Compare commits

..

233 Commits

Author SHA1 Message Date
e1f1bce9dc Fix invalid incremental decoding check.
(cherry picked from commit 95ea5226c8)

Change-Id: I80c2165aa9fdf43077db155d2d00e0e99db73eab
2023-10-06 11:53:43 +02:00
90d47113bc Fix OOB write in BuildHuffmanTable.
First, BuildHuffmanTable is called to check if the data is valid.
If it is and the table is not big enough, more memory is allocated.

This will make sure that valid (but unoptimized because of unbalanced
codes) streams are still decodable.
(cherry picked from commit 902bc91)

Change-Id: I3abe4db460dcac62c14a84832284c0b530630af2
2023-10-06 11:53:43 +02:00
bc99ef0699 Limit memory allocation when reading invalid Huffman codes.
This is a backported fix for: CVE-2020-36332

This is a merge of:
dce5d76431
39cb9aad85
067031eaed

Change-Id: Iab84d2ca459327cdcee1038499842d30370fe486
2023-10-06 11:53:40 +02:00
ece9684f52 update ChangeLog
Change-Id: I0d8730f3f5fb0b54404666bc0088fe29a3e55fc4
2016-12-15 15:44:21 -08:00
aa7744ca38 anim_util: quiet implicit conv warnings in 32-bit
the sizes are already validated by CheckSizeForOverflow(), add casts to
size_t to avoid -Wshorten-64-to-32

Change-Id: Ida9102c2104f4a334a0ad16d6e01a12bedfd4eec
(cherry picked from commit 1e2e25b0d4)
2016-12-15 11:51:44 -08:00
d9120271e7 jpegdec: correct ContextFill signature
fill_input_buffer returns a boolean

Change-Id: I208a1a862fa6c57cb5b73568b84055f734c1c36f
(cherry picked from commit cec7201447)
2016-12-15 11:51:28 -08:00
24eb39401b Remove some errors when compiling the code as C++.
This fixes some cases from
https://bugs.chromium.org/p/webp/issues/detail?id=137

Change-Id: I58f3a617bf973dbe4c5794004a01e2aea39ba53a
(cherry picked from commit 28ce304344)
2016-12-15 11:50:44 -08:00
a4a8e5f32c vwebp: clear canvas during resize w/o animation
this corrects the checkboard pattern displayed with transparent images

Change-Id: I5f46dbc9fa3893d61f5f1d4fda643ac030238f94
(cherry picked from commit 67c25ad5b4)
2016-12-14 11:26:24 -08:00
31ca2a8098 tiffdec: restore libtiff 3.9.x compatibility
use tsize_t which is equivalent (for now) to tmsize_t added in version 4

Change-Id: I7489ef7b39d9a3692a4b952de22476b245240ed1
2016-12-13 22:50:28 -08:00
b2f77b57a9 update NEWS
Change-Id: Ie74e9331378a254cc2265eaa1c94101db767e411
2016-12-13 18:51:20 -08:00
5ab6d9de1f AnimEncoder: avoid freeing uninitialized memory pointer.
In GenerateCandidates(), when candidate_ll->evaluate_ and
candidate_lossy->evaluate_ are both true, if lossless encoding
exits on error, candidate_ll->evaluate_ would not be correctly
reset. This will cause freeing uninitialized memory pointer in
SetFrame().

BUG=webp:322

Change-Id: I481b49a186e4fa3607ce71b4543a481083edf444
(cherry picked from commit 3ebe1c0003)
2016-12-13 18:18:57 -08:00
f29bf582df WebPAnimEncoder: If 'minimize_size' and 'allow_mixed' on, try lossy + lossless.
This improves compression by ~5% at default quality.

If only 'allow_mixed' is on (but 'minimize_size' isn't), we continue to
use a heuristic to try one of the two or both.

Change-Id: Ia573a73ea26ad25f9debff759eed69d2b0449e82
(cherry picked from commit 3f4042b52a)
2016-12-13 18:18:48 -08:00
df780e0eac fix a potential overflow with MALLOC_LIMIT
BUG=webp:321

Change-Id: Iab89dfe167fb394fcdffd3b2732d4ac9bef764b0
(cherry picked from commit 76bbcf2ed6)
2016-12-13 16:15:28 -08:00
218460cdd7 bump version to 0.5.2
libwebp{,decoder} - 0.5.2
libwebp libtool - 6.2.0
libwebpdecoder libtool - 2.2.0

mux - 0.3.2
libtool - 2.2.0

demux - 0.3.1
libtool - 2.1.0

Change-Id: Idf199415c325e6e9d157459a4e016ebba88c3f34
2016-12-12 17:36:12 -08:00
de7d654d0d update AUTHORS & .mailmap
Change-Id: If4d71ede51f91959e4990f3455c781a8bcf4d373
2016-12-12 17:36:09 -08:00
74a12b10d9 iosbuild.sh: add WebPDecoder.framework + encoder
WebPDecoder.framework replaces WebP.framework as the decode-only
framework.
WebP.framework now includes the full library allowing for use of the
encoder.

BUG=webp:307

Change-Id: Ic8139f201576bf94b0d4a31cb7cad0655cd8ba97
(cherry picked from commit 1d5046d1f9)
2016-12-12 12:50:49 -08:00
be7dcc088c AnimEncoder: Correctly skip a frame when sub-rectangle is empty.
Change-Id: I0d288bd9561b48cf5a1eae92a1b7106ba44c664e
(cherry picked from commit 1cc79e92ac)
2016-12-09 20:22:31 -08:00
408858308a Fix assertions in WebPRescalerExportRow()
Change-Id: I25711dd54e71c90a25f7b18e0ef9155e8151a15e
(cherry picked from commit 27b5d991e2)
2016-12-09 20:22:25 -08:00
8f38c72e11 fix a typo in WebPPictureYUVAToARGB's doc
method -> colorspace

Change-Id: I5c9a2ccc909c967a936758dde2cfce92eb95462a
(cherry picked from commit dc789ada44)
2016-12-09 17:27:59 -08:00
33ca93f909 systematically call WebPDemuxReleaseIterator() on dec->prev_iter_
Change-Id: I4a767134dcc52a7ee7c3bc5deb91012eaf7b6512
(cherry picked from commit aaf2a6a698)
2016-12-09 17:27:54 -08:00
76e190735b doc: use two's complement explicitly for uint8->int8 conversion
BUG=webp:225

Change-Id: I6bad131e275dbd992484e95a1b834010121281b8
(cherry picked from commit 13ae011e4c)
2016-12-09 17:27:50 -08:00
f91ba96306 Anim_encoder: correctly handle enc->prev_candidate_undecided_
Set enc->prev_candidate_undecided_ as 0 when a frame is not chosen
as a possible keyframe, so that the dispose method can be
dispose-to-background.

Change-Id: If2899f5dbc06fb53705fb8240072ab6440a6de12
(cherry picked from commit 29fedbf58b)
2016-12-09 16:58:28 -08:00
25d74e652e WebPPictureDistortion(): free() -> WebPSafeFree()
missed one!

Change-Id: I643170451b3ac07c748b70a9abfe8af17a716b24
(cherry picked from commit 32dead4ee3)
2016-12-09 16:58:19 -08:00
03f1c00877 mux/Makefile.am: add missing -lm
+ libwebpmux.pc

anim_encode.c relies on functions from math.h

BUG=webp:306

Change-Id: I3a8eb48febfd52bfbeb04f4dc615ccbed72926f7
(cherry picked from commit aaf2530cc3)
2016-12-09 15:03:08 -08:00
58410cd6dc fix bug in RefineUsingDistortion()
When try_both_modes=0 (that is: -m 0 or -m 1), and the mode is i4,
we were still sometimes falling back to (unexplored, uninitialized) i16 mode,
which resulted in a enc/dec mismatch.
This was mainly occurring for large images (when bit_limit is low enough)

We disable the fall-back by disabling bit_limit using a large MAX_COST threshold.

Change-Id: I0c60257595812bd813b239ff4c86703ddf63cbf8
(cherry picked from commit 0a3838ca77)
2016-12-08 15:48:16 -08:00
e168af8c6c fix filtering auto-adjustment
the min-distortion was quite too low. And we were also
considering the fully skipped macroblocks (nz=0) in the stats.
We need to have at least *some* non-zero dc coeffs (nz=0x100XXXX).

Fix also two typos in StoreMaxDelta: the v0/v1 comparison was wrong,
and the DCs[] coeffs are actually already in ZigZag order.

Change-Id: I602aaa74b36f7ce80017e506212c7d6fd9deba1f
(cherry picked from commit e4cd4daf74)
2016-12-08 15:48:08 -08:00
ed9dec41a5 fix doc and code snippet for WebPINewDecoder() doc
Change-Id: I1a75fdf60f0b9f1816be28f22613438bfe21752b
(cherry picked from commit e715285611)
2016-12-08 15:48:04 -08:00
3c49178f7d prevent 32b overflow for very large canvas_width / height
some multiplies here and there needed some extra checks
and error reporting. Even if width * height is guaranteed
to be < 2**32, we were multiplying by num_channels and
triggering a 32b overflow.
Some multiplies were not using size_t or uint64_t, additionally.

Change-Id: If2a35b94c8af204135f4b88a7fd63850aa381bbf
(cherry picked from commit 1c36440094)
2016-12-08 15:27:51 -08:00
9595f29010 fix anim_util.c compilation when HAVE_GIF is not defined.
Change-Id: I1a4f13a5799c020fb4b3ade3c7336f049443ab62
(cherry picked from commit 1effde7bcd)
2016-12-08 15:27:46 -08:00
7ec9552c22 Make gif transparent color to be transparent black
Change it from transparent white to transparent black, which matches
the transparent color assumed in Webp dispose-to-background method.

Also pre-multiply background colors before comparison in anim_diff,
just as what is done with regular pixel values.

Change-Id: I5a790522df21619c666ce499f73e42294ed276f2
(cherry picked from commit 43bd895879)
2016-12-08 15:27:40 -08:00
9871335fc8 Add a CMake option for WEBP_SWAP_16BIT_CSP.
Change-Id: I79ed6cbce9563bf7ca97fc2e10ec3e75b6b8ab5b
(cherry picked from commit 7b54e26bac)
2016-12-07 23:41:36 -08:00
0ae32226ce Fix missing cpu-features for Android.
Change-Id: Ief13b7c63b96194970b00a5732aa080f794e7ae6
(cherry picked from commit d2223d8d6c)
2016-12-07 23:41:28 -08:00
ab4c8056e0 cpu.cmake: improve webp_check_compiler_flag output
WEBP_HAVE_FLAG_LOCAL -> WEBP_HAVE_FLAG_${WEBP_SIMD_FLAG} this will
include the flag being tested in the output:
-- Performing Test WEBP_HAVE_FLAG_NEON

Change-Id: I1c0a143a857b16e4eb1fcf8b23c176380a5fef29
(cherry picked from commit ee1057e3b1)
2016-12-07 23:41:23 -08:00
eec5fa3a95 Provide support for CMake on Android studio 2.2.
Change-Id: Ib3508c20a220b29d42f963cf154cd9ab5236ed6e
(cherry picked from commit a80e8cfdf2)
2016-12-07 23:41:17 -08:00
004d569086 Split the main CMake file.
This will well isolate contributions for original code,
generated code and SIMD (especially for Android).

Change-Id: Ie47664decc7f43c2f57260a72cab951c347281a7
(cherry picked from commit 6c62841076)
2016-12-07 23:41:08 -08:00
4fe5d588bf Android.mk: use -fvisibility=hidden
brings the final libwebp.so size down 16/20K with arm64/armv7 builds
using ndk-r13

Change-Id: I20d8aba61d6b692b0fc32f4b271e2f9872f03c28
(cherry picked from commit de568abfdb)
2016-12-07 18:34:46 -08:00
bd63a31aab vwebp: ensure setenv() is available in stdlib.h
quiets a -Wimplicit-function-declaration with some configurations of gcc
(-std=c99).
_POSIX_C_SOURCE is preferred over _BSD_SOURCE with newer versions of
glibc

Change-Id: I378bffb13ba52ff5c4bad1433090dcc387e5d507
(cherry picked from commit bfab894739)
2016-12-07 18:30:54 -08:00
363a568131 vwebp: handle window resizing properly
The image is scaled to fit the whole viewport.
Avoid some oddities with offsets, etc.

removes some TODO.

Change-Id: I52fae9ca80a2feed234f32261c7f6358d7594e21
(cherry picked from commit 9310d19258)
2016-12-07 18:30:49 -08:00
a0d2753fcb lower WEBP_MAX_ALLOCABLE_MEMORY default
restrict to 2^34 for 64-bit targets, < 2^32 for 32-bit

Change-Id: Iff4ce40ae2c3c7fc119f018c2128dbe8f744341f
(cherry picked from commit b8384b53d6)
2016-12-07 18:30:44 -08:00
31fe11a57a fix infinite loop in case of PARTITION0 overflow
max_i4_header_bits_ could drop to zero for difficult image and trigger
a loop. Surprisingly, StatLoop() didn't have this bug.

Change-Id: Idc0f9eadef30a2b2f02041b994f25def30901e36
(cherry picked from commit 21e7537abe)
2016-12-07 18:30:39 -08:00
532215dd29 Change the rule of picking UV mode in MBAnalyzeBestUVMode()
Pick the mode with the smallest alpha.
It only affects m0, in which case the mode decision is not re-examined
later in VP8Decimate(). Tests on some natural content png images show
PSNR increase as well as visual quality improvement.

Change-Id: Iea997e718cd7477160fa05eb7cfb35f4cec2fa9a
(cherry picked from commit 1377ac2ec1)
2016-12-07 18:30:33 -08:00
9c75dbd39c cwebp.1: improve some grammar
Change-Id: Id849d7e0d7573f5b8d3b2e807d95e9c628f03b1e
(cherry picked from commit 5b46f7fc80)
2016-12-07 18:30:28 -08:00
af2e05cbdf vwebp: Clear previous frame when a key triggers a redraw
otherwise, transparent areas were accumulating.

Change-Id: I066a96a2bcf0cac750b3df0c02229542b1ed3473
(cherry picked from commit c0a27fd2af)
2016-12-07 18:30:23 -08:00
26ffa2962b Add descriptions of default configuration in help info.
Change-Id: I43188fab5f57bd45aa3e564df52e36cc37b1bb2f
(cherry picked from commit 74f6f9e793)
2016-12-07 18:30:17 -08:00
7416280d75 Fix an unsigned integer overflow error in enc/cost.h
Change-Id: I9774b59c417c185f09a61a115364b9642976a100
(cherry picked from commit 0b2c58a91c)
2016-12-07 18:29:51 -08:00
13cf1d2e41 Do token recording and counting in a single loop
Change-Id: I8afd3c486b210bd67888de03e91dde7f78276f89
(cherry picked from commit 0c0fb83211)
2016-12-07 18:29:44 -08:00
eb9a4b97c5 Reset segment id if we decide not to update segment map
This avoids potential encoder and decoder mismatch.

Change-Id: I5282d3e168afc6193033ad3fce8fbc35618ab2f5
(cherry picked from commit 386e4ba2f0)
2016-12-07 18:25:06 -08:00
42ebe3b783 configure: fix NEON flag detection under gcc 6
use a compile check on a separate file to avoid assuming using
arm_neon.h is safe to use without flags when just the file itself is
self-contained with GCC target pragmas.

BUG=webp:313

Change-Id: I48f92ae3e6e4a9468ea5b937c80a89ee40b2dcfd
(cherry picked from commit 0104d730bf)
2016-12-07 18:23:55 -08:00
83cbfa09a1 Import: use relative pointer offsets
avoids int rollover when working with large input

BUG=webp:312

Change-Id: I6ad9f93b6c4b665c559bff87716a7b847f66a20d
(cherry picked from commit 342e15f0ce)
2016-11-09 15:50:57 -08:00
a1ade40ed8 PreprocessARGB: use relative pointer offsets
avoids int rollover when working with large input

BUG=webp:312

Change-Id: I2881bec2884b550c966108beeff1bf0d8ef9f76b
(cherry picked from commit 1147ab4ee7)
2016-11-09 15:24:16 -08:00
fd4d090fd1 ConvertWRGBToYUV: use relative pointer offsets
avoids int rollover when working with large input

BUG=webp:312

Change-Id: I693cbb295df9cf94aa89294b19c0496bdbe84d18
(cherry picked from commit de9fa5074e)
2016-11-09 12:57:03 -08:00
9daad4598b ImportYUVAFromRGBA: use relative pointer offsets
avoids int rollover when working with large input

BUG=webp:312

Change-Id: I3d7b689be8d5751248a82d1021243d80d3f67203
(cherry picked from commit deb1b83199)
2016-11-09 12:56:49 -08:00
c284780f0a imageio_util: add ImgIoUtilCheckSizeArgumentsOverflow
and use it to validate decoder allocations. fixes a crash in jpegdec at
least.

BUG=webp:312

Change-Id: Ia940590098f29510add6aad10a8dfe9e9ea46bf4
(cherry picked from commit bc86b7a8a1)
2016-11-02 23:36:28 -07:00
e375080d8f gifdec,Remap: avoid out of bounds colormap read
make this function return success/failure.
an empty map or out of bounds read is treated as an error.

BUG=webp:316

Change-Id: Ic8651836915ea4dd8e0dc81ca8d5d3f247be1ff8
(cherry picked from commit cac9a36a23)
2016-11-02 23:10:22 -07:00
c222a053af additional fix for stride type as size_t
- remove the inclusion of format_constants.h
 - use incremental update of pointer, instead of arithmetic

(follow-up to e2affacc35)

Change-Id: I48420c8defc8d47339f54bc00e9da9617f08ab32
(cherry picked from commit 9f5c8eca79)
2016-11-02 23:09:49 -07:00
bb2336170b fix potential overflow when width * height * 4 >= (1<<32)
Mostly: avoid doing calculation like: ptr + j * stride
when stride is 'int'. Rather use size_t, or pointer increments (ptr += stride)
when possible.

BUG=webp:314

Change-Id: I81c684b515dd1ec4f601f32d50a6e821c4e46e20
(cherry picked from commit e2affacc35)
2016-11-02 23:09:49 -07:00
883d41fb40 gif2webp: fix crash with NULL extension data
DGifGetExtension() may successfully return, but the data pointer should
still be validated

BUG=webp:310

Change-Id: I6cfe617871fef2fe07887e5f48bb20f7ab7cfb35
(cherry picked from commit 806f6279ae)
2016-11-02 23:09:49 -07:00
3d97bb7514 update ChangeLog
Change-Id: I05b9ac2cf8fdd26e3f2e68780dca3e613bc2f732
2016-07-06 18:00:16 -07:00
deb54d915a Clarify the expected 'config' lifespan in WebPIDecode()
Change-Id: I08c880adc34d651a63fa2d07cfb07b1468a6df1b
2016-07-06 17:17:15 -07:00
c7e2d245d1 update ChangeLog
Change-Id: Icaaaeda53ecf24ecc9a2e09da78423235fc8133b
2016-06-27 16:49:43 -07:00
c7eb06f737 Fix corner case in CostManagerInit.
Change-Id: I91795d05eb78816d6d9a8cadc64d3814650d2aee
2016-06-27 20:01:44 +02:00
ab7937a5c7 gif2webp: normalize the number of .'s in the help message
Change-Id: I3a2412f391b3244c172ee28819e8e606a9674aa9
2016-06-24 19:29:43 -07:00
3cdec847a5 vwebp: normalize the number of .'s in the help message
Change-Id: Iadaa5a8f64cd8093eeeaf829ba609bef034d63b5
2016-06-24 19:29:43 -07:00
bdf6241e70 cwebp: normalize the number of .'s in the help message
Change-Id: I0208c01a671bd822ee707c6d8ead3d5348b3e2c6
2016-06-24 19:29:43 -07:00
06a38c7b1c fix rescaling bug: alpha plane wasn't filled with 0xff
(in case no alpha was present in the source .webp,
but user requested some)

Change-Id: I9011d38237907c60d6796a86bd2c72166aa80f27
2016-06-24 17:15:31 +02:00
319e37be13 Improve lossless compression.
This is essentially a revert of a3611513d2
and cfbcc5ece0.
Here is what happened: there was a corruption bug that eventually
got fixed by 0174d18d8b.
But before finding the root, a3611513d2
and cfbcc5ece0 hid the bug
by not imposing length of 1 when it was actually 2 or 3 (which does help
compression as a litteral is more efficient than an offset and a length
of size 2 or 3).

Change-Id: I6f18fc1f583a51ac9d8aab2508458264047cd493
2016-06-24 16:11:25 +02:00
447adbce1e 'our bug tracker' -> 'the bug tracker'
(less exclusive wording)
(also a good opportunity to update the date)

Change-Id: Ife221f3f8ac3c38db93ec91954123edc3d8a4a6b
2016-06-23 16:06:12 -07:00
97b9e64459 normalize the number of .'s in the help message
Change-Id: I2dbb36cbdc3cb93364d9e1dd6da10d3a5f9470af
2016-06-23 16:06:09 -07:00
bb50bf42b0 pngdec,ReadFunc: throw an error on invalid read
convert the assert() to an error check to avoid crashing when reading
malformed files.

BUG=webp:302

Change-Id: I25eed9cab5c0a439bd3411beacc83f3a27af2bbf
2016-06-22 23:09:30 -07:00
38063af131 decode.h,WebPGetInfo: normalize function comment
use true/false to match the documentation of other functions.

Change-Id: If059f8fb6d771a165e4682495fd8881b3dc0670f
2016-06-22 15:20:47 -07:00
9e8e1b7b2a Inline GetResidual for speed.
Change-Id: Ib4228e87dc448866229c0795ca68dabe777ef31c
2016-06-21 16:04:53 +02:00
7d58d1b7b9 Speed-up uniform-region processing.
Change-Id: I9a88d0ac97c31d19323c9505ebe21f375d2e96b8
2016-06-21 15:45:46 +02:00
23e29cb1e3 Merge "Fix a boundary case in BackwardReferencesHashChainDistanceOnly." into 0.5.1 2016-06-20 16:06:39 +00:00
0bb23b2cf7 free -> WebPSafeFree()
avoids unbalanced memory track at the end (w/ PRINT_MEM_INFO flag on)

Change-Id: I70da087f079198bcaacd0c81593f104058dcac69
2016-06-17 17:49:07 +02:00
e7b917726f Merge "DecodeImageData(): change the incorrect assert" into 0.5.1 2016-06-17 06:07:44 +00:00
2abfa54f95 DecodeImageData(): change the incorrect assert
this function can be called not to decode pixels, but simply
to finish processing (through process_func()) the already decoded
pixels.

Change-Id: I80485e92e3c47f0aa3389476dcb82745a243fc4a
2016-06-16 22:24:30 -07:00
5a48fcd8a1 Merge "configure: test for -Wfloat-conversion" 2016-06-16 17:57:58 +00:00
0174d18d8b Fix a boundary case in BackwardReferencesHashChainDistanceOnly.
The optimization for (len != MIN_LENGTH) actually only holds for
(len > MIN_LENGTH) but (len < MIN_LENGTH) can now happen as len can
be changed in the loop before.

Change-Id: I3f9f91a540206c80385c5fba96c3d64ab9536752
2016-06-16 19:22:28 +02:00
6a9c262aa8 Merge "Added MSA optimized transform functions" 2016-06-16 10:53:43 +00:00
cfbcc5ece0 Make sure to consider small distances in LZ77.
This could corrupt certain images since commit
a3611513d2

Change-Id: Ifbe43abaafe8efb27c62af18039fea5a9dc4e062
2016-06-16 09:14:11 +00:00
5e60c42a76 Added MSA optimized transform functions
1. TransformWHT
2. TransformTwo
3. TransformDC
4. TransformAC3

Change-Id: Ia3624cb4aed215bcaffce542b28794e643207039
2016-06-16 09:04:27 +00:00
3dc28d7643 configure: test for -Wfloat-conversion
Change-Id: Ib7d0372dc43994d66228f7dd3ce4dc3b80a5482d
2016-06-15 23:54:10 -07:00
f2a0946a7a add some asserts to delimit the perimeter of CostManager's operation
a small protection in a fairly complex code.

Change-Id: I920e10e1fc1c35da2cf486349417048d516ff2b9
2016-06-15 20:55:32 +02:00
9a583c66f9 fix invalid-write bug for alpha-decoding
On the non-fast path (use_8b_decode_=0) for decoding the alpha-mask,
we could end up requesting ApplyInverseTransform() with more rows
to process than NUM_ARGB_CACHE_ROWS. This could only happen on the
very last bottom rows of the image.

* ProcessRows() doesn't need to be fixed, since we never request more
  than NUM_ARGB_CACHE_ROWS rows. Added an assert for that.

* the use_8b_decode_=1 case doesn't use argb_cache_, but rather does
  the palette-decoding call directly. So, no problem here too.

Only the generic (and rather rare) case of calling ExtractAlphaRows()
was affected.

Change-Id: I58e28d590dcc08c24d237429b79614abcef1db7c
2016-06-15 16:46:19 +02:00
f66512db94 make gradlew executable
Change-Id: If3456966b193ff6710463364917522378ebeeee3
2016-06-14 23:42:41 -07:00
6fda58f137 backward_references: quiet double->int warning
since:
059aab4 Fix a compression regression for images with long uniform
regions.

Change-Id: I1783a74220961e8bc3bb42696e3412fe4bfc4ddb
2016-06-14 15:27:58 -07:00
a48cc9d201 Merge "Fix a compression regression for images with long uniform regions." into 0.5.1 2016-06-14 21:26:06 +00:00
cc2720c1d5 Merge "Revert an LZ77 boundary constant." into 0.5.1 2016-06-14 21:25:08 +00:00
059aab4fa1 Fix a compression regression for images with long uniform regions.
Change-Id: Id87a4ac2a22daaa71e8f3132e69703b9b3ddd752
2016-06-14 21:51:10 +02:00
b0c7e49e58 Check more backward matches with higher quality.
Change-Id: I3f0887b0b9b7f0e69758f51783807e1583b74be2
2016-06-14 21:50:03 +02:00
a3611513d2 Revert an LZ77 boundary constant.
This is getting back to the old behavior which is actually better for
compression and speed with the latest patches.

Change-Id: I35884bab02589297c25d6e1e66dc5f13e05f7aa7
2016-06-14 21:42:45 +02:00
8190374c2b README: fix typo
Change-Id: Id744d5f994bed6522f3be7d6b90e6b78bd8da587
2016-06-13 22:48:04 -07:00
7551db445e update NEWS
Change-Id: I022f975ed1b2f6d9ccbf4a3f4c4bd2113eafacfd
2016-06-13 20:06:37 -07:00
0fb2269c4d bump version to 0.5.1
libwebp{,decoder} - 0.5.1
libwebp libtool - 6.1.0
libwebpdecoder libtool - 2.1.0

mux - 0.3.1
libtool - 2.1.0

demux (no changes) - 0.3.0
libtool - 2.0.0

Change-Id: I6f51bfaccf33ff7a1492f3e8f888324d7afc0f4b
2016-06-13 19:10:21 -07:00
f45376101c update AUTHORS & .mailmap
Change-Id: Ia0e25b9af8dd66f16ca75bd35bcd3e5e41c0bdbd
2016-06-13 19:01:37 -07:00
3259571e7d Refactor GetColorPalette method.
This was defined (slightly differently) at two places. Created a common
method and moved to utils/utils.[hc].

Change-Id: I66c3ac6dea24e0cd2c0eaa5440f3142b4dbbe23b
2016-06-13 18:52:37 -07:00
1df5e26001 avoid using tmp histogram in PreparePair()
we don't need to store the resulting histogram, so no need to
call HistogramAddEval().
Allows some signature simplifications...

Change-Id: I3fff6c45f4a7c6179499c6078ff159df4ca0ac53
2016-06-12 14:32:22 -07:00
7685123a7f fix comment typos
Change-Id: I2a55e371dbf7e62b446f6bb732c8913b85633c49
2016-06-10 13:29:07 +00:00
a246b921a0 Speedup backward references.
In case where the same offset is found in consecutive pixels,
the cost computation from one pixel can be re-used for the next.

Change-Id: Ic03c7d4ab95f3612eafc703349cfefd75273c3d7
2016-06-09 20:05:15 +02:00
76d73f1835 Merge "CostManager: introduce a free-list of ~10 intervals" 2016-06-09 15:58:38 +00:00
eab39d8147 CostManager: introduce a free-list of ~10 intervals
and also recycle the malloc'd intervals
This avoids quite some malloc/free cycles during interval managment.

Change-Id: Ic2892e7c0260d0fca0e455d4728f261fb4c3800e
2016-06-09 01:50:50 -07:00
4c59aac0f9 Merge "mips msa webp configuration" 2016-06-09 05:39:53 +00:00
043c33f1ce Merge "Improve speed and compression in backward reference for lossless." 2016-06-08 19:54:51 +00:00
71be9b8c11 Merge "clarify variable names in HistogramRemap()" 2016-06-08 19:48:38 +00:00
0ba7fd70c6 Improve speed and compression in backward reference for lossless.
Change-Id: I664c5e68b036a2d424192962dbad873a2c70b826
2016-06-08 21:13:33 +02:00
0481d42ad8 CostManager: cache one interval and re-use it when possible
In a lot of cases, only one interval is used. This can cause
a lot of malloc/free cycles for only 56 bytes. By caching this
single interval and re-using it, we remove this cycle in most
frequent cases.

Change-Id: Ia22d583f60ae438c216612062316b20ecb34f029
2016-06-08 14:10:06 +00:00
41b7e6b56e Merge "histogram: fix bin calculation" 2016-06-08 11:59:16 +00:00
96c3d62496 histogram: fix bin calculation
It could overflow if min==max

Change-Id: I11580328712879fe310e3d9c8c5e10835c4faf86
also: collect GetHistoBinIndex() variant into a single function
2016-06-08 13:38:00 +02:00
fe9e31ef5e clarify variable names in HistogramRemap()
also simplify the loop by removing the initial special case

Change-Id: I4fc1b9eb14a42381a815354599137703c21ed9e6
2016-06-08 13:36:04 +02:00
ce3c824712 disable near-lossless quantization if palette is used
Change-Id: I3335e8f97f99d6d786e953161adbf246e85d80d3
2016-06-08 11:29:48 +02:00
e11da081f9 mips msa webp configuration
Change-Id: I886164d6d3d560b1249603d47391fddf20b5a3d4
2016-06-07 23:49:41 -07:00
5f8f998d0e mux: Presence of unknown chunks should trigger VP8X chunk output.
As per the spec
(https://developers.google.com/speed/webp/docs/riff_container), only the
extended file format can contain an unknown chunk. So, when assembling a
WebP file with muxer, whenever there is an unknown chunk present, we
should create a VP8X chunk (even though none of the features are
present).

BUG=webp:294

Change-Id: I5da52d311e1853d40063d0f5026100d4325effaa
2016-06-06 09:25:21 -07:00
cadec0b126 Merge "Sync mips32 and dsp_r2 YUV->RGB code with C verison" 2016-06-03 10:32:20 +00:00
d963775859 Compute the hash chain once and for all for lossless compression.
In some cases, the hash chain for a function is filled several
times:
- GetBackwardReferences -> CalculateBestCacheSize ->
BackwardReferencesLz77 that computes the hash chain
- GetBackwardReferences ->
(not always) BackwardReferencesTraceBackwards ->
BackwardReferencesHashChainDistanceOnly that computes the hash
chain in a slightly different way

Speed and compression performance are slightly changed (+ or -)
but will be homogneized in a later patch.

Change-Id: I43f0ecc7a9312c2ed6cdba1c0fabc6c5ad91c953
2016-06-03 11:42:13 +02:00
50a486656d Sync mips32 and dsp_r2 YUV->RGB code with C verison
Change-Id: Ibe12f5ef596b8922225b95c36b67955a3f8b9ae4
2016-06-03 10:42:11 +02:00
eee788e26a Merge "introduce a common signature for all image reader function" 2016-06-02 05:26:23 +00:00
d77b877cc9 introduce a common signature for all image reader function
-> WebPImageReader

Introduce a variant of image-guessing function that returns a reader
directly: WebPGuessImageReader()

Change-Id: I5ddc53024fcf941e33d997b2be6aa1a963d939ab
2016-06-01 22:51:41 +00:00
ca8d951980 remove some obsolete TODOs
Change-Id: Ied77b2dd7e3e5bb65524c0ac7b9a3fb6585cac57
2016-06-01 16:23:16 +02:00
ae2a7222ce collect all decoding utilities from examples/ in libexampledec.a
adds a generic examples/image_dec.[ch] entry point too.

WebPGuessImageType() can be used to infer image type.

Change-Id: I8337e7b6ad91863c9cf118e4791668d2d175079b
2016-06-01 05:39:55 +00:00
0b8ae8520f Merge "Move DitherCombine8x8 to dsp/dec.c" 2016-05-27 05:55:04 +00:00
77cad88589 Merge "ReadWebP: avoid conversion to ARGB if final format is YUVA" 2016-05-27 05:51:15 +00:00
ab8d669887 ReadWebP: avoid conversion to ARGB if final format is YUVA
Adds ExUtilCopyPlane() in example_utils.[ch], borrowed from
src/util/utils.h

(removes a TODO)

Change-Id: Iadfc19da3914c9439608f296e86225d279263566
2016-05-26 05:49:16 +00:00
f8b7ce9ef1 Merge "test pointer to NULL explicitly" 2016-05-25 06:46:11 +00:00
5df6f214e3 test pointer to NULL explicitly
Change-Id: I3de6f6b717a21c93f9d3144c66b7328b0b322853
2016-05-24 23:26:19 -07:00
77f21c9c39 Move DitherCombine8x8 to dsp/dec.c
To be later optimized in SSE2

Change-Id: I0de9c89eb5166f3319bb4b0500150de271ecac05
2016-05-24 23:14:41 -07:00
c9e6d865aa Add gradle support
Change-Id: I8d522e582959e1a17605430316c1506349a7193c
2016-05-24 15:35:20 +02:00
c65f41e816 Revert "Add gradle support"
This reverts commit bf731ede7e.

Change-Id: Iffabc2a69a25685ba367c26f54d84c43f2c19d68
2016-05-24 10:47:36 +00:00
bf731ede7e Add gradle support
Change-Id: I9e5ec253897f63cb163b3f732ff844262ba07db6
2016-05-24 11:05:27 +02:00
08333b8551 WebPAnimEncoder: Detect when canvas is modified, restore only when needed.
Change-Id: I6b45283e13c08c7d11c96500a65c75ce3637b06d
2016-05-23 11:17:23 -07:00
0209d7e6d6 Merge "speed-up MapToPalette() with binary search" 2016-05-23 14:09:34 +00:00
fdd29a3d3f speed-up MapToPalette() with binary search
goes from ~2% CPU to ~0.7% for large images
Change-Id: Ibd8a0fde9ba553f93157a49dcb7da0426209e404
2016-05-23 15:39:54 +02:00
cf4a651bb8 Revert "Refactor GetColorPalette method."
This reverts commit 169004b1d5.

this changes the ABI, so should bump versions and add a note to NEWS
when we're ready to expose it

Change-Id: Ic5bbd0aee2b6fd0f9d438a9effedf22fe0cec4bf
2016-05-20 17:05:33 -07:00
0a27aca3f8 Merge changes Idfa8ce83,I19adc9c4
* changes:
  WebPAnimEncoder: Restore original canvas between multiple encodes.
  Refactor GetColorPalette method.
2016-05-21 00:03:15 +00:00
f25c4406e6 WebPAnimEncoder: Restore original canvas between multiple encodes.
tl;dr
We do the following:
- Start with transparent value of 0x00000000 instead of 0x00ffffff, so that
WebPCleanupTransparentAreaLossless() is a no-op.
- Restore the original canvas after lossy encoding, to discard changes made by
WebPCleanupTransparentArea() before the next encode.

Explanation of why:
In the mixed mode, anim_encoder tries to encode using both lossless and lossy
compression. In fact, when "min_size" option is enabled, there are at most 4
encodes that can happen in this order:
- lossless with dispose none
- lossy with dispose none
- lossless with dispose background
- lossy with dispose background

But both lossless and lossy both potentially modify the canvas during encode
(for better compression):
- Lossless: WebPCleanupTransparentAreaLossless() turns all transparent pixels
to 0x00000000
- Lossy: WebPCleanupTransparentArea() flattens some transparent pixels

So, the result is that, sometimes we feed the modified canvas to the encoder
instead of the original one, which isn't the right thing to do.

This also applies to just lossless or just lossy encoding, as multiple encodes
happen (with the two dispose methods) in those cases too.

Change-Id: Idfa8ce831a1627014785ba7d0316c42f72594455
2016-05-20 15:07:50 -07:00
169004b1d5 Refactor GetColorPalette method.
This was defined (slightly differently) at two places. Created a common
method and moved to utils/utils.[hc].

Change-Id: I19adc9c48f2a4e2ec9d995e78add6f25172774c2
2016-05-20 15:06:38 -07:00
576362abd7 VP8LDoFillBitWindow: support big-endian in fast path
Change-Id: I577944fe0b85505766050dba5ab5aec48b30f541
2016-05-20 00:31:05 -07:00
ac49e4e4dc bit_reader.c: s/VP8L_USE_UNALIGNED_LOAD/VP8L_USE_FAST_LOAD/
no longer gate this on WEBP_FORCE_ALIGNED as WebPMemToUint32() provides
this service. replace that check with WORDS_BIGENDIAN as the block is
currently little-endian specific.

Change-Id: Ie04ec0179022d20dab53da878008ae049837782f
2016-05-19 23:14:02 -07:00
d39ceb58ac VP8LDoFillBitWindow: remove stale TODO
the read size may be fixed, but the offsets into buf_ are not. forcing
an aligned read then shifting or using a temporary would be costly. this
is less important now that WebPMemToUint32() is being used.

Change-Id: I357fec8f750969cce91987abebed2f95e27a835f
2016-05-19 23:07:10 -07:00
2ec2de1450 Merge "Speed-up BackwardReferencesHashChainDistanceOnly." 2016-05-19 05:17:03 +00:00
3e023c17cd Speed-up BackwardReferencesHashChainDistanceOnly.
Instead of comparing all the following pixels over len (which can
frequently reach the maximum MAX_LENGTH=4096 for some images),
intervals are stored and compared.

Change-Id: I0dafef6cc988dde3c1c03ae07305ac48901d60ee
2016-05-19 04:51:13 +00:00
f2e1efbeb7 Improve near lossless compression when a prediction filter is used.
The old implementation in enc/near_lossless.c performing a separate
preprocessing step is used only when a prediction filter is not used,
otherwise a new implementation integrated into lossless_enc.c is used.

It retains the same logic for converting near lossless quality into max
number of bits dropped, and for adjusting the number of bits based on
the smoothness of the image at a given pixel. As before, borders are not
changed.

Then, instead of quantizing raw component values, the residual after
subtract green and after prediction is quantized according to the
resulting number of bits, taking care to not cross the boundary between
255 and 0 after decoding. Ties are resolved by moving closer to the
prediction instead of by bankers’ rounding.

This results in about 15% size decrease for the same quality.

Change-Id: If3e9c388158c2e3e75ef88876703f40b932f671f
2016-05-18 20:59:02 +00:00
e15afbce5d dsp.h: fix ubsan macro name
copy and paste error in the previous commit, change
no_sanitize("unsigned-integer-overflow") from WEBP_UBSAN_IGNORE_UNDEF ->
WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW

Change-Id: Id178ee14df1f2c4923a91ce423241e26b60b5d32
2016-05-13 11:09:57 -07:00
e53c9ccb24 dsp.h: add WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
for suppressing expected failures with -fsanitize=integer

Change-Id: I954cba45f0c96478b770ed7a6ac7491359cae075
2016-05-12 23:51:23 -07:00
af81fdb772 utils.h: quiet -fsanitize=undefined warnings
add WEBP_UBSAN_IGNORE_UNDEF to WebPMemToUint32() / WebPUint32ToMem()
when WEBP_FORCE_ALIGNED is unset

Change-Id: I726b2e708ce29681584eb10c8874d5cf1e798756
2016-05-11 23:47:52 -07:00
ea0be354a0 dsp.h: remove utils.h include
include utils.h directly where needed to allow utils.h to rely on
defines from dsp.h in a follow-up.

Change-Id: I32e26aaeb0b04ba60b3332f685f9a2be5a0a8d3d
2016-05-11 23:17:21 -07:00
cd276aecd0 utils/*.c: ../utils/utils.h -> ./utils.h
Change-Id: I0154e788c864f77d15d6c287df59c0a02e6db5e9
2016-05-11 23:08:32 -07:00
c892713104 utils/Makefile.am: add some missing headers
Change-Id: Id5b81337b92819038bc7006de8c30bfc8eb9def5
2016-05-11 23:03:52 -07:00
ea24e026aa Merge "dsp.h: add WEBP_UBSAN_IGNORE_UNDEF" 2016-05-11 06:21:45 +00:00
369e264e2e dsp.h: add WEBP_UBSAN_IGNORE_UNDEF
only defined when WEBP_FORCE_ALIGNED isn't. use it to quiet alignment
warnings VP8LoadNewBytes().

Change-Id: I710a74bb9375285974e97022540551a3f4eda414
2016-05-10 22:45:13 -07:00
0d020a7892 Merge "add runtime NEON detection" 2016-05-11 05:42:13 +00:00
5ee2136a71 Merge "add VP8LAddPixels() to lossless.h" 2016-05-10 22:39:29 +00:00
47435a6162 add VP8LAddPixels() to lossless.h
Change-Id: I67f9118f875affa32c47adfedf9df28b0ac9957b
2016-05-10 20:30:30 +00:00
8fa6ac68f0 remove two ubsan warnings
(regarding uint overflow)

Change-Id: I1a76e4b1268370b6b7d6a1aa93b99e57f55fd02e
2016-05-10 18:40:18 +00:00
74fb56fb5d add runtime NEON detection
configure gets 2 new options:
--enable-neon / --enable-neon-rtcd

the NEON modules are split to their own convenience lib and built with
auto-detected flags if none are given via CFLAGS.

the /proc/cpuinfo check will only be used for armv7 targets whose
toolchain does not enable NEON by default or didn't have NEON forced by
the CFLAGS from the environment.

Change-Id: I2755bc1d065d5d6ee6143b44978c2082f8bef1c5
2016-05-06 15:32:48 -07:00
4154a8395d MIPS update to new Unfilter API
Change-Id: I2b5960812954dfcabc84663382b9e032fd1eeb43
2016-05-05 15:50:34 +02:00
c80b9fc8fc Merge "cherry-pick decoder fix for 64-bit android devices" 2016-05-03 06:12:13 +00:00
6235147ed5 cherry-pick decoder fix for 64-bit android devices
This fixes decoders built against clang-3.8 (r11c). Without this change
bad conditional code would be generated causing all calls to
WebPParseHeaders() to return 4 (UNSUPPORTED_FEATURE).

Original fix:
https://android-review.googlesource.com/#/c/196123

Change-Id: Id4b4d84048d347cea110b6cf297ef9ef4fbed323
2016-05-02 22:27:42 -07:00
d41b8c432d configure: test for -Wformat-* w/-Wformat present
TEST_AND_ADD_CFLAGS tests flags independently from AM_CFLAGS. The lack
of -Wformat may cause -Wformat-* checks to fail (gcc 4.9), though in the
end they would have succeeded with -Wall present.

Change-Id: Ic8f0d5b8a3f75a0b105bdc4ddd16690878a30222
2016-05-02 19:30:29 -07:00
5f95589fae Fix WEBP_ALIGN in case the argument is a pointer to a type larger than a byte.
Change-Id: Id74a70aa02be813cdf3ab4407ce207bcd647a6a4
2016-05-02 12:40:37 -07:00
2309fd5c1d replace num_parts_ by num_parts_minus_one_ (unsigned)
This avoids spurious warnings and extra calculations.

Change-Id: Ibbe1b893cf89ee6b31a04d7bcbf92b4b357c8628
2016-05-02 12:19:01 -07:00
9629f4bcda SimplifySegments: quiet -Warray-bounds warning
the number of segments are previously validated, but an explicit check
is needed to avoid a warning under gcc-4.9
this is similar to the changes made in:
c8a87bb AssignSegments: quiet -Warray-bounds warning
3e7f34a AssignSegments: quiet array-bounds warning

Change-Id: Iec7d470be424390c66f769a19576021d0cd9a2fd
2016-05-02 12:17:49 -07:00
de47492e91 Merge "update the Unfilter API in dsp to process one row independently" 2016-04-22 17:33:41 +00:00
2102ccd091 update the Unfilter API in dsp to process one row independently
This will allow to work in-place on cropped area later.

Also sped up the inverse gradient filtering in SSE2 (~4%)

Change-Id: I463149eee95d36984328f163a1e17f8cabd87441
2016-04-21 08:10:45 +00:00
e3912d560b WebPAnimEncoder: Restore canvas before evaluating blending possibility.
Without this, the canvas may have been modified in-between two
GenerateCandidate() calls.

Change-Id: I0364a269caabdf282c30a8fa3c61896f0247342e
2016-04-19 16:13:45 -07:00
6e12e1e3d2 WebPAnimEncoder: Fix for single-frame optimization.
Change-Id: I2da8dc34ab9589b58cde0ecb05d71357d77f6b25
2016-04-15 12:10:07 -07:00
602f344a36 Merge changes I1d03acac,Ifcb64219
* changes:
  anim_diff: Add an experimental option for max inter-frame diff.
  WebPAnimEncoder: FlattenSimilarPixels(): look for similar
2016-04-07 19:13:56 +00:00
95ecccf6dc only apply color-mapping for alpha on the cropped area
This is only possible if the filtering is not VERTICAL or GRADIENT.
Otherwise, we need the spatial predictors and hence need the un-visible
part above crop_top row.

COLOR_INDEX transform is the only transform that is not predicted
from previous row. Applying the same for other transform (spatial
predict, ...) is going to be more involve and use an extra temporary row.

+ remove ApplyInverseTransformsAlpha()
(work is done directly within ExtractPalettedAlphaRows())

+ change back to using filter_ instead of unfilter_func_

Change-Id: I09e57efae4a4af00bde35f21ca6e3d73b35d7d43
2016-04-07 10:21:02 +02:00
47dd07080f anim_diff: Add an experimental option for max inter-frame diff.
After the introduction of lossy frame rectangles
we need equivalent option in anim_diff for merging similar frames.

Change-Id: I1d03acace396ec4cb0212586c6e8b8ec5b0b0bfc
2016-04-06 17:25:19 -07:00
aa809cfeb3 only allocate alpha_plane_ up to crop_bottom row
-> reduces memory consumption in case of cropping

work for bug #288

Change-Id: Icab6eb8e67e3c5c184a9363fffcd9f6acd0b311c
2016-04-06 14:07:16 +02:00
31f2b8d8e1 WebPAnimEncoder: FlattenSimilarPixels(): look for similar
not exactly same.

Based on lossy WebP quality setting, ignore minor differences when
flattening
similar blocks.

For 6k set, at default quality with '-min_size' option, improves
compression by 0.3%

Change-Id: Ifcb64219f941e869eb2643e231220b278aad4cd4
2016-04-06 00:17:19 -07:00
774dfbdc34 perform alpha filtering within the decoding loop
Change-Id: I221ae20ba879cd4f7b48d012db1bc5443d968e17
2016-04-06 00:13:14 -07:00
a4cae68de0 lossless decoding: only process decoded row up to last_row
there's some subtle changes:
  - DecodeAlphaData() may be called with pos==end because we don't want
    to decode more data (there's none left), but because we want to apply
    process_func() to all the unprocessed pixels already decoded
  - last_row is exclusive and should be understood as 'up to last_row'. Can be misleading.
  - VP8LDecodeAlphaImageStream() was testing dec->last_pixel_ for completion,
    which was wrong because last_pixel_ is the last *decoded* pixel, not the
    last *processed* one. -> test now uses last_row_, as expected

Change-Id: I1fb04ba25cd7a4775db9e3deee3e2ae80f9c0a75
2016-04-05 23:26:50 -07:00
238cdcdbe1 Only call WebPDequantizeLevels() on cropped area
this might change some crc slightly, since WebPDequantizeLevels()
performs an analysis pass, counting levels, which impacts the smoothing.
Now, the cropping area is not the same, so minor diffs are expected here
and there.

Change-Id: I3cce1e40c6f11c25b7c841044d637685c5740352
2016-04-05 13:12:47 +00:00
cf6c713a0b alpha: preparatory cleanup
* make ALPHNew/Delete static
* properly init ALPHDec::io_
* introduce AllocateAlphaPlane() and WebPDeallocateAlphaMemory()
* reorganize VP8DecompressAlphaRows()

but we're still allocate the full alpha-plane. Optim will come
in another patch since it's tricky

Change-Id: Ib6f190a40abb7926a71535b0ed67c39d0974e06a
2016-04-04 16:30:29 +02:00
b95ac0a221 Merge "VP8GetHeaders(): initialize VP8Io with sane value for crop/scale dimensions" 2016-04-04 13:50:05 +00:00
8923139414 VP8GetHeaders(): initialize VP8Io with sane value for crop/scale dimensions
so we can use crop_width/height elsewhere, instead of testing use_cropping

Change-Id: I41705db9f6889b036ecc167d73d1a1536dc33552
2016-04-04 09:21:55 +00:00
5828e1995e use_8b_decode -> use_8b_decode_
Change-Id: I78e2b26c9bbe1364e5d048794528f9290606b545
2016-04-04 09:00:14 +00:00
8dca0247d2 fix bug in alpha.c that was triggering a memory error in incremental mode
this change will be superseded by patch #335160 eventually, but until then
let's fix the problem temporarily.

Change-Id: Iafd979c2ff6801e3f1de4614870ca854a4747b04
2016-04-01 00:59:39 -07:00
9a950c5375 WebPAnimEncoder: Disable filtering when blending is used with lossy encoding.
When FlattenSimilarBlocks() was making some blocks transparent with
averaged RGB values, filtering in lossy compression was causing
blockiness just outside the edge of these blocks.
Disabling filtering for that particular case avoid these block
artifacts.

The total encoded size of the 6k GIF set remains roughly the same (in
fact, reduces a bit).

Change-Id: Ida71cbabd59d851e16d871f53d19473312b3cc77
2016-03-31 23:07:47 -07:00
eb423903a4 WebPAnimEncoder: choose max diff for framerect based on quality.
We pick a mapping with quality 0 mapping to max diff 32, to quality 100
mapping to max_diff 1.

For 6k GIF image set, this improves compression by:
4% at quality 0
0.05% at quality 75

Benefits the MovingThumbnailer test videos too.

Change-Id: I6838ce864d41e1e65311d26b9b8115a12390a253
2016-03-31 23:07:41 -07:00
ff0a94beda WebPAnimEncoder lossy: ignore small pixel differences for frame rectangles.
This way we can ignore some noisy pixels and get tighter frame
rectangles.

Some results:
- Correctness:
Tested that anim_diff reports all images are identical for lossless, and
similar min_psnr value for lossy and mixed modes.

Also checked output images visually to make sure there weren't any
obvious kinks.

- Compression:
A very tiny improvement for 6000 image GIF set we have (0.03%) for lossy
and mixed mode. For some of these images, frames get dropped
automatically as they have a very small diff from previous frame.

10 images from test_video_frames_png show a clear improvement in
compression though. This CL leads to 7 out of 9 lossy WebPs getting
smaller -- for one of them, this leads to a higher quality being picked
(as that’s still < 150 KB).

Change-Id: If539b9e77e1375aa15edc8f926933593a9865f1c
2016-03-31 20:58:42 -07:00
f80400843f gif2webp: Remove the 'prev_to_prev_canvas' buffer.
This was never used.

Change-Id: Icf50e658418b851af7b6c9c3f552cdfdfa68d6ae
2016-03-31 20:58:37 -07:00
6d8c07d375 Merge "WebPDequantizeLevels(): use stride in CountLevels()" 2016-03-31 07:49:56 +00:00
d96fe5e079 WebPDequantizeLevels(): use stride in CountLevels()
follow-up for patch #333712

Change-Id: I3f85e3fce9e1c9d1a862a14ba56134882807718f
2016-03-31 00:08:51 -07:00
ec1b2407a4 WebPPictureImport*: check output pointer
fixes crash with NULL output pointer in calls to simple encode api
(WebPEncodeRGB, etc.)

Change-Id: I91e7a1c0e070ea842b0a2a4ac54e981cac8629bf
2016-03-25 18:21:13 -07:00
c07687699b Merge "Revert "Re-enable encoding of alpha plane with color cache for next release."" 2016-03-25 07:29:07 +00:00
41f14bcbc5 WebPPictureImport*: check src pointer
fixes crash with NULL source pointer in calls to simple encode api
(WebPEncodeRGB, etc.)

Change-Id: I706d670c80298da5176aaa5ba0eb2238dd71a8f0
2016-03-24 22:52:01 -07:00
64eed38779 Pass stride parameter to WebPDequantizeLevels()
and also pass 'VP8Io* io' extra param to VP8DecompressAlphaRows()

This is somehow in preparation for some memory optimizations in
the 'cropping' case. For now, only the easy crop_bottom case is
optimized.

Change-Id: Ib54531ba057bf62b98422dbb6c181dda626c72c2
2016-03-18 15:36:58 +01:00
97934e2447 Revert "Re-enable encoding of alpha plane with color cache for next release."
This avoids generating file that would trigger a decoding bug
found in 0.4.0 -> 0.4.3 libwebp versions.

This reverts commit 6ecd72f845.

Change-Id: I4667cc8f7b851ba44479e3fe2b9d844b2c56fcf4
2016-03-18 11:01:54 +01:00
e88c4ca013 fix -m 2 mode-cost evaluation (causing partition0 overflow)
The mode's bits were not taken into account, which is ok for most of cases.
But in case of super large image, with 'easy' content, their overhead starts
mattering a lot and we were omitting to optimize for these.
Now, these mode bits have their own lambda values associated, limiting
the jerkiness. We also limit (for -m 2 only) the individual number of bits
to something that will prevent the partition 0 overflow.

removed the I4_PENALTY constant, which was a rather crude approximation.
Replaced by some q-dependent expression.

fixes issue #289

Change-Id: I956ae2d2308c339adc4706d52722f0bb61ccf18c
2016-03-11 20:34:45 +01:00
4562e83dc2 Merge "add extra meaning to WebPDecBuffer::is_external_memory" 2016-03-09 08:24:19 +00:00
abdb109f3b add extra meaning to WebPDecBuffer::is_external_memory
If value is '2', it means the buffer is a 'slow' one, like GPU-mapped memory.

This change is backward compatible (setting is_external_memory to 2
will be a no-op in previous libraries)

dwebp:  add flags to force a particular colorspace format
new flags is:
   -pixel_format {RGB,RGBA,BGR,BGRA,ARGB,RGBA_4444,RGB_565,
                  rgbA,bgrA,Argb,rgbA_4444,YUV,YUVA}
and also,external_memory {0,1,2}
These flags are mostly for debuggging purpose, and hence are not documented.

Change-Id: Iac88ce1e10b35163dd7af57f9660f062f5d8ed5e
2016-03-09 09:00:13 +01:00
875aec7044 enc_neon,cosmetics: break long comment
Change-Id: I88dff0271fef1cc6dd5888572bfe0f09f467b028
2016-03-08 23:33:21 -08:00
71e856cf84 GetMBSSIM,cosmetics: fix alignment
Change-Id: I884b3361484b48917fa4cba33cd1217ac51685f9
2016-03-08 23:26:10 -08:00
a90edffb7e fix missing 'extern' for SSIM function in dsp/
Change-Id: Id8143120f01065dc088f4e90bd930f8ea7c3ae5a
2016-03-08 10:27:46 -08:00
423ecaf484 move some SSIM-accumulation function for dsp/
This is in preparation for some SSE2 code.

And generally speaking, the whole SSIM code needs some
revamp: we're not averaging the SSIM value at each pixels
but just computing the overall SSIM value once, for the whole
plane. The former might be better than the latter.

Change-Id: I935784a917f84a18ef08dc5ec9a7b528abea46a5
2016-03-08 07:50:09 +01:00
f08e66245a Merge "Fix FindClosestDiscretized in near lossless:" 2016-03-04 09:34:16 +00:00
0d40cc5ea3 enc_neon,Disto4x4: remove an unnecessary transpose
based on the sse2 change in:
9960c31 Remove an unnecessary transposition in TTransform.

~9-10.5% faster at the function-level, < 1% overall

Change-Id: I44413369b230b250fb0dbc51ff2f17cfeda609b7
2016-03-03 16:18:59 -08:00
e8feb20e39 Fix FindClosestDiscretized in near lossless:
- The result is now indeed closest among possible results for all inputs, which
  was not the case for bits>4, where the mapping was not even monotonic because
  GetValAndDistance was correct only if the significant part of initial fit in
  a byte at most twice.

- The set of results for a larger number of bits dropped is a subset of values
  for a smaller number of bits dropped. This implies that subsequent
  discretizations for a smaller number of bits dropped do not change already
  discretized pixels, which improves the quality (changes do not accumulate)
  and compression density (values tend to repeat more often).

- Errors are more fairly distributed between upwards and downwards thanks to
  bankers’ rounding, which avoids images getting darker or lighter in overall.

- Deltas between discretized values are more repetitive. This improves
  compression density if delta encoding is used.

Also, the implementation is much shorter now.

Change-Id: I0a98e7d5255e91a7b9c193a156cf5405d9701f16
2016-03-02 12:47:22 +01:00
8200643017 anim_util: quiet static analysis warning
in ReadAnimatedWebP() frame_index is tied to the reported frame_count.

Change-Id: I69241e5d77a0b8bac1deabd992d0ad2ecf51a4ec
2016-02-23 11:13:38 -08:00
a6f23c49b2 Merge "AnimEncoder: Support progress hook and user data." 2016-02-20 04:25:57 +00:00
a5193774b0 Merge "Near lossless feature: fix some comments." 2016-02-20 03:51:15 +00:00
da98d31ced AnimEncoder: Support progress hook and user data.
Pass them along to internal 'pic' object, so that progress can be reported back
and user data can also be inspected.

Change-Id: Idb5d0d4a76d07283d704a86c5892e1ad7bda09fa
2016-02-19 19:50:30 -08:00
3335713169 Near lossless feature: fix some comments.
Change-Id: I2c5fc2a3b3fe5123d66b42bf148e361b4862dfb9
2016-02-19 19:26:39 -08:00
0beed01aa5 cosmetics: fix indent after 2f5e898
2f5e898 fix multiple allocation for transform buffer

Change-Id: Ied5c89c0040671e2eddf23c8b7a78e0d817dd18e
2016-02-19 19:22:34 -08:00
6753f35cac Merge "FTransformWHT optimization." 2016-02-19 09:38:04 +00:00
6583bb1a42 Improve SSE4.1 implementation of TTransform.
SSE4.1 is slower than the SSE2 implementation and this seems to
be due to a slow _mm_loadl_epi64 implementation by gcc
(hence a bug with my gcc 4.8) and a very slow _mm_hadd_epi32. Both
got confirmed by IACA and experiments.

Change-Id: I05607f66b7ccd8f4f42e000693aea583ffd5768f
2016-02-19 09:11:53 +01:00
7561d0c338 FTransformWHT optimization.
Data is packed sooner in the functions.

Change-Id: I018cfeca43f015ac755c7f209f9a97984cc0517b
2016-02-18 17:44:05 +01:00
7ccdb734c2 fix indentation after patch #328220
Change-Id: Iccfcf4deaed6b383b9f80ae84b5b0575a4e94b5f
2016-02-18 15:14:41 +01:00
6ec0d2a946 clarify the logic of the error path when decoding fails.
Change-Id: I2f86751ddafa4708dac3ffc9d6ec1f156e027a83
2016-02-18 14:58:18 +01:00
8aa352b256 Merge "Remove an unnecessary transposition in TTransform." 2016-02-18 08:15:10 +00:00
db86088426 Merge "remove useless #include" 2016-02-17 23:17:12 +00:00
9960c31685 Remove an unnecessary transposition in TTransform.
Change-Id: Ib715c2d5ba659cb2db9c6832875ba508cc2fca3e
2016-02-17 21:41:28 +01:00
6e36b51188 Small speedup in FTransform.
It removes two _mm_unpacklo_epi32 and two _mm_sub_epi16.

Change-Id: Icdf86259f796ba855d1cda5e9c0e99cb396cb351
2016-02-17 21:26:36 +01:00
9dbd4aad77 Merge "fix C and SIMD flags completion." 2016-02-17 19:22:20 +00:00
e60853eabc Add missing common_sse2.h file to makefile.unix
This was missed in bf2b4f114f

Change-Id: Ib0d2ac9f5b86196de2ac8feccbb9536530e991b2
2016-02-17 09:48:05 +01:00
696eb2b03a fix C and SIMD flags completion.
CMAKE_C_FLAGS is handled as a string and not a list by CMake.

Change-Id: Ia0cfb38817edef2f212a04b26601f6aea76a5684
2016-02-17 09:10:04 +01:00
2b4fe33e00 Merge "fix multiple allocation for transform buffer" 2016-02-17 07:27:23 +00:00
2f5e8986cf fix multiple allocation for transform buffer
We were not updating the current_width_, which is usually
not a problem, unless we use Delta Palette with small number
of colors
-> Addressed this re-entrancy problem by checking we have
enough capacity for transform buffer.

The problem is not currently visible, until we restrict
the number of gradient used in delta-palette to less than 16.
Then the buffers have different current_width_ and the problem
surfaces.

Change-Id: Icd84b919905d7789014bb6668bfb6813c93fb36e
2016-02-17 06:14:39 +01:00
bf2b4f114f Regroup common SSE code + optimization.
The transpose refactoring will help removing a transpose in a
later CL.

The horizontal add function helps removing a _mm_sad_epu8 in DC8uv
=> the latency/throughput went from 29/25 to 23/19

Change-Id: I5f3dfd4aad614eb079b1e83631e6a7cef49a3766
2016-02-16 18:34:34 +01:00
4ed650a13d force "-pass 6" if -psnr or -size is used but -pass isn't.
This is to prevent users shooting in the foot using -psnr or
-size alone and not getting the expected result.

Change-Id: I67a3289e4ec0a2a813c98807f2ec5e600f52dc63
2016-02-13 10:22:32 +01:00
3ef1ce98b9 yuv_sse2: fix -Wconstant-conversion warning
'implicit conversion from 'int' to 'short' changes value from 33050 to
-32486'

original patch:

https://codereview.chromium.org/1657313003/

Make libwebp build with -Wconstant-conversion from newer clangs.

After http://llvm.org/viewvc/llvm-project?rev=259271&view=rev, clang
points out that _mm_set1_epi16(33050) causes an overflow in the short
argument to _mm_set1_epi16().  Since there's no version that takes an
unsigned short, add an explicit cast to tell the compiler that this is
intentional.

No behavior change.

Change-Id: I6b4e3401b15cfbcc895f9e81b5c2dc59d43ffb9b
2016-02-02 14:52:11 -08:00
a7a03e9f96 Merge changes I4852d18f,I51ccb85d
* changes:
  gif2webp: set enc_options.verbose = 0 w/-quiet
  anim_encode,DefaultEncoderOptions: init verbose
2016-02-02 01:49:12 +00:00
5e122bd6d6 gif2webp: set enc_options.verbose = 0 w/-quiet
Change-Id: I4852d18f7c04f1f48684459f0ac5b002a8c2f27b
2016-02-01 17:00:51 -08:00
ab3c2583aa anim_encode,DefaultEncoderOptions: init verbose
default to disabled

broken since:
c13245c AnimEncoder: Add a GetError() method.

Change-Id: I51ccb85d5df338570512ec1d7430ad3229f93a9f
2016-02-01 17:00:19 -08:00
8f0dee773a Merge "configure: fix builtin detection w/-Werror" 2016-01-30 05:11:19 +00:00
4a7b85a967 cmake: fix builtin detection w/-Werror
and other warnings enabled (-Wunused-value)

Change-Id: Ie3605628b4cbfb5bfd1e847cc504cb96f2825388
2016-01-28 23:47:11 -08:00
b74657fb2e configure: fix builtin detection w/-Werror
and other warnings enabled (-Wunused-value)

Change-Id: Ic0080d559366a5254d0fa3ba939150445157087f
2016-01-28 23:45:35 -08:00
3661b98069 Add a CMakeLists.txt
The goal is not to replace our autotools configuration, but to
provide a minimal CMake file that can build the lib, cwebp and
dwebp.

Change-Id: I3be343bd698d118c5f00172449d232d87e868f23
2016-01-28 01:30:59 +01:00
75f4af4d54 remove useless #include
Change-Id: Id34f12ec94d8be6853fabd67609a6006ac99f152
2016-01-25 09:34:10 -08:00
6c1d763119 avoid Yoda style for comparison
Change-Id: I8ff9f96951e5e8a619f7132455dd281cbf91aa4d
2016-01-15 23:52:29 -08:00
8ce975ac82 SSE optimization for vector mismatch.
Change-Id: I564b822033b59d86635230f29ed6197e306a2c4f
2016-01-07 18:23:45 +01:00
7db53831a9 Merge tag 'v0.5.0'
libwebp-0.5.0
- 12/17/2015: version 0.5.0
  * miscellaneous bug & build fixes (issues #234, #258, #274, #275, #278)
  * encoder & decoder speed-ups on x86/ARM/MIPS for lossy & lossless
    - note! YUV->RGB conversion was sped-up, but the results will be slightly
      different from previous releases
  * various lossless encoder improvements
  * gif2webp improvements, -min_size option added
  * tools fully support input from stdin and output to stdout (issue #168)
  * New WebPAnimEncoder API for creating animations
  * New WebPAnimDecoder API for decoding animations
  * other API changes:
    - libwebp:
      WebPPictureSmartARGBToYUVA() (-pre 4 in cwebp)
      WebPConfig::exact (-exact in cwebp; -alpha_cleanup is now the default)
      WebPConfig::near_lossless (-near_lossless in cwebp)
      WebPFree() (free'ing webp allocated memory in other languages)
      WebPConfigLosslessPreset()
      WebPMemoryWriterClear()
    - libwebpdemux: removed experimental fragment related fields and functions
    - libwebpmux: WebPMuxSetCanvasSize()
  * new libwebpextras library with some uncommon import functions:
    WebPImportGray/WebPImportRGB565/WebPImportRGB4444

* tag 'v0.5.0':
  update ChangeLog
  faster rgb565/rgb4444/argb output
  update NEWS
  update AUTHORS
  update mailmap
  bump version to 0.5.0
  README: update help text, repo link

Change-Id: I21dc611cfd2a3cb6ed6ba5c455a5049fe615f7a1
2015-12-23 11:20:42 -08:00
129 changed files with 6793 additions and 2328 deletions

3
.gitignore vendored
View File

@ -31,3 +31,6 @@ src/webp/stamp-h1
*.pdb
/iosbuild
/WebP.framework
CMakeCache.txt
CMakeFiles/
cmake_install.cmake

View File

@ -8,3 +8,5 @@ Vikas Arora <vikasa@google.com>
<slobodan.prijic@imgtec.com> <Slobodan.Prijic@imgtec.com>
<vrabaud@google.com> <vincent.rabaud@gmail.com>
Tamar Levy <tamar.levy@intel.com>
<qrczak@google.com> <qrczak>
Hui Su <huisu@google.com>

View File

@ -2,6 +2,7 @@ Contributors:
- Charles Munger (clm at google dot com)
- Christian Duvivier (cduvivier at google dot com)
- Djordje Pesut (djordje dot pesut at imgtec dot com)
- Hui Su (huisu at google dot com)
- James Zern (jzern at google dot com)
- Jan Engelhardt (jengelh at medozas dot de)
- Johann (johann dot koenig at duck dot com)
@ -10,10 +11,13 @@ Contributors:
- Lode Vandevenne (lode at google dot com)
- Lou Quillio (louquillio at google dot com)
- Mans Rullgard (mans at mansr dot com)
- Marcin Kowalczyk (qrczak at google dot com)
- Martin Olsson (mnemo at minimum dot se)
- Mikołaj Zalewski (mikolajz at google dot com)
- Mislav Bradac (mislavm at google dot com)
- Nico Weber (thakis at chromium dot org)
- Noel Chromium (noel at chromium dot org)
- Parag Salasakar (img dot mips1 at gmail dot com)
- Pascal Massimino (pascal dot massimino at gmail dot com)
- Paweł Hajdan, Jr (phajdan dot jr at chromium dot org)
- Pierre Joye (pierre dot php at gmail dot com)

View File

@ -1,6 +1,7 @@
LOCAL_PATH := $(call my-dir)
WEBP_CFLAGS := -Wall -DANDROID -DHAVE_MALLOC_H -DHAVE_PTHREAD -DWEBP_USE_THREAD
WEBP_CFLAGS += -fvisibility=hidden
ifeq ($(APP_OPTIM),release)
WEBP_CFLAGS += -finline-functions -ffast-math \
@ -49,6 +50,7 @@ dsp_dec_srcs := \
src/dsp/dec_clip_tables.c \
src/dsp/dec_mips32.c \
src/dsp/dec_mips_dsp_r2.c \
src/dsp/dec_msa.c \
src/dsp/dec_neon.$(NEON) \
src/dsp/dec_sse2.c \
src/dsp/dec_sse41.c \

151
CMakeLists.txt Normal file
View File

@ -0,0 +1,151 @@
cmake_minimum_required(VERSION 2.8.7)
project(libwebp C)
# Options for coder / decoder executables.
option(WEBP_BUILD_CWEBP "Build the cwebp command line tool." OFF)
option(WEBP_BUILD_DWEBP "Build the dwebp command line tool." OFF)
option(WEBP_EXPERIMENTAL_FEATURES "Build with experimental features." OFF)
option(WEBP_FORCE_ALIGNED "Force aligned memory operations." OFF)
option(WEBP_ENABLE_SWAP_16BIT_CSP "Enable byte swap for 16 bit colorspaces." OFF)
set(WEBP_DEP_LIBRARIES)
set(WEBP_DEP_INCLUDE_DIRS)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE
"Build type: Release, Debug or RelWithDebInfo" STRING FORCE
)
endif()
include(cmake/config.h.cmake)
################################################################################
# Options.
if(WEBP_ENABLE_SWAP_16BIT_CSP)
add_definitions(-DWEBP_SWAP_16BIT_CSP)
endif()
################################################################################
# Android only.
if(ANDROID)
include_directories(${ANDROID_NDK}/sources/android/cpufeatures)
add_library(cpufeatures STATIC
${ANDROID_NDK}/sources/android/cpufeatures/cpu-features.c
)
target_link_libraries(cpufeatures dl)
set(WEBP_DEP_LIBRARIES ${WEBP_DEP_LIBRARIES} cpufeatures)
set(WEBP_DEP_INCLUDE_DIRS ${WEBP_DEP_INCLUDE_DIRS}
${ANDROID_NDK}/sources/android/cpufeatures
)
endif()
################################################################################
# WebP source files.
# Read the Makefile.am to get the source files.
set(WEBP_SRCS)
function(parse_Makefile_am FOLDER WEBP_SRCS)
file(READ ${FOLDER}/Makefile.am MAKEFILE_AM)
string(REGEX MATCHALL "_SOURCES \\+= [^\n]*"
FILES_PER_LINE ${MAKEFILE_AM}
)
set(SRCS ${WEBP_SRCS})
foreach(FILES ${FILES_PER_LINE})
string(SUBSTRING ${FILES} 12 -1 FILES)
string(REGEX MATCHALL "[0-9a-z\\._]+"
FILES ${FILES}
)
foreach(FILE ${FILES})
list(APPEND SRCS ${FOLDER}/${FILE})
endforeach()
endforeach()
set(WEBP_SRCS ${SRCS} PARENT_SCOPE)
endfunction()
parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/src/dec "${WEBP_SRCS}")
parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/src/demux "${WEBP_SRCS}")
parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/src/dsp "${WEBP_SRCS}")
parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/src/enc "${WEBP_SRCS}")
parse_Makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/src/utils "${WEBP_SRCS}")
# Remove the files specific to SIMD we don't use.
foreach(FILE ${WEBP_SIMD_FILES_NOT_TO_INCLUDE})
list(REMOVE_ITEM WEBP_SRCS ${FILE})
endforeach()
# Build the library.
add_definitions(-Wall)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src/ ${WEBP_DEP_INCLUDE_DIRS})
add_library(webp ${WEBP_SRCS})
target_link_libraries(webp ${WEBP_DEP_LIBRARIES})
# Change the compile flags for SIMD files we use.
list(LENGTH WEBP_SIMD_FILES_TO_INCLUDE WEBP_SIMD_FILES_TO_INCLUDE_LENGTH)
math(EXPR WEBP_SIMD_FILES_TO_INCLUDE_RANGE
"${WEBP_SIMD_FILES_TO_INCLUDE_LENGTH}-1"
)
foreach(I_FILE RANGE ${WEBP_SIMD_FILES_TO_INCLUDE_RANGE})
list(GET WEBP_SIMD_FILES_TO_INCLUDE ${I_FILE} FILE)
list(GET WEBP_SIMD_FLAGS_TO_INCLUDE ${I_FILE} SIMD_COMPILE_FLAG)
set_source_files_properties(${FILE} PROPERTIES
COMPILE_FLAGS ${SIMD_COMPILE_FLAG}
)
endforeach()
# Build the executables if asked for.
if(WEBP_BUILD_CWEBP OR WEBP_BUILD_DWEBP)
# Example utility library.
set(exampleutil_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/examples/example_util.c
${CMAKE_CURRENT_SOURCE_DIR}/examples/example_util.h
${CMAKE_CURRENT_SOURCE_DIR}/examples/stopwatch.h)
add_library(exampleutil ${exampleutil_SRCS})
target_link_libraries(exampleutil webp ${WEBP_DEP_LIBRARIES})
endif()
if(WEBP_BUILD_CWEBP)
# Image-decoding utility library.
set(exampledec_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/examples/image_dec.c
${CMAKE_CURRENT_SOURCE_DIR}/examples/image_dec.h
${CMAKE_CURRENT_SOURCE_DIR}/examples/jpegdec.c
${CMAKE_CURRENT_SOURCE_DIR}/examples/jpegdec.h
${CMAKE_CURRENT_SOURCE_DIR}/examples/metadata.c
${CMAKE_CURRENT_SOURCE_DIR}/examples/metadata.h
${CMAKE_CURRENT_SOURCE_DIR}/examples/pngdec.c
${CMAKE_CURRENT_SOURCE_DIR}/examples/pngdec.h
${CMAKE_CURRENT_SOURCE_DIR}/examples/tiffdec.c
${CMAKE_CURRENT_SOURCE_DIR}/examples/tiffdec.h
${CMAKE_CURRENT_SOURCE_DIR}/examples/webpdec.c
${CMAKE_CURRENT_SOURCE_DIR}/examples/webpdec.h
${CMAKE_CURRENT_SOURCE_DIR}/examples/wicdec.c
${CMAKE_CURRENT_SOURCE_DIR}/examples/wicdec.h)
add_library(exampledec ${exampledec_SRCS})
target_link_libraries(exampledec webp ${WEBP_DEP_LIBRARIES}
${WEBP_DEP_IMG_LIBRARIES})
endif()
if(WEBP_BUILD_DWEBP)
# dwebp
include_directories(${WEBP_DEP_IMG_INCLUDE_DIRS})
add_executable(dwebp
${CMAKE_CURRENT_SOURCE_DIR}/examples/dwebp.c
${CMAKE_CURRENT_SOURCE_DIR}/examples/stopwatch.h
)
target_link_libraries(dwebp webp exampleutil ${WEBP_DEP_LIBRARIES}
${WEBP_DEP_IMG_LIBRARIES}
)
endif()
if(WEBP_BUILD_CWEBP)
# cwebp
include_directories(${WEBP_DEP_IMG_INCLUDE_DIRS})
add_executable(cwebp
${CMAKE_CURRENT_SOURCE_DIR}/examples/cwebp.c
${CMAKE_CURRENT_SOURCE_DIR}/examples/stopwatch.h)
target_link_libraries(cwebp exampledec webp exampleutil
${WEBP_DEP_LIBRARIES} ${WEBP_DEP_IMG_LIBRARIES}
)
endif()

232
ChangeLog
View File

@ -1,3 +1,233 @@
aa7744c anim_util: quiet implicit conv warnings in 32-bit
d912027 jpegdec: correct ContextFill signature
24eb394 Remove some errors when compiling the code as C++.
a4a8e5f vwebp: clear canvas during resize w/o animation
31ca2a8 tiffdec: restore libtiff 3.9.x compatibility
b2f77b5 update NEWS
5ab6d9d AnimEncoder: avoid freeing uninitialized memory pointer.
f29bf58 WebPAnimEncoder: If 'minimize_size' and 'allow_mixed' on, try lossy + lossless.
df780e0 fix a potential overflow with MALLOC_LIMIT
218460c bump version to 0.5.2
de7d654 update AUTHORS & .mailmap
74a12b1 iosbuild.sh: add WebPDecoder.framework + encoder
be7dcc0 AnimEncoder: Correctly skip a frame when sub-rectangle is empty.
4088583 Fix assertions in WebPRescalerExportRow()
8f38c72 fix a typo in WebPPictureYUVAToARGB's doc
33ca93f systematically call WebPDemuxReleaseIterator() on dec->prev_iter_
76e1907 doc: use two's complement explicitly for uint8->int8 conversion
f91ba96 Anim_encoder: correctly handle enc->prev_candidate_undecided_
25d74e6 WebPPictureDistortion(): free() -> WebPSafeFree()
03f1c00 mux/Makefile.am: add missing -lm
58410cd fix bug in RefineUsingDistortion()
e168af8 fix filtering auto-adjustment
ed9dec4 fix doc and code snippet for WebPINewDecoder() doc
3c49178 prevent 32b overflow for very large canvas_width / height
9595f29 fix anim_util.c compilation when HAVE_GIF is not defined.
7ec9552 Make gif transparent color to be transparent black
9871335 Add a CMake option for WEBP_SWAP_16BIT_CSP.
0ae3222 Fix missing cpu-features for Android.
ab4c805 cpu.cmake: improve webp_check_compiler_flag output
eec5fa3 Provide support for CMake on Android studio 2.2.
004d569 Split the main CMake file.
4fe5d58 Android.mk: use -fvisibility=hidden
bd63a31 vwebp: ensure setenv() is available in stdlib.h
363a568 vwebp: handle window resizing properly
a0d2753 lower WEBP_MAX_ALLOCABLE_MEMORY default
31fe11a fix infinite loop in case of PARTITION0 overflow
532215d Change the rule of picking UV mode in MBAnalyzeBestUVMode()
9c75dbd cwebp.1: improve some grammar
af2e05c vwebp: Clear previous frame when a key triggers a redraw
26ffa29 Add descriptions of default configuration in help info.
7416280 Fix an unsigned integer overflow error in enc/cost.h
13cf1d2 Do token recording and counting in a single loop
eb9a4b9 Reset segment id if we decide not to update segment map
42ebe3b configure: fix NEON flag detection under gcc 6
83cbfa0 Import: use relative pointer offsets
a1ade40 PreprocessARGB: use relative pointer offsets
fd4d090 ConvertWRGBToYUV: use relative pointer offsets
9daad45 ImportYUVAFromRGBA: use relative pointer offsets
c284780 imageio_util: add ImgIoUtilCheckSizeArgumentsOverflow
e375080 gifdec,Remap: avoid out of bounds colormap read
c222a05 additional fix for stride type as size_t
bb23361 fix potential overflow when width * height * 4 >= (1<<32)
883d41f gif2webp: fix crash with NULL extension data
3d97bb7 update ChangeLog (tag: v0.5.1, origin/0.5.1, 0.5.1)
deb54d9 Clarify the expected 'config' lifespan in WebPIDecode()
c7e2d24 update ChangeLog (tag: v0.5.1-rc5)
c7eb06f Fix corner case in CostManagerInit.
ab7937a gif2webp: normalize the number of .'s in the help message
3cdec84 vwebp: normalize the number of .'s in the help message
bdf6241 cwebp: normalize the number of .'s in the help message
06a38c7 fix rescaling bug: alpha plane wasn't filled with 0xff
319e37b Improve lossless compression.
447adbc 'our bug tracker' -> 'the bug tracker'
97b9e64 normalize the number of .'s in the help message
bb50bf4 pngdec,ReadFunc: throw an error on invalid read
38063af decode.h,WebPGetInfo: normalize function comment
9e8e1b7 Inline GetResidual for speed.
7d58d1b Speed-up uniform-region processing.
23e29cb Merge "Fix a boundary case in BackwardReferencesHashChainDistanceOnly." into 0.5.1
0bb23b2 free -> WebPSafeFree()
e7b9177 Merge "DecodeImageData(): change the incorrect assert" into 0.5.1
2abfa54 DecodeImageData(): change the incorrect assert
5a48fcd Merge "configure: test for -Wfloat-conversion"
0174d18 Fix a boundary case in BackwardReferencesHashChainDistanceOnly.
6a9c262 Merge "Added MSA optimized transform functions"
cfbcc5e Make sure to consider small distances in LZ77.
5e60c42 Added MSA optimized transform functions
3dc28d7 configure: test for -Wfloat-conversion
f2a0946 add some asserts to delimit the perimeter of CostManager's operation
9a583c6 fix invalid-write bug for alpha-decoding
f66512d make gradlew executable
6fda58f backward_references: quiet double->int warning
a48cc9d Merge "Fix a compression regression for images with long uniform regions." into 0.5.1
cc2720c Merge "Revert an LZ77 boundary constant." into 0.5.1
059aab4 Fix a compression regression for images with long uniform regions.
b0c7e49 Check more backward matches with higher quality.
a361151 Revert an LZ77 boundary constant.
8190374 README: fix typo
7551db4 update NEWS
0fb2269 bump version to 0.5.1
f453761 update AUTHORS & .mailmap
3259571 Refactor GetColorPalette method.
1df5e26 avoid using tmp histogram in PreparePair()
7685123 fix comment typos
a246b92 Speedup backward references.
76d73f1 Merge "CostManager: introduce a free-list of ~10 intervals"
eab39d8 CostManager: introduce a free-list of ~10 intervals
4c59aac Merge "mips msa webp configuration"
043c33f Merge "Improve speed and compression in backward reference for lossless."
71be9b8 Merge "clarify variable names in HistogramRemap()"
0ba7fd7 Improve speed and compression in backward reference for lossless.
0481d42 CostManager: cache one interval and re-use it when possible
41b7e6b Merge "histogram: fix bin calculation"
96c3d62 histogram: fix bin calculation
fe9e31e clarify variable names in HistogramRemap()
ce3c824 disable near-lossless quantization if palette is used
e11da08 mips msa webp configuration
5f8f998 mux: Presence of unknown chunks should trigger VP8X chunk output.
cadec0b Merge "Sync mips32 and dsp_r2 YUV->RGB code with C verison"
d963775 Compute the hash chain once and for all for lossless compression.
50a4866 Sync mips32 and dsp_r2 YUV->RGB code with C verison
eee788e Merge "introduce a common signature for all image reader function"
d77b877 introduce a common signature for all image reader function
ca8d951 remove some obsolete TODOs
ae2a722 collect all decoding utilities from examples/ in libexampledec.a
0b8ae85 Merge "Move DitherCombine8x8 to dsp/dec.c"
77cad88 Merge "ReadWebP: avoid conversion to ARGB if final format is YUVA"
ab8d669 ReadWebP: avoid conversion to ARGB if final format is YUVA
f8b7ce9 Merge "test pointer to NULL explicitly"
5df6f21 test pointer to NULL explicitly
77f21c9 Move DitherCombine8x8 to dsp/dec.c
c9e6d86 Add gradle support
c65f41e Revert "Add gradle support"
bf731ed Add gradle support
08333b8 WebPAnimEncoder: Detect when canvas is modified, restore only when needed.
0209d7e Merge "speed-up MapToPalette() with binary search"
fdd29a3 speed-up MapToPalette() with binary search
cf4a651 Revert "Refactor GetColorPalette method."
0a27aca Merge changes Idfa8ce83,I19adc9c4
f25c440 WebPAnimEncoder: Restore original canvas between multiple encodes.
169004b Refactor GetColorPalette method.
576362a VP8LDoFillBitWindow: support big-endian in fast path
ac49e4e bit_reader.c: s/VP8L_USE_UNALIGNED_LOAD/VP8L_USE_FAST_LOAD/
d39ceb5 VP8LDoFillBitWindow: remove stale TODO
2ec2de1 Merge "Speed-up BackwardReferencesHashChainDistanceOnly."
3e023c1 Speed-up BackwardReferencesHashChainDistanceOnly.
f2e1efb Improve near lossless compression when a prediction filter is used.
e15afbc dsp.h: fix ubsan macro name
e53c9cc dsp.h: add WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
af81fdb utils.h: quiet -fsanitize=undefined warnings
ea0be35 dsp.h: remove utils.h include
cd276ae utils/*.c: ../utils/utils.h -> ./utils.h
c892713 utils/Makefile.am: add some missing headers
ea24e02 Merge "dsp.h: add WEBP_UBSAN_IGNORE_UNDEF"
369e264 dsp.h: add WEBP_UBSAN_IGNORE_UNDEF
0d020a7 Merge "add runtime NEON detection"
5ee2136 Merge "add VP8LAddPixels() to lossless.h"
47435a6 add VP8LAddPixels() to lossless.h
8fa6ac6 remove two ubsan warnings
74fb56f add runtime NEON detection
4154a83 MIPS update to new Unfilter API
c80b9fc Merge "cherry-pick decoder fix for 64-bit android devices"
6235147 cherry-pick decoder fix for 64-bit android devices
d41b8c4 configure: test for -Wformat-* w/-Wformat present
5f95589 Fix WEBP_ALIGN in case the argument is a pointer to a type larger than a byte.
2309fd5 replace num_parts_ by num_parts_minus_one_ (unsigned)
9629f4b SimplifySegments: quiet -Warray-bounds warning
de47492 Merge "update the Unfilter API in dsp to process one row independently"
2102ccd update the Unfilter API in dsp to process one row independently
e3912d5 WebPAnimEncoder: Restore canvas before evaluating blending possibility.
6e12e1e WebPAnimEncoder: Fix for single-frame optimization.
602f344 Merge changes I1d03acac,Ifcb64219
95ecccf only apply color-mapping for alpha on the cropped area
47dd070 anim_diff: Add an experimental option for max inter-frame diff.
aa809cf only allocate alpha_plane_ up to crop_bottom row
31f2b8d WebPAnimEncoder: FlattenSimilarPixels(): look for similar
774dfbd perform alpha filtering within the decoding loop
a4cae68 lossless decoding: only process decoded row up to last_row
238cdcd Only call WebPDequantizeLevels() on cropped area
cf6c713 alpha: preparatory cleanup
b95ac0a Merge "VP8GetHeaders(): initialize VP8Io with sane value for crop/scale dimensions"
8923139 VP8GetHeaders(): initialize VP8Io with sane value for crop/scale dimensions
5828e19 use_8b_decode -> use_8b_decode_
8dca024 fix bug in alpha.c that was triggering a memory error in incremental mode
9a950c5 WebPAnimEncoder: Disable filtering when blending is used with lossy encoding.
eb42390 WebPAnimEncoder: choose max diff for framerect based on quality.
ff0a94b WebPAnimEncoder lossy: ignore small pixel differences for frame rectangles.
f804008 gif2webp: Remove the 'prev_to_prev_canvas' buffer.
6d8c07d Merge "WebPDequantizeLevels(): use stride in CountLevels()"
d96fe5e WebPDequantizeLevels(): use stride in CountLevels()
ec1b240 WebPPictureImport*: check output pointer
c076876 Merge "Revert "Re-enable encoding of alpha plane with color cache for next release.""
41f14bc WebPPictureImport*: check src pointer
64eed38 Pass stride parameter to WebPDequantizeLevels()
97934e2 Revert "Re-enable encoding of alpha plane with color cache for next release."
e88c4ca fix -m 2 mode-cost evaluation (causing partition0 overflow)
4562e83 Merge "add extra meaning to WebPDecBuffer::is_external_memory"
abdb109 add extra meaning to WebPDecBuffer::is_external_memory
875aec7 enc_neon,cosmetics: break long comment
71e856c GetMBSSIM,cosmetics: fix alignment
a90edff fix missing 'extern' for SSIM function in dsp/
423ecaf move some SSIM-accumulation function for dsp/
f08e662 Merge "Fix FindClosestDiscretized in near lossless:"
0d40cc5 enc_neon,Disto4x4: remove an unnecessary transpose
e8feb20 Fix FindClosestDiscretized in near lossless:
8200643 anim_util: quiet static analysis warning
a6f23c4 Merge "AnimEncoder: Support progress hook and user data."
a519377 Merge "Near lossless feature: fix some comments."
da98d31 AnimEncoder: Support progress hook and user data.
3335713 Near lossless feature: fix some comments.
0beed01 cosmetics: fix indent after 2f5e898
6753f35 Merge "FTransformWHT optimization."
6583bb1 Improve SSE4.1 implementation of TTransform.
7561d0c FTransformWHT optimization.
7ccdb73 fix indentation after patch #328220
6ec0d2a clarify the logic of the error path when decoding fails.
8aa352b Merge "Remove an unnecessary transposition in TTransform."
db86088 Merge "remove useless #include"
9960c31 Remove an unnecessary transposition in TTransform.
6e36b51 Small speedup in FTransform.
9dbd4aa Merge "fix C and SIMD flags completion."
e60853e Add missing common_sse2.h file to makefile.unix
696eb2b fix C and SIMD flags completion.
2b4fe33 Merge "fix multiple allocation for transform buffer"
2f5e898 fix multiple allocation for transform buffer
bf2b4f1 Regroup common SSE code + optimization.
4ed650a force "-pass 6" if -psnr or -size is used but -pass isn't.
3ef1ce9 yuv_sse2: fix -Wconstant-conversion warning
a7a03e9 Merge changes I4852d18f,I51ccb85d
5e122bd gif2webp: set enc_options.verbose = 0 w/-quiet
ab3c258 anim_encode,DefaultEncoderOptions: init verbose
8f0dee7 Merge "configure: fix builtin detection w/-Werror"
4a7b85a cmake: fix builtin detection w/-Werror
b74657f configure: fix builtin detection w/-Werror
3661b98 Add a CMakeLists.txt
75f4af4 remove useless #include
6c1d763 avoid Yoda style for comparison
8ce975a SSE optimization for vector mismatch.
7db5383 Merge tag 'v0.5.0'
37f0494 update ChangeLog (tag: v0.5.0-rc1, tag: v0.5.0, origin/0.5.0, 0.5.0)
7e7b6cc faster rgb565/rgb4444/argb output
4c7f565 update NEWS
1f62b6b update AUTHORS
@ -140,7 +370,7 @@ f7c507a Merge "remove unnecessary #include "yuv.h""
7861578 for ReadXXXX() image-readers, use the value of pic->use_argb
14e4043 remove unnecessary #include "yuv.h"
469ba2c vwebp: fix incorrect clipping w/NO_BLEND
4b9186b update issue tracker url (master)
4b9186b update issue tracker url
d64d376 change WEBP_ALIGN_CST value to 31
f717b82 vp8l.c, cosmetics: fix indent after 95509f9
927ccdc Merge "fix alignment of allocated memory in AllocateTransformBuffer"

View File

@ -194,6 +194,7 @@ DSP_DEC_OBJS = \
$(DIROBJ)\dsp\dec_clip_tables.obj \
$(DIROBJ)\dsp\dec_mips32.obj \
$(DIROBJ)\dsp\dec_mips_dsp_r2.obj \
$(DIROBJ)\dsp\dec_msa.obj \
$(DIROBJ)\dsp\dec_neon.obj \
$(DIROBJ)\dsp\dec_sse2.obj \
$(DIROBJ)\dsp\dec_sse41.obj \
@ -244,6 +245,7 @@ EX_ANIM_UTIL_OBJS = \
$(DIROBJ)\examples\anim_util.obj \
EX_FORMAT_DEC_OBJS = \
$(DIROBJ)\examples\image_dec.obj \
$(DIROBJ)\examples\jpegdec.obj \
$(DIROBJ)\examples\metadata.obj \
$(DIROBJ)\examples\pngdec.obj \

27
NEWS
View File

@ -1,3 +1,30 @@
- 12/13/2016: version 0.5.2
This is a binary compatible release.
This release covers CVE-2016-8888 and CVE-2016-9085.
* further security related hardening in the tools; fixes to
gif2webp/AnimEncoder (issues #310, #314, #316, #322), cwebp/libwebp (issue
#312)
* full libwebp (encoder & decoder) iOS framework; libwebpdecoder
WebP.framework renamed to WebPDecoder.framework (issue #307)
* CMake support for Android Studio (2.2)
* miscellaneous build related fixes (issue #306, #313)
* miscellaneous documentation improvements (issue #225)
* minor lossy encoder fixes and improvements
- 6/14/2016: version 0.5.1
This is a binary compatible release.
* miscellaneous bug fixes (issues #280, #289)
* reverted alpha plane encoding with color cache for compatibility with
libwebp 0.4.0->0.4.3 (issues #291, #298)
* lossless encoding performance improvements
* memory reduction in both lossless encoding and decoding
* force mux output to be in the extended format (VP8X) when undefined chunks
are present (issue #294)
* gradle, cmake build support
* workaround for compiler bug causing 64-bit decode failures on android
devices using clang-3.8 in the r11c NDK
* various WebPAnimEncoder improvements
- 12/17/2015: version 0.5.0
* miscellaneous bug & build fixes (issues #234, #258, #274, #275, #278)
* encoder & decoder speed-ups on x86/ARM/MIPS for lossy & lossless

112
README
View File

@ -4,7 +4,7 @@
\__\__/\____/\_____/__/ ____ ___
/ _/ / \ \ / _ \/ _/
/ \_/ / / \ \ __/ \__
\____/____/\_____/_____/____/v0.5.0
\____/____/\_____/_____/____/v0.5.2
Description:
============
@ -20,7 +20,7 @@ https://chromium.googlesource.com/webm/libwebp
It is released under the same license as the WebM project.
See http://www.webmproject.org/license/software/ or the
file "COPYING" file for details. An additional intellectual
"COPYING" file for details. An additional intellectual
property rights grant can be found in the file PATENTS.
Building:
@ -84,6 +84,73 @@ be installed independently using a minor modification in the corresponding
Makefile.am configure files (see comments there). See './configure --help' for
more options.
Building for MIPS Linux:
------------------------
MIPS Linux toolchain stable available releases can be found at:
https://community.imgtec.com/developers/mips/tools/codescape-mips-sdk/available-releases/
# Add toolchain to PATH
export PATH=$PATH:/path/to/toolchain/bin
# 32-bit build for mips32r5 (p5600)
HOST=mips-mti-linux-gnu
MIPS_CFLAGS="-O3 -mips32r5 -mabi=32 -mtune=p5600 -mmsa -mfp64 \
-msched-weight -mload-store-pairs -fPIE"
MIPS_LDFLAGS="-mips32r5 -mabi=32 -mmsa -mfp64 -pie"
# 64-bit build for mips64r6 (i6400)
HOST=mips-img-linux-gnu
MIPS_CFLAGS="-O3 -mips64r6 -mabi=64 -mtune=i6400 -mmsa -mfp64 \
-msched-weight -mload-store-pairs -fPIE"
MIPS_LDFLAGS="-mips64r6 -mabi=64 -mmsa -mfp64 -pie"
./configure --host=${HOST} --build=`config.guess` \
CC="${HOST}-gcc -EL" \
CFLAGS="$MIPS_CFLAGS" \
LDFLAGS="$MIPS_LDFLAGS"
make
make install
CMake:
------
The support for CMake is minimal: it only helps you compile libwebp, cwebp and
dwebp.
Prerequisites:
A compiler (e.g., gcc with autotools) and CMake.
On a Debian-like system the following should install everything you need for a
minimal build:
$ sudo apt-get install build-essential cmake
When building from git sources, you will need to run cmake to generate the
configure script.
mkdir build && cd build && cmake ../
make
make install
If you also want cwebp or dwebp, you will need to enable them through CMake:
cmake -DWEBP_BUILD_CWEBP=ON -DWEBP_BUILD_DWEBP=ON ../
or through your favorite interface (like ccmake or cmake-qt-gui).
Gradle:
-------
The support for Gradle is minimal: it only helps you compile libwebp, cwebp and
dwebp and webpmux_example.
Prerequisites:
A compiler (e.g., gcc with autotools) and gradle.
On a Debian-like system the following should install everything you need for a
minimal build:
$ sudo apt-get install build-essential gradle
When building from git sources, you will need to run the Gradle wrapper with the
appropriate target, e.g. :
./gradlew buildAllExecutables
SWIG bindings:
--------------
@ -151,10 +218,11 @@ If input size (-s) for an image is not specified, it is
assumed to be a PNG, JPEG, TIFF or WebP file.
Options:
-h / -help ............ short help
-H / -longhelp ........ long help
-q <float> ............. quality factor (0:small..100:big)
-alpha_q <int> ......... transparency-compression quality (0..100)
-h / -help ............. short help
-H / -longhelp ......... long help
-q <float> ............. quality factor (0:small..100:big), default=75
-alpha_q <int> ......... transparency-compression quality (0..100),
default=100
-preset <string> ....... preset setting, one of:
default, photo, picture,
drawing, icon, text
@ -162,15 +230,15 @@ Options:
-z <int> ............... activates lossless preset with given
level in [0:fast, ..., 9:slowest]
-m <int> ............... compression method (0=fast, 6=slowest)
-segments <int> ........ number of segments to use (1..4)
-m <int> ............... compression method (0=fast, 6=slowest), default=4
-segments <int> ........ number of segments to use (1..4), default=4
-size <int> ............ target size (in bytes)
-psnr <float> .......... target PSNR (in dB. typically: 42)
-s <int> <int> ......... input size (width x height) for YUV
-sns <int> ............. spatial noise shaping (0:off, 100:max)
-f <int> ............... filter strength (0=off..100)
-sharpness <int> ....... filter sharpness (0:most .. 7:least sharp)
-sns <int> ............. spatial noise shaping (0:off, 100:max), default=50
-f <int> ............... filter strength (0=off..100), default=60
-sharpness <int> ....... filter sharpness (0:most .. 7:least sharp), default=0
-strong ................ use strong filter instead of simple (default)
-nostrong .............. use simple filter instead of strong
-partition_limit <int> . limit quality to fit the 512k limit on
@ -185,18 +253,18 @@ Options:
-print_ssim ............ prints averaged SSIM distortion
-print_lsim ............ prints local-similarity distortion
-d <file.pgm> .......... dump the compressed output (PGM file)
-alpha_method <int> .... transparency-compression method (0..1)
-alpha_method <int> .... transparency-compression method (0..1), default=1
-alpha_filter <string> . predictive filtering for alpha plane,
one of: none, fast (default) or best
-exact ................. preserve RGB values in transparent area
-exact ................. preserve RGB values in transparent area, default=off
-blend_alpha <hex> ..... blend colors against background color
expressed as RGB values written in
hexadecimal, e.g. 0xc0e0d0 for red=0xc0
green=0xe0 and blue=0xd0
-noalpha ............... discard any transparency information
-lossless .............. encode image losslessly
-lossless .............. encode image losslessly, default=off
-near_lossless <int> ... use near-lossless image
preprocessing (0..100=off)
preprocessing (0..100=off), default=100
-hint <string> ......... specify image characteristics hint,
one of: photo, picture or graph
@ -274,7 +342,7 @@ Use following options to convert into alternate image formats:
-yuv ......... save the raw YUV samples in flat layout
Other options are:
-version .... print version number and exit
-version ..... print version number and exit
-nofancy ..... don't use the fancy YUV420 upscaler
-nofilter .... disable in-loop filtering
-nodither .... disable dithering
@ -286,8 +354,8 @@ Use following options to convert into alternate image formats:
-flip ........ flip the output vertically
-alpha ....... only save the alpha plane
-incremental . use incremental decoding (useful for tests)
-h ....... this help message
-v ....... verbose (e.g. print encoding/decoding times)
-h ........... this help message
-v ........... verbose (e.g. print encoding/decoding times)
-quiet ....... quiet mode, don't print anything
-noasm ....... disable all assembly optimizations
@ -303,7 +371,7 @@ Usage: vwebp in_file [options]
Decodes the WebP image file and visualize it using OpenGL
Options are:
-version .... print version number and exit
-version ..... print version number and exit
-noicc ....... don't use the icc profile if present
-nofancy ..... don't use the fancy YUV420 upscaler
-nofilter .... disable in-loop filtering
@ -311,7 +379,7 @@ Options are:
-noalphadither disable alpha plane dithering
-mt .......... use multi-threading
-info ........ print info
-h ....... this help message
-h ........... this help message
Keyboard shortcuts:
'c' ................ toggle use of color profile
@ -353,7 +421,7 @@ vwebp.
Usage:
gif2webp [options] gif_file -o webp_file
Options:
-h / -help ............ this help
-h / -help ............. this help
-lossy ................. encode image using lossy compression
-mixed ................. for each frame in the image, pick lossy
or lossless compression heuristically
@ -637,7 +705,7 @@ an otherwise too-large picture. Some CPU can be saved too, incidentally.
Bugs:
=====
Please report all bugs to our issue tracker:
Please report all bugs to the issue tracker:
https://bugs.chromium.org/p/webp
Patches welcome! See this page to get started:
http://www.webmproject.org/code/contribute/submitting-patches/

View File

@ -1,7 +1,7 @@
 __ __ ____ ____ ____ __ __ _ __ __
/ \\/ \/ _ \/ _ \/ _ \/ \ \/ \___/_ / _\
\ / __/ _ \ __/ / / (_/ /__
\__\__/\_____/_____/__/ \__//_/\_____/__/___/v0.3.0
\__\__/\_____/_____/__/ \__//_/\_____/__/___/v0.3.2
Description:
@ -198,7 +198,7 @@ For a detailed AnimEncoder API reference, please refer to the header file
Bugs:
=====
Please report all bugs to our issue tracker:
Please report all bugs to the issue tracker:
https://bugs.chromium.org/p/webp
Patches welcome! See this page to get started:
http://www.webmproject.org/code/contribute/submitting-patches/

337
build.gradle Normal file
View File

@ -0,0 +1,337 @@
// Define dependencies.
buildscript {
repositories {
maven {
url "https://jcenter.bintray.com"
}
}
dependencies {
classpath "com.android.tools.build:gradle:${ANDROID_GRADLE_PLUGIN_VERSION}"
}
}
// Define versions in the project.
project.ext {
buildToolsVersion = "${BUILD_TOOLS_VERSION}"
compileSdkVersion = COMPILE_SDK_VERSION.toInteger()
}
// Core libraries and executables.
apply plugin: "c"
def NEON
model {
buildTypes {
debug
release
}
platforms {
arm {
architecture "arm"
}
arm64 {
architecture "arm64"
}
x86 {
architecture "x86"
}
x64 {
architecture "x86_64"
}
mips32r2
mips32r5
mips64r6
}
toolChains {
gcc(Gcc) {
target("mips32r2") {
cCompiler.args "-mips32r2"
}
target("mips32r5") {
cCompiler.args "-mips32r5"
}
target("mips64r6") {
cCompiler.args "-mips64r6"
}
}
}
binaries {
all {
if (toolChain in Gcc) {
cCompiler.args "-fPIC"
cCompiler.args "-Wall"
cCompiler.define "ANDROID"
cCompiler.define "HAVE_MALLOC_H"
}
// Optimizations.
if (buildType == buildTypes.release) {
if (toolChain in Gcc) {
cCompiler.args "-finline-functions"
cCompiler.args "-ffast-math"
cCompiler.args "-ffunction-sections"
cCompiler.args "-fdata-sections"
}
if (toolChain in Clang) {
cCompiler.args "-frename-registers -s"
}
}
// Check for NEON usage.
if (getTargetPlatform() == "arm" || getTargetPlatform() == "arm64") {
NEON = "c.neon"
} else {
NEON = "c"
}
}
// Link to pthread for shared libraries.
withType(SharedLibraryBinarySpec) {
if (toolChain in Gcc) {
cCompiler.define "HAVE_PTHREAD"
cCompiler.define "WEBP_USE_THREAD"
linker.args "-pthread"
}
}
}
components {
webp(NativeLibrarySpec) {
sources {
c {
source {
srcDir "src/dec"
include "alpha.c"
include "buffer.c"
include "frame.c"
include "idec.c"
include "io.c"
include "quant.c"
include "tree.c"
include "vp8.c"
include "vp8l.c"
include "webp.c"
srcDir "src/dsp"
include "alpha_processing.c"
include "alpha_processing_mips_dsp_r2.c"
include "alpha_processing_sse2.c"
include "alpha_processing_sse41.c"
include "argb.c"
include "argb_mips_dsp_r2.c"
include "argb_sse2.c"
include "cpu.c"
include "dec.c"
include "dec_clip_tables.c"
include "dec_mips32.c"
include "dec_mips_dsp_r2.c"
include "dec_msa.c"
include "dec_neon.$NEON"
include "dec_sse2.c"
include "dec_sse41.c"
include "filters.c"
include "filters_mips_dsp_r2.c"
include "filters_sse2.c"
include "lossless.c"
include "lossless_mips_dsp_r2.c"
include "lossless_neon.$NEON"
include "lossless_sse2.c"
include "rescaler.c"
include "rescaler_mips32.c"
include "rescaler_mips_dsp_r2.c"
include "rescaler_neon.$NEON"
include "rescaler_sse2.c"
include "upsampling.c"
include "upsampling_mips_dsp_r2.c"
include "upsampling_neon.$NEON"
include "upsampling_sse2.c"
include "yuv.c"
include "yuv_mips32.c"
include "yuv_mips_dsp_r2.c"
include "yuv_sse2.c"
srcDir "src/utils"
include "ans.c"
include "bit_reader.c"
include "color_cache.c"
include "filters.c"
include "huffman.c"
include "quant_levels_dec.c"
include "random.c"
include "rescaler.c"
include "thread.c"
include "utils.c"
srcDir "src/dsp"
include "cost.c"
include "cost_mips32.c"
include "cost_mips_dsp_r2.c"
include "cost_sse2.c"
include "enc.c"
include "enc_avx2.c"
include "enc_mips32.c"
include "enc_mips_dsp_r2.c"
include "enc_neon.$NEON"
include "enc_sse2.c"
include "enc_sse41.c"
include "lossless_enc.c"
include "lossless_enc_mips32.c"
include "lossless_enc_mips_dsp_r2.c"
include "lossless_enc_neon.$NEON"
include "lossless_enc_sse2.c"
include "lossless_enc_sse41.c"
srcDir "src/enc"
include "alpha.c"
include "analysis.c"
include "backward_references.c"
include "config.c"
include "cost.c"
include "delta_palettization.c"
include "filter.c"
include "frame.c"
include "histogram.c"
include "iterator.c"
include "near_lossless.c"
include "picture.c"
include "picture_csp.c"
include "picture_psnr.c"
include "picture_rescale.c"
include "picture_tools.c"
include "quant.c"
include "syntax.c"
include "token.c"
include "tree.c"
include "vp8l.c"
include "webpenc.c"
srcDir "src/utils"
include "bit_writer.c"
include "huffman_encode.c"
include "quant_levels.c"
}
exportedHeaders {
srcDir "src"
}
}
}
}
webpdemux(NativeLibrarySpec) {
sources {
c {
source {
srcDir "src/demux"
include "anim_decode.c"
include "demux.c"
}
}
}
}
webpmux(NativeLibrarySpec) {
sources {
c {
source {
srcDir "src/mux/"
include "anim_encode.c"
include "muxedit.c"
include "muxinternal.c"
include "muxread.c"
}
}
}
}
// Executables from examples.
example_util(NativeLibrarySpec) {
binaries {
all {
lib library: "webp", linkage: "static"
}
}
sources {
c {
source {
srcDir "./examples"
include "example_util.c"
}
}
}
}
example_dec(NativeLibrarySpec) {
binaries {
all {
lib library: "webp", linkage: "static"
}
}
sources {
c {
source {
srcDir "./examples"
include "image_dec.c"
include "jpegdec.c"
include "metadata.c"
include "pngdec.c"
include "tiffdec.c"
include "webpdec.c"
}
}
}
}
cwebp(NativeExecutableSpec) {
binaries {
all {
lib library: "example_util", linkage: "static"
lib library: "example_dec", linkage: "static"
lib library: "webp", linkage: "static"
}
}
sources {
c {
source {
srcDir "./examples"
include "cwebp.c"
}
}
}
}
dwebp(NativeExecutableSpec) {
binaries {
all {
lib library: "example_util", linkage: "static"
lib library: "webp"
}
}
sources {
c {
source {
srcDir "./examples"
include "dwebp.c"
}
}
}
}
webpmux_example(NativeExecutableSpec) {
binaries {
all {
lib library: "example_util", linkage: "static"
lib library: "webpmux", linkage: "static"
lib library: "webp"
}
}
sources {
c {
source {
srcDir "./examples"
include "webpmux.c"
}
}
}
}
}
tasks {
// Task to test all possible configurations.
buildAllExecutables(Task) {
dependsOn $.binaries.findAll { it.buildable }
}
}
}
// Task to generate the wrapper.
task wrapper(type: Wrapper) {
gradleVersion = '2.13'
}

141
cmake/config.h.cmake Normal file
View File

@ -0,0 +1,141 @@
# Generate the config.h to compile with specific intrinsics / libs.
## Check for compiler options.
include(CheckCSourceCompiles)
check_c_source_compiles("
int main(void) {
(void)__builtin_bswap16(0);
return 0;
}
"
HAVE_BUILTIN_BSWAP16
)
check_c_source_compiles("
int main(void) {
(void)__builtin_bswap32(0);
return 0;
}
"
HAVE_BUILTIN_BSWAP32
)
check_c_source_compiles("
int main(void) {
(void)__builtin_bswap64(0);
return 0;
}
"
HAVE_BUILTIN_BSWAP64
)
## Check for libraries.
find_package(Threads)
if(Threads_FOUND)
if(CMAKE_USE_PTHREADS_INIT)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
endif()
foreach(PTHREAD_TEST HAVE_PTHREAD_PRIO_INHERIT PTHREAD_CREATE_UNDETACHED)
check_c_source_compiles("
#include <pthread.h>
int main (void) {
int attr = ${PTHREAD_TEST};
return attr;
}
" ${PTHREAD_TEST}
)
endforeach()
list(APPEND WEBP_DEP_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
endif()
set(WEBP_USE_THREAD ${Threads_FOUND})
# TODO: this seems unused, check with autotools.
set(LT_OBJDIR ".libs/")
# Only useful for vwebp, so useless for now.
# find_package(OpenGL)
# set(WEBP_HAVE_GL ${OPENGL_FOUND})
# set(WEBP_DEP_INCLUDE_DIRS ${WEBP_DEP_INCLUDE_DIRS} ${OPENGL_INCLUDE_DIRS})
# set(WEBP_DEP_LIBRARIES ${WEBP_DEP_LIBRARIES} ${OPENGL_LIBRARIES})
# Find the standard C math library.
find_library(MATH_LIBRARY NAMES m)
if(MATH_LIBRARY)
list(APPEND WEBP_DEP_LIBRARIES ${MATH_LIBRARY})
endif()
# Find the standard image libraries.
set(WEBP_DEP_IMG_LIBRARIES)
set(WEBP_DEP_IMG_INCLUDE_DIRS)
foreach(I_LIB PNG JPEG TIFF GIF)
find_package(${I_LIB})
set(WEBP_HAVE_${I_LIB} ${${I_LIB}_FOUND})
if(${I_LIB}_FOUND)
list(APPEND WEBP_DEP_IMG_LIBRARIES ${${I_LIB}_LIBRARIES})
list(APPEND WEBP_DEP_IMG_INCLUDE_DIRS ${${I_LIB}_INCLUDE_DIRS})
endif()
endforeach()
## Check for specific headers.
include(CheckIncludeFiles)
check_include_files("stdlib.h;stdarg.h;string.h;float.h" STDC_HEADERS)
check_include_files(dlfcn.h HAVE_DLFCN_H)
check_include_files(GLUT/glut.h HAVE_GLUT_GLUT_H)
check_include_files(GL/glut.h HAVE_GL_GLUT_H)
check_include_files(inttypes.h HAVE_INTTYPES_H)
check_include_files(memory.h HAVE_MEMORY_H)
check_include_files(OpenGL/glut.h HAVE_OPENGL_GLUT_H)
check_include_files(shlwapi.h HAVE_SHLWAPI_H)
check_include_files(stdint.h HAVE_STDINT_H)
check_include_files(stdlib.h HAVE_STDLIB_H)
check_include_files(strings.h HAVE_STRINGS_H)
check_include_files(string.h HAVE_STRING_H)
check_include_files(sys/stat.h HAVE_SYS_STAT_H)
check_include_files(sys/types.h HAVE_SYS_TYPES_H)
check_include_files(unistd.h HAVE_UNISTD_H)
check_include_files(wincodec.h HAVE_WINCODEC_H)
check_include_files(windows.h HAVE_WINDOWS_H)
# Windows specifics
if(HAVE_WINCODEC_H)
list(APPEND WEBP_DEP_LIBRARIES shlwapi ole32 windowscodecs)
endif()
## Check for SIMD extensions.
include(${CMAKE_CURRENT_LIST_DIR}/cpu.cmake)
## Define extra info.
set(PACKAGE ${PROJECT_NAME})
set(PACKAGE_NAME ${PROJECT_NAME})
# Read from configure.ac.
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/configure.ac CONFIGURE_AC)
string(REGEX MATCHALL "\\[([0-9a-z\\.:/]*)\\]"
CONFIGURE_AC_PACKAGE_INFO ${CONFIGURE_AC}
)
function(strip_bracket VAR)
string(LENGTH ${${VAR}} TMP_LEN)
math(EXPR TMP_LEN ${TMP_LEN}-2)
string(SUBSTRING ${${VAR}} 1 ${TMP_LEN} TMP_SUB)
set(${VAR} ${TMP_SUB} PARENT_SCOPE)
endfunction()
list(GET CONFIGURE_AC_PACKAGE_INFO 1 PACKAGE_VERSION)
strip_bracket(PACKAGE_VERSION)
list(GET CONFIGURE_AC_PACKAGE_INFO 2 PACKAGE_BUGREPORT)
strip_bracket(PACKAGE_BUGREPORT)
list(GET CONFIGURE_AC_PACKAGE_INFO 3 PACKAGE_URL)
strip_bracket(PACKAGE_URL)
# Build more info.
set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
set(PACKAGE_TARNAME ${PACKAGE_NAME})
set(VERSION ${PACKAGE_VERSION})
## Generate the config.h header.
configure_file(${CMAKE_CURRENT_LIST_DIR}/config.h.in
${CMAKE_CURRENT_BINARY_DIR}/include/webp/config.h)
add_definitions(-DHAVE_CONFIG_H)
# The webp folder is included as we reference config.h as
# ../webp/config.h or webp/config.h
include_directories(${CMAKE_CURRENT_BINARY_DIR}/include
${CMAKE_CURRENT_BINARY_DIR}/include/webp
)

146
cmake/config.h.in Normal file
View File

@ -0,0 +1,146 @@
/* Adapted from the autotools src/webp/config.h.in. */
/* Define if building universal (internal helper macro) */
/* TODO: handle properly in CMake */
#cmakedefine AC_APPLE_UNIVERSAL_BUILD 1
/* Set to 1 if __builtin_bswap16 is available */
#cmakedefine HAVE_BUILTIN_BSWAP16 1
/* Set to 1 if __builtin_bswap32 is available */
#cmakedefine HAVE_BUILTIN_BSWAP32 1
/* Set to 1 if __builtin_bswap64 is available */
#cmakedefine HAVE_BUILTIN_BSWAP64 1
/* Define to 1 if you have the <dlfcn.h> header file. */
#cmakedefine HAVE_DLFCN_H 1
/* Define to 1 if you have the <GLUT/glut.h> header file. */
#cmakedefine HAVE_GLUT_GLUT_H 1
/* Define to 1 if you have the <GL/glut.h> header file. */
#cmakedefine HAVE_GL_GLUT_H 1
/* Define to 1 if you have the <inttypes.h> header file. */
#cmakedefine HAVE_INTTYPES_H 1
/* Define to 1 if you have the <memory.h> header file. */
#cmakedefine HAVE_MEMORY_H 1
/* Define to 1 if you have the <OpenGL/glut.h> header file. */
#cmakedefine HAVE_OPENGL_GLUT_H 1
/* Have PTHREAD_PRIO_INHERIT. */
#cmakedefine HAVE_PTHREAD_PRIO_INHERIT @HAVE_PTHREAD_PRIO_INHERIT@
/* Define to 1 if you have the <shlwapi.h> header file. */
#cmakedefine HAVE_SHLWAPI_H 1
/* Define to 1 if you have the <stdint.h> header file. */
#cmakedefine HAVE_STDINT_H 1
/* Define to 1 if you have the <stdlib.h> header file. */
#cmakedefine HAVE_STDLIB_H 1
/* Define to 1 if you have the <strings.h> header file. */
#cmakedefine HAVE_STRINGS_H 1
/* Define to 1 if you have the <string.h> header file. */
#cmakedefine HAVE_STRING_H 1
/* Define to 1 if you have the <sys/stat.h> header file. */
#cmakedefine HAVE_SYS_STAT_H 1
/* Define to 1 if you have the <sys/types.h> header file. */
#cmakedefine HAVE_SYS_TYPES_H 1
/* Define to 1 if you have the <unistd.h> header file. */
#cmakedefine HAVE_UNISTD_H 1
/* Define to 1 if you have the <wincodec.h> header file. */
#cmakedefine HAVE_WINCODEC_H 1
/* Define to 1 if you have the <windows.h> header file. */
#cmakedefine HAVE_WINDOWS_H 1
/* Define to the sub-directory in which libtool stores uninstalled libraries.
*/
/* TODO: handle properly in CMake */
#cmakedefine LT_OBJDIR "@LT_OBJDIR@"
/* Name of package */
#cmakedefine PACKAGE "@PROJECT_NAME@"
/* Define to the address where bug reports for this package should be sent. */
#cmakedefine PACKAGE_BUGREPORT "@PACKAGE_BUGREPORT@"
/* Define to the full name of this package. */
#cmakedefine PACKAGE_NAME "@PACKAGE_NAME@"
/* Define to the full name and version of this package. */
#cmakedefine PACKAGE_STRING "@PACKAGE_STRING@"
/* Define to the one symbol short name of this package. */
#cmakedefine PACKAGE_TARNAME "@PACKAGE_TARNAME@"
/* Define to the home page for this package. */
#cmakedefine PACKAGE_URL "@PACKAGE_URL@"
/* Define to the version of this package. */
#cmakedefine PACKAGE_VERSION "@PACKAGE_VERSION@"
/* Define to necessary symbol if this constant uses a non-standard name on
your system. */
#cmakedefine PTHREAD_CREATE_JOINABLE 1
/* Define to 1 if you have the ANSI C header files. */
#cmakedefine STDC_HEADERS 1
/* Version number of package */
#cmakedefine VERSION "@VERSION@"
/* Enable experimental code */
#cmakedefine WEBP_EXPERIMENTAL_FEATURES 1
/* Define to 1 to force aligned memory operations */
#cmakedefine WEBP_FORCE_ALIGNED 1
/* Set to 1 if AVX2 is supported */
#cmakedefine WEBP_HAVE_AVX2 1
/* Set to 1 if GIF library is installed */
#cmakedefine WEBP_HAVE_GIF 1
/* Set to 1 if OpenGL is supported */
#cmakedefine WEBP_HAVE_GL 1
/* Set to 1 if JPEG library is installed */
#cmakedefine WEBP_HAVE_JPEG 1
/* Set to 1 if PNG library is installed */
#cmakedefine WEBP_HAVE_PNG 1
/* Set to 1 if SSE2 is supported */
#cmakedefine WEBP_HAVE_SSE2 1
/* Set to 1 if SSE4.1 is supported */
#cmakedefine WEBP_HAVE_SSE41 1
/* Set to 1 if TIFF library is installed */
#cmakedefine WEBP_HAVE_TIFF 1
/* Undefine this to disable thread support. */
#cmakedefine WEBP_USE_THREAD 1
/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
significant byte first (like Motorola and SPARC, unlike Intel). */
#if defined AC_APPLE_UNIVERSAL_BUILD
# if defined __BIG_ENDIAN__
# define WORDS_BIGENDIAN 1
# endif
#else
# ifndef WORDS_BIGENDIAN
# undef WORDS_BIGENDIAN
# endif
#endif

96
cmake/cpu.cmake Normal file
View File

@ -0,0 +1,96 @@
## Check for SIMD extensions.
function(webp_check_compiler_flag WEBP_SIMD_FLAG)
unset(WEBP_HAVE_FLAG_${WEBP_SIMD_FLAG} CACHE)
check_c_source_compiles("
#include \"${CMAKE_CURRENT_LIST_DIR}/../src/dsp/dsp.h\"
int main(void) {
#if !defined(WEBP_USE_${WEBP_SIMD_FLAG})
this is not valid code
#endif
return 0;
}
" WEBP_HAVE_FLAG_${WEBP_SIMD_FLAG}
)
if(WEBP_HAVE_FLAG_${WEBP_SIMD_FLAG})
set(WEBP_HAVE_${WEBP_SIMD_FLAG} 1 PARENT_SCOPE)
else()
set(WEBP_HAVE_${WEBP_SIMD_FLAG} 0 PARENT_SCOPE)
endif()
endfunction()
# those are included in the names of WEBP_USE_* in c++ code.
set(WEBP_SIMD_FLAGS "SSE2;SSE41;AVX2;MIPS32;MIPS_DSP_R2;NEON;MSA")
set(WEBP_SIMD_FILE_EXTENSIONS "_sse2.c;_sse41.c;_avx2.c;_mips32.c;_mips_dsp_r2.c;_neon.c;_msa.c")
if(MSVC)
# MSVC does not have a SSE4 flag but AVX2 support implies
# SSE4 support.
set(SIMD_ENABLE_FLAGS "/arch:SSE2;/arch:AVX2;/arch:AVX2;;;;")
set(SIMD_DISABLE_FLAGS)
else()
set(SIMD_ENABLE_FLAGS "-msse2;-msse4.1;-mavx2;-mips32;-mdspr2;-mfpu=neon;-mmsa")
set(SIMD_DISABLE_FLAGS "-mno-sse2;-mno-sse4.1;-mno-avx2;;-mno-dspr2;;-mno-msa")
endif()
set(WEBP_SIMD_FILES_TO_NOT_INCLUDE)
set(WEBP_SIMD_FILES_TO_INCLUDE)
set(WEBP_SIMD_FLAGS_TO_INCLUDE)
if(${ANDROID})
if(${ANDROID_ABI} STREQUAL "armeabi-v7a")
# This is because Android studio uses the configuration
# "-march=armv7-a -mfloat-abi=softfp -mfpu=vfpv3-d16"
# that does not trigger neon optimizations but should
# (as this configuration does not exist anymore).
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon ")
endif()
endif()
list(LENGTH WEBP_SIMD_FLAGS WEBP_SIMD_FLAGS_LENGTH)
math(EXPR WEBP_SIMD_FLAGS_RANGE "${WEBP_SIMD_FLAGS_LENGTH} - 1")
foreach(I_SIMD RANGE ${WEBP_SIMD_FLAGS_RANGE})
list(GET WEBP_SIMD_FLAGS ${I_SIMD} WEBP_SIMD_FLAG)
# First try with no extra flag added as the compiler might have default flags
# (especially on Android).
unset(WEBP_HAVE_${WEBP_SIMD_FLAG} CACHE)
set(CMAKE_REQUIRED_FLAGS)
webp_check_compiler_flag(${WEBP_SIMD_FLAG})
if(NOT WEBP_HAVE_${WEBP_SIMD_FLAG})
list(GET SIMD_ENABLE_FLAGS ${I_SIMD} SIMD_COMPILE_FLAG)
set(CMAKE_REQUIRED_FLAGS ${SIMD_COMPILE_FLAG})
webp_check_compiler_flag(${WEBP_SIMD_FLAG})
else()
set(SIMD_COMPILE_FLAG " ")
endif()
# Check which files we should include or not.
list(GET WEBP_SIMD_FILE_EXTENSIONS ${I_SIMD} WEBP_SIMD_FILE_EXTENSION)
file(GLOB SIMD_FILES "${CMAKE_CURRENT_LIST_DIR}/../"
"src/dsp/*${WEBP_SIMD_FILE_EXTENSION}"
)
if(WEBP_HAVE_${WEBP_SIMD_FLAG})
# Memorize the file and flags.
foreach(FILE ${SIMD_FILES})
list(APPEND WEBP_SIMD_FILES_TO_INCLUDE ${FILE})
list(APPEND WEBP_SIMD_FLAGS_TO_INCLUDE ${SIMD_COMPILE_FLAG})
endforeach()
else()
# Remove the file from the list.
foreach(FILE ${SIMD_FILES})
list(APPEND WEBP_SIMD_FILES_NOT_TO_INCLUDE ${FILE})
endforeach()
# Explicitly disable SIMD.
if(SIMD_DISABLE_FLAGS)
list(GET SIMD_DISABLE_FLAGS ${I_SIMD} SIMD_COMPILE_FLAG)
include(CheckCCompilerFlag)
if(SIMD_COMPILE_FLAG)
unset(HAS_COMPILE_FLAG CACHE)
check_c_compiler_flag(${SIMD_COMPILE_FLAG} HAS_COMPILE_FLAG)
if(HAS_COMPILE_FLAG)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_COMPILE_FLAG}")
endif()
endif()
endif()
endif()
endforeach()

View File

@ -1,4 +1,4 @@
AC_INIT([libwebp], [0.5.0],
AC_INIT([libwebp], [0.5.2],
[https://bugs.chromium.org/p/webp],,
[http://developers.google.com/speed/webp])
AC_CANONICAL_HOST
@ -10,6 +10,7 @@ dnl === it must occur before LT_INIT (AC_PROG_LIBTOOL).
m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
AC_PROG_LIBTOOL
AC_PROG_SED
AM_PROG_CC_C_O
dnl === Enable less verbose output when building.
@ -68,8 +69,9 @@ TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-fvisibility=hidden])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wall])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wdeclaration-after-statement])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wextra])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wformat-nonliteral])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wformat-security])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wfloat-conversion])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wformat -Wformat-nonliteral])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wformat -Wformat-security])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wmissing-declarations])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wmissing-prototypes])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wold-style-definition])
@ -178,6 +180,71 @@ AS_IF([test "x$enable_sse2" != "xno"], [
CFLAGS=$SAVED_CFLAGS])
AC_SUBST([SSE2_FLAGS])])
AC_ARG_ENABLE([neon],
AS_HELP_STRING([--disable-neon],
[Disable detection of NEON support
@<:@default=auto@:>@]))
AC_ARG_ENABLE([neon_rtcd],
AS_HELP_STRING([--disable-neon-rtcd],
[Disable runtime detection of NEON support via
/proc/cpuinfo on Linux hosts
@<:@default=auto@:>@]))
# For ARM(7) hosts:
# Both NEON flags unset and NEON support detected = build all modules with NEON
# NEON detected with the use of -mfpu=neon = build only NEON modules with NEON
AS_IF([test "x$enable_neon" != "xno"], [
case "$host_cpu" in
arm|armv7*)
# Test for NEON support without flags before falling back to -mfpu=neon
for flag in '' '-mfpu=neon'; do
LOCAL_NEON_FLAGS="$INTRINSICS_CFLAGS $NEON_FLAGS"
TEST_AND_ADD_CFLAGS([LOCAL_NEON_FLAGS], [$flag])
SAVED_CFLAGS=$CFLAGS
CFLAGS="$CFLAGS $LOCAL_NEON_FLAGS"
dnl Note AC_LANG_PROGRAM([]) uses an old-style main definition.
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <arm_neon.h>
int main(void) {
int8x8_t v = vdup_n_s8(0);
(void)v;
return 0;
}])],
[NEON_FLAGS="$(echo $LOCAL_NEON_FLAGS | $SED 's/^ *//')"
AS_IF([test -n "$NEON_FLAGS"], [
AS_IF([test "${host_os%%-*}" = "linux" -o \
"x$enable_neon_rtcd" = "xno"], [
CFLAGS=$SAVED_CFLAGS
AC_DEFINE(WEBP_HAVE_NEON, [1], [Set to 1 if NEON is supported])
break
],[
AC_MSG_WARN(m4_normalize([NEON runtime cpu-detection is
unavailable for ${host_os%%-*}. Force
with CFLAGS=-mfpu=neon or
--disable-neon-rtcd.]))
enable_neon_rtcd=no
NEON_FLAGS=""
])
],[
CFLAGS=$SAVED_CFLAGS
AC_DEFINE(WEBP_HAVE_NEON, [1], [Set to 1 if NEON is supported])
break
])])
CFLAGS=$SAVED_CFLAGS
done
AS_IF([test -n "$NEON_FLAGS"], [
# If NEON is available and rtcd is disabled apply NEON_FLAGS globally.
AS_IF([test "x$enable_neon_rtcd" = "xno"], [
AM_CFLAGS="$AM_CFLAGS $NEON_FLAGS"
NEON_FLAGS=""],
[AC_DEFINE(WEBP_HAVE_NEON_RTCD, [1],
[Set to 1 if runtime detection of NEON is enabled])])])
;;
esac
AC_SUBST([NEON_FLAGS])])
dnl === CLEAR_LIBVARS([var_pfx])
dnl === Clears <var_pfx>_{INCLUDES,LIBS}.
AC_DEFUN([CLEAR_LIBVARS], [$1_INCLUDES=""; $1_LIBS=""])
@ -221,7 +288,7 @@ dnl === AC_DEFINE'ing <define> if successful.
AC_DEFUN([CHECK_FOR_BUILTIN],
[AC_LANG_PUSH([C])
AC_MSG_CHECKING([for $1])
AC_LINK_IFELSE([AC_LANG_PROGRAM([], [$1($2)])],
AC_LINK_IFELSE([AC_LANG_PROGRAM([], [(void)$1($2)])],
[AC_MSG_RESULT([yes])
AC_DEFINE([$3], [1],
[Set to 1 if $1 is available])],

View File

@ -429,6 +429,11 @@ int8 ColorTransformDelta(int8 t, int8 c) {
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
A conversion from the 8-bit unsigned representation (uint8) to the 8-bit
signed one (int8) is required before calling ColorTransformDelta().
It should be performed using 8-bit two's complement (that is: uint8 range
\[128-255\] is mapped to the \[-128, -1\] range of its converted int8 value).
The multiplication is to be done using more precision (with at least
16-bit dynamics). The sign extension property of the shift operation
does not matter here: only the lowest 8 bits are used from the result,

View File

@ -15,6 +15,27 @@ LOCAL_MODULE := example_util
include $(BUILD_STATIC_LIBRARY)
################################################################################
# libexample_dec
include $(CLEAR_VARS)
LOCAL_SRC_FILES := \
image_dec.c \
jpegdec.c \
metadata.c \
pngdec.c \
tiffdec.c \
webpdec.c \
LOCAL_CFLAGS := $(WEBP_CFLAGS)
LOCAL_C_INCLUDES := $(LOCAL_PATH)/../src
LOCAL_MODULE := example_dec
include $(BUILD_STATIC_LIBRARY)
################################################################################
# cwebp
@ -24,15 +45,10 @@ include $(CLEAR_VARS)
# minor modification to their Android.mk files.
LOCAL_SRC_FILES := \
cwebp.c \
jpegdec.c \
metadata.c \
pngdec.c \
tiffdec.c \
webpdec.c \
LOCAL_CFLAGS := $(WEBP_CFLAGS)
LOCAL_C_INCLUDES := $(LOCAL_PATH)/../src
LOCAL_STATIC_LIBRARIES := example_util webp
LOCAL_STATIC_LIBRARIES := example_util example_dec webp
LOCAL_MODULE := cwebp

View File

@ -12,16 +12,26 @@ if BUILD_GIF2WEBP
bin_PROGRAMS += gif2webp
endif
noinst_LTLIBRARIES = libexampleutil.la
noinst_LTLIBRARIES = libexampleutil.la libexampledec.la
libexampleutil_la_SOURCES = example_util.c example_util.h stopwatch.h
libexampledec_la_SOURCES = image_dec.c image_dec.h
libexampledec_la_SOURCES += jpegdec.c jpegdec.h
libexampledec_la_SOURCES += metadata.c metadata.h
libexampledec_la_SOURCES += pngdec.c pngdec.h
libexampledec_la_SOURCES += tiffdec.c tiffdec.h
libexampledec_la_SOURCES += webpdec.c webpdec.h
libexampledec_la_SOURCES += wicdec.c wicdec.h
libexampledec_la_CPPFLAGS = $(JPEG_INCLUDES) $(PNG_INCLUDES) $(TIFF_INCLUDES)
libexampledec_la_CPPFLAGS += $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
if BUILD_ANIMDIFF
noinst_PROGRAMS = anim_diff
endif
anim_diff_SOURCES = anim_diff.c anim_util.c anim_util.h
anim_diff_CPPFLAGS = $(AM_CPPFLAGS) $(GIF_INCLUDES)
anim_diff_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(GIF_INCLUDES)
anim_diff_LDADD = ../src/demux/libwebpdemux.la
anim_diff_LDADD += libexampleutil.la
anim_diff_LDADD += $(GIF_LIBS) -lm
@ -31,15 +41,9 @@ dwebp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
dwebp_CPPFLAGS += $(JPEG_INCLUDES) $(PNG_INCLUDES)
dwebp_LDADD = libexampleutil.la $(PNG_LIBS) $(JPEG_LIBS)
cwebp_SOURCES = cwebp.c metadata.c metadata.h stopwatch.h
cwebp_SOURCES += jpegdec.c jpegdec.h
cwebp_SOURCES += pngdec.c pngdec.h
cwebp_SOURCES += tiffdec.c tiffdec.h
cwebp_SOURCES += webpdec.c webpdec.h
cwebp_SOURCES += wicdec.c wicdec.h
cwebp_SOURCES = cwebp.c stopwatch.h
cwebp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
cwebp_CPPFLAGS += $(JPEG_INCLUDES) $(PNG_INCLUDES) $(TIFF_INCLUDES)
cwebp_LDADD = libexampleutil.la ../src/libwebp.la
cwebp_LDADD = libexampleutil.la libexampledec.la ../src/libwebp.la
cwebp_LDADD += $(JPEG_LIBS) $(PNG_LIBS) $(TIFF_LIBS)
gif2webp_SOURCES = gif2webp.c gifdec.c gifdec.h

View File

@ -13,6 +13,7 @@
//
// example: anim_diff foo.gif bar.webp
#include <assert.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h> // for 'strtod'.
@ -29,20 +30,67 @@ static int AdditionWillOverflow(int a, int b) {
return (b > 0) && (a > INT_MAX - b);
}
// Minimize number of frames by combining successive frames that have exact same
// ARGB data into a single longer duration frame.
static void MinimizeAnimationFrames(AnimatedImage* const img) {
static int FramesAreEqual(const uint8_t* const rgba1,
const uint8_t* const rgba2, int width, int height) {
const int stride = width * 4; // Always true for 'DecodedFrame.rgba'.
return !memcmp(rgba1, rgba2, stride * height);
}
static WEBP_INLINE int PixelsAreSimilar(uint32_t src, uint32_t dst,
int max_allowed_diff) {
const int src_a = (src >> 24) & 0xff;
const int src_r = (src >> 16) & 0xff;
const int src_g = (src >> 8) & 0xff;
const int src_b = (src >> 0) & 0xff;
const int dst_a = (dst >> 24) & 0xff;
const int dst_r = (dst >> 16) & 0xff;
const int dst_g = (dst >> 8) & 0xff;
const int dst_b = (dst >> 0) & 0xff;
return (abs(src_r * src_a - dst_r * dst_a) <= (max_allowed_diff * 255)) &&
(abs(src_g * src_a - dst_g * dst_a) <= (max_allowed_diff * 255)) &&
(abs(src_b * src_a - dst_b * dst_a) <= (max_allowed_diff * 255)) &&
(abs(src_a - dst_a) <= max_allowed_diff);
}
static int FramesAreSimilar(const uint8_t* const rgba1,
const uint8_t* const rgba2,
int width, int height, int max_allowed_diff) {
int i, j;
assert(max_allowed_diff > 0);
for (j = 0; j < height; ++j) {
for (i = 0; i < width; ++i) {
const int stride = width * 4;
const size_t offset = j * stride + i;
if (!PixelsAreSimilar(rgba1[offset], rgba2[offset], max_allowed_diff)) {
return 0;
}
}
}
return 1;
}
// Minimize number of frames by combining successive frames that have at max
// 'max_diff' difference per channel between corresponding pixels.
static void MinimizeAnimationFrames(AnimatedImage* const img, int max_diff) {
uint32_t i;
for (i = 1; i < img->num_frames; ++i) {
DecodedFrame* const frame1 = &img->frames[i - 1];
DecodedFrame* const frame2 = &img->frames[i];
const uint8_t* const rgba1 = frame1->rgba;
const uint8_t* const rgba2 = frame2->rgba;
int should_merge_frames = 0;
// If merging frames will result in integer overflow for 'duration',
// skip merging.
if (AdditionWillOverflow(frame1->duration, frame2->duration)) continue;
if (!memcmp(rgba1, rgba2, img->canvas_width * 4 * img->canvas_height)) {
// Merge 'i+1'th frame into 'i'th frame.
if (max_diff > 0) {
should_merge_frames = FramesAreSimilar(rgba1, rgba2, img->canvas_width,
img->canvas_height, max_diff);
} else {
should_merge_frames =
FramesAreEqual(rgba1, rgba2, img->canvas_width, img->canvas_height);
}
if (should_merge_frames) { // Merge 'i+1'th frame into 'i'th frame.
frame1->duration += frame2->duration;
if (i + 1 < img->num_frames) {
memmove(&img->frames[i], &img->frames[i + 1],
@ -62,6 +110,20 @@ static int CompareValues(uint32_t a, uint32_t b, const char* output_str) {
return 1;
}
static int CompareBackgroundColor(uint32_t bg1, uint32_t bg2, int premultiply) {
if (premultiply) {
const int alpha1 = (bg1 >> 24) & 0xff;
const int alpha2 = (bg2 >> 24) & 0xff;
if (alpha1 == 0 && alpha2 == 0) return 1;
}
if (bg1 != bg2) {
fprintf(stderr, "Background color mismatch: 0x%08x vs 0x%08x\n",
bg1, bg2);
return 0;
}
return 1;
}
// Note: As long as frame durations and reconstructed frames are identical, it
// is OK for other aspects like offsets, dispose/blend method to vary.
static int CompareAnimatedImagePair(const AnimatedImage* const img1,
@ -83,8 +145,8 @@ static int CompareAnimatedImagePair(const AnimatedImage* const img1,
if (is_multi_frame_image) { // Checks relevant for multi-frame images only.
ok = CompareValues(img1->loop_count, img2->loop_count,
"Loop count mismatch") && ok;
ok = CompareValues(img1->bgcolor, img2->bgcolor,
"Background color mismatch") && ok;
ok = CompareBackgroundColor(img1->bgcolor, img2->bgcolor,
premultiply) && ok;
}
for (i = 0; i < img1->num_frames; ++i) {
@ -125,6 +187,11 @@ static void Help(void) {
printf(" -min_psnr <float> ... minimum per-frame PSNR\n");
printf(" -raw_comparison ..... if this flag is not used, RGB is\n");
printf(" premultiplied before comparison\n");
#ifdef WEBP_EXPERIMENTAL_FEATURES
printf(" -max_diff <int> ..... maximum allowed difference per channel "
" between corresponding pixels in subsequent"
" frames\n");
#endif
}
int main(int argc, const char* argv[]) {
@ -135,6 +202,7 @@ int main(int argc, const char* argv[]) {
int got_input1 = 0;
int got_input2 = 0;
int premultiply = 1;
int max_diff = 0;
int i, c;
const char* files[2] = { NULL, NULL };
AnimatedImage images[2];
@ -168,6 +236,21 @@ int main(int argc, const char* argv[]) {
}
} else if (!strcmp(argv[c], "-raw_comparison")) {
premultiply = 0;
#ifdef WEBP_EXPERIMENTAL_FEATURES
} else if (!strcmp(argv[c], "-max_diff")) {
if (c < argc - 1) {
const char* const v = argv[++c];
char* end = NULL;
const int n = (int)strtol(v, &end, 10);
if (end == v) {
parse_error = 1;
fprintf(stderr, "Error! '%s' is not an integer.\n", v);
}
max_diff = n;
} else {
parse_error = 1;
}
#endif
} else {
if (!got_input1) {
files[0] = argv[c];
@ -201,7 +284,7 @@ int main(int argc, const char* argv[]) {
return_code = -2;
goto End;
} else {
MinimizeAnimationFrames(&images[i]);
MinimizeAnimationFrames(&images[i], max_diff);
}
}

View File

@ -39,13 +39,24 @@ static int IsFullFrame(int width, int height,
return (width == canvas_width && height == canvas_height);
}
static int CheckSizeForOverflow(uint64_t size) {
return (size == (size_t)size);
}
static int AllocateFrames(AnimatedImage* const image, uint32_t num_frames) {
uint32_t i;
const size_t rgba_size =
image->canvas_width * kNumChannels * image->canvas_height;
uint8_t* const mem = (uint8_t*)malloc(num_frames * rgba_size * sizeof(*mem));
DecodedFrame* const frames =
(DecodedFrame*)malloc(num_frames * sizeof(*frames));
uint8_t* mem = NULL;
DecodedFrame* frames = NULL;
const uint64_t rgba_size =
(uint64_t)image->canvas_width * kNumChannels * image->canvas_height;
const uint64_t total_size = (uint64_t)num_frames * rgba_size * sizeof(*mem);
const uint64_t total_frame_size = (uint64_t)num_frames * sizeof(*frames);
if (!CheckSizeForOverflow(total_size) ||
!CheckSizeForOverflow(total_frame_size)) {
return 0;
}
mem = (uint8_t*)malloc((size_t)total_size);
frames = (DecodedFrame*)malloc((size_t)total_frame_size);
if (mem == NULL || frames == NULL) {
free(mem);
@ -232,6 +243,7 @@ static int ReadAnimatedWebP(const char filename[],
fprintf(stderr, "Error decoding frame #%u\n", frame_index);
goto End;
}
assert(frame_index < anim_info.frame_count);
curr_frame = &image->frames[frame_index];
curr_rgba = curr_frame->rgba;
curr_frame->duration = timestamp - prev_frame_timestamp;
@ -264,6 +276,8 @@ static int ReadAnimatedWebP(const char filename[],
// -----------------------------------------------------------------------------
// GIF Decoding.
#ifdef WEBP_HAVE_GIF
// Returns true if this is a valid GIF bitstream.
static int IsGIF(const WebPData* const data) {
return data->size > GIF_STAMP_LEN &&
@ -272,8 +286,6 @@ static int IsGIF(const WebPData* const data) {
!memcmp(GIF89_STAMP, data->bytes, GIF_STAMP_LEN));
}
#ifdef WEBP_HAVE_GIF
// GIFLIB_MAJOR is only defined in libgif >= 4.2.0.
#if defined(GIFLIB_MAJOR) && defined(GIFLIB_MINOR)
# define LOCAL_GIF_VERSION ((GIFLIB_MAJOR << 8) | GIFLIB_MINOR)
@ -395,7 +407,7 @@ static uint32_t GetBackgroundColorGIF(GifFileType* gif) {
const ColorMapObject* const color_map = gif->SColorMap;
if (transparent_index != NO_TRANSPARENT_COLOR &&
gif->SBackGroundColor == transparent_index) {
return 0x00ffffff; // Special case: transparent white.
return 0x00000000; // Special case: transparent black.
} else if (color_map == NULL || color_map->Colors == NULL
|| gif->SBackGroundColor >= color_map->ColorCount) {
return 0xffffffff; // Invalid: assume white.
@ -664,6 +676,11 @@ static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
#else
static int IsGIF(const WebPData* const data) {
(void)data;
return 0;
}
static int ReadAnimatedGIF(const char filename[], AnimatedImage* const image,
int dump_frames, const char dump_folder[]) {
(void)filename;

View File

@ -20,17 +20,10 @@
#include "webp/config.h"
#endif
#include "webp/encode.h"
#include "./example_util.h"
#include "./metadata.h"
#include "./image_dec.h"
#include "./stopwatch.h"
#include "./jpegdec.h"
#include "./pngdec.h"
#include "./tiffdec.h"
#include "./webpdec.h"
#include "./wicdec.h"
#include "webp/encode.h"
#ifndef WEBP_DLL
#ifdef __cplusplus
@ -50,13 +43,12 @@ static int verbose = 0;
static int ReadYUV(const uint8_t* const data, size_t data_size,
WebPPicture* const pic) {
int y;
const int use_argb = pic->use_argb;
const int uv_width = (pic->width + 1) / 2;
const int uv_height = (pic->height + 1) / 2;
const int y_plane_size = pic->width * pic->height;
const int uv_plane_size = uv_width * uv_height;
const size_t expected_data_size =
pic->width * pic->height + 2 * uv_plane_size;
const size_t expected_data_size = y_plane_size + 2 * uv_plane_size;
if (data_size != expected_data_size) {
fprintf(stderr,
@ -67,18 +59,12 @@ static int ReadYUV(const uint8_t* const data, size_t data_size,
pic->use_argb = 0;
if (!WebPPictureAlloc(pic)) return 0;
for (y = 0; y < pic->height; ++y) {
memcpy(pic->y + y * pic->y_stride, data + y * pic->width,
pic->width * sizeof(*pic->y));
}
for (y = 0; y < uv_height; ++y) {
const uint8_t* const uv_data = data + pic->height * pic->y_stride;
memcpy(pic->u + y * pic->uv_stride, uv_data + y * uv_width,
uv_width * sizeof(*uv_data));
memcpy(pic->v + y * pic->uv_stride, uv_data + y * uv_width + uv_plane_size,
uv_width * sizeof(*uv_data));
}
ExUtilCopyPlane(data, pic->width, pic->y, pic->y_stride,
pic->width, pic->height);
ExUtilCopyPlane(data + y_plane_size, uv_width,
pic->u, pic->uv_stride, uv_width, uv_height);
ExUtilCopyPlane(data + y_plane_size + uv_plane_size, uv_width,
pic->v, pic->uv_stride, uv_width, uv_height);
return use_argb ? WebPPictureYUVAToARGB(pic) : 1;
}
@ -109,34 +95,6 @@ static int ReadPicture(const char* const filename, WebPPicture* const pic,
#else // !HAVE_WINCODEC_H
typedef enum {
PNG_ = 0,
JPEG_,
TIFF_, // 'TIFF' clashes with libtiff
WEBP_,
UNSUPPORTED
} InputFileFormat;
static uint32_t GetBE32(const uint8_t buf[]) {
return ((uint32_t)buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
}
static InputFileFormat GuessImageType(const uint8_t buf[12]) {
InputFileFormat format = UNSUPPORTED;
const uint32_t magic1 = GetBE32(buf + 0);
const uint32_t magic2 = GetBE32(buf + 8);
if (magic1 == 0x89504E47U) {
format = PNG_;
} else if (magic1 >= 0xFFD8FF00U && magic1 <= 0xFFD8FFFFU) {
format = JPEG_;
} else if (magic1 == 0x49492A00 || magic1 == 0x4D4D002A) {
format = TIFF_;
} else if (magic1 == 0x52494646 && magic2 == 0x57454250) {
format = WEBP_;
}
return format;
}
static int ReadPicture(const char* const filename, WebPPicture* const pic,
int keep_alpha, Metadata* const metadata) {
const uint8_t* data = NULL;
@ -147,19 +105,8 @@ static int ReadPicture(const char* const filename, WebPPicture* const pic,
if (!ok) goto End;
if (pic->width == 0 || pic->height == 0) {
ok = 0;
if (data_size >= 12) {
const InputFileFormat format = GuessImageType(data);
if (format == PNG_) {
ok = ReadPNG(data, data_size, pic, keep_alpha, metadata);
} else if (format == JPEG_) {
ok = ReadJPEG(data, data_size, pic, metadata);
} else if (format == TIFF_) {
ok = ReadTIFF(data, data_size, pic, keep_alpha, metadata);
} else if (format == WEBP_) {
ok = ReadWebP(data, data_size, pic, keep_alpha, metadata);
}
}
WebPImageReader reader = WebPGuessImageReader(data, data_size);
ok = (reader != NULL) && reader(data, data_size, pic, keep_alpha, metadata);
} else {
// If image size is specified, infer it as YUV format.
ok = ReadYUV(data, data_size, pic);
@ -567,11 +514,12 @@ static void HelpLong(void) {
printf("Windows builds can take as input any of the files handled by WIC.\n");
#endif
printf("\nOptions:\n");
printf(" -h / -help ............ short help\n");
printf(" -H / -longhelp ........ long help\n");
printf(" -q <float> ............. quality factor (0:small..100:big)\n");
printf(" -alpha_q <int> ......... transparency-compression quality "
"(0..100)\n");
printf(" -h / -help ............. short help\n");
printf(" -H / -longhelp ......... long help\n");
printf(" -q <float> ............. quality factor (0:small..100:big), "
"default=75\n");
printf(" -alpha_q <int> ......... transparency-compression quality (0..100),"
"\n default=100\n");
printf(" -preset <string> ....... preset setting, one of:\n");
printf(" default, photo, picture,\n");
printf(" drawing, icon, text\n");
@ -579,16 +527,20 @@ static void HelpLong(void) {
printf(" -z <int> ............... activates lossless preset with given\n"
" level in [0:fast, ..., 9:slowest]\n");
printf("\n");
printf(" -m <int> ............... compression method (0=fast, 6=slowest)\n");
printf(" -segments <int> ........ number of segments to use (1..4)\n");
printf(" -m <int> ............... compression method (0=fast, 6=slowest), "
"default=4\n");
printf(" -segments <int> ........ number of segments to use (1..4), "
"default=4\n");
printf(" -size <int> ............ target size (in bytes)\n");
printf(" -psnr <float> .......... target PSNR (in dB. typically: 42)\n");
printf("\n");
printf(" -s <int> <int> ......... input size (width x height) for YUV\n");
printf(" -sns <int> ............. spatial noise shaping (0:off, 100:max)\n");
printf(" -f <int> ............... filter strength (0=off..100)\n");
printf(" -sns <int> ............. spatial noise shaping (0:off, 100:max), "
"default=50\n");
printf(" -f <int> ............... filter strength (0=off..100), "
"default=60\n");
printf(" -sharpness <int> ....... "
"filter sharpness (0:most .. 7:least sharp)\n");
"filter sharpness (0:most .. 7:least sharp), default=0\n");
printf(" -strong ................ use strong filter instead "
"of simple (default)\n");
printf(" -nostrong .............. use simple filter instead of strong\n");
@ -605,19 +557,21 @@ static void HelpLong(void) {
printf(" -print_ssim ............ prints averaged SSIM distortion\n");
printf(" -print_lsim ............ prints local-similarity distortion\n");
printf(" -d <file.pgm> .......... dump the compressed output (PGM file)\n");
printf(" -alpha_method <int> .... transparency-compression method (0..1)\n");
printf(" -alpha_method <int> .... transparency-compression method (0..1), "
"default=1\n");
printf(" -alpha_filter <string> . predictive filtering for alpha plane,\n");
printf(" one of: none, fast (default) or best\n");
printf(" -exact ................. preserve RGB values in transparent area"
"\n");
printf(" -exact ................. preserve RGB values in transparent area, "
"default=off\n");
printf(" -blend_alpha <hex> ..... blend colors against background color\n"
" expressed as RGB values written in\n"
" hexadecimal, e.g. 0xc0e0d0 for red=0xc0\n"
" green=0xe0 and blue=0xd0\n");
printf(" -noalpha ............... discard any transparency information\n");
printf(" -lossless .............. encode image losslessly\n");
printf(" -lossless .............. encode image losslessly, default=off\n");
printf(" -near_lossless <int> ... use near-lossless image\n"
" preprocessing (0..100=off)\n");
" preprocessing (0..100=off), "
"default=100\n");
#ifdef WEBP_EXPERIMENTAL_FEATURES
printf(" -delta_palettization ... use delta palettization\n");
#endif // WEBP_EXPERIMENTAL_FEATURES
@ -972,6 +926,11 @@ int main(int argc, const char *argv[]) {
" encoding. Ignoring this option!\n");
}
}
// If a target size or PSNR was given, but somehow the -pass option was
// omitted, force a reasonable value.
if (config.target_size > 0 || config.target_PSNR > 0) {
if (config.pass == 1) config.pass = 6;
}
if (!WebPValidateConfig(&config)) {
fprintf(stderr, "Error! Invalid configuration.\n");

View File

@ -67,8 +67,13 @@ typedef enum {
PGM,
BMP,
TIFF,
YUV,
ALPHA_PLANE_ONLY // this is for experimenting only
RAW_YUV,
ALPHA_PLANE_ONLY, // this is for experimenting only
// forced colorspace output (for testing, mostly)
RGB, RGBA, BGR, BGRA, ARGB,
RGBA_4444, RGB_565,
rgbA, bgrA, Argb, rgbA_4444,
YUV, YUVA
} OutputFileFormat;
#ifdef HAVE_WINCODEC_H
@ -175,7 +180,7 @@ static int WritePNG(const char* out_file_name, int use_stdout,
const uint32_t height = buffer->height;
uint8_t* const rgb = buffer->u.RGBA.rgba;
const int stride = buffer->u.RGBA.stride;
const int has_alpha = (buffer->colorspace == MODE_BGRA);
const int has_alpha = WebPIsAlphaMode(buffer->colorspace);
return SUCCEEDED(WriteUsingWIC(out_file_name, use_stdout,
MAKE_REFGUID(GUID_ContainerFormatPng),
@ -193,7 +198,7 @@ static int WritePNG(FILE* out_file, const WebPDecBuffer* const buffer) {
const uint32_t height = buffer->height;
uint8_t* const rgb = buffer->u.RGBA.rgba;
const int stride = buffer->u.RGBA.stride;
const int has_alpha = (buffer->colorspace == MODE_RGBA);
const int has_alpha = WebPIsAlphaMode(buffer->colorspace);
volatile png_structp png;
volatile png_infop info;
png_uint_32 y;
@ -259,6 +264,24 @@ static int WritePPM(FILE* fout, const WebPDecBuffer* const buffer, int alpha) {
return 1;
}
// Save 16b mode (RGBA4444, RGB565, ...) for debugging purpose.
static int Write16bAsPGM(FILE* fout, const WebPDecBuffer* const buffer) {
const uint32_t width = buffer->width;
const uint32_t height = buffer->height;
const uint8_t* const rgba = buffer->u.RGBA.rgba;
const int stride = buffer->u.RGBA.stride;
const uint32_t bytes_per_px = 2;
uint32_t y;
fprintf(fout, "P5\n%u %u\n255\n", width * bytes_per_px, height);
for (y = 0; y < height; ++y) {
if (fwrite(rgba + y * stride, width, bytes_per_px, fout) != bytes_per_px) {
return 0;
}
}
return 1;
}
static void PutLE16(uint8_t* const dst, uint32_t value) {
dst[0] = (value >> 0) & 0xff;
dst[1] = (value >> 8) & 0xff;
@ -271,7 +294,7 @@ static void PutLE32(uint8_t* const dst, uint32_t value) {
#define BMP_HEADER_SIZE 54
static int WriteBMP(FILE* fout, const WebPDecBuffer* const buffer) {
const int has_alpha = (buffer->colorspace != MODE_BGR);
const int has_alpha = WebPIsAlphaMode(buffer->colorspace);
const uint32_t width = buffer->width;
const uint32_t height = buffer->height;
const uint8_t* const rgba = buffer->u.RGBA.rgba;
@ -332,7 +355,7 @@ static int WriteBMP(FILE* fout, const WebPDecBuffer* const buffer) {
#define TIFF_HEADER_SIZE (EXTRA_DATA_OFFSET + EXTRA_DATA_SIZE)
static int WriteTIFF(FILE* fout, const WebPDecBuffer* const buffer) {
const int has_alpha = (buffer->colorspace != MODE_RGB);
const int has_alpha = WebPIsAlphaMode(buffer->colorspace);
const uint32_t width = buffer->width;
const uint32_t height = buffer->height;
const uint8_t* const rgba = buffer->u.RGBA.rgba;
@ -418,7 +441,7 @@ static int WriteAlphaPlane(FILE* fout, const WebPDecBuffer* const buffer) {
// format=PGM: save a grayscale PGM file using the IMC4 layout
// (http://www.fourcc.org/yuv.php#IMC4). This is a very convenient format for
// viewing the samples, esp. for odd dimensions.
// format=YUV: just save the Y/U/V/A planes sequentially without header.
// format=RAW_YUV: just save the Y/U/V/A planes sequentially without header.
static int WritePGMOrYUV(FILE* fout, const WebPDecBuffer* const buffer,
OutputFileFormat format) {
const int width = buffer->width;
@ -426,7 +449,7 @@ static int WritePGMOrYUV(FILE* fout, const WebPDecBuffer* const buffer,
const WebPYUVABuffer* const yuv = &buffer->u.YUVA;
int ok = 1;
int y;
const int pad = (format == YUV) ? 0 : 1;
const int pad = (format == RAW_YUV) ? 0 : 1;
const int uv_width = (width + 1) / 2;
const int uv_height = (height + 1) / 2;
const int out_stride = (width + pad) & ~pad;
@ -487,7 +510,9 @@ static int SaveOutput(const WebPDecBuffer* const buffer,
}
}
if (format == PNG) {
if (format == PNG ||
format == RGBA || format == BGRA || format == ARGB ||
format == rgbA || format == bgrA || format == Argb) {
#ifdef HAVE_WINCODEC_H
ok &= WritePNG(out_file, use_stdout, buffer);
#else
@ -495,14 +520,17 @@ static int SaveOutput(const WebPDecBuffer* const buffer,
#endif
} else if (format == PAM) {
ok &= WritePPM(fout, buffer, 1);
} else if (format == PPM) {
} else if (format == PPM || format == RGB || format == BGR) {
ok &= WritePPM(fout, buffer, 0);
} else if (format == RGBA_4444 || format == RGB_565 || format == rgbA_4444) {
ok &= Write16bAsPGM(fout, buffer);
} else if (format == BMP) {
ok &= WriteBMP(fout, buffer);
} else if (format == TIFF) {
ok &= WriteTIFF(fout, buffer);
} else if (format == PGM || format == YUV) {
ok &= WritePGMOrYUV(fout, buffer, format);
} else if (format == PGM || format == RAW_YUV ||
format == YUV || format == YUVA) {
ok &= WritePGMOrYUV(fout, buffer, format == RAW_YUV ? RAW_YUV : PGM);
} else if (format == ALPHA_PLANE_ONLY) {
ok &= WriteAlphaPlane(fout, buffer);
}
@ -544,7 +572,7 @@ static void Help(void) {
" -yuv ......... save the raw YUV samples in flat layout\n"
"\n"
" Other options are:\n"
" -version .... print version number and exit\n"
" -version ..... print version number and exit\n"
" -nofancy ..... don't use the fancy YUV420 upscaler\n"
" -nofilter .... disable in-loop filtering\n"
" -nodither .... disable dithering\n"
@ -556,8 +584,8 @@ static void Help(void) {
" -flip ........ flip the output vertically\n"
" -alpha ....... only save the alpha plane\n"
" -incremental . use incremental decoding (useful for tests)\n"
" -h ....... this help message\n"
" -v ....... verbose (e.g. print encoding/decoding times)\n"
" -h ........... this help message\n"
" -v ........... verbose (e.g. print encoding/decoding times)\n"
" -quiet ....... quiet mode, don't print anything\n"
#ifndef WEBP_DLL
" -noasm ....... disable all assembly optimizations\n"
@ -569,6 +597,70 @@ static const char* const kFormatType[] = {
"unspecified", "lossy", "lossless"
};
static uint8_t* AllocateExternalBuffer(WebPDecoderConfig* config,
OutputFileFormat format,
int use_external_memory) {
uint8_t* external_buffer = NULL;
WebPDecBuffer* const output_buffer = &config->output;
int w = config->input.width;
int h = config->input.height;
if (config->options.use_scaling) {
w = config->options.scaled_width;
h = config->options.scaled_height;
} else if (config->options.use_cropping) {
w = config->options.crop_width;
h = config->options.crop_height;
}
if (format >= RGB && format <= rgbA_4444) {
const int bpp = (format == RGB || format == BGR) ? 3
: (format == RGBA_4444 || format == rgbA_4444 ||
format == RGB_565) ? 2
: 4;
uint32_t stride = bpp * w + 7; // <- just for exercising
external_buffer = (uint8_t*)malloc(stride * h);
if (external_buffer == NULL) return NULL;
output_buffer->u.RGBA.stride = stride;
output_buffer->u.RGBA.size = stride * h;
output_buffer->u.RGBA.rgba = external_buffer;
} else { // YUV and YUVA
const int has_alpha = WebPIsAlphaMode(output_buffer->colorspace);
uint8_t* tmp;
uint32_t stride = w + 3;
uint32_t uv_stride = (w + 1) / 2 + 13;
uint32_t total_size = stride * h * (has_alpha ? 2 : 1)
+ 2 * uv_stride * (h + 1) / 2;
assert(format >= YUV && format <= YUVA);
external_buffer = (uint8_t*)malloc(total_size);
if (external_buffer == NULL) return NULL;
tmp = external_buffer;
output_buffer->u.YUVA.y = tmp;
output_buffer->u.YUVA.y_stride = stride;
output_buffer->u.YUVA.y_size = stride * h;
tmp += output_buffer->u.YUVA.y_size;
if (has_alpha) {
output_buffer->u.YUVA.a = tmp;
output_buffer->u.YUVA.a_stride = stride;
output_buffer->u.YUVA.a_size = stride * h;
tmp += output_buffer->u.YUVA.a_size;
} else {
output_buffer->u.YUVA.a = NULL;
output_buffer->u.YUVA.a_stride = 0;
}
output_buffer->u.YUVA.u = tmp;
output_buffer->u.YUVA.u_stride = uv_stride;
output_buffer->u.YUVA.u_size = uv_stride * (h + 1) / 2;
tmp += output_buffer->u.YUVA.u_size;
output_buffer->u.YUVA.v = tmp;
output_buffer->u.YUVA.v_stride = uv_stride;
output_buffer->u.YUVA.v_size = uv_stride * (h + 1) / 2;
tmp += output_buffer->u.YUVA.v_size;
assert(tmp <= external_buffer + total_size);
}
output_buffer->is_external_memory = use_external_memory;
return external_buffer;
}
int main(int argc, const char *argv[]) {
int ok = 0;
const char *in_file = NULL;
@ -578,6 +670,10 @@ int main(int argc, const char *argv[]) {
WebPDecBuffer* const output_buffer = &config.output;
WebPBitstreamFeatures* const bitstream = &config.input;
OutputFileFormat format = PNG;
uint8_t* external_buffer = NULL;
int use_external_memory = 0;
const uint8_t* data = NULL;
int incremental = 0;
int c;
@ -617,7 +713,32 @@ int main(int argc, const char *argv[]) {
} else if (!strcmp(argv[c], "-pgm")) {
format = PGM;
} else if (!strcmp(argv[c], "-yuv")) {
format = YUV;
format = RAW_YUV;
} else if (!strcmp(argv[c], "-pixel_format") && c < argc - 1) {
const char* const fmt = argv[++c];
if (!strcmp(fmt, "RGB")) format = RGB;
else if (!strcmp(fmt, "RGBA")) format = RGBA;
else if (!strcmp(fmt, "BGR")) format = BGR;
else if (!strcmp(fmt, "BGRA")) format = BGRA;
else if (!strcmp(fmt, "ARGB")) format = ARGB;
else if (!strcmp(fmt, "RGBA_4444")) format = RGBA_4444;
else if (!strcmp(fmt, "RGB_565")) format = RGB_565;
else if (!strcmp(fmt, "rgbA")) format = rgbA;
else if (!strcmp(fmt, "bgrA")) format = bgrA;
else if (!strcmp(fmt, "Argb")) format = Argb;
else if (!strcmp(fmt, "rgbA_4444")) format = rgbA_4444;
else if (!strcmp(fmt, "YUV")) format = YUV;
else if (!strcmp(fmt, "YUVA")) format = YUVA;
else {
fprintf(stderr, "Can't parse pixel_format %s\n", fmt);
parse_error = 1;
}
} else if (!strcmp(argv[c], "-external_memory") && c < argc - 1) {
use_external_memory = ExUtilGetInt(argv[++c], 0, &parse_error);
parse_error |= (use_external_memory > 2 || use_external_memory < 0);
if (parse_error) {
fprintf(stderr, "Can't parse 'external_memory' value %s\n", argv[c]);
}
} else if (!strcmp(argv[c], "-mt")) {
config.options.use_threads = 1;
} else if (!strcmp(argv[c], "-alpha_dither")) {
@ -676,7 +797,6 @@ int main(int argc, const char *argv[]) {
{
VP8StatusCode status = VP8_STATUS_OK;
size_t data_size = 0;
const uint8_t* data = NULL;
if (!ExUtilLoadWebP(in_file, &data, &data_size, bitstream)) {
return -1;
}
@ -703,15 +823,33 @@ int main(int argc, const char *argv[]) {
bitstream->has_alpha ? MODE_rgbA : MODE_RGB;
break;
case PGM:
case YUV:
case RAW_YUV:
output_buffer->colorspace = bitstream->has_alpha ? MODE_YUVA : MODE_YUV;
break;
case ALPHA_PLANE_ONLY:
output_buffer->colorspace = MODE_YUVA;
break;
default:
free((void*)data);
return -1;
// forced modes:
case RGB: output_buffer->colorspace = MODE_RGB; break;
case RGBA: output_buffer->colorspace = MODE_RGBA; break;
case BGR: output_buffer->colorspace = MODE_BGR; break;
case BGRA: output_buffer->colorspace = MODE_BGRA; break;
case ARGB: output_buffer->colorspace = MODE_ARGB; break;
case RGBA_4444: output_buffer->colorspace = MODE_RGBA_4444; break;
case RGB_565: output_buffer->colorspace = MODE_RGB_565; break;
case rgbA: output_buffer->colorspace = MODE_rgbA; break;
case bgrA: output_buffer->colorspace = MODE_bgrA; break;
case Argb: output_buffer->colorspace = MODE_Argb; break;
case rgbA_4444: output_buffer->colorspace = MODE_rgbA_4444; break;
case YUV: output_buffer->colorspace = MODE_YUV; break;
case YUVA: output_buffer->colorspace = MODE_YUVA; break;
default: goto Exit;
}
if (use_external_memory > 0 && format >= RGB) {
external_buffer = AllocateExternalBuffer(&config, format,
use_external_memory);
if (external_buffer == NULL) goto Exit;
}
if (incremental) {
@ -720,7 +858,6 @@ int main(int argc, const char *argv[]) {
status = ExUtilDecodeWebP(data, data_size, verbose, &config);
}
free((void*)data);
ok = (status == VP8_STATUS_OK);
if (!ok) {
ExUtilPrintWebPError(in_file, status);
@ -750,6 +887,8 @@ int main(int argc, const char *argv[]) {
}
Exit:
WebPFreeDecBuffer(output_buffer);
free((void*)external_buffer);
free((void*)data);
return ok ? 0 : -1;
}

View File

@ -268,3 +268,21 @@ VP8StatusCode ExUtilDecodeWebPIncremental(
}
// -----------------------------------------------------------------------------
void ExUtilCopyPlane(const uint8_t* src, int src_stride,
uint8_t* dst, int dst_stride, int width, int height) {
while (height-- > 0) {
memcpy(dst, src, width * sizeof(*dst));
src += src_stride;
dst += dst_stride;
}
}
// -----------------------------------------------------------------------------
int ImgIoUtilCheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) {
const uint64_t total_size = nmemb * size;
return (total_size == (size_t)total_size);
}
// -----------------------------------------------------------------------------

View File

@ -53,6 +53,17 @@ int ExUtilReadFromStdin(const uint8_t** data, size_t* data_size);
int ExUtilWriteFile(const char* const file_name,
const uint8_t* data, size_t data_size);
//------------------------------------------------------------------------------
// Copy width x height pixels from 'src' to 'dst' honoring the strides.
void ExUtilCopyPlane(const uint8_t* src, int src_stride,
uint8_t* dst, int dst_stride, int width, int height);
//------------------------------------------------------------------------------
// Returns 0 in case of overflow of nmemb * size.
int ImgIoUtilCheckSizeArgumentsOverflow(uint64_t nmemb, size_t size);
//------------------------------------------------------------------------------
// WebP decoding

View File

@ -55,7 +55,7 @@ static void Help(void) {
printf("Usage:\n");
printf(" gif2webp [options] gif_file -o webp_file\n");
printf("Options:\n");
printf(" -h / -help ............ this help\n");
printf(" -h / -help ............. this help\n");
printf(" -lossy ................. encode image using lossy compression\n");
printf(" -mixed ................. for each frame in the image, pick lossy\n"
" or lossless compression heuristically\n");
@ -98,7 +98,6 @@ int main(int argc, const char *argv[]) {
WebPPicture frame; // Frame rectangle only (not disposed).
WebPPicture curr_canvas; // Not disposed.
WebPPicture prev_canvas; // Disposed.
WebPPicture prev_to_prev_canvas; // Disposed.
WebPAnimEncoder* enc = NULL;
WebPAnimEncoderOptions enc_options;
@ -124,8 +123,7 @@ int main(int argc, const char *argv[]) {
if (!WebPConfigInit(&config) || !WebPAnimEncoderOptionsInit(&enc_options) ||
!WebPPictureInit(&frame) || !WebPPictureInit(&curr_canvas) ||
!WebPPictureInit(&prev_canvas) ||
!WebPPictureInit(&prev_to_prev_canvas)) {
!WebPPictureInit(&prev_canvas)) {
fprintf(stderr, "Error! Version mismatch!\n");
return -1;
}
@ -217,6 +215,7 @@ int main(int argc, const char *argv[]) {
return 0;
} else if (!strcmp(argv[c], "-quiet")) {
quiet = 1;
enc_options.verbose = 0;
} else if (!strcmp(argv[c], "-v")) {
verbose = 1;
enc_options.verbose = 1;
@ -304,7 +303,6 @@ int main(int argc, const char *argv[]) {
GIFClearPic(&frame, NULL);
WebPPictureCopy(&frame, &curr_canvas);
WebPPictureCopy(&frame, &prev_canvas);
WebPPictureCopy(&frame, &prev_to_prev_canvas);
// Background color.
GIFGetBackgroundColor(gif->SColorMap, gif->SBackGroundColor,
@ -341,7 +339,6 @@ int main(int argc, const char *argv[]) {
}
// Update canvases.
GIFCopyPixels(&prev_canvas, &prev_to_prev_canvas);
GIFDisposeFrame(orig_dispose, &gif_rect, &prev_canvas, &curr_canvas);
GIFCopyPixels(&curr_canvas, &prev_canvas);
@ -362,6 +359,8 @@ int main(int argc, const char *argv[]) {
if (DGifGetExtension(gif, &extension, &data) == GIF_ERROR) {
goto End;
}
if (data == NULL) continue;
switch (extension) {
case COMMENT_EXT_FUNC_CODE: {
break; // Do nothing for now.
@ -529,7 +528,6 @@ int main(int argc, const char *argv[]) {
WebPPictureFree(&frame);
WebPPictureFree(&curr_canvas);
WebPPictureFree(&prev_canvas);
WebPPictureFree(&prev_to_prev_canvas);
WebPAnimEncoderDelete(enc);
if (out != NULL && out_file != NULL) fclose(out);

View File

@ -21,7 +21,7 @@
#include "webp/encode.h"
#include "webp/mux_types.h"
#define GIF_TRANSPARENT_COLOR 0x00ffffff
#define GIF_TRANSPARENT_COLOR 0x00000000
#define GIF_WHITE_COLOR 0xffffffff
#define GIF_TRANSPARENT_MASK 0x01
#define GIF_DISPOSE_MASK 0x07
@ -81,20 +81,27 @@ int GIFReadGraphicsExtension(const GifByteType* const buf, int* const duration,
return 1;
}
static void Remap(const GifFileType* const gif, const uint8_t* const src,
int len, int transparent_index, uint32_t* dst) {
static int Remap(const GifFileType* const gif, const uint8_t* const src,
int len, int transparent_index, uint32_t* dst) {
int i;
const GifColorType* colors;
const ColorMapObject* const cmap =
gif->Image.ColorMap ? gif->Image.ColorMap : gif->SColorMap;
if (cmap == NULL) return;
if (cmap == NULL) return 1;
if (cmap->Colors == NULL || cmap->ColorCount <= 0) return 0;
colors = cmap->Colors;
for (i = 0; i < len; ++i) {
const GifColorType c = colors[src[i]];
dst[i] = (src[i] == transparent_index) ? GIF_TRANSPARENT_COLOR
: c.Blue | (c.Green << 8) | (c.Red << 16) | (0xff << 24);
if (src[i] == transparent_index) {
dst[i] = GIF_TRANSPARENT_COLOR;
} else if (src[i] < cmap->ColorCount) {
const GifColorType c = colors[src[i]];
dst[i] = c.Blue | (c.Green << 8) | (c.Red << 16) | (0xffu << 24);
} else {
return 0;
}
}
return 1;
}
int GIFReadFrame(GifFileType* const gif, int transparent_index,
@ -103,12 +110,18 @@ int GIFReadFrame(GifFileType* const gif, int transparent_index,
const GifImageDesc* const image_desc = &gif->Image;
uint32_t* dst = NULL;
uint8_t* tmp = NULL;
int ok = 0;
GIFFrameRect rect = {
const GIFFrameRect rect = {
image_desc->Left, image_desc->Top, image_desc->Width, image_desc->Height
};
const uint64_t memory_needed = 4 * rect.width * (uint64_t)rect.height;
int ok = 0;
*gif_rect = rect;
if (memory_needed != (size_t)memory_needed || memory_needed > (4ULL << 32)) {
fprintf(stderr, "Image is too large (%d x %d).", rect.width, rect.height);
return 0;
}
// Use a view for the sub-picture:
if (!WebPPictureView(picture, rect.x_offset, rect.y_offset,
rect.width, rect.height, &sub_image)) {
@ -127,20 +140,21 @@ int GIFReadFrame(GifFileType* const gif, int transparent_index,
const int interlace_jumps[] = { 8, 8, 4, 2 };
int pass;
for (pass = 0; pass < 4; ++pass) {
int y;
for (y = interlace_offsets[pass]; y < rect.height;
y += interlace_jumps[pass]) {
const size_t stride = (size_t)sub_image.argb_stride;
int y = interlace_offsets[pass];
uint32_t* row = dst + y * stride;
const size_t jump = interlace_jumps[pass] * stride;
for (; y < rect.height; y += interlace_jumps[pass], row += jump) {
if (DGifGetLine(gif, tmp, rect.width) == GIF_ERROR) goto End;
Remap(gif, tmp, rect.width, transparent_index,
dst + y * sub_image.argb_stride);
if (!Remap(gif, tmp, rect.width, transparent_index, row)) goto End;
}
}
} else { // Non-interlaced image.
int y;
for (y = 0; y < rect.height; ++y) {
uint32_t* ptr = dst;
for (y = 0; y < rect.height; ++y, ptr += sub_image.argb_stride) {
if (DGifGetLine(gif, tmp, rect.width) == GIF_ERROR) goto End;
Remap(gif, tmp, rect.width, transparent_index,
dst + y * sub_image.argb_stride);
if (!Remap(gif, tmp, rect.width, transparent_index, ptr)) goto End;
}
}
ok = 1;
@ -216,13 +230,11 @@ int GIFReadMetadata(GifFileType* const gif, GifByteType** const buf,
static void ClearRectangle(WebPPicture* const picture,
int left, int top, int width, int height) {
int j;
for (j = top; j < top + height; ++j) {
uint32_t* const dst = picture->argb + j * picture->argb_stride;
int i;
for (i = left; i < left + width; ++i) {
dst[i] = GIF_TRANSPARENT_COLOR;
}
int i, j;
const size_t stride = picture->argb_stride;
uint32_t* dst = picture->argb + top * stride + left;
for (j = 0; j < height; ++j, dst += stride) {
for (i = 0; i < width; ++i) dst[i] = GIF_TRANSPARENT_COLOR;
}
}
@ -246,29 +258,31 @@ void GIFDisposeFrame(GIFDisposeMethod dispose, const GIFFrameRect* const rect,
if (dispose == GIF_DISPOSE_BACKGROUND) {
GIFClearPic(curr_canvas, rect);
} else if (dispose == GIF_DISPOSE_RESTORE_PREVIOUS) {
const int src_stride = prev_canvas->argb_stride;
const uint32_t* const src =
prev_canvas->argb + rect->x_offset + rect->y_offset * src_stride;
const int dst_stride = curr_canvas->argb_stride;
uint32_t* const dst =
curr_canvas->argb + rect->x_offset + rect->y_offset * dst_stride;
const size_t src_stride = prev_canvas->argb_stride;
const uint32_t* const src = prev_canvas->argb + rect->x_offset
+ rect->y_offset * src_stride;
const size_t dst_stride = curr_canvas->argb_stride;
uint32_t* const dst = curr_canvas->argb + rect->x_offset
+ rect->y_offset * dst_stride;
assert(prev_canvas != NULL);
WebPCopyPlane((uint8_t*)src, 4 * src_stride, (uint8_t*)dst, 4 * dst_stride,
WebPCopyPlane((uint8_t*)src, (int)(4 * src_stride),
(uint8_t*)dst, (int)(4 * dst_stride),
4 * rect->width, rect->height);
}
}
void GIFBlendFrames(const WebPPicture* const src,
const GIFFrameRect* const rect, WebPPicture* const dst) {
int j;
int i, j;
const size_t src_stride = src->argb_stride;
const size_t dst_stride = dst->argb_stride;
assert(src->width == dst->width && src->height == dst->height);
for (j = rect->y_offset; j < rect->y_offset + rect->height; ++j) {
int i;
for (i = rect->x_offset; i < rect->x_offset + rect->width; ++i) {
const uint32_t src_pixel = src->argb[j * src->argb_stride + i];
const uint32_t src_pixel = src->argb[j * src_stride + i];
const int src_alpha = src_pixel >> 24;
if (src_alpha != 0) {
dst->argb[j * dst->argb_stride + i] = src_pixel;
dst->argb[j * dst_stride + i] = src_pixel;
}
}
}

46
examples/image_dec.c Normal file
View File

@ -0,0 +1,46 @@
// Copyright 2016 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Generic image-type guessing.
#include "./image_dec.h"
static WEBP_INLINE uint32_t GetBE32(const uint8_t buf[]) {
return ((uint32_t)buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
}
WebPInputFileFormat WebPGuessImageType(const uint8_t* const data,
size_t data_size) {
WebPInputFileFormat format = WEBP_UNSUPPORTED_FORMAT;
if (data != NULL && data_size >= 12) {
const uint32_t magic1 = GetBE32(data + 0);
const uint32_t magic2 = GetBE32(data + 8);
if (magic1 == 0x89504E47U) {
format = WEBP_PNG_FORMAT;
} else if (magic1 >= 0xFFD8FF00U && magic1 <= 0xFFD8FFFFU) {
format = WEBP_JPEG_FORMAT;
} else if (magic1 == 0x49492A00 || magic1 == 0x4D4D002A) {
format = WEBP_TIFF_FORMAT;
} else if (magic1 == 0x52494646 && magic2 == 0x57454250) {
format = WEBP_WEBP_FORMAT;
}
}
return format;
}
WebPImageReader WebPGuessImageReader(const uint8_t* const data,
size_t data_size) {
switch (WebPGuessImageType(data, data_size)) {
case WEBP_PNG_FORMAT: return ReadPNG;
case WEBP_JPEG_FORMAT: return ReadJPEG;
case WEBP_TIFF_FORMAT: return ReadTIFF;
case WEBP_WEBP_FORMAT: return ReadWebP;
default: return NULL;
}
}

61
examples/image_dec.h Normal file
View File

@ -0,0 +1,61 @@
// Copyright 2016 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// All-in-one library to decode PNG/JPEG/WebP/TIFF/WIC input images.
//
// Author: Skal (pascal.massimino@gmail.com)
#ifndef WEBP_EXAMPLES_IMAGE_DEC_H_
#define WEBP_EXAMPLES_IMAGE_DEC_H_
#include "webp/types.h"
#ifdef HAVE_CONFIG_H
#include "webp/config.h"
#endif
#include "./metadata.h"
#include "./jpegdec.h"
#include "./pngdec.h"
#include "./tiffdec.h"
#include "./webpdec.h"
#include "./wicdec.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef enum {
WEBP_PNG_FORMAT = 0,
WEBP_JPEG_FORMAT,
WEBP_TIFF_FORMAT,
WEBP_WEBP_FORMAT,
WEBP_UNSUPPORTED_FORMAT
} WebPInputFileFormat;
// Try to infer the image format. 'data_size' should be larger than 12.
// Returns WEBP_UNSUPPORTED_FORMAT if format can't be guess safely.
WebPInputFileFormat WebPGuessImageType(const uint8_t* const data,
size_t data_size);
// Signature for common image-reading functions (ReadPNG, ReadJPEG, ...)
typedef int (*WebPImageReader)(const uint8_t* const data, size_t data_size,
struct WebPPicture* const pic,
int keep_alpha, struct Metadata* const metadata);
// This function is similar to WebPGuessImageType(), but returns a
// suitable reader function. Or NULL if the image can't be guessed.
WebPImageReader WebPGuessImageReader(const uint8_t* const data,
size_t data_size);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // WEBP_EXAMPLES_IMAGE_DEC_H_

View File

@ -222,10 +222,10 @@ static void ContextInit(j_decompress_ptr cinfo) {
ctx->pub.bytes_in_buffer = ctx->data_size;
}
static int ContextFill(j_decompress_ptr cinfo) {
static boolean ContextFill(j_decompress_ptr cinfo) {
// we shouldn't get here.
ERREXIT(cinfo, JERR_FILE_READ);
return 0;
return FALSE;
}
static void ContextSkip(j_decompress_ptr cinfo, long jump_size) {
@ -255,15 +255,18 @@ static void ContextSetup(volatile struct jpeg_decompress_struct* const cinfo,
}
int ReadJPEG(const uint8_t* const data, size_t data_size,
WebPPicture* const pic, Metadata* const metadata) {
WebPPicture* const pic, int keep_alpha,
Metadata* const metadata) {
volatile int ok = 0;
int stride, width, height;
int width, height;
int64_t stride;
volatile struct jpeg_decompress_struct dinfo;
struct my_error_mgr jerr;
uint8_t* volatile rgb = NULL;
JSAMPROW buffer[1];
JPEGReadContext ctx;
(void)keep_alpha;
memset(&ctx, 0, sizeof(ctx));
ctx.data = data;
ctx.data_size = data_size;
@ -295,9 +298,14 @@ int ReadJPEG(const uint8_t* const data, size_t data_size,
width = dinfo.output_width;
height = dinfo.output_height;
stride = dinfo.output_width * dinfo.output_components * sizeof(*rgb);
stride = (int64_t)dinfo.output_width * dinfo.output_components * sizeof(*rgb);
rgb = (uint8_t*)malloc(stride * height);
if (stride != (int)stride ||
!ImgIoUtilCheckSizeArgumentsOverflow(stride, height)) {
goto End;
}
rgb = (uint8_t*)malloc((size_t)stride * height);
if (rgb == NULL) {
goto End;
}
@ -324,7 +332,7 @@ int ReadJPEG(const uint8_t* const data, size_t data_size,
// WebP conversion.
pic->width = width;
pic->height = height;
ok = WebPPictureImportRGB(pic, rgb, stride);
ok = WebPPictureImportRGB(pic, rgb, (int)stride);
if (!ok) goto Error;
End:
@ -333,11 +341,12 @@ int ReadJPEG(const uint8_t* const data, size_t data_size,
}
#else // !WEBP_HAVE_JPEG
int ReadJPEG(const uint8_t* const data, size_t data_size,
struct WebPPicture* const pic,
struct WebPPicture* const pic, int keep_alpha,
struct Metadata* const metadata) {
(void)data;
(void)data_size;
(void)pic;
(void)keep_alpha;
(void)metadata;
fprintf(stderr, "JPEG support not compiled. Please install the libjpeg "
"development package before building.\n");

View File

@ -25,8 +25,11 @@ struct WebPPicture;
// Reads a JPEG from 'data', returning the decoded output in 'pic'.
// The output is RGB or YUV depending on pic->use_argb value.
// Returns true on success.
// 'keep_alpha' has no effect, but is kept for coherence with other signatures
// for image readers.
int ReadJPEG(const uint8_t* const data, size_t data_size,
struct WebPPicture* const pic, struct Metadata* const metadata);
struct WebPPicture* const pic, int keep_alpha,
struct Metadata* const metadata);
#ifdef __cplusplus
} // extern "C"

View File

@ -18,7 +18,6 @@
#include <stdio.h>
#ifdef WEBP_HAVE_PNG
#include <assert.h>
#include <png.h>
#include <setjmp.h> // note: this must be included *after* png.h
#include <stdlib.h>
@ -198,7 +197,9 @@ typedef struct {
static void ReadFunc(png_structp png_ptr, png_bytep data, png_size_t length) {
PNGReadContext* const ctx = (PNGReadContext*)png_get_io_ptr(png_ptr);
assert(ctx->offset + length <= ctx->data_size);
if (ctx->data_size - ctx->offset < length) {
png_error(png_ptr, "ReadFunc: invalid read length (overflow)!");
}
memcpy(data, ctx->data + ctx->offset, length);
ctx->offset += length;
}
@ -216,7 +217,7 @@ int ReadPNG(const uint8_t* const data, size_t data_size,
int p;
volatile int ok = 0;
png_uint_32 width, height, y;
png_uint_32 stride;
int64_t stride;
uint8_t* volatile rgb = NULL;
context.data = data;
@ -269,8 +270,14 @@ int ReadPNG(const uint8_t* const data, size_t data_size,
num_passes = png_set_interlace_handling(png);
png_read_update_info(png, info);
stride = (has_alpha ? 4 : 3) * width * sizeof(*rgb);
rgb = (uint8_t*)malloc(stride * height);
stride = (int64_t)(has_alpha ? 4 : 3) * width * sizeof(*rgb);
if (stride != (int)stride ||
!ImgIoUtilCheckSizeArgumentsOverflow(stride, height)) {
goto Error;
}
rgb = (uint8_t*)malloc((size_t)stride * height);
if (rgb == NULL) goto Error;
for (p = 0; p < num_passes; ++p) {
for (y = 0; y < height; ++y) {

View File

@ -22,6 +22,7 @@
#include <tiffio.h>
#include "webp/encode.h"
#include "./example_util.h"
#include "./metadata.h"
static const struct {
@ -124,6 +125,7 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
MySize, MyMapFile, MyUnmapFile);
uint32 width, height;
uint32* raster;
int64_t alloc_size;
int ok = 0;
tdir_t dircount;
@ -144,7 +146,16 @@ int ReadTIFF(const uint8_t* const data, size_t data_size,
fprintf(stderr, "Error! Cannot retrieve TIFF image dimensions.\n");
goto End;
}
raster = (uint32*)_TIFFmalloc(width * height * sizeof(*raster));
if (!ImgIoUtilCheckSizeArgumentsOverflow((uint64_t)width * height,
sizeof(*raster))) {
goto End;
}
// _Tiffmalloc uses a signed type for size.
alloc_size = (int64_t)((uint64_t)width * height * sizeof(*raster));
if (alloc_size < 0 || alloc_size != (tsize_t)alloc_size) goto End;
raster = (uint32*)_TIFFmalloc((tsize_t)alloc_size);
if (raster != NULL) {
if (TIFFReadRGBAImageOriented(tif, width, height, raster,
ORIENTATION_TOPLEFT, 1)) {

View File

@ -14,6 +14,10 @@
#include "webp/config.h"
#endif
#if defined(__unix__) || defined(__CYGWIN__)
#define _POSIX_C_SOURCE 200112L // for setenv
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -63,6 +67,7 @@ static struct {
WebPIterator curr_frame;
WebPIterator prev_frame;
WebPChunkIterator iccp;
int viewport_width, viewport_height;
} kParams;
static void ClearPreviousPic(void) {
@ -242,18 +247,24 @@ static void HandleKey(unsigned char key, int pos_x, int pos_y) {
}
} else if (key == 'i') {
kParams.print_info = 1 - kParams.print_info;
// TODO(skal): handle refresh of animation's last-frame too. It's quite
// more involved though (need to save the previous frame).
if (!kParams.has_animation) ClearPreviousFrame();
glutPostRedisplay();
}
}
static void HandleReshape(int width, int height) {
// TODO(skal): proper handling of resize, esp. for large pictures.
// + key control of the zoom.
// TODO(skal): should we preserve aspect ratio?
// Also: handle larger-than-screen pictures correctly.
glViewport(0, 0, width, height);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
kParams.viewport_width = width;
kParams.viewport_height = height;
if (!kParams.has_animation) ClearPreviousFrame();
}
static void PrintString(const char* const text) {
@ -295,7 +306,8 @@ static void HandleDisplay(void) {
GLfloat xoff, yoff;
if (pic == NULL) return;
glPushMatrix();
glPixelZoom(1, -1);
glPixelZoom((GLfloat)(+1. / kParams.canvas_width * kParams.viewport_width),
(GLfloat)(-1. / kParams.canvas_height * kParams.viewport_height));
xoff = (GLfloat)(2. * curr->x_offset / kParams.canvas_width);
yoff = (GLfloat)(2. * curr->y_offset / kParams.canvas_height);
glRasterPos2f(-1.f + xoff, 1.f - yoff);
@ -304,8 +316,6 @@ static void HandleDisplay(void) {
if (prev->dispose_method == WEBP_MUX_DISPOSE_BACKGROUND ||
curr->blend_method == WEBP_MUX_NO_BLEND) {
// TODO(later): these offsets and those above should factor in window size.
// they will be incorrect if the window is resized.
// glScissor() takes window coordinates (0,0 at bottom left).
int window_x, window_y;
int frame_w, frame_h;
@ -325,6 +335,10 @@ static void HandleDisplay(void) {
}
glEnable(GL_SCISSOR_TEST);
// Only update the requested area, not the whole canvas.
window_x = window_x * kParams.viewport_width / kParams.canvas_width;
window_y = window_y * kParams.viewport_height / kParams.canvas_height;
frame_w = frame_w * kParams.viewport_width / kParams.canvas_width;
frame_h = frame_h * kParams.viewport_height / kParams.canvas_height;
glScissor(window_x, window_y, frame_w, frame_h);
glClear(GL_COLOR_BUFFER_BIT); // use clear color
@ -367,6 +381,7 @@ static void StartDisplay(void) {
glutInitWindowSize(width, height);
glutCreateWindow("WebP viewer");
glutDisplayFunc(HandleDisplay);
glutReshapeFunc(HandleReshape);
glutIdleFunc(NULL);
glutKeyboardFunc(HandleKey);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
@ -375,7 +390,6 @@ static void StartDisplay(void) {
GetColorf(kParams.bg_color, 8),
GetColorf(kParams.bg_color, 16),
GetColorf(kParams.bg_color, 24));
HandleReshape(width, height);
glClear(GL_COLOR_BUFFER_BIT);
DrawCheckerBoard();
}
@ -387,7 +401,7 @@ static void Help(void) {
printf("Usage: vwebp in_file [options]\n\n"
"Decodes the WebP image file and visualize it using OpenGL\n"
"Options are:\n"
" -version .... print version number and exit\n"
" -version ..... print version number and exit\n"
" -noicc ....... don't use the icc profile if present\n"
" -nofancy ..... don't use the fancy YUV420 upscaler\n"
" -nofilter .... disable in-loop filtering\n"
@ -395,7 +409,7 @@ static void Help(void) {
" -noalphadither disable alpha plane dithering\n"
" -mt .......... use multi-threading\n"
" -info ........ print info\n"
" -h ....... this help message\n"
" -h ........... this help message\n"
"\n"
"Keyboard shortcuts:\n"
" 'c' ................ toggle use of color profile\n"

View File

@ -45,18 +45,42 @@ int ReadWebP(const uint8_t* const data, size_t data_size,
}
{
const int has_alpha = keep_alpha && bitstream->has_alpha;
// TODO(skal): use MODE_YUV(A), depending on the expected
// input pic->use_argb. This would save some conversion steps.
output_buffer->colorspace = has_alpha ? MODE_RGBA : MODE_RGB;
if (pic->use_argb) {
output_buffer->colorspace = has_alpha ? MODE_RGBA : MODE_RGB;
} else {
output_buffer->colorspace = has_alpha ? MODE_YUVA : MODE_YUV;
}
status = ExUtilDecodeWebP(data, data_size, 0, &config);
if (status == VP8_STATUS_OK) {
const uint8_t* const rgba = output_buffer->u.RGBA.rgba;
const int stride = output_buffer->u.RGBA.stride;
pic->width = output_buffer->width;
pic->height = output_buffer->height;
ok = has_alpha ? WebPPictureImportRGBA(pic, rgba, stride)
: WebPPictureImportRGB(pic, rgba, stride);
if (pic->use_argb) {
const uint8_t* const rgba = output_buffer->u.RGBA.rgba;
const int stride = output_buffer->u.RGBA.stride;
ok = has_alpha ? WebPPictureImportRGBA(pic, rgba, stride)
: WebPPictureImportRGB(pic, rgba, stride);
} else {
pic->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420;
ok = WebPPictureAlloc(pic);
if (!ok) {
status = VP8_STATUS_OUT_OF_MEMORY;
} else {
const WebPYUVABuffer* const yuva = &output_buffer->u.YUVA;
const int uv_width = (pic->width + 1) >> 1;
const int uv_height = (pic->height + 1) >> 1;
ExUtilCopyPlane(yuva->y, yuva->y_stride,
pic->y, pic->y_stride, pic->width, pic->height);
ExUtilCopyPlane(yuva->u, yuva->u_stride,
pic->u, pic->uv_stride, uv_width, uv_height);
ExUtilCopyPlane(yuva->v, yuva->v_stride,
pic->v, pic->uv_stride, uv_width, uv_height);
if (has_alpha) {
ExUtilCopyPlane(yuva->a, yuva->a_stride,
pic->a, pic->a_stride, pic->width, pic->height);
}
}
}
}
}

View File

@ -274,7 +274,7 @@ int ReadPictureWithWIC(const char* const filename,
NULL
};
int has_alpha = 0;
int stride;
int64_t stride;
IFS(CoInitialize(NULL));
IFS(CoCreateInstance(MAKE_REFGUID(CLSID_WICImagingFactory), NULL,
@ -334,14 +334,19 @@ int ReadPictureWithWIC(const char* const filename,
// Decode.
IFS(IWICFormatConverter_GetSize(converter, &width, &height));
stride = importer->bytes_per_pixel * width * sizeof(*rgb);
stride = (int64_t)importer->bytes_per_pixel * width * sizeof(*rgb);
if (stride != (int)stride ||
!ImgIoUtilCheckSizeArgumentsOverflow(stride, height)) {
hr = E_FAIL;
}
if (SUCCEEDED(hr)) {
rgb = (BYTE*)malloc(stride * height);
rgb = (BYTE*)malloc((size_t)stride * height);
if (rgb == NULL)
hr = E_OUTOFMEMORY;
}
IFS(IWICFormatConverter_CopyPixels(converter, NULL,
stride, stride * height, rgb));
(UINT)stride, (UINT)stride * height, rgb));
// WebP conversion.
if (SUCCEEDED(hr)) {
@ -349,7 +354,7 @@ int ReadPictureWithWIC(const char* const filename,
pic->width = width;
pic->height = height;
pic->use_argb = 1; // For WIC, we always force to argb
ok = importer->import(pic, rgb, stride);
ok = importer->import(pic, rgb, (int)stride);
if (!ok) hr = E_FAIL;
}
if (SUCCEEDED(hr)) {

14
gradle.properties Normal file
View File

@ -0,0 +1,14 @@
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Versions for gradle
BUILD_TOOLS_VERSION=23.0.3
COMPILE_SDK_VERSION=23
ANDROID_GRADLE_PLUGIN_VERSION=1.5.0
GRADLE_DOWNLOAD_TASK_VERSION=2.1.0

BIN
gradle/wrapper/gradle-wrapper.jar vendored Normal file

Binary file not shown.

View File

@ -0,0 +1,6 @@
#Thu May 12 17:06:25 CEST 2016
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-2.13-bin.zip

164
gradlew vendored Executable file
View File

@ -0,0 +1,164 @@
#!/usr/bin/env bash
##############################################################################
##
## Gradle start up script for UN*X
##
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "$PRG"`"/$link"
fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null
APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS=""
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"
warn ( ) {
echo "$*"
}
die ( ) {
echo
echo "$*"
echo
exit 1
}
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
CYGWIN* )
cygwin=true
;;
Darwin* )
darwin=true
;;
MINGW* )
msys=true
;;
NONSTOP* )
nonstop=true
;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
else
JAVACMD="$JAVA_HOME/bin/java"
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD="java"
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
MAX_FD_LIMIT=`ulimit -H -n`
if [ $? -eq 0 ] ; then
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
MAX_FD="$MAX_FD_LIMIT"
fi
ulimit -n $MAX_FD
if [ $? -ne 0 ] ; then
warn "Could not set maximum file descriptor limit: $MAX_FD"
fi
else
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
fi
fi
# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi
# For Cygwin, switch paths to Windows format before running java
if $cygwin ; then
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
JAVACMD=`cygpath --unix "$JAVACMD"`
# We build the pattern for arguments to be converted via cygpath
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
SEP=""
for dir in $ROOTDIRSRAW ; do
ROOTDIRS="$ROOTDIRS$SEP$dir"
SEP="|"
done
OURCYGPATTERN="(^($ROOTDIRS))"
# Add a user-defined pattern to the cygpath arguments
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
fi
# Now convert the arguments - kludge to limit ourselves to /bin/sh
i=0
for arg in "$@" ; do
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
else
eval `echo args$i`="\"$arg\""
fi
i=$((i+1))
done
case $i in
(0) set -- ;;
(1) set -- "$args0" ;;
(2) set -- "$args0" "$args1" ;;
(3) set -- "$args0" "$args1" "$args2" ;;
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
esac
fi
# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
function splitJvmOpts() {
JVM_OPTS=("$@")
}
eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"

90
gradlew.bat vendored Normal file
View File

@ -0,0 +1,90 @@
@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS=
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto init
echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto init
echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:init
@rem Get command-line arguments, handling Windows variants
if not "%OS%" == "Windows_NT" goto win9xME_args
if "%@eval[2+2]" == "4" goto 4NT_args
:win9xME_args
@rem Slurp the command line arguments.
set CMD_LINE_ARGS=
set _SKIP=2
:win9xME_args_slurp
if "x%~1" == "x" goto execute
set CMD_LINE_ARGS=%*
goto execute
:4NT_args
@rem Get arguments from the 4NT Shell from JP Software
set CMD_LINE_ARGS=%$
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega

View File

@ -1,10 +1,11 @@
#!/bin/bash
#
# This script generates 'WebP.framework'. An iOS app can decode WebP images
# by including 'WebP.framework'.
# This script generates 'WebP.framework' and 'WebPDecoder.framework'. An iOS
# app can decode WebP images by including 'WebPDecoder.framework' and both
# encode and decode WebP images by including 'WebP.framework'.
#
# Run ./iosbuild.sh to generate 'WebP.framework' under the current directory
# (previous build will be erased if it exists).
# Run ./iosbuild.sh to generate the frameworks under the current directory
# (the previous build will be erased if it exists).
#
# This script is inspired by the build script written by Carson McDonald.
# (http://www.ioncannon.net/programming/1483/using-webp-to-reduce-native-ios-app-size/).
@ -33,10 +34,12 @@ readonly SRCDIR=$(dirname $0)
readonly TOPDIR=$(pwd)
readonly BUILDDIR="${TOPDIR}/iosbuild"
readonly TARGETDIR="${TOPDIR}/WebP.framework"
readonly DECTARGETDIR="${TOPDIR}/WebPDecoder.framework"
readonly DEVELOPER=$(xcode-select --print-path)
readonly PLATFORMSROOT="${DEVELOPER}/Platforms"
readonly LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo)
LIBLIST=''
DECLIBLIST=''
if [[ -z "${SDK}" ]]; then
echo "iOS SDK not available"
@ -50,10 +53,8 @@ else
echo "iOS SDK Version ${SDK}"
fi
rm -rf ${BUILDDIR}
rm -rf ${TARGETDIR}
mkdir -p ${BUILDDIR}
mkdir -p ${TARGETDIR}/Headers/
rm -rf ${BUILDDIR} ${TARGETDIR} ${DECTARGETDIR}
mkdir -p ${BUILDDIR} ${TARGETDIR}/Headers/ ${DECTARGETDIR}/Headers/
if [[ ! -e ${SRCDIR}/configure ]]; then
if ! (cd ${SRCDIR} && sh autogen.sh); then
@ -107,12 +108,13 @@ for PLATFORM in ${PLATFORMS}; do
CFLAGS="${CFLAGS}"
set +x
# run make only in the src/ directory to create libwebpdecoder.a
# run make only in the src/ directory to create libwebp.a/libwebpdecoder.a
cd src/
make V=0
make install
LIBLIST+=" ${ROOTDIR}/lib/libwebpdecoder.a"
LIBLIST+=" ${ROOTDIR}/lib/libwebp.a"
DECLIBLIST+=" ${ROOTDIR}/lib/libwebpdecoder.a"
make clean
cd ..
@ -122,3 +124,6 @@ done
cp -a ${SRCDIR}/src/webp/*.h ${TARGETDIR}/Headers/
${LIPO} -create ${LIBLIST} -output ${TARGETDIR}/WebP
cp -a ${SRCDIR}/src/webp/*.h ${DECTARGETDIR}/Headers/
${LIPO} -create ${DECLIBLIST} -output ${DECTARGETDIR}/WebPDecoder

View File

@ -89,6 +89,14 @@ endif
# EXTRA_FLAGS += -march=armv7-a -mfloat-abi=hard -mfpu=neon -mtune=cortex-a8
# -> seems to make the overall lib slower: -fno-split-wide-types
# MIPS (MSA) 32-bit build specific flags for mips32r5 (p5600):
# EXTRA_FLAGS += -mips32r5 -mabi=32 -mtune=p5600 -mmsa -mfp64
# EXTRA_FLAGS += -msched-weight -mload-store-pairs
# MIPS (MSA) 64-bit build specific flags for mips64r6 (i6400):
# EXTRA_FLAGS += -mips64r6 -mabi=64 -mtune=i6400 -mmsa -mfp64
# EXTRA_FLAGS += -msched-weight -mload-store-pairs
#### Nothing should normally be changed below this line ####
AR = ar
@ -130,6 +138,7 @@ DSP_DEC_OBJS = \
src/dsp/dec_clip_tables.o \
src/dsp/dec_mips32.o \
src/dsp/dec_mips_dsp_r2.o \
src/dsp/dec_msa.o \
src/dsp/dec_neon.o \
src/dsp/dec_sse2.o \
src/dsp/dec_sse41.o \
@ -201,6 +210,7 @@ ENC_OBJS = \
src/enc/webpenc.o \
EX_FORMAT_DEC_OBJS = \
examples/image_dec.o \
examples/jpegdec.o \
examples/metadata.o \
examples/pngdec.o \
@ -260,9 +270,11 @@ HDRS = \
src/dec/vp8i.h \
src/dec/vp8li.h \
src/dec/webpi.h \
src/dsp/common_sse2.h \
src/dsp/dsp.h \
src/dsp/lossless.h \
src/dsp/mips_macro.h \
src/dsp/msa_macro.h \
src/dsp/neon.h \
src/dsp/yuv.h \
src/enc/backward_references.h \
@ -320,6 +332,7 @@ src/utils/bit_writer.o: src/utils/endian_inl.h
$(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@
examples/libanim_util.a: $(ANIM_UTIL_OBJS)
examples/libexample_dec.a: $(EX_FORMAT_DEC_OBJS)
examples/libexample_util.a: $(EX_UTIL_OBJS)
examples/libgifdec.a: $(GIFDEC_OBJS)
src/libwebpdecoder.a: $(LIBWEBPDECODER_OBJS)
@ -332,7 +345,7 @@ src/libwebpextras.a: $(LIBWEBPEXTRA_OBJS)
$(AR) $(ARFLAGS) $@ $^
examples/anim_diff: examples/anim_diff.o $(ANIM_UTIL_OBJS) $(GIFDEC_OBJS)
examples/cwebp: examples/cwebp.o $(EX_FORMAT_DEC_OBJS)
examples/cwebp: examples/cwebp.o
examples/dwebp: examples/dwebp.o
examples/gif2webp: examples/gif2webp.o $(GIFDEC_OBJS)
examples/vwebp: examples/vwebp.o
@ -343,7 +356,8 @@ examples/anim_diff: src/demux/libwebpdemux.a examples/libexample_util.a
examples/anim_diff: src/libwebp.a
examples/anim_diff: EXTRA_LIBS += $(GIF_LIBS)
examples/anim_diff: EXTRA_FLAGS += -DWEBP_HAVE_GIF
examples/cwebp: examples/libexample_util.a src/libwebp.a
examples/cwebp: examples/libexample_util.a examples/libexample_dec.a
examples/cwebp: src/libwebp.a
examples/cwebp: EXTRA_LIBS += $(CWEBP_LIBS)
examples/dwebp: examples/libexample_util.a src/libwebpdecoder.a
examples/dwebp: EXTRA_LIBS += $(DWEBP_LIBS)

View File

@ -1,5 +1,5 @@
.\" Hey, EMACS: -*- nroff -*-
.TH CWEBP 1 "December 14, 2015"
.TH CWEBP 1 "September 02, 2016"
.SH NAME
cwebp \- compress an image file to a WebP file
.SH SYNOPSIS
@ -117,17 +117,21 @@ or without alpha).
.TP
.BI \-size " int
Specify a target size (in bytes) to try and reach for the compressed output.
Compressor will make several pass of partial encoding in order to get as
close as possible to this target.
The compressor will make several passes of partial encoding in order to get as
close as possible to this target. If both \fB\-size\fP and \fB\-psnr\fP
are used, \fB\-size\fP value will prevail.
.TP
.BI \-psnr " float
Specify a target PSNR (in dB) to try and reach for the compressed output.
Compressor will make several pass of partial encoding in order to get as
close as possible to this target.
The compressor will make several passes of partial encoding in order to get as
close as possible to this target. If both \fB\-size\fP and \fB\-psnr\fP
are used, \fB\-size\fP value will prevail.
.TP
.BI \-pass " int
Set a maximum number of passes to use during the dichotomy used by
options \fB\-size\fP or \fB\-psnr\fP. Maximum value is 10, default is 1.
If options \fB\-size\fP or \fB\-psnr\fP were used, but \fB\-pass\fP wasn't
specified, a default value of '6' passes will be used.
.TP
.B \-af
Turns auto\-filter on. This algorithm will spend additional time optimizing
@ -167,7 +171,7 @@ Specify the amplitude of the spatial noise shaping. Spatial noise shaping
(or \fBsns\fP for short) refers to a general collection of built\-in algorithms
used to decide which area of the picture should use relatively less bits,
and where else to better transfer these bits. The possible range goes from
0 (algorithm is off) to 100 (the maximal effect). The default value is 80.
0 (algorithm is off) to 100 (the maximal effect). The default value is 50.
.TP
.BI \-segments " int
Change the number of partitions to use during the segmentation of the
@ -218,7 +222,7 @@ Report encoding progress in percent.
Do not print anything.
.TP
.B \-short
Only print brief information (output file size and PSNR) for testing purpose.
Only print brief information (output file size and PSNR) for testing purposes.
.TP
.BI \-map " int
Output additional ASCII\-map of encoding information. Possible map values
@ -277,7 +281,7 @@ Note: each input format may not support all combinations.
Disable all assembly optimizations.
.SH BUGS
Please report all bugs to our issue tracker:
Please report all bugs to the issue tracker:
https://bugs.chromium.org/p/webp
.br
Patches welcome! See this page to get started:

View File

@ -1,5 +1,5 @@
.\" Hey, EMACS: -*- nroff -*-
.TH DWEBP 1 "December 11, 2015"
.TH DWEBP 1 "June 23, 2016"
.SH NAME
dwebp \- decompress a WebP file to an image file
.SH SYNOPSIS
@ -108,7 +108,7 @@ Print extra information (decoding time in particular).
Disable all assembly optimizations.
.SH BUGS
Please report all bugs to our issue tracker:
Please report all bugs to the issue tracker:
https://bugs.chromium.org/p/webp
.br
Patches welcome! See this page to get started:

View File

@ -1,5 +1,5 @@
.\" Hey, EMACS: -*- nroff -*-
.TH GIF2WEBP 1 "December 11, 2015"
.TH GIF2WEBP 1 "June 23, 2016"
.SH NAME
gif2webp \- Convert a GIF image to WebP
.SH SYNOPSIS
@ -117,7 +117,7 @@ Print extra information.
Do not print anything.
.SH BUGS
Please report all bugs to our issue tracker:
Please report all bugs to the issue tracker:
https://bugs.chromium.org/p/webp
.br
Patches welcome! See this page to get started:

View File

@ -1,5 +1,5 @@
.\" Hey, EMACS: -*- nroff -*-
.TH VWEBP 1 "December 11, 2015"
.TH VWEBP 1 "June 23, 2016"
.SH NAME
vwebp \- decompress a WebP file and display it in a window
.SH SYNOPSIS
@ -63,7 +63,7 @@ Overlay file information.
Quit.
.SH BUGS
Please report all bugs to our issue tracker:
Please report all bugs to the issue tracker:
https://bugs.chromium.org/p/webp
.br
Patches welcome! See this page to get started:

View File

@ -1,5 +1,5 @@
.\" Hey, EMACS: -*- nroff -*-
.TH WEBPMUX 1 "December 11, 2015"
.TH WEBPMUX 1 "June 23, 2016"
.SH NAME
webpmux \- create animated WebP files from non\-animated WebP images, extract
frames from animated WebP images, and manage XMP/EXIF metadata and ICC profile.
@ -128,7 +128,7 @@ Output file in WebP format.
The nature of EXIF, XMP and ICC data is not checked and is assumed to be valid.
.SH BUGS
Please report all bugs to our issue tracker:
Please report all bugs to the issue tracker:
https://bugs.chromium.org/p/webp
.br
Patches welcome! See this page to get started:

View File

@ -38,7 +38,7 @@ libwebp_la_LIBADD += utils/libwebputils.la
# other than the ones listed on the command line, i.e., after linking, it will
# not have unresolved symbols. Some platforms (Windows among them) require all
# symbols in shared libraries to be resolved at library creation.
libwebp_la_LDFLAGS = -no-undefined -version-info 6:0:0
libwebp_la_LDFLAGS = -no-undefined -version-info 6:2:0
libwebpincludedir = $(includedir)/webp
pkgconfig_DATA = libwebp.pc
@ -50,7 +50,7 @@ if BUILD_LIBWEBPDECODER
libwebpdecoder_la_LIBADD += dsp/libwebpdspdecode.la
libwebpdecoder_la_LIBADD += utils/libwebputilsdecode.la
libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 2:0:0
libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 2:2:0
pkgconfig_DATA += libwebpdecoder.pc
endif

View File

@ -23,12 +23,14 @@
//------------------------------------------------------------------------------
// ALPHDecoder object.
ALPHDecoder* ALPHNew(void) {
// Allocates a new alpha decoder instance.
static ALPHDecoder* ALPHNew(void) {
ALPHDecoder* const dec = (ALPHDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
return dec;
}
void ALPHDelete(ALPHDecoder* const dec) {
// Clears and deallocates an alpha decoder instance.
static void ALPHDelete(ALPHDecoder* const dec) {
if (dec != NULL) {
VP8LDelete(dec->vp8l_dec_);
dec->vp8l_dec_ = NULL;
@ -44,24 +46,28 @@ void ALPHDelete(ALPHDecoder* const dec) {
// Returns false in case of error in alpha header (data too short, invalid
// compression method or filter, error in lossless header data etc).
static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data,
size_t data_size, int width, int height, uint8_t* output) {
size_t data_size, const VP8Io* const src_io,
uint8_t* output) {
int ok = 0;
const uint8_t* const alpha_data = data + ALPHA_HEADER_LEN;
const size_t alpha_data_size = data_size - ALPHA_HEADER_LEN;
int rsrv;
VP8Io* const io = &dec->io_;
assert(width > 0 && height > 0);
assert(data != NULL && output != NULL);
assert(data != NULL && output != NULL && src_io != NULL);
dec->width_ = width;
dec->height_ = height;
VP8FiltersInit();
dec->output_ = output;
dec->width_ = src_io->width;
dec->height_ = src_io->height;
assert(dec->width_ > 0 && dec->height_ > 0);
if (data_size <= ALPHA_HEADER_LEN) {
return 0;
}
dec->method_ = (data[0] >> 0) & 0x03;
dec->filter_ = (data[0] >> 2) & 0x03;
dec->filter_ = (WEBP_FILTER_TYPE)((data[0] >> 2) & 0x03);
dec->pre_processing_ = (data[0] >> 4) & 0x03;
rsrv = (data[0] >> 6) & 0x03;
if (dec->method_ < ALPHA_NO_COMPRESSION ||
@ -72,14 +78,28 @@ static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data,
return 0;
}
// Copy the necessary parameters from src_io to io
VP8InitIo(io);
WebPInitCustomIo(NULL, io);
io->opaque = dec;
io->width = src_io->width;
io->height = src_io->height;
io->use_cropping = src_io->use_cropping;
io->crop_left = src_io->crop_left;
io->crop_right = src_io->crop_right;
io->crop_top = src_io->crop_top;
io->crop_bottom = src_io->crop_bottom;
// No need to copy the scaling parameters.
if (dec->method_ == ALPHA_NO_COMPRESSION) {
const size_t alpha_decoded_size = dec->width_ * dec->height_;
ok = (alpha_data_size >= alpha_decoded_size);
} else {
assert(dec->method_ == ALPHA_LOSSLESS_COMPRESSION);
ok = VP8LDecodeAlphaHeader(dec, alpha_data, alpha_data_size, output);
ok = VP8LDecodeAlphaHeader(dec, alpha_data, alpha_data_size);
}
VP8FiltersInit();
return ok;
}
@ -90,15 +110,30 @@ static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data,
static int ALPHDecode(VP8Decoder* const dec, int row, int num_rows) {
ALPHDecoder* const alph_dec = dec->alph_dec_;
const int width = alph_dec->width_;
const int height = alph_dec->height_;
WebPUnfilterFunc unfilter_func = WebPUnfilters[alph_dec->filter_];
uint8_t* const output = dec->alpha_plane_;
const int height = alph_dec->io_.crop_bottom;
if (alph_dec->method_ == ALPHA_NO_COMPRESSION) {
const size_t offset = row * width;
const size_t num_pixels = num_rows * width;
assert(dec->alpha_data_size_ >= ALPHA_HEADER_LEN + offset + num_pixels);
memcpy(dec->alpha_plane_ + offset,
dec->alpha_data_ + ALPHA_HEADER_LEN + offset, num_pixels);
int y;
const uint8_t* prev_line = dec->alpha_prev_line_;
const uint8_t* deltas = dec->alpha_data_ + ALPHA_HEADER_LEN + row * width;
uint8_t* dst = dec->alpha_plane_ + row * width;
assert(deltas <= &dec->alpha_data_[dec->alpha_data_size_]);
if (alph_dec->filter_ != WEBP_FILTER_NONE) {
assert(WebPUnfilters[alph_dec->filter_] != NULL);
for (y = 0; y < num_rows; ++y) {
WebPUnfilters[alph_dec->filter_](prev_line, deltas, dst, width);
prev_line = dst;
dst += width;
deltas += width;
}
} else {
for (y = 0; y < num_rows; ++y) {
memcpy(dst, deltas, width * sizeof(*dst));
prev_line = dst;
dst += width;
deltas += width;
}
}
dec->alpha_prev_line_ = prev_line;
} else { // alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION
assert(alph_dec->vp8l_dec_ != NULL);
if (!VP8LDecodeAlphaImageStream(alph_dec, row + num_rows)) {
@ -106,62 +141,92 @@ static int ALPHDecode(VP8Decoder* const dec, int row, int num_rows) {
}
}
if (unfilter_func != NULL) {
unfilter_func(width, height, width, row, num_rows, output);
}
if (row + num_rows == dec->pic_hdr_.height_) {
if (row + num_rows >= height) {
dec->is_alpha_decoded_ = 1;
}
return 1;
}
static int AllocateAlphaPlane(VP8Decoder* const dec, const VP8Io* const io) {
const int stride = io->width;
const int height = io->crop_bottom;
const uint64_t alpha_size = (uint64_t)stride * height;
assert(dec->alpha_plane_mem_ == NULL);
dec->alpha_plane_mem_ =
(uint8_t*)WebPSafeMalloc(alpha_size, sizeof(*dec->alpha_plane_));
if (dec->alpha_plane_mem_ == NULL) {
return 0;
}
dec->alpha_plane_ = dec->alpha_plane_mem_;
dec->alpha_prev_line_ = NULL;
return 1;
}
void WebPDeallocateAlphaMemory(VP8Decoder* const dec) {
assert(dec != NULL);
WebPSafeFree(dec->alpha_plane_mem_);
dec->alpha_plane_mem_ = NULL;
dec->alpha_plane_ = NULL;
ALPHDelete(dec->alph_dec_);
dec->alph_dec_ = NULL;
}
//------------------------------------------------------------------------------
// Main entry point.
const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
const VP8Io* const io,
int row, int num_rows) {
const int width = dec->pic_hdr_.width_;
const int height = dec->pic_hdr_.height_;
const int width = io->width;
const int height = io->crop_bottom;
assert(dec != NULL && io != NULL);
if (row < 0 || num_rows <= 0 || row + num_rows > height) {
return NULL; // sanity check.
}
if (row == 0) {
// Initialize decoding.
assert(dec->alpha_plane_ != NULL);
dec->alph_dec_ = ALPHNew();
if (dec->alph_dec_ == NULL) return NULL;
if (!ALPHInit(dec->alph_dec_, dec->alpha_data_, dec->alpha_data_size_,
width, height, dec->alpha_plane_)) {
ALPHDelete(dec->alph_dec_);
dec->alph_dec_ = NULL;
return NULL;
}
// if we allowed use of alpha dithering, check whether it's needed at all
if (dec->alph_dec_->pre_processing_ != ALPHA_PREPROCESSED_LEVELS) {
dec->alpha_dithering_ = 0; // disable dithering
} else {
num_rows = height; // decode everything in one pass
}
}
if (!dec->is_alpha_decoded_) {
int ok = 0;
assert(dec->alph_dec_ != NULL);
ok = ALPHDecode(dec, row, num_rows);
if (ok && dec->alpha_dithering_ > 0) {
ok = WebPDequantizeLevels(dec->alpha_plane_, width, height,
dec->alpha_dithering_);
if (dec->alph_dec_ == NULL) { // Initialize decoder.
dec->alph_dec_ = ALPHNew();
if (dec->alph_dec_ == NULL) return NULL;
if (!AllocateAlphaPlane(dec, io)) goto Error;
if (!ALPHInit(dec->alph_dec_, dec->alpha_data_, dec->alpha_data_size_,
io, dec->alpha_plane_)) {
goto Error;
}
// if we allowed use of alpha dithering, check whether it's needed at all
if (dec->alph_dec_->pre_processing_ != ALPHA_PREPROCESSED_LEVELS) {
dec->alpha_dithering_ = 0; // disable dithering
} else {
num_rows = height - row; // decode everything in one pass
}
}
if (!ok || dec->is_alpha_decoded_) {
assert(dec->alph_dec_ != NULL);
assert(row + num_rows <= height);
if (!ALPHDecode(dec, row, num_rows)) goto Error;
if (dec->is_alpha_decoded_) { // finished?
ALPHDelete(dec->alph_dec_);
dec->alph_dec_ = NULL;
if (dec->alpha_dithering_ > 0) {
uint8_t* const alpha = dec->alpha_plane_ + io->crop_top * width
+ io->crop_left;
if (!WebPDequantizeLevels(alpha,
io->crop_right - io->crop_left,
io->crop_bottom - io->crop_top,
width, dec->alpha_dithering_)) {
goto Error;
}
}
}
if (!ok) return NULL; // Error.
}
// Return a pointer to the current decoded row.
return dec->alpha_plane_ + row * width;
Error:
WebPDeallocateAlphaMemory(dec);
return NULL;
}

View File

@ -32,19 +32,18 @@ struct ALPHDecoder {
int pre_processing_;
struct VP8LDecoder* vp8l_dec_;
VP8Io io_;
int use_8b_decode; // Although alpha channel requires only 1 byte per
// pixel, sometimes VP8LDecoder may need to allocate
// 4 bytes per pixel internally during decode.
int use_8b_decode_; // Although alpha channel requires only 1 byte per
// pixel, sometimes VP8LDecoder may need to allocate
// 4 bytes per pixel internally during decode.
uint8_t* output_;
const uint8_t* prev_line_; // last output row (or NULL)
};
//------------------------------------------------------------------------------
// internal functions. Not public.
// Allocates a new alpha decoder instance.
ALPHDecoder* ALPHNew(void);
// Clears and deallocates an alpha decoder instance.
void ALPHDelete(ALPHDecoder* const dec);
// Deallocate memory associated to dec->alpha_plane_ decoding
void WebPDeallocateAlphaMemory(VP8Decoder* const dec);
//------------------------------------------------------------------------------

View File

@ -92,7 +92,7 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
return VP8_STATUS_INVALID_PARAM;
}
if (!buffer->is_external_memory && buffer->private_memory == NULL) {
if (buffer->is_external_memory <= 0 && buffer->private_memory == NULL) {
uint8_t* output;
int uv_stride = 0, a_stride = 0;
uint64_t uv_size = 0, a_size = 0, total_size;
@ -227,7 +227,7 @@ int WebPInitDecBufferInternal(WebPDecBuffer* buffer, int version) {
void WebPFreeDecBuffer(WebPDecBuffer* buffer) {
if (buffer != NULL) {
if (!buffer->is_external_memory) {
if (buffer->is_external_memory <= 0) {
WebPSafeFree(buffer->private_memory);
}
buffer->private_memory = NULL;
@ -256,5 +256,45 @@ void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst) {
}
}
//------------------------------------------------------------------------------
VP8StatusCode WebPCopyDecBufferPixels(const WebPDecBuffer* const src_buf,
WebPDecBuffer* const dst_buf) {
assert(src_buf != NULL && dst_buf != NULL);
assert(src_buf->colorspace == dst_buf->colorspace);
dst_buf->width = src_buf->width;
dst_buf->height = src_buf->height;
if (CheckDecBuffer(dst_buf) != VP8_STATUS_OK) {
return VP8_STATUS_INVALID_PARAM;
}
if (WebPIsRGBMode(src_buf->colorspace)) {
const WebPRGBABuffer* const src = &src_buf->u.RGBA;
const WebPRGBABuffer* const dst = &dst_buf->u.RGBA;
WebPCopyPlane(src->rgba, src->stride, dst->rgba, dst->stride,
src_buf->width * kModeBpp[src_buf->colorspace],
src_buf->height);
} else {
const WebPYUVABuffer* const src = &src_buf->u.YUVA;
const WebPYUVABuffer* const dst = &dst_buf->u.YUVA;
WebPCopyPlane(src->y, src->y_stride, dst->y, dst->y_stride,
src_buf->width, src_buf->height);
WebPCopyPlane(src->u, src->u_stride, dst->u, dst->u_stride,
(src_buf->width + 1) / 2, (src_buf->height + 1) / 2);
WebPCopyPlane(src->v, src->v_stride, dst->v, dst->v_stride,
(src_buf->width + 1) / 2, (src_buf->height + 1) / 2);
if (WebPIsAlphaMode(src_buf->colorspace)) {
WebPCopyPlane(src->a, src->a_stride, dst->a, dst->a_stride,
src_buf->width, src_buf->height);
}
}
return VP8_STATUS_OK;
}
int WebPAvoidSlowMemory(const WebPDecBuffer* const output,
const WebPBitstreamFeatures* const features) {
assert(output != NULL);
return (output->is_external_memory >= 2) &&
WebPIsPremultipliedMode(output->colorspace) &&
(features != NULL && features->has_alpha);
}
//------------------------------------------------------------------------------

View File

@ -316,6 +316,9 @@ static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
//------------------------------------------------------------------------------
// Dithering
// minimal amp that will provide a non-zero dithering effect
#define MIN_DITHER_AMP 4
#define DITHER_AMP_TAB_SIZE 12
static const int kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
// roughly, it's dqm->uv_mat_[1]
@ -356,27 +359,14 @@ void VP8InitDithering(const WebPDecoderOptions* const options,
}
}
// minimal amp that will provide a non-zero dithering effect
#define MIN_DITHER_AMP 4
#define DITHER_DESCALE 4
#define DITHER_DESCALE_ROUNDER (1 << (DITHER_DESCALE - 1))
#define DITHER_AMP_BITS 8
#define DITHER_AMP_CENTER (1 << DITHER_AMP_BITS)
// Convert to range: [-2,2] for dither=50, [-4,4] for dither=100
static void Dither8x8(VP8Random* const rg, uint8_t* dst, int bps, int amp) {
int i, j;
for (j = 0; j < 8; ++j) {
for (i = 0; i < 8; ++i) {
// TODO: could be made faster with SSE2
const int bits =
VP8RandomBits2(rg, DITHER_AMP_BITS + 1, amp) - DITHER_AMP_CENTER;
// Convert to range: [-2,2] for dither=50, [-4,4] for dither=100
const int delta = (bits + DITHER_DESCALE_ROUNDER) >> DITHER_DESCALE;
const int v = (int)dst[i] + delta;
dst[i] = (v < 0) ? 0 : (v > 255) ? 255u : (uint8_t)v;
}
dst += bps;
uint8_t dither[64];
int i;
for (i = 0; i < 8 * 8; ++i) {
dither[i] = VP8RandomBits2(rg, VP8_DITHER_AMP_BITS + 1, amp);
}
VP8DitherCombine8x8(dither, dst, bps);
}
static void DitherRow(VP8Decoder* const dec) {
@ -462,7 +452,7 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
if (dec->alpha_data_ != NULL && y_start < y_end) {
// TODO(skal): testing presence of alpha with dec->alpha_data_ is not a
// good idea.
io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start);
io->a = VP8DecompressAlphaRows(dec, io, y_start, y_end - y_start);
if (io->a == NULL) {
return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
"Could not decode alpha data.");

View File

@ -70,7 +70,9 @@ struct WebPIDecoder {
VP8Io io_;
MemBuffer mem_; // input memory buffer.
WebPDecBuffer output_; // output buffer (when no external one is supplied)
WebPDecBuffer output_; // output buffer (when no external one is supplied,
// or if the external one has slow-memory)
WebPDecBuffer* final_output_; // Slow-memory output to copy to eventually.
size_t chunk_size_; // Compressed VP8/VP8L size extracted from Header.
int last_mb_y_; // last row reached for intra-mode decoding
@ -118,9 +120,9 @@ static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
if (idec->dec_ != NULL) {
if (!idec->is_lossless_) {
VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
const int last_part = dec->num_parts_ - 1;
const uint32_t last_part = dec->num_parts_minus_one_;
if (offset != 0) {
int p;
uint32_t p;
for (p = 0; p <= last_part; ++p) {
VP8RemapBitReader(dec->parts_ + p, offset);
}
@ -132,7 +134,6 @@ static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
}
{
const uint8_t* const last_start = dec->parts_[last_part].buf_;
assert(last_part >= 0);
VP8BitReaderSetBuffer(&dec->parts_[last_part], last_start,
mem->buf_ + mem->end_ - last_start);
}
@ -249,10 +250,16 @@ static VP8StatusCode FinishDecoding(WebPIDecoder* const idec) {
idec->state_ = STATE_DONE;
if (options != NULL && options->flip) {
return WebPFlipBuffer(output);
} else {
return VP8_STATUS_OK;
const VP8StatusCode status = WebPFlipBuffer(output);
if (status != VP8_STATUS_OK) return status;
}
if (idec->final_output_ != NULL) {
WebPCopyDecBufferPixels(output, idec->final_output_); // do the slow-copy
WebPFreeDecBuffer(&idec->output_);
*output = *idec->final_output_;
idec->final_output_ = NULL;
}
return VP8_STATUS_OK;
}
//------------------------------------------------------------------------------
@ -457,19 +464,20 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
}
for (; dec->mb_x_ < dec->mb_w_; ++dec->mb_x_) {
VP8BitReader* const token_br =
&dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)];
&dec->parts_[dec->mb_y_ & dec->num_parts_minus_one_];
MBContext context;
SaveContext(dec, token_br, &context);
if (!VP8DecodeMB(dec, token_br)) {
// We shouldn't fail when MAX_MB data was available
if (dec->num_parts_ == 1 && MemDataSize(&idec->mem_) > MAX_MB_SIZE) {
if (dec->num_parts_minus_one_ == 0 &&
MemDataSize(&idec->mem_) > MAX_MB_SIZE) {
return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
}
RestoreContext(&context, dec, token_br);
return VP8_STATUS_SUSPENDED;
}
// Release buffer only if there is only one partition
if (dec->num_parts_ == 1) {
if (dec->num_parts_minus_one_ == 0) {
idec->mem_.start_ = token_br->buf_ - idec->mem_.buf_;
assert(idec->mem_.start_ <= idec->mem_.end_);
}
@ -575,9 +583,10 @@ static VP8StatusCode IDecode(WebPIDecoder* idec) {
}
//------------------------------------------------------------------------------
// Public functions
// Internal constructor
WebPIDecoder* WebPINewDecoder(WebPDecBuffer* output_buffer) {
static WebPIDecoder* NewDecoder(WebPDecBuffer* const output_buffer,
const WebPBitstreamFeatures* const features) {
WebPIDecoder* idec = (WebPIDecoder*)WebPSafeCalloc(1ULL, sizeof(*idec));
if (idec == NULL) {
return NULL;
@ -593,25 +602,46 @@ WebPIDecoder* WebPINewDecoder(WebPDecBuffer* output_buffer) {
VP8InitIo(&idec->io_);
WebPResetDecParams(&idec->params_);
idec->params_.output = (output_buffer != NULL) ? output_buffer
: &idec->output_;
if (output_buffer == NULL || WebPAvoidSlowMemory(output_buffer, features)) {
idec->params_.output = &idec->output_;
idec->final_output_ = output_buffer;
if (output_buffer != NULL) {
idec->params_.output->colorspace = output_buffer->colorspace;
}
} else {
idec->params_.output = output_buffer;
idec->final_output_ = NULL;
}
WebPInitCustomIo(&idec->params_, &idec->io_); // Plug the I/O functions.
return idec;
}
//------------------------------------------------------------------------------
// Public functions
WebPIDecoder* WebPINewDecoder(WebPDecBuffer* output_buffer) {
return NewDecoder(output_buffer, NULL);
}
WebPIDecoder* WebPIDecode(const uint8_t* data, size_t data_size,
WebPDecoderConfig* config) {
WebPIDecoder* idec;
WebPBitstreamFeatures tmp_features;
WebPBitstreamFeatures* const features =
(config == NULL) ? &tmp_features : &config->input;
memset(&tmp_features, 0, sizeof(tmp_features));
// Parse the bitstream's features, if requested:
if (data != NULL && data_size > 0 && config != NULL) {
if (WebPGetFeatures(data, data_size, &config->input) != VP8_STATUS_OK) {
if (data != NULL && data_size > 0) {
if (WebPGetFeatures(data, data_size, features) != VP8_STATUS_OK) {
return NULL;
}
}
// Create an instance of the incremental decoder
idec = WebPINewDecoder(config ? &config->output : NULL);
idec = (config != NULL) ? NewDecoder(&config->output, features)
: NewDecoder(NULL, features);
if (idec == NULL) {
return NULL;
}
@ -645,11 +675,11 @@ void WebPIDelete(WebPIDecoder* idec) {
WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
size_t output_buffer_size, int output_stride) {
const int is_external_memory = (output_buffer != NULL);
const int is_external_memory = (output_buffer != NULL) ? 1 : 0;
WebPIDecoder* idec;
if (mode >= MODE_YUV) return NULL;
if (!is_external_memory) { // Overwrite parameters to sane values.
if (is_external_memory == 0) { // Overwrite parameters to sane values.
output_buffer_size = 0;
output_stride = 0;
} else { // A buffer was passed. Validate the other params.
@ -671,11 +701,11 @@ WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride,
uint8_t* u, size_t u_size, int u_stride,
uint8_t* v, size_t v_size, int v_stride,
uint8_t* a, size_t a_size, int a_stride) {
const int is_external_memory = (luma != NULL);
const int is_external_memory = (luma != NULL) ? 1 : 0;
WebPIDecoder* idec;
WEBP_CSP_MODE colorspace;
if (!is_external_memory) { // Overwrite parameters to sane values.
if (is_external_memory == 0) { // Overwrite parameters to sane values.
luma_size = u_size = v_size = a_size = 0;
luma_stride = u_stride = v_stride = a_stride = 0;
u = v = a = NULL;
@ -783,6 +813,9 @@ static const WebPDecBuffer* GetOutputBuffer(const WebPIDecoder* const idec) {
if (idec->state_ <= STATE_VP8_PARTS0) {
return NULL;
}
if (idec->final_output_ != NULL) {
return NULL; // not yet slow-copied
}
return idec->params_.output;
}
@ -792,7 +825,7 @@ const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* idec,
const WebPDecBuffer* const src = GetOutputBuffer(idec);
if (left != NULL) *left = 0;
if (top != NULL) *top = 0;
if (src) {
if (src != NULL) {
if (width != NULL) *width = src->width;
if (height != NULL) *height = idec->params_.last_y;
} else {

View File

@ -119,6 +119,14 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
//------------------------------------------------------------------------------
static void FillAlphaPlane(uint8_t* dst, int w, int h, int stride) {
int j;
for (j = 0; j < h; ++j) {
memset(dst, 0xff, w * sizeof(*dst));
dst += stride;
}
}
static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
int expected_num_lines_out) {
const uint8_t* alpha = io->a;
@ -137,10 +145,7 @@ static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
}
} else if (buf->a != NULL) {
// the user requested alpha, but there is none, set it to opaque.
for (j = 0; j < mb_h; ++j) {
memset(dst, 0xff, mb_w * sizeof(*dst));
dst += buf->a_stride;
}
FillAlphaPlane(dst, mb_w, mb_h, buf->a_stride);
}
return 0;
}
@ -269,8 +274,8 @@ static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) {
static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
int expected_num_lines_out) {
const WebPYUVABuffer* const buf = &p->output->u.YUVA;
if (io->a != NULL) {
const WebPYUVABuffer* const buf = &p->output->u.YUVA;
uint8_t* dst_y = buf->y + p->last_y * buf->y_stride;
const uint8_t* src_a = buf->a + p->last_y * buf->a_stride;
const int num_lines_out = Rescale(io->a, io->width, io->mb_h, &p->scaler_a);
@ -280,6 +285,11 @@ static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
WebPMultRows(dst_y, buf->y_stride, src_a, buf->a_stride,
p->scaler_a.dst_width, num_lines_out, 1);
}
} else if (buf->a != NULL) {
// the user requested alpha, but there is none, set it to opaque.
assert(p->last_y + expected_num_lines_out <= io->scaled_height);
FillAlphaPlane(buf->a + p->last_y * buf->a_stride,
io->scaled_width, expected_num_lines_out, buf->a_stride);
}
return 0;
}

View File

@ -50,7 +50,7 @@ VP8Decoder* VP8New(void) {
SetOk(dec);
WebPGetWorkerInterface()->Init(&dec->worker_);
dec->ready_ = 0;
dec->num_parts_ = 1;
dec->num_parts_minus_one_ = 0;
}
return dec;
}
@ -194,8 +194,8 @@ static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
size_t last_part;
size_t p;
dec->num_parts_ = 1 << VP8GetValue(br, 2);
last_part = dec->num_parts_ - 1;
dec->num_parts_minus_one_ = (1 << VP8GetValue(br, 2)) - 1;
last_part = dec->num_parts_minus_one_;
if (size < 3 * last_part) {
// we can't even read the sizes with sz[]! That's a failure.
return VP8_STATUS_NOT_ENOUGH_DATA;
@ -303,15 +303,22 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
dec->mb_w_ = (pic_hdr->width_ + 15) >> 4;
dec->mb_h_ = (pic_hdr->height_ + 15) >> 4;
// Setup default output area (can be later modified during io->setup())
io->width = pic_hdr->width_;
io->height = pic_hdr->height_;
io->use_scaling = 0;
// IMPORTANT! use some sane dimensions in crop_* and scaled_* fields.
// So they can be used interchangeably without always testing for
// 'use_cropping'.
io->use_cropping = 0;
io->crop_top = 0;
io->crop_left = 0;
io->crop_right = io->width;
io->crop_bottom = io->height;
io->use_scaling = 0;
io->scaled_width = io->width;
io->scaled_height = io->height;
io->mb_w = io->width; // sanity check
io->mb_h = io->height; // ditto
@ -579,7 +586,7 @@ static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
for (dec->mb_y_ = 0; dec->mb_y_ < dec->br_mb_y_; ++dec->mb_y_) {
// Parse bitstream for this row.
VP8BitReader* const token_br =
&dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)];
&dec->parts_[dec->mb_y_ & dec->num_parts_minus_one_];
if (!VP8ParseIntraModeRow(&dec->br_, dec)) {
return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
"Premature end-of-partition0 encountered.");
@ -649,8 +656,7 @@ void VP8Clear(VP8Decoder* const dec) {
return;
}
WebPGetWorkerInterface()->End(&dec->worker_);
ALPHDelete(dec->alph_dec_);
dec->alph_dec_ = NULL;
WebPDeallocateAlphaMemory(dec);
WebPSafeFree(dec->mem_);
dec->mem_ = NULL;
dec->mem_size_ = 0;
@ -659,4 +665,3 @@ void VP8Clear(VP8Decoder* const dec) {
}
//------------------------------------------------------------------------------

View File

@ -32,7 +32,7 @@ extern "C" {
// version numbers
#define DEC_MAJ_VERSION 0
#define DEC_MIN_VERSION 5
#define DEC_REV_VERSION 0
#define DEC_REV_VERSION 2
// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
// Constraints are: We need to store one 16x16 block of luma samples (y),
@ -209,8 +209,8 @@ struct VP8Decoder {
int tl_mb_x_, tl_mb_y_; // top-left MB that must be in-loop filtered
int br_mb_x_, br_mb_y_; // last bottom-right MB that must be decoded
// number of partitions.
int num_parts_;
// number of partitions minus one.
uint32_t num_parts_minus_one_;
// per-partition boolean decoders.
VP8BitReader parts_[MAX_NUM_PARTITIONS];
@ -258,9 +258,11 @@ struct VP8Decoder {
struct ALPHDecoder* alph_dec_; // alpha-plane decoder object
const uint8_t* alpha_data_; // compressed alpha data (if present)
size_t alpha_data_size_;
int is_alpha_decoded_; // true if alpha_data_ is decoded in alpha_plane_
uint8_t* alpha_plane_; // output. Persistent, contains the whole data.
int alpha_dithering_; // derived from decoding options (0=off, 100=full).
int is_alpha_decoded_; // true if alpha_data_ is decoded in alpha_plane_
uint8_t* alpha_plane_mem_; // memory allocated for alpha_plane_
uint8_t* alpha_plane_; // output. Persistent, contains the whole data.
const uint8_t* alpha_prev_line_; // last decoded alpha row (or NULL)
int alpha_dithering_; // derived from decoding options (0=off, 100=full)
};
//------------------------------------------------------------------------------
@ -306,6 +308,7 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br);
// in alpha.c
const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
const VP8Io* const io,
int row, int num_rows);
//------------------------------------------------------------------------------

View File

@ -252,11 +252,11 @@ static int ReadHuffmanCodeLengths(
int symbol;
int max_symbol;
int prev_code_len = DEFAULT_CODE_LENGTH;
HuffmanCode table[1 << LENGTHS_TABLE_BITS];
HuffmanTables tables;
if (!VP8LBuildHuffmanTable(table, LENGTHS_TABLE_BITS,
code_length_code_lengths,
NUM_CODE_LENGTH_CODES)) {
if (!VP8LHuffmanTablesAllocate(1 << LENGTHS_TABLE_BITS, &tables) ||
!VP8LBuildHuffmanTable(&tables, LENGTHS_TABLE_BITS,
code_length_code_lengths, NUM_CODE_LENGTH_CODES)) {
goto End;
}
@ -276,7 +276,7 @@ static int ReadHuffmanCodeLengths(
int code_len;
if (max_symbol-- == 0) break;
VP8LFillBitWindow(br);
p = &table[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK];
p = &tables.curr_segment->start[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK];
VP8LSetBitPos(br, br->bit_pos_ + p->bits);
code_len = p->value;
if (code_len < kCodeLengthLiterals) {
@ -299,6 +299,7 @@ static int ReadHuffmanCodeLengths(
ok = 1;
End:
VP8LHuffmanTablesDeallocate(&tables);
if (!ok) dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
return ok;
}
@ -306,7 +307,8 @@ static int ReadHuffmanCodeLengths(
// 'code_lengths' is pre-allocated temporary buffer, used for creating Huffman
// tree.
static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
int* const code_lengths, HuffmanCode* const table) {
int* const code_lengths,
HuffmanTables* const table) {
int ok = 0;
int size = 0;
VP8LBitReader* const br = &dec->br_;
@ -361,12 +363,18 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
VP8LMetadata* const hdr = &dec->hdr_;
uint32_t* huffman_image = NULL;
HTreeGroup* htree_groups = NULL;
HuffmanCode* huffman_tables = NULL;
HuffmanCode* next = NULL;
HuffmanTables* huffman_tables = &hdr->huffman_tables_;
int num_htree_groups = 1;
int num_htree_groups_max = 1;
int max_alphabet_size = 0;
int* code_lengths = NULL;
const int table_size = kTableSize[color_cache_bits];
int* mapping = NULL;
int ok = 0;
// Check the table has been 0 initialized (through InitMetadata).
assert(huffman_tables->root.start == NULL);
assert(huffman_tables->curr_segment == NULL);
if (allow_recursion && VP8LReadBits(br, 1)) {
// use meta Huffman codes.
@ -383,10 +391,36 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
// The huffman data is stored in red and green bytes.
const int group = (huffman_image[i] >> 8) & 0xffff;
huffman_image[i] = group;
if (group >= num_htree_groups) {
num_htree_groups = group + 1;
if (group >= num_htree_groups_max) {
num_htree_groups_max = group + 1;
}
}
// Check the validity of num_htree_groups_max. If it seems too big, use a
// smaller value for later. This will prevent big memory allocations to end
// up with a bad bitstream anyway.
// The value of 1000 is totally arbitrary. We know that num_htree_groups_max
// is smaller than (1 << 16) and should be smaller than the number of pixels
// (though the format allows it to be bigger).
if (num_htree_groups_max > 1000 || num_htree_groups_max > xsize * ysize) {
// Create a mapping from the used indices to the minimal set of used
// values [0, num_htree_groups)
mapping = (int*)WebPSafeMalloc(num_htree_groups_max, sizeof(*mapping));
if (mapping == NULL) {
dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
goto Error;
}
// -1 means a value is unmapped, and therefore unused in the Huffman
// image.
memset(mapping, 0xff, num_htree_groups_max * sizeof(*mapping));
for (num_htree_groups = 0, i = 0; i < huffman_pixs; ++i) {
// Get the current mapping for the group and remap the Huffman image.
int* const mapped_group = &mapping[huffman_image[i]];
if (*mapped_group == -1) *mapped_group = num_htree_groups++;
huffman_image[i] = *mapped_group;
}
} else {
num_htree_groups = num_htree_groups_max;
}
}
if (br->eos_) goto Error;
@ -402,83 +436,99 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
}
}
huffman_tables = (HuffmanCode*)WebPSafeMalloc(num_htree_groups * table_size,
sizeof(*huffman_tables));
htree_groups = VP8LHtreeGroupsNew(num_htree_groups);
code_lengths = (int*)WebPSafeCalloc((uint64_t)max_alphabet_size,
sizeof(*code_lengths));
if (htree_groups == NULL || code_lengths == NULL || huffman_tables == NULL) {
if (htree_groups == NULL || code_lengths == NULL ||
!VP8LHuffmanTablesAllocate(num_htree_groups * table_size,
huffman_tables)) {
dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
goto Error;
}
next = huffman_tables;
for (i = 0; i < num_htree_groups; ++i) {
HTreeGroup* const htree_group = &htree_groups[i];
HuffmanCode** const htrees = htree_group->htrees;
int size;
int total_size = 0;
int is_trivial_literal = 1;
int max_bits = 0;
for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
int alphabet_size = kAlphabetSize[j];
htrees[j] = next;
if (j == 0 && color_cache_bits > 0) {
alphabet_size += 1 << color_cache_bits;
}
size = ReadHuffmanCode(alphabet_size, dec, code_lengths, next);
if (size == 0) {
goto Error;
}
if (is_trivial_literal && kLiteralMap[j] == 1) {
is_trivial_literal = (next->bits == 0);
}
total_size += next->bits;
next += size;
if (j <= ALPHA) {
int local_max_bits = code_lengths[0];
int k;
for (k = 1; k < alphabet_size; ++k) {
if (code_lengths[k] > local_max_bits) {
local_max_bits = code_lengths[k];
}
for (i = 0; i < num_htree_groups_max; ++i) {
// If the index "i" is unused in the Huffman image, just make sure the
// coefficients are valid but do not store them.
if (mapping != NULL && mapping[i] == -1) {
for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
int alphabet_size = kAlphabetSize[j];
if (j == 0 && color_cache_bits > 0) {
alphabet_size += (1 << color_cache_bits);
}
// Passing in NULL so that nothing gets filled.
if (!ReadHuffmanCode(alphabet_size, dec, code_lengths, NULL)) {
goto Error;
}
max_bits += local_max_bits;
}
}
htree_group->is_trivial_literal = is_trivial_literal;
htree_group->is_trivial_code = 0;
if (is_trivial_literal) {
const int red = htrees[RED][0].value;
const int blue = htrees[BLUE][0].value;
const int alpha = htrees[ALPHA][0].value;
htree_group->literal_arb =
((uint32_t)alpha << 24) | (red << 16) | blue;
if (total_size == 0 && htrees[GREEN][0].value < NUM_LITERAL_CODES) {
htree_group->is_trivial_code = 1;
htree_group->literal_arb |= htrees[GREEN][0].value << 8;
} else {
HTreeGroup* const htree_group =
&htree_groups[(mapping == NULL) ? i : mapping[i]];
HuffmanCode** const htrees = htree_group->htrees;
int size;
int total_size = 0;
int is_trivial_literal = 1;
int max_bits = 0;
for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
int alphabet_size = kAlphabetSize[j];
if (j == 0 && color_cache_bits > 0) {
alphabet_size += (1 << color_cache_bits);
}
size =
ReadHuffmanCode(alphabet_size, dec, code_lengths, huffman_tables);
htrees[j] = huffman_tables->curr_segment->curr_table;
if (size == 0) {
goto Error;
}
if (is_trivial_literal && kLiteralMap[j] == 1) {
is_trivial_literal = (htrees[j]->bits == 0);
}
total_size += htrees[j]->bits;
huffman_tables->curr_segment->curr_table += size;
if (j <= ALPHA) {
int local_max_bits = code_lengths[0];
int k;
for (k = 1; k < alphabet_size; ++k) {
if (code_lengths[k] > local_max_bits) {
local_max_bits = code_lengths[k];
}
}
max_bits += local_max_bits;
}
}
htree_group->is_trivial_literal = is_trivial_literal;
htree_group->is_trivial_code = 0;
if (is_trivial_literal) {
const int red = htrees[RED][0].value;
const int blue = htrees[BLUE][0].value;
const int alpha = htrees[ALPHA][0].value;
htree_group->literal_arb = ((uint32_t)alpha << 24) | (red << 16) | blue;
if (total_size == 0 && htrees[GREEN][0].value < NUM_LITERAL_CODES) {
htree_group->is_trivial_code = 1;
htree_group->literal_arb |= htrees[GREEN][0].value << 8;
}
}
htree_group->use_packed_table =
!htree_group->is_trivial_code && (max_bits < HUFFMAN_PACKED_BITS);
if (htree_group->use_packed_table) BuildPackedTable(htree_group);
}
htree_group->use_packed_table = !htree_group->is_trivial_code &&
(max_bits < HUFFMAN_PACKED_BITS);
if (htree_group->use_packed_table) BuildPackedTable(htree_group);
}
WebPSafeFree(code_lengths);
ok = 1;
// All OK. Finalize pointers and return.
// All OK. Finalize pointers.
hdr->huffman_image_ = huffman_image;
hdr->num_htree_groups_ = num_htree_groups;
hdr->htree_groups_ = htree_groups;
hdr->huffman_tables_ = huffman_tables;
return 1;
Error:
WebPSafeFree(code_lengths);
WebPSafeFree(huffman_image);
WebPSafeFree(huffman_tables);
VP8LHtreeGroupsFree(htree_groups);
return 0;
WebPSafeFree(mapping);
if (!ok) {
WebPSafeFree(huffman_image);
VP8LHuffmanTablesDeallocate(huffman_tables);
VP8LHtreeGroupsFree(htree_groups);
}
return ok;
}
//------------------------------------------------------------------------------
@ -714,34 +764,22 @@ static void ApplyInverseTransforms(VP8LDecoder* const dec, int num_rows,
}
}
// Special method for paletted alpha data.
static void ApplyInverseTransformsAlpha(VP8LDecoder* const dec, int num_rows,
const uint8_t* const rows) {
const int start_row = dec->last_row_;
const int end_row = start_row + num_rows;
const uint8_t* rows_in = rows;
uint8_t* rows_out = (uint8_t*)dec->io_->opaque + dec->io_->width * start_row;
VP8LTransform* const transform = &dec->transforms_[0];
assert(dec->next_transform_ == 1);
assert(transform->type_ == COLOR_INDEXING_TRANSFORM);
VP8LColorIndexInverseTransformAlpha(transform, start_row, end_row, rows_in,
rows_out);
}
// Processes (transforms, scales & color-converts) the rows decoded after the
// last call.
static void ProcessRows(VP8LDecoder* const dec, int row) {
const uint32_t* const rows = dec->pixels_ + dec->width_ * dec->last_row_;
const int num_rows = row - dec->last_row_;
if (num_rows <= 0) return; // Nothing to be done.
ApplyInverseTransforms(dec, num_rows, rows);
// Emit output.
{
assert(row <= dec->io_->crop_bottom);
// We can't process more than NUM_ARGB_CACHE_ROWS at a time (that's the size
// of argb_cache_), but we currently don't need more than that.
assert(num_rows <= NUM_ARGB_CACHE_ROWS);
if (num_rows > 0) { // Emit output.
VP8Io* const io = dec->io_;
uint8_t* rows_data = (uint8_t*)dec->argb_cache_;
const int in_stride = io->width * sizeof(uint32_t); // in unit of RGBA
ApplyInverseTransforms(dec, num_rows, rows);
if (!SetCropWindow(io, dec->last_row_, row, &rows_data, in_stride)) {
// Nothing to output (this time).
} else {
@ -786,14 +824,46 @@ static int Is8bOptimizable(const VP8LMetadata* const hdr) {
return 1;
}
static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int row) {
const int num_rows = row - dec->last_row_;
const uint8_t* const in =
(uint8_t*)dec->pixels_ + dec->width_ * dec->last_row_;
if (num_rows > 0) {
ApplyInverseTransformsAlpha(dec, num_rows, in);
static void AlphaApplyFilter(ALPHDecoder* const alph_dec,
int first_row, int last_row,
uint8_t* out, int stride) {
if (alph_dec->filter_ != WEBP_FILTER_NONE) {
int y;
const uint8_t* prev_line = alph_dec->prev_line_;
assert(WebPUnfilters[alph_dec->filter_] != NULL);
for (y = first_row; y < last_row; ++y) {
WebPUnfilters[alph_dec->filter_](prev_line, out, out, stride);
prev_line = out;
out += stride;
}
alph_dec->prev_line_ = prev_line;
}
dec->last_row_ = dec->last_out_row_ = row;
}
static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int last_row) {
// For vertical and gradient filtering, we need to decode the part above the
// crop_top row, in order to have the correct spatial predictors.
ALPHDecoder* const alph_dec = (ALPHDecoder*)dec->io_->opaque;
const int top_row =
(alph_dec->filter_ == WEBP_FILTER_NONE ||
alph_dec->filter_ == WEBP_FILTER_HORIZONTAL) ? dec->io_->crop_top
: dec->last_row_;
const int first_row = (dec->last_row_ < top_row) ? top_row : dec->last_row_;
assert(last_row <= dec->io_->crop_bottom);
if (last_row > first_row) {
// Special method for paletted alpha data. We only process the cropped area.
const int width = dec->io_->width;
uint8_t* out = alph_dec->output_ + width * first_row;
const uint8_t* const in =
(uint8_t*)dec->pixels_ + dec->width_ * first_row;
VP8LTransform* const transform = &dec->transforms_[0];
assert(dec->next_transform_ == 1);
assert(transform->type_ == COLOR_INDEXING_TRANSFORM);
VP8LColorIndexInverseTransformAlpha(transform, first_row, last_row,
in, out);
AlphaApplyFilter(alph_dec, first_row, last_row, out, width);
}
dec->last_row_ = dec->last_out_row_ = last_row;
}
//------------------------------------------------------------------------------
@ -922,14 +992,14 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
int col = dec->last_pixel_ % width;
VP8LBitReader* const br = &dec->br_;
VP8LMetadata* const hdr = &dec->hdr_;
const HTreeGroup* htree_group = GetHtreeGroupForPos(hdr, col, row);
int pos = dec->last_pixel_; // current position
const int end = width * height; // End of data
const int last = width * last_row; // Last pixel to decode
const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES;
const int mask = hdr->huffman_mask_;
assert(htree_group != NULL);
assert(pos < end);
const HTreeGroup* htree_group =
(pos < last) ? GetHtreeGroupForPos(hdr, col, row) : NULL;
assert(pos <= end);
assert(last_row <= height);
assert(Is8bOptimizable(hdr));
@ -939,6 +1009,7 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
if ((col & mask) == 0) {
htree_group = GetHtreeGroupForPos(hdr, col, row);
}
assert(htree_group != NULL);
VP8LFillBitWindow(br);
code = ReadSymbol(htree_group->htrees[GREEN], br);
if (code < NUM_LITERAL_CODES) { // Literal
@ -948,7 +1019,7 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
if (col >= width) {
col = 0;
++row;
if (row % NUM_ARGB_CACHE_ROWS == 0) {
if (row <= last_row && (row % NUM_ARGB_CACHE_ROWS == 0)) {
ExtractPalettedAlphaRows(dec, row);
}
}
@ -971,7 +1042,7 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
while (col >= width) {
col -= width;
++row;
if (row % NUM_ARGB_CACHE_ROWS == 0) {
if (row <= last_row && (row % NUM_ARGB_CACHE_ROWS == 0)) {
ExtractPalettedAlphaRows(dec, row);
}
}
@ -985,7 +1056,7 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
assert(br->eos_ == VP8LIsEndOfStream(br));
}
// Process the remaining rows corresponding to last row-block.
ExtractPalettedAlphaRows(dec, row);
ExtractPalettedAlphaRows(dec, row > last_row ? last_row : row);
End:
if (!ok || (br->eos_ && pos < end)) {
@ -1025,7 +1096,6 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
int col = dec->last_pixel_ % width;
VP8LBitReader* const br = &dec->br_;
VP8LMetadata* const hdr = &dec->hdr_;
HTreeGroup* htree_group = GetHtreeGroupForPos(hdr, col, row);
uint32_t* src = data + dec->last_pixel_;
uint32_t* last_cached = src;
uint32_t* const src_end = data + width * height; // End of data
@ -1036,8 +1106,9 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
VP8LColorCache* const color_cache =
(hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL;
const int mask = hdr->huffman_mask_;
assert(htree_group != NULL);
assert(src < src_end);
const HTreeGroup* htree_group =
(src < src_last) ? GetHtreeGroupForPos(hdr, col, row) : NULL;
assert(dec->last_row_ < last_row);
assert(src_last <= src_end);
while (src < src_last) {
@ -1049,7 +1120,10 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
// Only update when changing tile. Note we could use this test:
// if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed
// but that's actually slower and needs storing the previous col/row.
if ((col & mask) == 0) htree_group = GetHtreeGroupForPos(hdr, col, row);
if ((col & mask) == 0) {
htree_group = GetHtreeGroupForPos(hdr, col, row);
}
assert(htree_group != NULL);
if (htree_group->is_trivial_code) {
*src = htree_group->literal_arb;
goto AdvanceByOne;
@ -1080,8 +1154,10 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
if (col >= width) {
col = 0;
++row;
if ((row % NUM_ARGB_CACHE_ROWS == 0) && (process_func != NULL)) {
process_func(dec, row);
if (process_func != NULL) {
if (row <= last_row && (row % NUM_ARGB_CACHE_ROWS == 0)) {
process_func(dec, row);
}
}
if (color_cache != NULL) {
while (last_cached < src) {
@ -1108,8 +1184,10 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
while (col >= width) {
col -= width;
++row;
if ((row % NUM_ARGB_CACHE_ROWS == 0) && (process_func != NULL)) {
process_func(dec, row);
if (process_func != NULL) {
if (row <= last_row && (row % NUM_ARGB_CACHE_ROWS == 0)) {
process_func(dec, row);
}
}
}
// Because of the check done above (before 'src' was incremented by
@ -1135,12 +1213,24 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
assert(br->eos_ == VP8LIsEndOfStream(br));
}
if (dec->incremental_ && br->eos_ && src < src_end) {
br->eos_ = VP8LIsEndOfStream(br);
// In incremental decoding:
// br->eos_ && src < src_last: if 'br' reached the end of the buffer and
// 'src_last' has not been reached yet, there is not enough data. 'dec' has to
// be reset until there is more data.
// !br->eos_ && src < src_last: this cannot happen as either the buffer is
// fully read, either enough has been read to reach 'src_last'.
// src >= src_last: 'src_last' is reached, all is fine. 'src' can actually go
// beyond 'src_last' in case the image is cropped and an LZ77 goes further.
// The buffer might have been enough or there is some left. 'br->eos_' does
// not matter.
assert(!dec->incremental_ || (br->eos_ && src < src_last) || src >= src_last);
if (dec->incremental_ && br->eos_ && src < src_last) {
RestoreState(dec);
} else if (!br->eos_) {
} else if ((dec->incremental_ && src >= src_last) || !br->eos_) {
// Process the remaining rows corresponding to last row-block.
if (process_func != NULL) {
process_func(dec, row);
process_func(dec, row > last_row ? last_row : row);
}
dec->status_ = VP8_STATUS_OK;
dec->last_pixel_ = (int)(src - data); // end-of-scan marker
@ -1255,7 +1345,7 @@ static void ClearMetadata(VP8LMetadata* const hdr) {
assert(hdr != NULL);
WebPSafeFree(hdr->huffman_image_);
WebPSafeFree(hdr->huffman_tables_);
VP8LHuffmanTablesDeallocate(&hdr->huffman_tables_);
VP8LHtreeGroupsFree(hdr->htree_groups_);
VP8LColorCacheClear(&hdr->color_cache_);
VP8LColorCacheClear(&hdr->saved_color_cache_);
@ -1438,46 +1528,51 @@ static int AllocateInternalBuffers8b(VP8LDecoder* const dec) {
//------------------------------------------------------------------------------
// Special row-processing that only stores the alpha data.
static void ExtractAlphaRows(VP8LDecoder* const dec, int row) {
const int num_rows = row - dec->last_row_;
const uint32_t* const in = dec->pixels_ + dec->width_ * dec->last_row_;
static void ExtractAlphaRows(VP8LDecoder* const dec, int last_row) {
int cur_row = dec->last_row_;
int num_rows = last_row - cur_row;
const uint32_t* in = dec->pixels_ + dec->width_ * cur_row;
if (num_rows <= 0) return; // Nothing to be done.
ApplyInverseTransforms(dec, num_rows, in);
// Extract alpha (which is stored in the green plane).
{
assert(last_row <= dec->io_->crop_bottom);
while (num_rows > 0) {
const int num_rows_to_process =
(num_rows > NUM_ARGB_CACHE_ROWS) ? NUM_ARGB_CACHE_ROWS : num_rows;
// Extract alpha (which is stored in the green plane).
ALPHDecoder* const alph_dec = (ALPHDecoder*)dec->io_->opaque;
uint8_t* const output = alph_dec->output_;
const int width = dec->io_->width; // the final width (!= dec->width_)
const int cache_pixs = width * num_rows;
uint8_t* const dst = (uint8_t*)dec->io_->opaque + width * dec->last_row_;
const int cache_pixs = width * num_rows_to_process;
uint8_t* const dst = output + width * cur_row;
const uint32_t* const src = dec->argb_cache_;
int i;
ApplyInverseTransforms(dec, num_rows_to_process, in);
for (i = 0; i < cache_pixs; ++i) dst[i] = (src[i] >> 8) & 0xff;
AlphaApplyFilter(alph_dec,
cur_row, cur_row + num_rows_to_process, dst, width);
num_rows -= num_rows_to_process;
in += num_rows_to_process * dec->width_;
cur_row += num_rows_to_process;
}
dec->last_row_ = dec->last_out_row_ = row;
assert(cur_row == last_row);
dec->last_row_ = dec->last_out_row_ = last_row;
}
int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec,
const uint8_t* const data, size_t data_size,
uint8_t* const output) {
const uint8_t* const data, size_t data_size) {
int ok = 0;
VP8LDecoder* dec;
VP8Io* io;
VP8LDecoder* dec = VP8LNew();
if (dec == NULL) return 0;
assert(alph_dec != NULL);
alph_dec->vp8l_dec_ = VP8LNew();
if (alph_dec->vp8l_dec_ == NULL) return 0;
dec = alph_dec->vp8l_dec_;
alph_dec->vp8l_dec_ = dec;
dec->width_ = alph_dec->width_;
dec->height_ = alph_dec->height_;
dec->io_ = &alph_dec->io_;
io = dec->io_;
VP8InitIo(io);
WebPInitCustomIo(NULL, io); // Just a sanity Init. io won't be used.
io->opaque = output;
io->width = alph_dec->width_;
io->height = alph_dec->height_;
dec->io_->opaque = alph_dec;
dec->io_->width = alph_dec->width_;
dec->io_->height = alph_dec->height_;
dec->status_ = VP8_STATUS_OK;
VP8LInitBitReader(&dec->br_, data, data_size);
@ -1492,11 +1587,11 @@ int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec,
if (dec->next_transform_ == 1 &&
dec->transforms_[0].type_ == COLOR_INDEXING_TRANSFORM &&
Is8bOptimizable(&dec->hdr_)) {
alph_dec->use_8b_decode = 1;
alph_dec->use_8b_decode_ = 1;
ok = AllocateInternalBuffers8b(dec);
} else {
// Allocate internal buffers (note that dec->width_ may have changed here).
alph_dec->use_8b_decode = 0;
alph_dec->use_8b_decode_ = 0;
ok = AllocateInternalBuffers32b(dec, alph_dec->width_);
}
@ -1515,12 +1610,12 @@ int VP8LDecodeAlphaImageStream(ALPHDecoder* const alph_dec, int last_row) {
assert(dec != NULL);
assert(last_row <= dec->height_);
if (dec->last_pixel_ == dec->width_ * dec->height_) {
if (dec->last_row_ >= last_row) {
return 1; // done
}
// Decode (with special row processing).
return alph_dec->use_8b_decode ?
return alph_dec->use_8b_decode_ ?
DecodeAlphaData(dec, (uint8_t*)dec->pixels_, dec->width_, dec->height_,
last_row) :
DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_,
@ -1565,7 +1660,7 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
// Sanity checks.
if (dec == NULL) return 0;
assert(dec->hdr_.huffman_tables_ != NULL);
assert(dec->hdr_.huffman_tables_.root.start != NULL);
assert(dec->hdr_.htree_groups_ != NULL);
assert(dec->hdr_.num_htree_groups_ > 0);
@ -1611,7 +1706,7 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
// Decode.
if (!DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_,
dec->height_, ProcessRows)) {
io->crop_bottom, ProcessRows)) {
goto Err;
}

View File

@ -51,7 +51,7 @@ typedef struct {
uint32_t *huffman_image_;
int num_htree_groups_;
HTreeGroup *htree_groups_;
HuffmanCode *huffman_tables_;
HuffmanTables huffman_tables_;
} VP8LMetadata;
typedef struct VP8LDecoder VP8LDecoder;
@ -100,8 +100,7 @@ struct ALPHDecoder; // Defined in dec/alphai.h.
// Decodes image header for alpha data stored using lossless compression.
// Returns false in case of error.
int VP8LDecodeAlphaHeader(struct ALPHDecoder* const alph_dec,
const uint8_t* const data, size_t data_size,
uint8_t* const output);
const uint8_t* const data, size_t data_size);
// Decodes *at least* 'last_row' rows of alpha. If some of the initial rows are
// already decoded in previous call(s), it will resume decoding from where it

View File

@ -415,7 +415,8 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
}
VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
VP8StatusCode status;
// status is marked volatile as a workaround for a clang-3.8 (aarch64) bug
volatile VP8StatusCode status;
int has_animation = 0;
assert(headers != NULL);
// fill out headers, ignore width/height/has_alpha.
@ -512,10 +513,12 @@ static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size,
if (status != VP8_STATUS_OK) {
WebPFreeDecBuffer(params->output);
}
if (params->options != NULL && params->options->flip) {
status = WebPFlipBuffer(params->output);
} else {
if (params->options != NULL && params->options->flip) {
// This restores the original stride values if options->flip was used
// during the call to WebPAllocateDecBuffer above.
status = WebPFlipBuffer(params->output);
}
}
return status;
}
@ -758,9 +761,24 @@ VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,
}
WebPResetDecParams(&params);
params.output = &config->output;
params.options = &config->options;
status = DecodeInto(data, data_size, &params);
params.output = &config->output;
if (WebPAvoidSlowMemory(params.output, &config->input)) {
// decoding to slow memory: use a temporary in-mem buffer to decode into.
WebPDecBuffer in_mem_buffer;
WebPInitDecBuffer(&in_mem_buffer);
in_mem_buffer.colorspace = config->output.colorspace;
in_mem_buffer.width = config->input.width;
in_mem_buffer.height = config->input.height;
params.output = &in_mem_buffer;
status = DecodeInto(data, data_size, &params);
if (status == VP8_STATUS_OK) { // do the slow-copy
status = WebPCopyDecBufferPixels(&in_mem_buffer, &config->output);
}
WebPFreeDecBuffer(&in_mem_buffer);
} else {
status = DecodeInto(data, data_size, &params);
}
return status;
}
@ -809,7 +827,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
}
// Filter
io->bypass_filtering = options && options->bypass_filtering;
io->bypass_filtering = (options != NULL) && options->bypass_filtering;
// Fancy upsampler
#ifdef FANCY_UPSAMPLING
@ -826,4 +844,3 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
}
//------------------------------------------------------------------------------

View File

@ -45,11 +45,20 @@ struct WebPDecParams {
OutputFunc emit; // output RGB or YUV samples
OutputAlphaFunc emit_alpha; // output alpha channel
OutputRowFunc emit_alpha_row; // output one line of rescaled alpha values
WebPDecBuffer* final_output; // In case the user supplied a slow-memory
// output, we decode image in temporary buffer
// (this::output) and copy it here.
WebPDecBuffer tmp_buffer; // this::output will point to this one in case
// of slow memory.
};
// Should be called first, before any use of the WebPDecParams object.
void WebPResetDecParams(WebPDecParams* const params);
// Delete all memory (after an error occurred, for instance)
void WebPFreeDecParams(WebPDecParams* const params);
//------------------------------------------------------------------------------
// Header parsing helpers
@ -107,13 +116,23 @@ VP8StatusCode WebPAllocateDecBuffer(int width, int height,
VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer);
// Copy 'src' into 'dst' buffer, making sure 'dst' is not marked as owner of the
// memory (still held by 'src').
// memory (still held by 'src'). No pixels are copied.
void WebPCopyDecBuffer(const WebPDecBuffer* const src,
WebPDecBuffer* const dst);
// Copy and transfer ownership from src to dst (beware of parameter order!)
void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst);
// Copy pixels from 'src' into a *preallocated* 'dst' buffer. Returns
// VP8_STATUS_INVALID_PARAM if the 'dst' is not set up correctly for the copy.
VP8StatusCode WebPCopyDecBufferPixels(const WebPDecBuffer* const src,
WebPDecBuffer* const dst);
// Returns true if decoding will be slow with the current configuration
// and bitstream features.
int WebPAvoidSlowMemory(const WebPDecBuffer* const output,
const WebPBitstreamFeatures* const features);
//------------------------------------------------------------------------------
#ifdef __cplusplus

View File

@ -9,6 +9,6 @@ libwebpdemuxinclude_HEADERS += ../webp/mux_types.h
libwebpdemuxinclude_HEADERS += ../webp/types.h
libwebpdemux_la_LIBADD = ../libwebp.la
libwebpdemux_la_LDFLAGS = -no-undefined -version-info 2:0:0
libwebpdemux_la_LDFLAGS = -no-undefined -version-info 2:1:0
libwebpdemuxincludedir = $(includedir)/webp
pkgconfig_DATA = libwebpdemux.pc

View File

@ -112,18 +112,15 @@ WebPAnimDecoder* WebPAnimDecoderNewInternal(
dec->info_.bgcolor = WebPDemuxGetI(dec->demux_, WEBP_FF_BACKGROUND_COLOR);
dec->info_.frame_count = WebPDemuxGetI(dec->demux_, WEBP_FF_FRAME_COUNT);
{
const int canvas_bytes =
dec->info_.canvas_width * NUM_CHANNELS * dec->info_.canvas_height;
// Note: calloc() because we fill frame with zeroes as well.
dec->curr_frame_ = WebPSafeCalloc(1ULL, canvas_bytes);
if (dec->curr_frame_ == NULL) goto Error;
dec->prev_frame_disposed_ = WebPSafeCalloc(1ULL, canvas_bytes);
if (dec->prev_frame_disposed_ == NULL) goto Error;
}
// Note: calloc() because we fill frame with zeroes as well.
dec->curr_frame_ = (uint8_t*)WebPSafeCalloc(
dec->info_.canvas_width * NUM_CHANNELS, dec->info_.canvas_height);
if (dec->curr_frame_ == NULL) goto Error;
dec->prev_frame_disposed_ = (uint8_t*)WebPSafeCalloc(
dec->info_.canvas_width * NUM_CHANNELS, dec->info_.canvas_height);
if (dec->prev_frame_disposed_ == NULL) goto Error;
WebPAnimDecoderReset(dec);
return dec;
Error:
@ -144,9 +141,13 @@ static int IsFullFrame(int width, int height, int canvas_width,
}
// Clear the canvas to transparent.
static void ZeroFillCanvas(uint8_t* buf, uint32_t canvas_width,
uint32_t canvas_height) {
memset(buf, 0, canvas_width * NUM_CHANNELS * canvas_height);
static int ZeroFillCanvas(uint8_t* buf, uint32_t canvas_width,
uint32_t canvas_height) {
const uint64_t size =
(uint64_t)canvas_width * canvas_height * NUM_CHANNELS * sizeof(*buf);
if (size != (size_t)size) return 0;
memset(buf, 0, (size_t)size);
return 1;
}
// Clear given frame rectangle to transparent.
@ -162,10 +163,13 @@ static void ZeroFillFrameRect(uint8_t* buf, int buf_stride, int x_offset,
}
// Copy width * height pixels from 'src' to 'dst'.
static void CopyCanvas(const uint8_t* src, uint8_t* dst,
uint32_t width, uint32_t height) {
static int CopyCanvas(const uint8_t* src, uint8_t* dst,
uint32_t width, uint32_t height) {
const uint64_t size = (uint64_t)width * height * NUM_CHANNELS;
if (size != (size_t)size) return 0;
assert(src != NULL && dst != NULL);
memcpy(dst, src, width * NUM_CHANNELS * height);
memcpy(dst, src, (size_t)size);
return 1;
}
// Returns true if the current frame is a key-frame.
@ -328,9 +332,14 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
is_key_frame = IsKeyFrame(&iter, &dec->prev_iter_,
dec->prev_frame_was_keyframe_, width, height);
if (is_key_frame) {
ZeroFillCanvas(dec->curr_frame_, width, height);
if (!ZeroFillCanvas(dec->curr_frame_, width, height)) {
goto Error;
}
} else {
CopyCanvas(dec->prev_frame_disposed_, dec->curr_frame_, width, height);
if (!CopyCanvas(dec->prev_frame_disposed_, dec->curr_frame_,
width, height)) {
goto Error;
}
}
// Decode.
@ -393,6 +402,7 @@ int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
// Update info of the previous frame and dispose it for the next iteration.
dec->prev_frame_timestamp_ = timestamp;
WebPDemuxReleaseIterator(&dec->prev_iter_);
dec->prev_iter_ = iter;
dec->prev_frame_was_keyframe_ = is_key_frame;
CopyCanvas(dec->curr_frame_, dec->prev_frame_disposed_, width, height);
@ -421,6 +431,7 @@ int WebPAnimDecoderHasMoreFrames(const WebPAnimDecoder* dec) {
void WebPAnimDecoderReset(WebPAnimDecoder* dec) {
if (dec != NULL) {
dec->prev_frame_timestamp_ = 0;
WebPDemuxReleaseIterator(&dec->prev_iter_);
memset(&dec->prev_iter_, 0, sizeof(dec->prev_iter_));
dec->prev_frame_was_keyframe_ = 0;
dec->next_frame_ = 1;
@ -434,6 +445,7 @@ const WebPDemuxer* WebPAnimDecoderGetDemuxer(const WebPAnimDecoder* dec) {
void WebPAnimDecoderDelete(WebPAnimDecoder* dec) {
if (dec != NULL) {
WebPDemuxReleaseIterator(&dec->prev_iter_);
WebPDemuxDelete(dec->demux_);
WebPSafeFree(dec->curr_frame_);
WebPSafeFree(dec->prev_frame_disposed_);

View File

@ -25,7 +25,7 @@
#define DMUX_MAJ_VERSION 0
#define DMUX_MIN_VERSION 3
#define DMUX_REV_VERSION 0
#define DMUX_REV_VERSION 1
typedef struct {
size_t start_; // start location of the data

View File

@ -1,6 +1,8 @@
noinst_LTLIBRARIES = libwebpdsp.la libwebpdsp_avx2.la
noinst_LTLIBRARIES += libwebpdsp_sse2.la libwebpdspdecode_sse2.la
noinst_LTLIBRARIES += libwebpdsp_sse41.la libwebpdspdecode_sse41.la
noinst_LTLIBRARIES += libwebpdsp_neon.la libwebpdspdecode_neon.la
noinst_LTLIBRARIES += libwebpdspdecode_msa.la
if BUILD_LIBWEBPDECODER
noinst_LTLIBRARIES += libwebpdspdecode.la
@ -12,28 +14,24 @@ commondir = $(includedir)/webp
COMMON_SOURCES =
COMMON_SOURCES += alpha_processing.c
COMMON_SOURCES += alpha_processing_mips_dsp_r2.c
COMMON_SOURCES += common_sse2.h
COMMON_SOURCES += cpu.c
COMMON_SOURCES += dec.c
COMMON_SOURCES += dec_clip_tables.c
COMMON_SOURCES += dec_mips32.c
COMMON_SOURCES += dec_mips_dsp_r2.c
COMMON_SOURCES += dec_neon.c
COMMON_SOURCES += dsp.h
COMMON_SOURCES += filters.c
COMMON_SOURCES += filters_mips_dsp_r2.c
COMMON_SOURCES += lossless.c
COMMON_SOURCES += lossless.h
COMMON_SOURCES += lossless_mips_dsp_r2.c
COMMON_SOURCES += lossless_neon.c
COMMON_SOURCES += mips_macro.h
COMMON_SOURCES += neon.h
COMMON_SOURCES += rescaler.c
COMMON_SOURCES += rescaler_mips32.c
COMMON_SOURCES += rescaler_mips_dsp_r2.c
COMMON_SOURCES += rescaler_neon.c
COMMON_SOURCES += upsampling.c
COMMON_SOURCES += upsampling_mips_dsp_r2.c
COMMON_SOURCES += upsampling_neon.c
COMMON_SOURCES += yuv.c
COMMON_SOURCES += yuv.h
COMMON_SOURCES += yuv_mips32.c
@ -48,11 +46,9 @@ ENC_SOURCES += cost_mips_dsp_r2.c
ENC_SOURCES += enc.c
ENC_SOURCES += enc_mips32.c
ENC_SOURCES += enc_mips_dsp_r2.c
ENC_SOURCES += enc_neon.c
ENC_SOURCES += lossless_enc.c
ENC_SOURCES += lossless_enc_mips32.c
ENC_SOURCES += lossless_enc_mips_dsp_r2.c
ENC_SOURCES += lossless_enc_neon.c
libwebpdsp_avx2_la_SOURCES =
libwebpdsp_avx2_la_SOURCES += enc_avx2.c
@ -76,6 +72,21 @@ libwebpdspdecode_sse2_la_SOURCES += yuv_sse2.c
libwebpdspdecode_sse2_la_CPPFLAGS = $(libwebpdsp_sse2_la_CPPFLAGS)
libwebpdspdecode_sse2_la_CFLAGS = $(libwebpdsp_sse2_la_CFLAGS)
libwebpdspdecode_neon_la_SOURCES =
libwebpdspdecode_neon_la_SOURCES += dec_neon.c
libwebpdspdecode_neon_la_SOURCES += lossless_neon.c
libwebpdspdecode_neon_la_SOURCES += neon.h
libwebpdspdecode_neon_la_SOURCES += rescaler_neon.c
libwebpdspdecode_neon_la_SOURCES += upsampling_neon.c
libwebpdspdecode_neon_la_CPPFLAGS = $(libwebpdsp_neon_la_CPPFLAGS)
libwebpdspdecode_neon_la_CFLAGS = $(libwebpdsp_neon_la_CFLAGS)
libwebpdspdecode_msa_la_SOURCES =
libwebpdspdecode_msa_la_SOURCES += dec_msa.c
libwebpdspdecode_msa_la_SOURCES += msa_macro.h
libwebpdspdecode_msa_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
libwebpdspdecode_msa_la_CFLAGS = $(AM_CFLAGS)
libwebpdsp_sse2_la_SOURCES =
libwebpdsp_sse2_la_SOURCES += argb_sse2.c
libwebpdsp_sse2_la_SOURCES += cost_sse2.c
@ -92,6 +103,13 @@ libwebpdsp_sse41_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
libwebpdsp_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_FLAGS)
libwebpdsp_sse41_la_LIBADD = libwebpdspdecode_sse41.la
libwebpdsp_neon_la_SOURCES =
libwebpdsp_neon_la_SOURCES += enc_neon.c
libwebpdsp_neon_la_SOURCES += lossless_enc_neon.c
libwebpdsp_neon_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
libwebpdsp_neon_la_CFLAGS = $(AM_CFLAGS) $(NEON_FLAGS)
libwebpdsp_neon_la_LIBADD = libwebpdspdecode_neon.la
libwebpdsp_la_SOURCES = $(COMMON_SOURCES) $(ENC_SOURCES)
noinst_HEADERS =
@ -105,6 +123,8 @@ libwebpdsp_la_LDFLAGS = -lm
libwebpdsp_la_LIBADD =
libwebpdsp_la_LIBADD += libwebpdsp_avx2.la libwebpdsp_sse2.la
libwebpdsp_la_LIBADD += libwebpdsp_sse41.la
libwebpdsp_la_LIBADD += libwebpdsp_neon.la
libwebpdsp_la_LIBADD += libwebpdspdecode_msa.la
if BUILD_LIBWEBPDECODER
libwebpdspdecode_la_SOURCES = $(COMMON_SOURCES)
@ -114,4 +134,6 @@ if BUILD_LIBWEBPDECODER
libwebpdspdecode_la_LIBADD =
libwebpdspdecode_la_LIBADD += libwebpdspdecode_sse2.la
libwebpdspdecode_la_LIBADD += libwebpdspdecode_sse41.la
libwebpdspdecode_la_LIBADD += libwebpdspdecode_neon.la
libwebpdspdecode_la_LIBADD += libwebpdspdecode_msa.la
endif

109
src/dsp/common_sse2.h Normal file
View File

@ -0,0 +1,109 @@
// Copyright 2016 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// SSE2 code common to several files.
//
// Author: Vincent Rabaud (vrabaud@google.com)
#ifndef WEBP_DSP_COMMON_SSE2_H_
#define WEBP_DSP_COMMON_SSE2_H_
#ifdef __cplusplus
extern "C" {
#endif
#if defined(WEBP_USE_SSE2)
#include <emmintrin.h>
//------------------------------------------------------------------------------
// Quite useful macro for debugging. Left here for convenience.
#if 0
#include <stdio.h>
static WEBP_INLINE void PrintReg(const __m128i r, const char* const name,
int size) {
int n;
union {
__m128i r;
uint8_t i8[16];
uint16_t i16[8];
uint32_t i32[4];
uint64_t i64[2];
} tmp;
tmp.r = r;
fprintf(stderr, "%s\t: ", name);
if (size == 8) {
for (n = 0; n < 16; ++n) fprintf(stderr, "%.2x ", tmp.i8[n]);
} else if (size == 16) {
for (n = 0; n < 8; ++n) fprintf(stderr, "%.4x ", tmp.i16[n]);
} else if (size == 32) {
for (n = 0; n < 4; ++n) fprintf(stderr, "%.8x ", tmp.i32[n]);
} else {
for (n = 0; n < 2; ++n) fprintf(stderr, "%.16lx ", tmp.i64[n]);
}
fprintf(stderr, "\n");
}
#endif
//------------------------------------------------------------------------------
// Math functions.
// Return the sum of all the 8b in the register.
static WEBP_INLINE int VP8HorizontalAdd8b(const __m128i* const a) {
const __m128i zero = _mm_setzero_si128();
const __m128i sad8x2 = _mm_sad_epu8(*a, zero);
// sum the two sads: sad8x2[0:1] + sad8x2[8:9]
const __m128i sum = _mm_add_epi32(sad8x2, _mm_shuffle_epi32(sad8x2, 2));
return _mm_cvtsi128_si32(sum);
}
// Transpose two 4x4 16b matrices horizontally stored in registers.
static WEBP_INLINE void VP8Transpose_2_4x4_16b(
const __m128i* const in0, const __m128i* const in1,
const __m128i* const in2, const __m128i* const in3, __m128i* const out0,
__m128i* const out1, __m128i* const out2, __m128i* const out3) {
// Transpose the two 4x4.
// a00 a01 a02 a03 b00 b01 b02 b03
// a10 a11 a12 a13 b10 b11 b12 b13
// a20 a21 a22 a23 b20 b21 b22 b23
// a30 a31 a32 a33 b30 b31 b32 b33
const __m128i transpose0_0 = _mm_unpacklo_epi16(*in0, *in1);
const __m128i transpose0_1 = _mm_unpacklo_epi16(*in2, *in3);
const __m128i transpose0_2 = _mm_unpackhi_epi16(*in0, *in1);
const __m128i transpose0_3 = _mm_unpackhi_epi16(*in2, *in3);
// a00 a10 a01 a11 a02 a12 a03 a13
// a20 a30 a21 a31 a22 a32 a23 a33
// b00 b10 b01 b11 b02 b12 b03 b13
// b20 b30 b21 b31 b22 b32 b23 b33
const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
// a00 a10 a20 a30 a01 a11 a21 a31
// b00 b10 b20 b30 b01 b11 b21 b31
// a02 a12 a22 a32 a03 a13 a23 a33
// b02 b12 a22 b32 b03 b13 b23 b33
*out0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
*out1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
*out2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
*out3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
// a00 a10 a20 a30 b00 b10 b20 b30
// a01 a11 a21 a31 b01 b11 b21 b31
// a02 a12 a22 a32 b02 b12 b22 b32
// a03 a13 a23 a33 b03 b13 b23 b33
}
#endif // WEBP_USE_SSE2
#ifdef __cplusplus
} // extern "C"
#endif
#endif // WEBP_DSP_COMMON_SSE2_H_

View File

@ -13,6 +13,11 @@
#include "./dsp.h"
#if defined(WEBP_HAVE_NEON_RTCD)
#include <stdio.h>
#include <string.h>
#endif
#if defined(WEBP_ANDROID_NEON)
#include <cpu-features.h>
#endif
@ -142,13 +147,33 @@ VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
// define a dummy function to enable turning off NEON at runtime by setting
// VP8DecGetCPUInfo = NULL
static int armCPUInfo(CPUFeature feature) {
(void)feature;
if (feature != kNEON) return 0;
#if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD)
{
int has_neon = 0;
char line[200];
FILE* const cpuinfo = fopen("/proc/cpuinfo", "r");
if (cpuinfo == NULL) return 0;
while (fgets(line, sizeof(line), cpuinfo)) {
if (!strncmp(line, "Features", 8)) {
if (strstr(line, " neon ") != NULL) {
has_neon = 1;
break;
}
}
}
fclose(cpuinfo);
return has_neon;
}
#else
return 1;
#endif
}
VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
#elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2)
#elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \
defined(WEBP_USE_MSA)
static int mipsCPUInfo(CPUFeature feature) {
if ((feature == kMIPS32) || (feature == kMIPSdspR2)) {
if ((feature == kMIPS32) || (feature == kMIPSdspR2) || (feature == kMSA)) {
return 1;
} else {
return 0;

View File

@ -13,6 +13,7 @@
#include "./dsp.h"
#include "../dec/vp8i.h"
#include "../utils/utils.h"
//------------------------------------------------------------------------------
@ -238,7 +239,7 @@ VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
//------------------------------------------------------------------------------
// 4x4
#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
#define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
#define AVG2(a, b) (((a) + (b) + 1) >> 1)
static void VE4(uint8_t* dst) { // vertical
@ -654,6 +655,23 @@ static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
//------------------------------------------------------------------------------
static void DitherCombine8x8(const uint8_t* dither, uint8_t* dst,
int dst_stride) {
int i, j;
for (j = 0; j < 8; ++j) {
for (i = 0; i < 8; ++i) {
const int delta0 = dither[i] - VP8_DITHER_AMP_CENTER;
const int delta1 =
(delta0 + VP8_DITHER_DESCALE_ROUNDER) >> VP8_DITHER_DESCALE;
dst[i] = clip_8b((int)dst[i] + delta1);
}
dst += dst_stride;
dither += 8;
}
}
//------------------------------------------------------------------------------
VP8DecIdct2 VP8Transform;
VP8DecIdct VP8TransformAC3;
VP8DecIdct VP8TransformUV;
@ -673,11 +691,15 @@ VP8SimpleFilterFunc VP8SimpleHFilter16;
VP8SimpleFilterFunc VP8SimpleVFilter16i;
VP8SimpleFilterFunc VP8SimpleHFilter16i;
void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst,
int dst_stride);
extern void VP8DspInitSSE2(void);
extern void VP8DspInitSSE41(void);
extern void VP8DspInitNEON(void);
extern void VP8DspInitMIPS32(void);
extern void VP8DspInitMIPSdspR2(void);
extern void VP8DspInitMSA(void);
static volatile VP8CPUInfo dec_last_cpuinfo_used =
(VP8CPUInfo)&dec_last_cpuinfo_used;
@ -734,6 +756,8 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
VP8PredChroma8[5] = DC8uvNoLeft;
VP8PredChroma8[6] = DC8uvNoTopLeft;
VP8DitherCombine8x8 = DitherCombine8x8;
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
if (VP8GetCPUInfo != NULL) {
#if defined(WEBP_USE_SSE2)
@ -760,6 +784,11 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
if (VP8GetCPUInfo(kMIPSdspR2)) {
VP8DspInitMIPSdspR2();
}
#endif
#if defined(WEBP_USE_MSA)
if (VP8GetCPUInfo(kMSA)) {
VP8DspInitMSA();
}
#endif
}
dec_last_cpuinfo_used = VP8GetCPUInfo;

172
src/dsp/dec_msa.c Normal file
View File

@ -0,0 +1,172 @@
// Copyright 2016 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// MSA version of dsp functions
//
// Author(s): Prashant Patil (prashant.patil@imgtec.com)
#include "./dsp.h"
#if defined(WEBP_USE_MSA)
#include "./msa_macro.h"
//------------------------------------------------------------------------------
// Transforms
#define IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3) { \
v4i32 a1_m, b1_m, c1_m, d1_m; \
v4i32 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \
const v4i32 cospi8sqrt2minus1 = __msa_fill_w(20091); \
const v4i32 sinpi8sqrt2 = __msa_fill_w(35468); \
\
a1_m = in0 + in2; \
b1_m = in0 - in2; \
c_tmp1_m = (in1 * sinpi8sqrt2) >> 16; \
c_tmp2_m = in3 + ((in3 * cospi8sqrt2minus1) >> 16); \
c1_m = c_tmp1_m - c_tmp2_m; \
d_tmp1_m = in1 + ((in1 * cospi8sqrt2minus1) >> 16); \
d_tmp2_m = (in3 * sinpi8sqrt2) >> 16; \
d1_m = d_tmp1_m + d_tmp2_m; \
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
}
#define MULT1(a) ((((a) * 20091) >> 16) + (a))
#define MULT2(a) (((a) * 35468) >> 16)
static void TransformOne(const int16_t* in, uint8_t* dst) {
v8i16 input0, input1;
v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
v4i32 res0, res1, res2, res3;
const v16i8 zero = { 0 };
v16i8 dest0, dest1, dest2, dest3;
LD_SH2(in, 8, input0, input1);
UNPCK_SH_SW(input0, in0, in1);
UNPCK_SH_SW(input1, in2, in3);
IDCT_1D_W(in0, in1, in2, in3, hz0, hz1, hz2, hz3);
TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
IDCT_1D_W(hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3);
SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
LD_SB4(dst, BPS, dest0, dest1, dest2, dest3);
ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3,
res0, res1, res2, res3);
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3,
res0, res1, res2, res3);
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
CLIP_SW4_0_255(res0, res1, res2, res3);
PCKEV_B2_SW(res0, res1, res2, res3, vt0, vt1);
res0 = (v4i32)__msa_pckev_b((v16i8)vt0, (v16i8)vt1);
ST4x4_UB(res0, res0, 3, 2, 1, 0, dst, BPS);
}
static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
TransformOne(in, dst);
if (do_two) {
TransformOne(in + 16, dst + 4);
}
}
static void TransformWHT(const int16_t* in, int16_t* out) {
v8i16 input0, input1;
const v8i16 mask0 = { 0, 1, 2, 3, 8, 9, 10, 11 };
const v8i16 mask1 = { 4, 5, 6, 7, 12, 13, 14, 15 };
const v8i16 mask2 = { 0, 4, 8, 12, 1, 5, 9, 13 };
const v8i16 mask3 = { 3, 7, 11, 15, 2, 6, 10, 14 };
v8i16 tmp0, tmp1, tmp2, tmp3;
v8i16 out0, out1;
LD_SH2(in, 8, input0, input1);
input1 = SLDI_SH(input1, input1, 8);
tmp0 = input0 + input1;
tmp1 = input0 - input1;
VSHF_H2_SH(tmp0, tmp1, tmp0, tmp1, mask0, mask1, tmp2, tmp3);
out0 = tmp2 + tmp3;
out1 = tmp2 - tmp3;
VSHF_H2_SH(out0, out1, out0, out1, mask2, mask3, input0, input1);
tmp0 = input0 + input1;
tmp1 = input0 - input1;
VSHF_H2_SH(tmp0, tmp1, tmp0, tmp1, mask0, mask1, tmp2, tmp3);
tmp0 = tmp2 + tmp3;
tmp1 = tmp2 - tmp3;
ADDVI_H2_SH(tmp0, 3, tmp1, 3, out0, out1);
SRAI_H2_SH(out0, out1, 3);
out[0] = __msa_copy_s_h(out0, 0);
out[16] = __msa_copy_s_h(out0, 4);
out[32] = __msa_copy_s_h(out1, 0);
out[48] = __msa_copy_s_h(out1, 4);
out[64] = __msa_copy_s_h(out0, 1);
out[80] = __msa_copy_s_h(out0, 5);
out[96] = __msa_copy_s_h(out1, 1);
out[112] = __msa_copy_s_h(out1, 5);
out[128] = __msa_copy_s_h(out0, 2);
out[144] = __msa_copy_s_h(out0, 6);
out[160] = __msa_copy_s_h(out1, 2);
out[176] = __msa_copy_s_h(out1, 6);
out[192] = __msa_copy_s_h(out0, 3);
out[208] = __msa_copy_s_h(out0, 7);
out[224] = __msa_copy_s_h(out1, 3);
out[240] = __msa_copy_s_h(out1, 7);
}
static void TransformDC(const int16_t* in, uint8_t* dst) {
const int DC = (in[0] + 4) >> 3;
const v8i16 tmp0 = __msa_fill_h(DC);
ADDBLK_ST4x4_UB(tmp0, tmp0, tmp0, tmp0, dst, BPS);
}
static void TransformAC3(const int16_t* in, uint8_t* dst) {
const int a = in[0] + 4;
const int c4 = MULT2(in[4]);
const int d4 = MULT1(in[4]);
const int in2 = MULT2(in[1]);
const int in3 = MULT1(in[1]);
v4i32 tmp0 = { 0 };
v4i32 out0 = __msa_fill_w(a + d4);
v4i32 out1 = __msa_fill_w(a + c4);
v4i32 out2 = __msa_fill_w(a - c4);
v4i32 out3 = __msa_fill_w(a - d4);
v4i32 res0, res1, res2, res3;
const v4i32 zero = { 0 };
v16u8 dest0, dest1, dest2, dest3;
INSERT_W4_SW(in3, in2, -in2, -in3, tmp0);
ADD4(out0, tmp0, out1, tmp0, out2, tmp0, out3, tmp0,
out0, out1, out2, out3);
SRAI_W4_SW(out0, out1, out2, out3, 3);
LD_UB4(dst, BPS, dest0, dest1, dest2, dest3);
ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3,
res0, res1, res2, res3);
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3,
res0, res1, res2, res3);
ADD4(res0, out0, res1, out1, res2, out2, res3, out3, res0, res1, res2, res3);
CLIP_SW4_0_255(res0, res1, res2, res3);
PCKEV_B2_SW(res0, res1, res2, res3, out0, out1);
res0 = (v4i32)__msa_pckev_b((v16i8)out0, (v16i8)out1);
ST4x4_UB(res0, res0, 3, 2, 1, 0, dst, BPS);
}
//------------------------------------------------------------------------------
// Entry point
extern void VP8DspInitMSA(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitMSA(void) {
VP8TransformWHT = TransformWHT;
VP8Transform = TransformTwo;
VP8TransformDC = TransformDC;
VP8TransformAC3 = TransformAC3;
}
#else // !WEBP_USE_MSA
WEBP_DSP_INIT_STUB(VP8DspInitMSA)
#endif // WEBP_USE_MSA

View File

@ -21,7 +21,9 @@
// #define USE_TRANSFORM_AC3
#include <emmintrin.h>
#include "./common_sse2.h"
#include "../dec/vp8i.h"
#include "../utils/utils.h"
//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)
@ -102,34 +104,7 @@ static void Transform(const int16_t* in, uint8_t* dst, int do_two) {
const __m128i tmp3 = _mm_sub_epi16(a, d);
// Transpose the two 4x4.
// a00 a01 a02 a03 b00 b01 b02 b03
// a10 a11 a12 a13 b10 b11 b12 b13
// a20 a21 a22 a23 b20 b21 b22 b23
// a30 a31 a32 a33 b30 b31 b32 b33
const __m128i transpose0_0 = _mm_unpacklo_epi16(tmp0, tmp1);
const __m128i transpose0_1 = _mm_unpacklo_epi16(tmp2, tmp3);
const __m128i transpose0_2 = _mm_unpackhi_epi16(tmp0, tmp1);
const __m128i transpose0_3 = _mm_unpackhi_epi16(tmp2, tmp3);
// a00 a10 a01 a11 a02 a12 a03 a13
// a20 a30 a21 a31 a22 a32 a23 a33
// b00 b10 b01 b11 b02 b12 b03 b13
// b20 b30 b21 b31 b22 b32 b23 b33
const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
// a00 a10 a20 a30 a01 a11 a21 a31
// b00 b10 b20 b30 b01 b11 b21 b31
// a02 a12 a22 a32 a03 a13 a23 a33
// b02 b12 a22 b32 b03 b13 b23 b33
T0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
T1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
T2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
T3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
// a00 a10 a20 a30 b00 b10 b20 b30
// a01 a11 a21 a31 b01 b11 b21 b31
// a02 a12 a22 a32 b02 b12 b22 b32
// a03 a13 a23 a33 b03 b13 b23 b33
VP8Transpose_2_4x4_16b(&tmp0, &tmp1, &tmp2, &tmp3, &T0, &T1, &T2, &T3);
}
// Horizontal pass and subsequent transpose.
@ -164,34 +139,8 @@ static void Transform(const int16_t* in, uint8_t* dst, int do_two) {
const __m128i shifted3 = _mm_srai_epi16(tmp3, 3);
// Transpose the two 4x4.
// a00 a01 a02 a03 b00 b01 b02 b03
// a10 a11 a12 a13 b10 b11 b12 b13
// a20 a21 a22 a23 b20 b21 b22 b23
// a30 a31 a32 a33 b30 b31 b32 b33
const __m128i transpose0_0 = _mm_unpacklo_epi16(shifted0, shifted1);
const __m128i transpose0_1 = _mm_unpacklo_epi16(shifted2, shifted3);
const __m128i transpose0_2 = _mm_unpackhi_epi16(shifted0, shifted1);
const __m128i transpose0_3 = _mm_unpackhi_epi16(shifted2, shifted3);
// a00 a10 a01 a11 a02 a12 a03 a13
// a20 a30 a21 a31 a22 a32 a23 a33
// b00 b10 b01 b11 b02 b12 b03 b13
// b20 b30 b21 b31 b22 b32 b23 b33
const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
// a00 a10 a20 a30 a01 a11 a21 a31
// b00 b10 b20 b30 b01 b11 b21 b31
// a02 a12 a22 a32 a03 a13 a23 a33
// b02 b12 a22 b32 b03 b13 b23 b33
T0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
T1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
T2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
T3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
// a00 a10 a20 a30 b00 b10 b20 b30
// a01 a11 a21 a31 b01 b11 b21 b31
// a02 a12 a22 a32 b02 b12 b22 b32
// a03 a13 a23 a33 b03 b13 b23 b33
VP8Transpose_2_4x4_16b(&shifted0, &shifted1, &shifted2, &shifted3, &T0, &T1,
&T2, &T3);
}
// Add inverse transform to 'dst' and store.

View File

@ -17,6 +17,7 @@
#include <smmintrin.h>
#include "../dec/vp8i.h"
#include "../utils/utils.h"
static void HE16(uint8_t* dst) { // horizontal
int j;

View File

@ -14,8 +14,11 @@
#ifndef WEBP_DSP_DSP_H_
#define WEBP_DSP_DSP_H_
#ifdef HAVE_CONFIG_H
#include "../webp/config.h"
#endif
#include "../webp/types.h"
#include "../utils/utils.h"
#ifdef __cplusplus
extern "C" {
@ -72,7 +75,8 @@ extern "C" {
// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
// inline assembly would need to be modified for use with Native Client.
#if (defined(__ARM_NEON__) || defined(WEBP_ANDROID_NEON) || \
defined(__aarch64__)) && !defined(__native_client__)
defined(__aarch64__) || defined(WEBP_HAVE_NEON)) && \
!defined(__native_client__)
#define WEBP_USE_NEON
#endif
@ -92,6 +96,10 @@ extern "C" {
#endif
#endif
#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5)
#define WEBP_USE_MSA
#endif
// This macro prevents thread_sanitizer from reporting known concurrent writes.
#define WEBP_TSAN_IGNORE_FUNCTION
#if defined(__has_feature)
@ -101,6 +109,27 @@ extern "C" {
#endif
#endif
#define WEBP_UBSAN_IGNORE_UNDEF
#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
#if !defined(WEBP_FORCE_ALIGNED) && defined(__clang__) && \
defined(__has_attribute)
#if __has_attribute(no_sanitize)
// This macro prevents the undefined behavior sanitizer from reporting
// failures. This is only meant to silence unaligned loads on platforms that
// are known to support them.
#undef WEBP_UBSAN_IGNORE_UNDEF
#define WEBP_UBSAN_IGNORE_UNDEF \
__attribute__((no_sanitize("undefined")))
// This macro prevents the undefined behavior sanitizer from reporting
// failures related to unsigned integer overflows. This is only meant to
// silence cases where this well defined behavior is expected.
#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \
__attribute__((no_sanitize("unsigned-integer-overflow")))
#endif
#endif
typedef enum {
kSSE2,
kSSE3,
@ -109,7 +138,8 @@ typedef enum {
kAVX2,
kNEON,
kMIPS32,
kMIPSdspR2
kMIPSdspR2,
kMSA
} CPUFeature;
// returns true if the CPU supports the feature.
typedef int (*VP8CPUInfo)(CPUFeature feature);
@ -151,6 +181,8 @@ typedef int (*VP8Metric)(const uint8_t* pix, const uint8_t* ref);
extern VP8Metric VP8SSE16x16, VP8SSE16x8, VP8SSE8x8, VP8SSE4x4;
typedef int (*VP8WMetric)(const uint8_t* pix, const uint8_t* ref,
const uint16_t* const weights);
// The weights for VP8TDisto4x4 and VP8TDisto16x16 contain a row-major
// 4 by 4 symmetric matrix.
extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;
typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst);
@ -213,6 +245,35 @@ extern VP8GetResidualCostFunc VP8GetResidualCost;
// must be called before anything using the above
void VP8EncDspCostInit(void);
//------------------------------------------------------------------------------
// SSIM utils
// struct for accumulating statistical moments
typedef struct {
double w; // sum(w_i) : sum of weights
double xm, ym; // sum(w_i * x_i), sum(w_i * y_i)
double xxm, xym, yym; // sum(w_i * x_i * x_i), etc.
} VP8DistoStats;
#define VP8_SSIM_KERNEL 3 // total size of the kernel: 2 * VP8_SSIM_KERNEL + 1
typedef void (*VP8SSIMAccumulateClippedFunc)(const uint8_t* src1, int stride1,
const uint8_t* src2, int stride2,
int xo, int yo, // center position
int W, int H, // plane dimension
VP8DistoStats* const stats);
// This version is called with the guarantee that you can load 8 bytes and
// 8 rows at offset src1 and src2
typedef void (*VP8SSIMAccumulateFunc)(const uint8_t* src1, int stride1,
const uint8_t* src2, int stride2,
VP8DistoStats* const stats);
extern VP8SSIMAccumulateFunc VP8SSIMAccumulate; // unclipped / unchecked
extern VP8SSIMAccumulateClippedFunc VP8SSIMAccumulateClipped; // with clipping
// must be called before using any of the above directly
void VP8SSIMDspInit(void);
//------------------------------------------------------------------------------
// Decoding
@ -265,6 +326,15 @@ extern VP8LumaFilterFunc VP8HFilter16i;
extern VP8ChromaFilterFunc VP8VFilter8i; // filtering u and v altogether
extern VP8ChromaFilterFunc VP8HFilter8i;
// Dithering. Combines dithering values (centered around 128) with dst[],
// according to: dst[] = clip(dst[] + (((dither[]-128) + 8) >> 4)
#define VP8_DITHER_DESCALE 4
#define VP8_DITHER_DESCALE_ROUNDER (1 << (VP8_DITHER_DESCALE - 1))
#define VP8_DITHER_AMP_BITS 7
#define VP8_DITHER_AMP_CENTER (1 << VP8_DITHER_AMP_BITS)
extern void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst,
int dst_stride);
// must be called before anything using the above
void VP8DspInit(void);
@ -472,8 +542,10 @@ typedef enum { // Filter types.
typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height,
int stride, uint8_t* out);
typedef void (*WebPUnfilterFunc)(int width, int height, int stride,
int row, int num_rows, uint8_t* data);
// In-place un-filtering.
// Warning! 'prev_line' pointer can be equal to 'cur_line' or 'preds'.
typedef void (*WebPUnfilterFunc)(const uint8_t* prev_line, const uint8_t* preds,
uint8_t* cur_line, int width);
// Filter the given data using the given predictor.
// 'in' corresponds to a 2-dimensional pixel array of size (stride * height)

View File

@ -69,7 +69,7 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
// Convert coefficients to bin.
for (k = 0; k < 16; ++k) {
const int v = abs(out[k]) >> 3; // TODO(skal): add rounding?
const int v = abs(out[k]) >> 3;
const int clipped_value = clip_max(v, MAX_COEFF_THRESH);
++distribution[clipped_value];
}
@ -335,7 +335,7 @@ static void Intra16Preds(uint8_t* dst,
// luma 4x4 prediction
#define DST(x, y) dst[(x) + (y) * BPS]
#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
#define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
#define AVG2(a, b) (((a) + (b) + 1) >> 1)
static void VE4(uint8_t* dst, const uint8_t* top) { // vertical
@ -559,6 +559,7 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
// Hadamard transform
// Returns the weighted sum of the absolute value of transformed coefficients.
// w[] contains a row-major 4 by 4 symmetric matrix.
static int TTransform(const uint8_t* in, const uint16_t* w) {
int sum = 0;
int tmp[16];
@ -636,7 +637,7 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
int level = QUANTDIV(coeff, iQ, B);
if (level > MAX_LEVEL) level = MAX_LEVEL;
if (sign) level = -level;
in[j] = level * Q;
in[j] = level * (int)Q;
out[n] = level;
if (level) last = n;
} else {
@ -670,7 +671,7 @@ static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
int level = QUANTDIV(coeff, iQ, B);
if (level > MAX_LEVEL) level = MAX_LEVEL;
if (sign) level = -level;
in[j] = level * Q;
in[j] = level * (int)Q;
out[n] = level;
if (level) last = n;
} else {
@ -701,6 +702,68 @@ static void Copy16x8(const uint8_t* src, uint8_t* dst) {
Copy(src, dst, 16, 8);
}
//------------------------------------------------------------------------------
static void SSIMAccumulateClipped(const uint8_t* src1, int stride1,
const uint8_t* src2, int stride2,
int xo, int yo, int W, int H,
VP8DistoStats* const stats) {
const int ymin = (yo - VP8_SSIM_KERNEL < 0) ? 0 : yo - VP8_SSIM_KERNEL;
const int ymax = (yo + VP8_SSIM_KERNEL > H - 1) ? H - 1
: yo + VP8_SSIM_KERNEL;
const int xmin = (xo - VP8_SSIM_KERNEL < 0) ? 0 : xo - VP8_SSIM_KERNEL;
const int xmax = (xo + VP8_SSIM_KERNEL > W - 1) ? W - 1
: xo + VP8_SSIM_KERNEL;
int x, y;
src1 += ymin * stride1;
src2 += ymin * stride2;
for (y = ymin; y <= ymax; ++y, src1 += stride1, src2 += stride2) {
for (x = xmin; x <= xmax; ++x) {
const int s1 = src1[x];
const int s2 = src2[x];
stats->w += 1;
stats->xm += s1;
stats->ym += s2;
stats->xxm += s1 * s1;
stats->xym += s1 * s2;
stats->yym += s2 * s2;
}
}
}
static void SSIMAccumulate(const uint8_t* src1, int stride1,
const uint8_t* src2, int stride2,
VP8DistoStats* const stats) {
int x, y;
for (y = 0; y <= 2 * VP8_SSIM_KERNEL; ++y, src1 += stride1, src2 += stride2) {
for (x = 0; x <= 2 * VP8_SSIM_KERNEL; ++x) {
const int s1 = src1[x];
const int s2 = src2[x];
stats->w += 1;
stats->xm += s1;
stats->ym += s2;
stats->xxm += s1 * s1;
stats->xym += s1 * s2;
stats->yym += s2 * s2;
}
}
}
VP8SSIMAccumulateFunc VP8SSIMAccumulate;
VP8SSIMAccumulateClippedFunc VP8SSIMAccumulateClipped;
static volatile VP8CPUInfo ssim_last_cpuinfo_used =
(VP8CPUInfo)&ssim_last_cpuinfo_used;
WEBP_TSAN_IGNORE_FUNCTION void VP8SSIMDspInit(void) {
if (ssim_last_cpuinfo_used == VP8GetCPUInfo) return;
VP8SSIMAccumulate = SSIMAccumulate;
VP8SSIMAccumulateClipped = SSIMAccumulateClipped;
ssim_last_cpuinfo_used = VP8GetCPUInfo;
}
//------------------------------------------------------------------------------
// Initialization

View File

@ -1393,8 +1393,6 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
"absq_s.ph %[temp1], %[temp1] \n\t" \
"absq_s.ph %[temp2], %[temp2] \n\t" \
"absq_s.ph %[temp3], %[temp3] \n\t" \
/* TODO(skal): add rounding ? shra_r.ph : shra.ph */ \
/* for following 4 instructions */ \
"shra.ph %[temp0], %[temp0], 3 \n\t" \
"shra.ph %[temp1], %[temp1], 3 \n\t" \
"shra.ph %[temp2], %[temp2], 3 \n\t" \

View File

@ -560,21 +560,6 @@ static void FTransformWHT(const int16_t* src, int16_t* out) {
// a 26ae, b 26ae
// a 37bf, b 37bf
//
static WEBP_INLINE uint8x8x4_t DistoTranspose4x4U8(uint8x8x4_t d4_in) {
const uint8x8x2_t d2_tmp0 = vtrn_u8(d4_in.val[0], d4_in.val[1]);
const uint8x8x2_t d2_tmp1 = vtrn_u8(d4_in.val[2], d4_in.val[3]);
const uint16x4x2_t d2_tmp2 = vtrn_u16(vreinterpret_u16_u8(d2_tmp0.val[0]),
vreinterpret_u16_u8(d2_tmp1.val[0]));
const uint16x4x2_t d2_tmp3 = vtrn_u16(vreinterpret_u16_u8(d2_tmp0.val[1]),
vreinterpret_u16_u8(d2_tmp1.val[1]));
d4_in.val[0] = vreinterpret_u8_u16(d2_tmp2.val[0]);
d4_in.val[2] = vreinterpret_u8_u16(d2_tmp2.val[1]);
d4_in.val[1] = vreinterpret_u8_u16(d2_tmp3.val[0]);
d4_in.val[3] = vreinterpret_u8_u16(d2_tmp3.val[1]);
return d4_in;
}
static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16(int16x8x4_t q4_in) {
const int16x8x2_t q2_tmp0 = vtrnq_s16(q4_in.val[0], q4_in.val[1]);
const int16x8x2_t q2_tmp1 = vtrnq_s16(q4_in.val[2], q4_in.val[3]);
@ -589,41 +574,40 @@ static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16(int16x8x4_t q4_in) {
return q4_in;
}
static WEBP_INLINE int16x8x4_t DistoHorizontalPass(const uint8x8x4_t d4_in) {
static WEBP_INLINE int16x8x4_t DistoHorizontalPass(const int16x8x4_t q4_in) {
// {a0, a1} = {in[0] + in[2], in[1] + in[3]}
// {a3, a2} = {in[0] - in[2], in[1] - in[3]}
const int16x8_t q_a0 = vreinterpretq_s16_u16(vaddl_u8(d4_in.val[0],
d4_in.val[2]));
const int16x8_t q_a1 = vreinterpretq_s16_u16(vaddl_u8(d4_in.val[1],
d4_in.val[3]));
const int16x8_t q_a3 = vreinterpretq_s16_u16(vsubl_u8(d4_in.val[0],
d4_in.val[2]));
const int16x8_t q_a2 = vreinterpretq_s16_u16(vsubl_u8(d4_in.val[1],
d4_in.val[3]));
const int16x8_t q_a0 = vaddq_s16(q4_in.val[0], q4_in.val[2]);
const int16x8_t q_a1 = vaddq_s16(q4_in.val[1], q4_in.val[3]);
const int16x8_t q_a3 = vsubq_s16(q4_in.val[0], q4_in.val[2]);
const int16x8_t q_a2 = vsubq_s16(q4_in.val[1], q4_in.val[3]);
int16x8x4_t q4_out;
// tmp[0] = a0 + a1
// tmp[1] = a3 + a2
// tmp[2] = a3 - a2
// tmp[3] = a0 - a1
INIT_VECTOR4(q4_out,
vaddq_s16(q_a0, q_a1), vaddq_s16(q_a3, q_a2),
vsubq_s16(q_a3, q_a2), vsubq_s16(q_a0, q_a1));
vabsq_s16(vaddq_s16(q_a0, q_a1)),
vabsq_s16(vaddq_s16(q_a3, q_a2)),
vabdq_s16(q_a3, q_a2), vabdq_s16(q_a0, q_a1));
return q4_out;
}
static WEBP_INLINE int16x8x4_t DistoVerticalPass(int16x8x4_t q4_in) {
const int16x8_t q_a0 = vaddq_s16(q4_in.val[0], q4_in.val[2]);
const int16x8_t q_a1 = vaddq_s16(q4_in.val[1], q4_in.val[3]);
const int16x8_t q_a2 = vsubq_s16(q4_in.val[1], q4_in.val[3]);
const int16x8_t q_a3 = vsubq_s16(q4_in.val[0], q4_in.val[2]);
static WEBP_INLINE int16x8x4_t DistoVerticalPass(const uint8x8x4_t q4_in) {
const int16x8_t q_a0 = vreinterpretq_s16_u16(vaddl_u8(q4_in.val[0],
q4_in.val[2]));
const int16x8_t q_a1 = vreinterpretq_s16_u16(vaddl_u8(q4_in.val[1],
q4_in.val[3]));
const int16x8_t q_a2 = vreinterpretq_s16_u16(vsubl_u8(q4_in.val[1],
q4_in.val[3]));
const int16x8_t q_a3 = vreinterpretq_s16_u16(vsubl_u8(q4_in.val[0],
q4_in.val[2]));
int16x8x4_t q4_out;
q4_in.val[0] = vaddq_s16(q_a0, q_a1);
q4_in.val[1] = vaddq_s16(q_a3, q_a2);
q4_in.val[2] = vabdq_s16(q_a3, q_a2);
q4_in.val[3] = vabdq_s16(q_a0, q_a1);
q4_in.val[0] = vabsq_s16(q4_in.val[0]);
q4_in.val[1] = vabsq_s16(q4_in.val[1]);
return q4_in;
INIT_VECTOR4(q4_out,
vaddq_s16(q_a0, q_a1), vaddq_s16(q_a3, q_a2),
vsubq_s16(q_a3, q_a2), vsubq_s16(q_a0, q_a1));
return q4_out;
}
static WEBP_INLINE int16x4x4_t DistoLoadW(const uint16_t* w) {
@ -667,6 +651,7 @@ static WEBP_INLINE int32x2_t DistoSum(const int16x8x4_t q4_in,
// Hadamard transform
// Returns the weighted sum of the absolute value of transformed coefficients.
// w[] contains a row-major 4 by 4 symmetric matrix.
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
uint32x2_t d_in_ab_0123 = vdup_n_u32(0);
@ -691,18 +676,19 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
vreinterpret_u8_u32(d_in_ab_cdef));
{
// horizontal pass
const uint8x8x4_t d4_t = DistoTranspose4x4U8(d4_in);
const int16x8x4_t q4_h = DistoHorizontalPass(d4_t);
// Vertical pass first to avoid a transpose (vertical and horizontal passes
// are commutative because w/kWeightY is symmetric) and subsequent
// transpose.
const int16x8x4_t q4_v = DistoVerticalPass(d4_in);
const int16x4x4_t d4_w = DistoLoadW(w);
// vertical pass
const int16x8x4_t q4_t = DistoTranspose4x4S16(q4_h);
const int16x8x4_t q4_v = DistoVerticalPass(q4_t);
int32x2_t d_sum = DistoSum(q4_v, d4_w);
// horizontal pass
const int16x8x4_t q4_t = DistoTranspose4x4S16(q4_v);
const int16x8x4_t q4_h = DistoHorizontalPass(q4_t);
int32x2_t d_sum = DistoSum(q4_h, d4_w);
// abs(sum2 - sum1) >> 5
d_sum = vabs_s32(d_sum);
d_sum = vshr_n_s32(d_sum, 5);
d_sum = vshr_n_s32(d_sum, 5);
return vget_lane_s32(d_sum, 0);
}
}

View File

@ -17,38 +17,10 @@
#include <stdlib.h> // for abs()
#include <emmintrin.h>
#include "./common_sse2.h"
#include "../enc/cost.h"
#include "../enc/vp8enci.h"
//------------------------------------------------------------------------------
// Quite useful macro for debugging. Left here for convenience.
#if 0
#include <stdio.h>
static void PrintReg(const __m128i r, const char* const name, int size) {
int n;
union {
__m128i r;
uint8_t i8[16];
uint16_t i16[8];
uint32_t i32[4];
uint64_t i64[2];
} tmp;
tmp.r = r;
fprintf(stderr, "%s\t: ", name);
if (size == 8) {
for (n = 0; n < 16; ++n) fprintf(stderr, "%.2x ", tmp.i8[n]);
} else if (size == 16) {
for (n = 0; n < 8; ++n) fprintf(stderr, "%.4x ", tmp.i16[n]);
} else if (size == 32) {
for (n = 0; n < 4; ++n) fprintf(stderr, "%.8x ", tmp.i32[n]);
} else {
for (n = 0; n < 2; ++n) fprintf(stderr, "%.16lx ", tmp.i64[n]);
}
fprintf(stderr, "\n");
}
#endif
//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)
@ -131,34 +103,7 @@ static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
const __m128i tmp3 = _mm_sub_epi16(a, d);
// Transpose the two 4x4.
// a00 a01 a02 a03 b00 b01 b02 b03
// a10 a11 a12 a13 b10 b11 b12 b13
// a20 a21 a22 a23 b20 b21 b22 b23
// a30 a31 a32 a33 b30 b31 b32 b33
const __m128i transpose0_0 = _mm_unpacklo_epi16(tmp0, tmp1);
const __m128i transpose0_1 = _mm_unpacklo_epi16(tmp2, tmp3);
const __m128i transpose0_2 = _mm_unpackhi_epi16(tmp0, tmp1);
const __m128i transpose0_3 = _mm_unpackhi_epi16(tmp2, tmp3);
// a00 a10 a01 a11 a02 a12 a03 a13
// a20 a30 a21 a31 a22 a32 a23 a33
// b00 b10 b01 b11 b02 b12 b03 b13
// b20 b30 b21 b31 b22 b32 b23 b33
const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
// a00 a10 a20 a30 a01 a11 a21 a31
// b00 b10 b20 b30 b01 b11 b21 b31
// a02 a12 a22 a32 a03 a13 a23 a33
// b02 b12 a22 b32 b03 b13 b23 b33
T0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
T1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
T2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
T3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
// a00 a10 a20 a30 b00 b10 b20 b30
// a01 a11 a21 a31 b01 b11 b21 b31
// a02 a12 a22 a32 b02 b12 b22 b32
// a03 a13 a23 a33 b03 b13 b23 b33
VP8Transpose_2_4x4_16b(&tmp0, &tmp1, &tmp2, &tmp3, &T0, &T1, &T2, &T3);
}
// Horizontal pass and subsequent transpose.
@ -193,34 +138,8 @@ static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
const __m128i shifted3 = _mm_srai_epi16(tmp3, 3);
// Transpose the two 4x4.
// a00 a01 a02 a03 b00 b01 b02 b03
// a10 a11 a12 a13 b10 b11 b12 b13
// a20 a21 a22 a23 b20 b21 b22 b23
// a30 a31 a32 a33 b30 b31 b32 b33
const __m128i transpose0_0 = _mm_unpacklo_epi16(shifted0, shifted1);
const __m128i transpose0_1 = _mm_unpacklo_epi16(shifted2, shifted3);
const __m128i transpose0_2 = _mm_unpackhi_epi16(shifted0, shifted1);
const __m128i transpose0_3 = _mm_unpackhi_epi16(shifted2, shifted3);
// a00 a10 a01 a11 a02 a12 a03 a13
// a20 a30 a21 a31 a22 a32 a23 a33
// b00 b10 b01 b11 b02 b12 b03 b13
// b20 b30 b21 b31 b22 b32 b23 b33
const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
// a00 a10 a20 a30 a01 a11 a21 a31
// b00 b10 b20 b30 b01 b11 b21 b31
// a02 a12 a22 a32 a03 a13 a23 a33
// b02 b12 a22 b32 b03 b13 b23 b33
T0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
T1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
T2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
T3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
// a00 a10 a20 a30 b00 b10 b20 b30
// a01 a11 a21 a31 b01 b11 b21 b31
// a02 a12 a22 a32 b02 b12 b22 b32
// a03 a13 a23 a33 b03 b13 b23 b33
VP8Transpose_2_4x4_16b(&shifted0, &shifted1, &shifted2, &shifted3, &T0, &T1,
&T2, &T3);
}
// Add inverse transform to 'ref' and store.
@ -373,42 +292,42 @@ static void FTransformPass2(const __m128i* const v01, const __m128i* const v32,
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
const __m128i zero = _mm_setzero_si128();
// Load src and convert to 16b.
// Load src.
const __m128i src0 = _mm_loadl_epi64((const __m128i*)&src[0 * BPS]);
const __m128i src1 = _mm_loadl_epi64((const __m128i*)&src[1 * BPS]);
const __m128i src2 = _mm_loadl_epi64((const __m128i*)&src[2 * BPS]);
const __m128i src3 = _mm_loadl_epi64((const __m128i*)&src[3 * BPS]);
const __m128i src_0 = _mm_unpacklo_epi8(src0, zero);
const __m128i src_1 = _mm_unpacklo_epi8(src1, zero);
const __m128i src_2 = _mm_unpacklo_epi8(src2, zero);
const __m128i src_3 = _mm_unpacklo_epi8(src3, zero);
// Load ref and convert to 16b.
// 00 01 02 03 *
// 10 11 12 13 *
// 20 21 22 23 *
// 30 31 32 33 *
// Shuffle.
const __m128i src_0 = _mm_unpacklo_epi16(src0, src1);
const __m128i src_1 = _mm_unpacklo_epi16(src2, src3);
// 00 01 10 11 02 03 12 13 * * ...
// 20 21 30 31 22 22 32 33 * * ...
// Load ref.
const __m128i ref0 = _mm_loadl_epi64((const __m128i*)&ref[0 * BPS]);
const __m128i ref1 = _mm_loadl_epi64((const __m128i*)&ref[1 * BPS]);
const __m128i ref2 = _mm_loadl_epi64((const __m128i*)&ref[2 * BPS]);
const __m128i ref3 = _mm_loadl_epi64((const __m128i*)&ref[3 * BPS]);
const __m128i ref_0 = _mm_unpacklo_epi8(ref0, zero);
const __m128i ref_1 = _mm_unpacklo_epi8(ref1, zero);
const __m128i ref_2 = _mm_unpacklo_epi8(ref2, zero);
const __m128i ref_3 = _mm_unpacklo_epi8(ref3, zero);
// Compute difference. -> 00 01 02 03 00 00 00 00
const __m128i diff0 = _mm_sub_epi16(src_0, ref_0);
const __m128i diff1 = _mm_sub_epi16(src_1, ref_1);
const __m128i diff2 = _mm_sub_epi16(src_2, ref_2);
const __m128i diff3 = _mm_sub_epi16(src_3, ref_3);
const __m128i ref_0 = _mm_unpacklo_epi16(ref0, ref1);
const __m128i ref_1 = _mm_unpacklo_epi16(ref2, ref3);
// Unpack and shuffle
// 00 01 02 03 0 0 0 0
// 10 11 12 13 0 0 0 0
// 20 21 22 23 0 0 0 0
// 30 31 32 33 0 0 0 0
const __m128i shuf01 = _mm_unpacklo_epi32(diff0, diff1);
const __m128i shuf23 = _mm_unpacklo_epi32(diff2, diff3);
// Convert both to 16 bit.
const __m128i src_0_16b = _mm_unpacklo_epi8(src_0, zero);
const __m128i src_1_16b = _mm_unpacklo_epi8(src_1, zero);
const __m128i ref_0_16b = _mm_unpacklo_epi8(ref_0, zero);
const __m128i ref_1_16b = _mm_unpacklo_epi8(ref_1, zero);
// Compute the difference.
const __m128i row01 = _mm_sub_epi16(src_0_16b, ref_0_16b);
const __m128i row23 = _mm_sub_epi16(src_1_16b, ref_1_16b);
__m128i v01, v32;
// First pass
FTransformPass1(&shuf01, &shuf23, &v01, &v32);
FTransformPass1(&row01, &row23, &v01, &v32);
// Second pass
FTransformPass2(&v01, &v32, out);
@ -463,8 +382,7 @@ static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) {
}
static void FTransformWHTRow(const int16_t* const in, __m128i* const out) {
const __m128i kMult1 = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
const __m128i kMult2 = _mm_set_epi16(0, 0, 0, 0, -1, 1, -1, 1);
const __m128i kMult = _mm_set_epi16(-1, 1, -1, 1, 1, 1, 1, 1);
const __m128i src0 = _mm_loadl_epi64((__m128i*)&in[0 * 16]);
const __m128i src1 = _mm_loadl_epi64((__m128i*)&in[1 * 16]);
const __m128i src2 = _mm_loadl_epi64((__m128i*)&in[2 * 16]);
@ -473,33 +391,38 @@ static void FTransformWHTRow(const int16_t* const in, __m128i* const out) {
const __m128i A23 = _mm_unpacklo_epi16(src2, src3); // A2 A3 | ...
const __m128i B0 = _mm_adds_epi16(A01, A23); // a0 | a1 | ...
const __m128i B1 = _mm_subs_epi16(A01, A23); // a3 | a2 | ...
const __m128i C0 = _mm_unpacklo_epi32(B0, B1); // a0 | a1 | a3 | a2
const __m128i C1 = _mm_unpacklo_epi32(B1, B0); // a3 | a2 | a0 | a1
const __m128i D0 = _mm_madd_epi16(C0, kMult1); // out0, out1
const __m128i D1 = _mm_madd_epi16(C1, kMult2); // out2, out3
*out = _mm_unpacklo_epi64(D0, D1);
const __m128i C0 = _mm_unpacklo_epi32(B0, B1); // a0 | a1 | a3 | a2 | ...
const __m128i C1 = _mm_unpacklo_epi32(B1, B0); // a3 | a2 | a0 | a1 | ...
const __m128i D = _mm_unpacklo_epi64(C0, C1); // a0 a1 a3 a2 a3 a2 a0 a1
*out = _mm_madd_epi16(D, kMult);
}
static void FTransformWHT(const int16_t* in, int16_t* out) {
// Input is 12b signed.
__m128i row0, row1, row2, row3;
// Rows are 14b signed.
FTransformWHTRow(in + 0 * 64, &row0);
FTransformWHTRow(in + 1 * 64, &row1);
FTransformWHTRow(in + 2 * 64, &row2);
FTransformWHTRow(in + 3 * 64, &row3);
{
// The a* are 15b signed.
const __m128i a0 = _mm_add_epi32(row0, row2);
const __m128i a1 = _mm_add_epi32(row1, row3);
const __m128i a2 = _mm_sub_epi32(row1, row3);
const __m128i a3 = _mm_sub_epi32(row0, row2);
const __m128i b0 = _mm_srai_epi32(_mm_add_epi32(a0, a1), 1);
const __m128i b1 = _mm_srai_epi32(_mm_add_epi32(a3, a2), 1);
const __m128i b2 = _mm_srai_epi32(_mm_sub_epi32(a3, a2), 1);
const __m128i b3 = _mm_srai_epi32(_mm_sub_epi32(a0, a1), 1);
const __m128i out0 = _mm_packs_epi32(b0, b1);
const __m128i out1 = _mm_packs_epi32(b2, b3);
_mm_storeu_si128((__m128i*)&out[0], out0);
_mm_storeu_si128((__m128i*)&out[8], out1);
const __m128i a0a3 = _mm_packs_epi32(a0, a3);
const __m128i a1a2 = _mm_packs_epi32(a1, a2);
// The b* are 16b signed.
const __m128i b0b1 = _mm_add_epi16(a0a3, a1a2);
const __m128i b3b2 = _mm_sub_epi16(a0a3, a1a2);
const __m128i tmp_b2b3 = _mm_unpackhi_epi64(b3b2, b3b2);
const __m128i b2b3 = _mm_unpacklo_epi64(tmp_b2b3, b3b2);
_mm_storeu_si128((__m128i*)&out[0], _mm_srai_epi16(b0b1, 1));
_mm_storeu_si128((__m128i*)&out[8], _mm_srai_epi16(b2b3, 1));
}
}
@ -692,12 +615,10 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
static WEBP_INLINE void DC8uv(uint8_t* dst, const uint8_t* left,
const uint8_t* top) {
const __m128i zero = _mm_setzero_si128();
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
const __m128i left_values = _mm_loadl_epi64((const __m128i*)left);
const __m128i sum_top = _mm_sad_epu8(top_values, zero);
const __m128i sum_left = _mm_sad_epu8(left_values, zero);
const int DC = _mm_cvtsi128_si32(sum_top) + _mm_cvtsi128_si32(sum_left) + 8;
const __m128i combined = _mm_unpacklo_epi64(top_values, left_values);
const int DC = VP8HorizontalAdd8b(&combined) + 8;
Put8x8uv(DC >> 4, dst);
}
@ -735,27 +656,16 @@ static WEBP_INLINE void DC8uvMode(uint8_t* dst, const uint8_t* left,
static WEBP_INLINE void DC16(uint8_t* dst, const uint8_t* left,
const uint8_t* top) {
const __m128i zero = _mm_setzero_si128();
const __m128i top_row = _mm_load_si128((const __m128i*)top);
const __m128i left_row = _mm_load_si128((const __m128i*)left);
const __m128i sad8x2 = _mm_sad_epu8(top_row, zero);
// sum the two sads: sad8x2[0:1] + sad8x2[8:9]
const __m128i sum_top = _mm_add_epi16(sad8x2, _mm_shuffle_epi32(sad8x2, 2));
const __m128i sad8x2_left = _mm_sad_epu8(left_row, zero);
// sum the two sads: sad8x2[0:1] + sad8x2[8:9]
const __m128i sum_left =
_mm_add_epi16(sad8x2_left, _mm_shuffle_epi32(sad8x2_left, 2));
const int DC = _mm_cvtsi128_si32(sum_top) + _mm_cvtsi128_si32(sum_left) + 16;
const int DC =
VP8HorizontalAdd8b(&top_row) + VP8HorizontalAdd8b(&left_row) + 16;
Put16(DC >> 5, dst);
}
static WEBP_INLINE void DC16NoLeft(uint8_t* dst, const uint8_t* top) {
const __m128i zero = _mm_setzero_si128();
const __m128i top_row = _mm_load_si128((const __m128i*)top);
const __m128i sad8x2 = _mm_sad_epu8(top_row, zero);
// sum the two sads: sad8x2[0:1] + sad8x2[8:9]
const __m128i sum = _mm_add_epi16(sad8x2, _mm_shuffle_epi32(sad8x2, 2));
const int DC = _mm_cvtsi128_si32(sum) + 8;
const int DC = VP8HorizontalAdd8b(&top_row) + 8;
Put16(DC >> 4, dst);
}
@ -1142,15 +1052,15 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
// reconstructed samples.
// Hadamard transform
// Returns the difference between the weighted sum of the absolute value of
// transformed coefficients.
// Returns the weighted sum of the absolute value of transformed coefficients.
// w[] contains a row-major 4 by 4 symmetric matrix.
static int TTransform(const uint8_t* inA, const uint8_t* inB,
const uint16_t* const w) {
int32_t sum[4];
__m128i tmp_0, tmp_1, tmp_2, tmp_3;
const __m128i zero = _mm_setzero_si128();
// Load, combine and transpose inputs.
// Load and combine inputs.
{
const __m128i inA_0 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 0]);
const __m128i inA_1 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 1]);
@ -1162,37 +1072,22 @@ static int TTransform(const uint8_t* inA, const uint8_t* inB,
const __m128i inB_3 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 3]);
// Combine inA and inB (we'll do two transforms in parallel).
const __m128i inAB_0 = _mm_unpacklo_epi8(inA_0, inB_0);
const __m128i inAB_1 = _mm_unpacklo_epi8(inA_1, inB_1);
const __m128i inAB_2 = _mm_unpacklo_epi8(inA_2, inB_2);
const __m128i inAB_3 = _mm_unpacklo_epi8(inA_3, inB_3);
// a00 b00 a01 b01 a02 b03 a03 b03 0 0 0 0 0 0 0 0
// a10 b10 a11 b11 a12 b12 a13 b13 0 0 0 0 0 0 0 0
// a20 b20 a21 b21 a22 b22 a23 b23 0 0 0 0 0 0 0 0
// a30 b30 a31 b31 a32 b32 a33 b33 0 0 0 0 0 0 0 0
// Transpose the two 4x4, discarding the filling zeroes.
const __m128i transpose0_0 = _mm_unpacklo_epi8(inAB_0, inAB_2);
const __m128i transpose0_1 = _mm_unpacklo_epi8(inAB_1, inAB_3);
// a00 a20 b00 b20 a01 a21 b01 b21 a02 a22 b02 b22 a03 a23 b03 b23
// a10 a30 b10 b30 a11 a31 b11 b31 a12 a32 b12 b32 a13 a33 b13 b33
const __m128i transpose1_0 = _mm_unpacklo_epi8(transpose0_0, transpose0_1);
const __m128i transpose1_1 = _mm_unpackhi_epi8(transpose0_0, transpose0_1);
// a00 a10 a20 a30 b00 b10 b20 b30 a01 a11 a21 a31 b01 b11 b21 b31
// a02 a12 a22 a32 b02 b12 b22 b32 a03 a13 a23 a33 b03 b13 b23 b33
// Convert to 16b.
tmp_0 = _mm_unpacklo_epi8(transpose1_0, zero);
tmp_1 = _mm_unpackhi_epi8(transpose1_0, zero);
tmp_2 = _mm_unpacklo_epi8(transpose1_1, zero);
tmp_3 = _mm_unpackhi_epi8(transpose1_1, zero);
// a00 a10 a20 a30 b00 b10 b20 b30
// a01 a11 a21 a31 b01 b11 b21 b31
// a02 a12 a22 a32 b02 b12 b22 b32
// a03 a13 a23 a33 b03 b13 b23 b33
const __m128i inAB_0 = _mm_unpacklo_epi32(inA_0, inB_0);
const __m128i inAB_1 = _mm_unpacklo_epi32(inA_1, inB_1);
const __m128i inAB_2 = _mm_unpacklo_epi32(inA_2, inB_2);
const __m128i inAB_3 = _mm_unpacklo_epi32(inA_3, inB_3);
tmp_0 = _mm_unpacklo_epi8(inAB_0, zero);
tmp_1 = _mm_unpacklo_epi8(inAB_1, zero);
tmp_2 = _mm_unpacklo_epi8(inAB_2, zero);
tmp_3 = _mm_unpacklo_epi8(inAB_3, zero);
// a00 a01 a02 a03 b00 b01 b02 b03
// a10 a11 a12 a13 b10 b11 b12 b13
// a20 a21 a22 a23 b20 b21 b22 b23
// a30 a31 a32 a33 b30 b31 b32 b33
}
// Horizontal pass and subsequent transpose.
// Vertical pass first to avoid a transpose (vertical and horizontal passes
// are commutative because w/kWeightY is symmetric) and subsequent transpose.
{
// Calculate a and b (two 4x4 at once).
const __m128i a0 = _mm_add_epi16(tmp_0, tmp_2);
@ -1209,33 +1104,10 @@ static int TTransform(const uint8_t* inA, const uint8_t* inB,
// a30 a31 a32 a33 b30 b31 b32 b33
// Transpose the two 4x4.
const __m128i transpose0_0 = _mm_unpacklo_epi16(b0, b1);
const __m128i transpose0_1 = _mm_unpacklo_epi16(b2, b3);
const __m128i transpose0_2 = _mm_unpackhi_epi16(b0, b1);
const __m128i transpose0_3 = _mm_unpackhi_epi16(b2, b3);
// a00 a10 a01 a11 a02 a12 a03 a13
// a20 a30 a21 a31 a22 a32 a23 a33
// b00 b10 b01 b11 b02 b12 b03 b13
// b20 b30 b21 b31 b22 b32 b23 b33
const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
// a00 a10 a20 a30 a01 a11 a21 a31
// b00 b10 b20 b30 b01 b11 b21 b31
// a02 a12 a22 a32 a03 a13 a23 a33
// b02 b12 a22 b32 b03 b13 b23 b33
tmp_0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
tmp_1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
tmp_2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
tmp_3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
// a00 a10 a20 a30 b00 b10 b20 b30
// a01 a11 a21 a31 b01 b11 b21 b31
// a02 a12 a22 a32 b02 b12 b22 b32
// a03 a13 a23 a33 b03 b13 b23 b33
VP8Transpose_2_4x4_16b(&b0, &b1, &b2, &b3, &tmp_0, &tmp_1, &tmp_2, &tmp_3);
}
// Vertical pass and difference of weighted sums.
// Horizontal pass and difference of weighted sums.
{
// Load all inputs.
const __m128i w_0 = _mm_loadu_si128((const __m128i*)&w[0]);

View File

@ -17,6 +17,7 @@
#include <smmintrin.h>
#include <stdlib.h> // for abs()
#include "./common_sse2.h"
#include "../enc/vp8enci.h"
//------------------------------------------------------------------------------
@ -67,55 +68,45 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
// reconstructed samples.
// Hadamard transform
// Returns the difference between the weighted sum of the absolute value of
// transformed coefficients.
// Returns the weighted sum of the absolute value of transformed coefficients.
// w[] contains a row-major 4 by 4 symmetric matrix.
static int TTransform(const uint8_t* inA, const uint8_t* inB,
const uint16_t* const w) {
int32_t sum[4];
__m128i tmp_0, tmp_1, tmp_2, tmp_3;
// Load, combine and transpose inputs.
// Load and combine inputs.
{
const __m128i inA_0 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 0]);
const __m128i inA_1 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 1]);
const __m128i inA_2 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 2]);
const __m128i inA_0 = _mm_loadu_si128((const __m128i*)&inA[BPS * 0]);
const __m128i inA_1 = _mm_loadu_si128((const __m128i*)&inA[BPS * 1]);
const __m128i inA_2 = _mm_loadu_si128((const __m128i*)&inA[BPS * 2]);
// In SSE4.1, with gcc 4.8 at least (maybe other versions),
// _mm_loadu_si128 is faster than _mm_loadl_epi64. But for the last lump
// of inA and inB, _mm_loadl_epi64 is still used not to have an out of
// bound read.
const __m128i inA_3 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 3]);
const __m128i inB_0 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 0]);
const __m128i inB_1 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 1]);
const __m128i inB_2 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 2]);
const __m128i inB_0 = _mm_loadu_si128((const __m128i*)&inB[BPS * 0]);
const __m128i inB_1 = _mm_loadu_si128((const __m128i*)&inB[BPS * 1]);
const __m128i inB_2 = _mm_loadu_si128((const __m128i*)&inB[BPS * 2]);
const __m128i inB_3 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 3]);
// Combine inA and inB (we'll do two transforms in parallel).
const __m128i inAB_0 = _mm_unpacklo_epi8(inA_0, inB_0);
const __m128i inAB_1 = _mm_unpacklo_epi8(inA_1, inB_1);
const __m128i inAB_2 = _mm_unpacklo_epi8(inA_2, inB_2);
const __m128i inAB_3 = _mm_unpacklo_epi8(inA_3, inB_3);
// a00 b00 a01 b01 a02 b03 a03 b03 0 0 0 0 0 0 0 0
// a10 b10 a11 b11 a12 b12 a13 b13 0 0 0 0 0 0 0 0
// a20 b20 a21 b21 a22 b22 a23 b23 0 0 0 0 0 0 0 0
// a30 b30 a31 b31 a32 b32 a33 b33 0 0 0 0 0 0 0 0
// Transpose the two 4x4, discarding the filling zeroes.
const __m128i transpose0_0 = _mm_unpacklo_epi8(inAB_0, inAB_2);
const __m128i transpose0_1 = _mm_unpacklo_epi8(inAB_1, inAB_3);
// a00 a20 b00 b20 a01 a21 b01 b21 a02 a22 b02 b22 a03 a23 b03 b23
// a10 a30 b10 b30 a11 a31 b11 b31 a12 a32 b12 b32 a13 a33 b13 b33
const __m128i transpose1_0 = _mm_unpacklo_epi8(transpose0_0, transpose0_1);
const __m128i transpose1_1 = _mm_unpackhi_epi8(transpose0_0, transpose0_1);
// a00 a10 a20 a30 b00 b10 b20 b30 a01 a11 a21 a31 b01 b11 b21 b31
// a02 a12 a22 a32 b02 b12 b22 b32 a03 a13 a23 a33 b03 b13 b23 b33
// Convert to 16b.
tmp_0 = _mm_cvtepu8_epi16(transpose1_0);
tmp_1 = _mm_cvtepu8_epi16(_mm_srli_si128(transpose1_0, 8));
tmp_2 = _mm_cvtepu8_epi16(transpose1_1);
tmp_3 = _mm_cvtepu8_epi16(_mm_srli_si128(transpose1_1, 8));
// a00 a10 a20 a30 b00 b10 b20 b30
// a01 a11 a21 a31 b01 b11 b21 b31
// a02 a12 a22 a32 b02 b12 b22 b32
// a03 a13 a23 a33 b03 b13 b23 b33
const __m128i inAB_0 = _mm_unpacklo_epi32(inA_0, inB_0);
const __m128i inAB_1 = _mm_unpacklo_epi32(inA_1, inB_1);
const __m128i inAB_2 = _mm_unpacklo_epi32(inA_2, inB_2);
const __m128i inAB_3 = _mm_unpacklo_epi32(inA_3, inB_3);
tmp_0 = _mm_cvtepu8_epi16(inAB_0);
tmp_1 = _mm_cvtepu8_epi16(inAB_1);
tmp_2 = _mm_cvtepu8_epi16(inAB_2);
tmp_3 = _mm_cvtepu8_epi16(inAB_3);
// a00 a01 a02 a03 b00 b01 b02 b03
// a10 a11 a12 a13 b10 b11 b12 b13
// a20 a21 a22 a23 b20 b21 b22 b23
// a30 a31 a32 a33 b30 b31 b32 b33
}
// Horizontal pass and subsequent transpose.
// Vertical pass first to avoid a transpose (vertical and horizontal passes
// are commutative because w/kWeightY is symmetric) and subsequent transpose.
{
// Calculate a and b (two 4x4 at once).
const __m128i a0 = _mm_add_epi16(tmp_0, tmp_2);
@ -132,33 +123,10 @@ static int TTransform(const uint8_t* inA, const uint8_t* inB,
// a30 a31 a32 a33 b30 b31 b32 b33
// Transpose the two 4x4.
const __m128i transpose0_0 = _mm_unpacklo_epi16(b0, b1);
const __m128i transpose0_1 = _mm_unpacklo_epi16(b2, b3);
const __m128i transpose0_2 = _mm_unpackhi_epi16(b0, b1);
const __m128i transpose0_3 = _mm_unpackhi_epi16(b2, b3);
// a00 a10 a01 a11 a02 a12 a03 a13
// a20 a30 a21 a31 a22 a32 a23 a33
// b00 b10 b01 b11 b02 b12 b03 b13
// b20 b30 b21 b31 b22 b32 b23 b33
const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
// a00 a10 a20 a30 a01 a11 a21 a31
// b00 b10 b20 b30 b01 b11 b21 b31
// a02 a12 a22 a32 a03 a13 a23 a33
// b02 b12 a22 b32 b03 b13 b23 b33
tmp_0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
tmp_1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
tmp_2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
tmp_3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
// a00 a10 a20 a30 b00 b10 b20 b30
// a01 a11 a21 a31 b01 b11 b21 b31
// a02 a12 a22 a32 b02 b12 b22 b32
// a03 a13 a23 a33 b03 b13 b23 b33
VP8Transpose_2_4x4_16b(&b0, &b1, &b2, &b3, &tmp_0, &tmp_1, &tmp_2, &tmp_3);
}
// Vertical pass and difference of weighted sums.
// Horizontal pass and difference of weighted sums.
{
// Load all inputs.
const __m128i w_0 = _mm_loadu_si128((const __m128i*)&w[0]);
@ -195,11 +163,9 @@ static int TTransform(const uint8_t* inA, const uint8_t* inB,
// difference of weighted sums
A_b2 = _mm_sub_epi32(A_b0, B_b0);
// cascading summation of the differences
B_b0 = _mm_hadd_epi32(A_b2, A_b2);
B_b2 = _mm_hadd_epi32(B_b0, B_b0);
return _mm_cvtsi128_si32(B_b2);
_mm_storeu_si128((__m128i*)&sum[0], A_b2);
}
return sum[0] + sum[1] + sum[2] + sum[3];
}
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,

View File

@ -184,19 +184,40 @@ static void GradientFilter(const uint8_t* data, int width, int height,
//------------------------------------------------------------------------------
static void VerticalUnfilter(int width, int height, int stride, int row,
int num_rows, uint8_t* data) {
DoVerticalFilter(data, width, height, stride, row, num_rows, 1, data);
static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
uint8_t pred = (prev == NULL) ? 0 : prev[0];
int i;
for (i = 0; i < width; ++i) {
out[i] = pred + in[i];
pred = out[i];
}
}
static void HorizontalUnfilter(int width, int height, int stride, int row,
int num_rows, uint8_t* data) {
DoHorizontalFilter(data, width, height, stride, row, num_rows, 1, data);
static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
if (prev == NULL) {
HorizontalUnfilter(NULL, in, out, width);
} else {
int i;
for (i = 0; i < width; ++i) out[i] = prev[i] + in[i];
}
}
static void GradientUnfilter(int width, int height, int stride, int row,
int num_rows, uint8_t* data) {
DoGradientFilter(data, width, height, stride, row, num_rows, 1, data);
static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
if (prev == NULL) {
HorizontalUnfilter(NULL, in, out, width);
} else {
uint8_t top = prev[0], top_left = top, left = top;
int i;
for (i = 0; i < width; ++i) {
top = prev[i]; // need to read this first, in case prev==out
left = in[i] + GradientPredictor(left, top, top_left);
top_left = top;
out[i] = left;
}
}
}
//------------------------------------------------------------------------------

View File

@ -33,10 +33,6 @@
assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
(void)height; // Silence unused warning.
// if INVERSE
// preds == &dst[-1] == &src[-1]
// else
// preds == &src[-1] != &dst[-1]
#define DO_PREDICT_LINE(SRC, DST, LENGTH, INVERSE) do { \
const uint8_t* psrc = (uint8_t*)(SRC); \
uint8_t* pdst = (uint8_t*)(DST); \
@ -45,27 +41,28 @@
__asm__ volatile ( \
".set push \n\t" \
".set noreorder \n\t" \
"srl %[temp0], %[length], 0x2 \n\t" \
"srl %[temp0], %[length], 2 \n\t" \
"beqz %[temp0], 4f \n\t" \
" andi %[temp6], %[length], 0x3 \n\t" \
" andi %[temp6], %[length], 3 \n\t" \
".if " #INVERSE " \n\t" \
"lbu %[temp1], -1(%[src]) \n\t" \
"1: \n\t" \
"lbu %[temp1], -1(%[dst]) \n\t" \
"lbu %[temp2], 0(%[src]) \n\t" \
"lbu %[temp3], 1(%[src]) \n\t" \
"lbu %[temp4], 2(%[src]) \n\t" \
"lbu %[temp5], 3(%[src]) \n\t" \
"addu %[temp1], %[temp1], %[temp2] \n\t" \
"addu %[temp2], %[temp1], %[temp3] \n\t" \
"addu %[temp3], %[temp2], %[temp4] \n\t" \
"addu %[temp4], %[temp3], %[temp5] \n\t" \
"sb %[temp1], 0(%[dst]) \n\t" \
"sb %[temp2], 1(%[dst]) \n\t" \
"sb %[temp3], 2(%[dst]) \n\t" \
"sb %[temp4], 3(%[dst]) \n\t" \
"addiu %[src], %[src], 4 \n\t" \
"addiu %[temp0], %[temp0], -1 \n\t" \
"addu %[temp2], %[temp2], %[temp1] \n\t" \
"addu %[temp3], %[temp3], %[temp2] \n\t" \
"addu %[temp4], %[temp4], %[temp3] \n\t" \
"addu %[temp1], %[temp5], %[temp4] \n\t" \
"sb %[temp2], -4(%[src]) \n\t" \
"sb %[temp3], -3(%[src]) \n\t" \
"sb %[temp4], -2(%[src]) \n\t" \
"bnez %[temp0], 1b \n\t" \
" sb %[temp1], -1(%[src]) \n\t" \
" addiu %[dst], %[dst], 4 \n\t" \
".else \n\t" \
"1: \n\t" \
"ulw %[temp1], -1(%[src]) \n\t" \
@ -81,16 +78,16 @@
"beqz %[temp6], 3f \n\t" \
" nop \n\t" \
"2: \n\t" \
"lbu %[temp1], -1(%[src]) \n\t" \
"lbu %[temp2], 0(%[src]) \n\t" \
"addiu %[src], %[src], 1 \n\t" \
".if " #INVERSE " \n\t" \
"lbu %[temp1], -1(%[dst]) \n\t" \
"addu %[temp3], %[temp1], %[temp2] \n\t" \
"sb %[temp3], -1(%[src]) \n\t" \
".else \n\t" \
"lbu %[temp1], -1(%[src]) \n\t" \
"subu %[temp3], %[temp1], %[temp2] \n\t" \
"sb %[temp3], 0(%[dst]) \n\t" \
".endif \n\t" \
"addiu %[src], %[src], 1 \n\t" \
"sb %[temp3], 0(%[dst]) \n\t" \
"addiu %[temp6], %[temp6], -1 \n\t" \
"bnez %[temp6], 2b \n\t" \
" addiu %[dst], %[dst], 1 \n\t" \
@ -105,12 +102,8 @@
} while (0)
static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
int length, int inverse) {
if (inverse) {
DO_PREDICT_LINE(src, dst, length, 1);
} else {
DO_PREDICT_LINE(src, dst, length, 0);
}
int length) {
DO_PREDICT_LINE(src, dst, length, 0);
}
#define DO_PREDICT_LINE_VERTICAL(SRC, PRED, DST, LENGTH, INVERSE) do { \
@ -172,16 +165,12 @@ static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
); \
} while (0)
#define PREDICT_LINE_ONE_PASS(SRC, PRED, DST, INVERSE) do { \
#define PREDICT_LINE_ONE_PASS(SRC, PRED, DST) do { \
int temp1, temp2, temp3; \
__asm__ volatile ( \
"lbu %[temp1], 0(%[src]) \n\t" \
"lbu %[temp2], 0(%[pred]) \n\t" \
".if " #INVERSE " \n\t" \
"addu %[temp3], %[temp1], %[temp2] \n\t" \
".else \n\t" \
"subu %[temp3], %[temp1], %[temp2] \n\t" \
".endif \n\t" \
"sb %[temp3], 0(%[dst]) \n\t" \
: [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3) \
: [pred]"r"((PRED)), [dst]"r"((DST)), [src]"r"((SRC)) \
@ -192,10 +181,10 @@ static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
//------------------------------------------------------------------------------
// Horizontal filter.
#define FILTER_LINE_BY_LINE(INVERSE) do { \
#define FILTER_LINE_BY_LINE do { \
while (row < last_row) { \
PREDICT_LINE_ONE_PASS(in, preds - stride, out, INVERSE); \
DO_PREDICT_LINE(in + 1, out + 1, width - 1, INVERSE); \
PREDICT_LINE_ONE_PASS(in, preds - stride, out); \
DO_PREDICT_LINE(in + 1, out + 1, width - 1, 0); \
++row; \
preds += stride; \
in += stride; \
@ -206,19 +195,19 @@ static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows,
int inverse, uint8_t* out) {
uint8_t* out) {
const uint8_t* preds;
const size_t start_offset = row * stride;
const int last_row = row + num_rows;
SANITY_CHECK(in, out);
in += start_offset;
out += start_offset;
preds = inverse ? out : in;
preds = in;
if (row == 0) {
// Leftmost pixel is the same as input for topmost scanline.
out[0] = in[0];
PredictLine(in + 1, out + 1, width - 1, inverse);
PredictLine(in + 1, out + 1, width - 1);
row = 1;
preds += stride;
in += stride;
@ -226,31 +215,21 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
}
// Filter line-by-line.
if (inverse) {
FILTER_LINE_BY_LINE(1);
} else {
FILTER_LINE_BY_LINE(0);
}
FILTER_LINE_BY_LINE;
}
#undef FILTER_LINE_BY_LINE
static void HorizontalFilter(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoHorizontalFilter(data, width, height, stride, 0, height, 0, filtered_data);
}
static void HorizontalUnfilter(int width, int height, int stride, int row,
int num_rows, uint8_t* data) {
DoHorizontalFilter(data, width, height, stride, row, num_rows, 1, data);
DoHorizontalFilter(data, width, height, stride, 0, height, filtered_data);
}
//------------------------------------------------------------------------------
// Vertical filter.
#define FILTER_LINE_BY_LINE(INVERSE) do { \
#define FILTER_LINE_BY_LINE do { \
while (row < last_row) { \
DO_PREDICT_LINE_VERTICAL(in, preds, out, width, INVERSE); \
DO_PREDICT_LINE_VERTICAL(in, preds, out, width, 0); \
++row; \
preds += stride; \
in += stride; \
@ -260,21 +239,20 @@ static void HorizontalUnfilter(int width, int height, int stride, int row,
static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows,
int inverse, uint8_t* out) {
int row, int num_rows, uint8_t* out) {
const uint8_t* preds;
const size_t start_offset = row * stride;
const int last_row = row + num_rows;
SANITY_CHECK(in, out);
in += start_offset;
out += start_offset;
preds = inverse ? out : in;
preds = in;
if (row == 0) {
// Very first top-left pixel is copied.
out[0] = in[0];
// Rest of top scan-line is left-predicted.
PredictLine(in + 1, out + 1, width - 1, inverse);
PredictLine(in + 1, out + 1, width - 1);
row = 1;
in += stride;
out += stride;
@ -284,24 +262,13 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
}
// Filter line-by-line.
if (inverse) {
FILTER_LINE_BY_LINE(1);
} else {
FILTER_LINE_BY_LINE(0);
}
FILTER_LINE_BY_LINE;
}
#undef FILTER_LINE_BY_LINE
#undef DO_PREDICT_LINE_VERTICAL
static void VerticalFilter(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoVerticalFilter(data, width, height, stride, 0, height, 0, filtered_data);
}
static void VerticalUnfilter(int width, int height, int stride, int row,
int num_rows, uint8_t* data) {
DoVerticalFilter(data, width, height, stride, row, num_rows, 1, data);
DoVerticalFilter(data, width, height, stride, 0, height, filtered_data);
}
//------------------------------------------------------------------------------
@ -321,10 +288,10 @@ static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
return temp0;
}
#define FILTER_LINE_BY_LINE(INVERSE, PREDS, OPERATION) do { \
#define FILTER_LINE_BY_LINE(PREDS, OPERATION) do { \
while (row < last_row) { \
int w; \
PREDICT_LINE_ONE_PASS(in, PREDS - stride, out, INVERSE); \
PREDICT_LINE_ONE_PASS(in, PREDS - stride, out); \
for (w = 1; w < width; ++w) { \
const int pred = GradientPredictor(PREDS[w - 1], \
PREDS[w - stride], \
@ -339,20 +306,19 @@ static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows,
int inverse, uint8_t* out) {
int row, int num_rows, uint8_t* out) {
const uint8_t* preds;
const size_t start_offset = row * stride;
const int last_row = row + num_rows;
SANITY_CHECK(in, out);
in += start_offset;
out += start_offset;
preds = inverse ? out : in;
preds = in;
// left prediction for top scan-line
if (row == 0) {
out[0] = in[0];
PredictLine(in + 1, out + 1, width - 1, inverse);
PredictLine(in + 1, out + 1, width - 1);
row = 1;
preds += stride;
in += stride;
@ -360,25 +326,49 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
}
// Filter line-by-line.
if (inverse) {
FILTER_LINE_BY_LINE(1, out, +);
} else {
FILTER_LINE_BY_LINE(0, in, -);
}
FILTER_LINE_BY_LINE(in, -);
}
#undef FILTER_LINE_BY_LINE
static void GradientFilter(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoGradientFilter(data, width, height, stride, 0, height, 0, filtered_data);
DoGradientFilter(data, width, height, stride, 0, height, filtered_data);
}
static void GradientUnfilter(int width, int height, int stride, int row,
int num_rows, uint8_t* data) {
DoGradientFilter(data, width, height, stride, row, num_rows, 1, data);
//------------------------------------------------------------------------------
static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
out[0] = in[0] + (prev == NULL ? 0 : prev[0]);
DO_PREDICT_LINE(in + 1, out + 1, width - 1, 1);
}
static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
if (prev == NULL) {
HorizontalUnfilter(NULL, in, out, width);
} else {
DO_PREDICT_LINE_VERTICAL(in, prev, out, width, 1);
}
}
static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
if (prev == NULL) {
HorizontalUnfilter(NULL, in, out, width);
} else {
uint8_t top = prev[0], top_left = top, left = top;
int i;
for (i = 0; i < width; ++i) {
top = prev[i]; // need to read this first, in case prev==dst
left = in[i] + GradientPredictor(left, top, top_left);
top_left = top;
out[i] = left;
}
}
}
#undef DO_PREDICT_LINE_VERTICAL
#undef PREDICT_LINE_ONE_PASS
#undef DO_PREDICT_LINE
#undef SANITY_CHECK
@ -389,13 +379,13 @@ static void GradientUnfilter(int width, int height, int stride, int row,
extern void VP8FiltersInitMIPSdspR2(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitMIPSdspR2(void) {
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
}
#else // !WEBP_USE_MIPS_DSP_R2

View File

@ -33,82 +33,39 @@
(void)height; // Silence unused warning.
static void PredictLineTop(const uint8_t* src, const uint8_t* pred,
uint8_t* dst, int length, int inverse) {
uint8_t* dst, int length) {
int i;
const int max_pos = length & ~31;
assert(length >= 0);
if (inverse) {
for (i = 0; i < max_pos; i += 32) {
const __m128i A0 = _mm_loadu_si128((const __m128i*)&src[i + 0]);
const __m128i A1 = _mm_loadu_si128((const __m128i*)&src[i + 16]);
const __m128i B0 = _mm_loadu_si128((const __m128i*)&pred[i + 0]);
const __m128i B1 = _mm_loadu_si128((const __m128i*)&pred[i + 16]);
const __m128i C0 = _mm_add_epi8(A0, B0);
const __m128i C1 = _mm_add_epi8(A1, B1);
_mm_storeu_si128((__m128i*)&dst[i + 0], C0);
_mm_storeu_si128((__m128i*)&dst[i + 16], C1);
}
for (; i < length; ++i) dst[i] = src[i] + pred[i];
} else {
for (i = 0; i < max_pos; i += 32) {
const __m128i A0 = _mm_loadu_si128((const __m128i*)&src[i + 0]);
const __m128i A1 = _mm_loadu_si128((const __m128i*)&src[i + 16]);
const __m128i B0 = _mm_loadu_si128((const __m128i*)&pred[i + 0]);
const __m128i B1 = _mm_loadu_si128((const __m128i*)&pred[i + 16]);
const __m128i C0 = _mm_sub_epi8(A0, B0);
const __m128i C1 = _mm_sub_epi8(A1, B1);
_mm_storeu_si128((__m128i*)&dst[i + 0], C0);
_mm_storeu_si128((__m128i*)&dst[i + 16], C1);
}
for (; i < length; ++i) dst[i] = src[i] - pred[i];
for (i = 0; i < max_pos; i += 32) {
const __m128i A0 = _mm_loadu_si128((const __m128i*)&src[i + 0]);
const __m128i A1 = _mm_loadu_si128((const __m128i*)&src[i + 16]);
const __m128i B0 = _mm_loadu_si128((const __m128i*)&pred[i + 0]);
const __m128i B1 = _mm_loadu_si128((const __m128i*)&pred[i + 16]);
const __m128i C0 = _mm_sub_epi8(A0, B0);
const __m128i C1 = _mm_sub_epi8(A1, B1);
_mm_storeu_si128((__m128i*)&dst[i + 0], C0);
_mm_storeu_si128((__m128i*)&dst[i + 16], C1);
}
for (; i < length; ++i) dst[i] = src[i] - pred[i];
}
// Special case for left-based prediction (when preds==dst-1 or preds==src-1).
static void PredictLineLeft(const uint8_t* src, uint8_t* dst, int length,
int inverse) {
static void PredictLineLeft(const uint8_t* src, uint8_t* dst, int length) {
int i;
if (length <= 0) return;
if (inverse) {
const int max_pos = length & ~7;
__m128i last = _mm_set_epi32(0, 0, 0, dst[-1]);
for (i = 0; i < max_pos; i += 8) {
const __m128i A0 = _mm_loadl_epi64((const __m128i*)(src + i));
const __m128i A1 = _mm_add_epi8(A0, last);
const __m128i A2 = _mm_slli_si128(A1, 1);
const __m128i A3 = _mm_add_epi8(A1, A2);
const __m128i A4 = _mm_slli_si128(A3, 2);
const __m128i A5 = _mm_add_epi8(A3, A4);
const __m128i A6 = _mm_slli_si128(A5, 4);
const __m128i A7 = _mm_add_epi8(A5, A6);
_mm_storel_epi64((__m128i*)(dst + i), A7);
last = _mm_srli_epi64(A7, 56);
}
for (; i < length; ++i) dst[i] = src[i] + dst[i - 1];
} else {
const int max_pos = length & ~31;
for (i = 0; i < max_pos; i += 32) {
const __m128i A0 = _mm_loadu_si128((const __m128i*)(src + i + 0 ));
const __m128i B0 = _mm_loadu_si128((const __m128i*)(src + i + 0 - 1));
const __m128i A1 = _mm_loadu_si128((const __m128i*)(src + i + 16 ));
const __m128i B1 = _mm_loadu_si128((const __m128i*)(src + i + 16 - 1));
const __m128i C0 = _mm_sub_epi8(A0, B0);
const __m128i C1 = _mm_sub_epi8(A1, B1);
_mm_storeu_si128((__m128i*)(dst + i + 0), C0);
_mm_storeu_si128((__m128i*)(dst + i + 16), C1);
}
for (; i < length; ++i) dst[i] = src[i] - src[i - 1];
}
}
static void PredictLineC(const uint8_t* src, const uint8_t* pred,
uint8_t* dst, int length, int inverse) {
int i;
if (inverse) {
for (i = 0; i < length; ++i) dst[i] = src[i] + pred[i];
} else {
for (i = 0; i < length; ++i) dst[i] = src[i] - pred[i];
const int max_pos = length & ~31;
assert(length >= 0);
for (i = 0; i < max_pos; i += 32) {
const __m128i A0 = _mm_loadu_si128((const __m128i*)(src + i + 0 ));
const __m128i B0 = _mm_loadu_si128((const __m128i*)(src + i + 0 - 1));
const __m128i A1 = _mm_loadu_si128((const __m128i*)(src + i + 16 ));
const __m128i B1 = _mm_loadu_si128((const __m128i*)(src + i + 16 - 1));
const __m128i C0 = _mm_sub_epi8(A0, B0);
const __m128i C1 = _mm_sub_epi8(A1, B1);
_mm_storeu_si128((__m128i*)(dst + i + 0), C0);
_mm_storeu_si128((__m128i*)(dst + i + 16), C1);
}
for (; i < length; ++i) dst[i] = src[i] - src[i - 1];
}
//------------------------------------------------------------------------------
@ -117,21 +74,18 @@ static void PredictLineC(const uint8_t* src, const uint8_t* pred,
static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows,
int inverse, uint8_t* out) {
const uint8_t* preds;
uint8_t* out) {
const size_t start_offset = row * stride;
const int last_row = row + num_rows;
SANITY_CHECK(in, out);
in += start_offset;
out += start_offset;
preds = inverse ? out : in;
if (row == 0) {
// Leftmost pixel is the same as input for topmost scanline.
out[0] = in[0];
PredictLineLeft(in + 1, out + 1, width - 1, inverse);
PredictLineLeft(in + 1, out + 1, width - 1);
row = 1;
preds += stride;
in += stride;
out += stride;
}
@ -139,10 +93,9 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
// Filter line-by-line.
while (row < last_row) {
// Leftmost pixel is predicted from above.
PredictLineC(in, preds - stride, out, 1, inverse);
PredictLineLeft(in + 1, out + 1, width - 1, inverse);
out[0] = in[0] - in[-stride];
PredictLineLeft(in + 1, out + 1, width - 1);
++row;
preds += stride;
in += stride;
out += stride;
}
@ -153,34 +106,27 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows,
int inverse, uint8_t* out) {
const uint8_t* preds;
int row, int num_rows, uint8_t* out) {
const size_t start_offset = row * stride;
const int last_row = row + num_rows;
SANITY_CHECK(in, out);
in += start_offset;
out += start_offset;
preds = inverse ? out : in;
if (row == 0) {
// Very first top-left pixel is copied.
out[0] = in[0];
// Rest of top scan-line is left-predicted.
PredictLineLeft(in + 1, out + 1, width - 1, inverse);
PredictLineLeft(in + 1, out + 1, width - 1);
row = 1;
in += stride;
out += stride;
} else {
// We are starting from in-between. Make sure 'preds' points to prev row.
preds -= stride;
}
// Filter line-by-line.
while (row < last_row) {
PredictLineTop(in, preds, out, width, inverse);
PredictLineTop(in, in - stride, out, width);
++row;
preds += stride;
in += stride;
out += stride;
}
@ -219,6 +165,101 @@ static void GradientPredictDirect(const uint8_t* const row,
}
}
static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows,
uint8_t* out) {
const size_t start_offset = row * stride;
const int last_row = row + num_rows;
SANITY_CHECK(in, out);
in += start_offset;
out += start_offset;
// left prediction for top scan-line
if (row == 0) {
out[0] = in[0];
PredictLineLeft(in + 1, out + 1, width - 1);
row = 1;
in += stride;
out += stride;
}
// Filter line-by-line.
while (row < last_row) {
out[0] = in[0] - in[-stride];
GradientPredictDirect(in + 1, in + 1 - stride, out + 1, width - 1);
++row;
in += stride;
out += stride;
}
}
#undef SANITY_CHECK
//------------------------------------------------------------------------------
static void HorizontalFilter(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoHorizontalFilter(data, width, height, stride, 0, height, filtered_data);
}
static void VerticalFilter(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoVerticalFilter(data, width, height, stride, 0, height, filtered_data);
}
static void GradientFilter(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoGradientFilter(data, width, height, stride, 0, height, filtered_data);
}
//------------------------------------------------------------------------------
// Inverse transforms
static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
int i;
__m128i last;
out[0] = in[0] + (prev == NULL ? 0 : prev[0]);
if (width <= 1) return;
last = _mm_set_epi32(0, 0, 0, out[0]);
for (i = 1; i + 8 <= width; i += 8) {
const __m128i A0 = _mm_loadl_epi64((const __m128i*)(in + i));
const __m128i A1 = _mm_add_epi8(A0, last);
const __m128i A2 = _mm_slli_si128(A1, 1);
const __m128i A3 = _mm_add_epi8(A1, A2);
const __m128i A4 = _mm_slli_si128(A3, 2);
const __m128i A5 = _mm_add_epi8(A3, A4);
const __m128i A6 = _mm_slli_si128(A5, 4);
const __m128i A7 = _mm_add_epi8(A5, A6);
_mm_storel_epi64((__m128i*)(out + i), A7);
last = _mm_srli_epi64(A7, 56);
}
for (; i < width; ++i) out[i] = in[i] + out[i - 1];
}
static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
if (prev == NULL) {
HorizontalUnfilter(NULL, in, out, width);
} else {
int i;
const int max_pos = width & ~31;
assert(width >= 0);
for (i = 0; i < max_pos; i += 32) {
const __m128i A0 = _mm_loadu_si128((const __m128i*)&in[i + 0]);
const __m128i A1 = _mm_loadu_si128((const __m128i*)&in[i + 16]);
const __m128i B0 = _mm_loadu_si128((const __m128i*)&prev[i + 0]);
const __m128i B1 = _mm_loadu_si128((const __m128i*)&prev[i + 16]);
const __m128i C0 = _mm_add_epi8(A0, B0);
const __m128i C1 = _mm_add_epi8(A1, B1);
_mm_storeu_si128((__m128i*)&out[i + 0], C0);
_mm_storeu_si128((__m128i*)&out[i + 16], C1);
}
for (; i < width; ++i) out[i] = in[i] + prev[i];
}
}
static void GradientPredictInverse(const uint8_t* const in,
const uint8_t* const top,
uint8_t* const row, int length) {
@ -232,25 +273,24 @@ static void GradientPredictInverse(const uint8_t* const in,
const __m128i tmp1 = _mm_loadl_epi64((const __m128i*)&top[i - 1]);
const __m128i B = _mm_unpacklo_epi8(tmp0, zero);
const __m128i C = _mm_unpacklo_epi8(tmp1, zero);
const __m128i tmp2 = _mm_loadl_epi64((const __m128i*)&in[i]);
const __m128i D = _mm_unpacklo_epi8(tmp2, zero); // base input
const __m128i D = _mm_loadl_epi64((const __m128i*)&in[i]); // base input
const __m128i E = _mm_sub_epi16(B, C); // unclipped gradient basis B - C
__m128i out = zero; // accumulator for output
__m128i mask_hi = _mm_set_epi32(0, 0, 0, 0xff);
int k = 8;
while (1) {
const __m128i tmp3 = _mm_add_epi16(A, E); // delta = A + B - C
const __m128i tmp4 = _mm_min_epi16(tmp3, mask_hi);
const __m128i tmp5 = _mm_max_epi16(tmp4, zero); // clipped delta
const __m128i tmp6 = _mm_add_epi16(tmp5, D); // add to in[] values
A = _mm_and_si128(tmp6, mask_hi); // 1-complement clip
out = _mm_or_si128(out, A); // accumulate output
const __m128i tmp3 = _mm_add_epi16(A, E); // delta = A + B - C
const __m128i tmp4 = _mm_packus_epi16(tmp3, zero); // saturate delta
const __m128i tmp5 = _mm_add_epi8(tmp4, D); // add to in[]
A = _mm_and_si128(tmp5, mask_hi); // 1-complement clip
out = _mm_or_si128(out, A); // accumulate output
if (--k == 0) break;
A = _mm_slli_si128(A, 2); // rotate left sample
mask_hi = _mm_slli_si128(mask_hi, 2); // rotate mask
A = _mm_slli_si128(A, 1); // rotate left sample
mask_hi = _mm_slli_si128(mask_hi, 1); // rotate mask
A = _mm_unpacklo_epi8(A, zero); // convert 8b->16b
}
A = _mm_srli_si128(A, 14); // prepare left sample for next iteration
_mm_storel_epi64((__m128i*)&row[i], _mm_packus_epi16(out, zero));
A = _mm_srli_si128(A, 7); // prepare left sample for next iteration
_mm_storel_epi64((__m128i*)&row[i], out);
}
for (; i < length; ++i) {
row[i] = in[i] + GradientPredictorC(row[i - 1], top[i], top[i - 1]);
@ -258,76 +298,14 @@ static void GradientPredictInverse(const uint8_t* const in,
}
}
static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows,
int inverse, uint8_t* out) {
const size_t start_offset = row * stride;
const int last_row = row + num_rows;
SANITY_CHECK(in, out);
in += start_offset;
out += start_offset;
// left prediction for top scan-line
if (row == 0) {
out[0] = in[0];
PredictLineLeft(in + 1, out + 1, width - 1, inverse);
row = 1;
in += stride;
out += stride;
static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
if (prev == NULL) {
HorizontalUnfilter(NULL, in, out, width);
} else {
out[0] = in[0] + prev[0]; // predict from above
GradientPredictInverse(in + 1, prev + 1, out + 1, width - 1);
}
// Filter line-by-line.
while (row < last_row) {
if (inverse) {
PredictLineC(in, out - stride, out, 1, inverse); // predict from above
GradientPredictInverse(in + 1, out + 1 - stride, out + 1, width - 1);
} else {
PredictLineC(in, in - stride, out, 1, inverse);
GradientPredictDirect(in + 1, in + 1 - stride, out + 1, width - 1);
}
++row;
in += stride;
out += stride;
}
}
#undef SANITY_CHECK
//------------------------------------------------------------------------------
static void HorizontalFilter(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoHorizontalFilter(data, width, height, stride, 0, height, 0, filtered_data);
}
static void VerticalFilter(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoVerticalFilter(data, width, height, stride, 0, height, 0, filtered_data);
}
static void GradientFilter(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoGradientFilter(data, width, height, stride, 0, height, 0, filtered_data);
}
//------------------------------------------------------------------------------
static void VerticalUnfilter(int width, int height, int stride, int row,
int num_rows, uint8_t* data) {
DoVerticalFilter(data, width, height, stride, row, num_rows, 1, data);
}
static void HorizontalUnfilter(int width, int height, int stride, int row,
int num_rows, uint8_t* data) {
DoHorizontalFilter(data, width, height, stride, row, num_rows, 1, data);
}
static void GradientUnfilter(int width, int height, int stride, int row,
int num_rows, uint8_t* data) {
DoGradientFilter(data, width, height, stride, row, num_rows, 1, data);
}
//------------------------------------------------------------------------------

View File

@ -28,9 +28,7 @@
// In-place sum of each component with mod 256.
static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
*a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
*a = VP8LAddPixels(*a, b);
}
static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {

View File

@ -158,7 +158,8 @@ void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
void VP8LResidualImage(int width, int height, int bits, int low_effort,
uint32_t* const argb, uint32_t* const argb_scratch,
uint32_t* const image, int exact);
uint32_t* const image, int near_lossless, int exact,
int used_subtract_green);
void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
uint32_t* const argb, uint32_t* image);
@ -172,6 +173,17 @@ static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size,
return (size + (1 << sampling_bits) - 1) >> sampling_bits;
}
// Converts near lossless quality into max number of bits shaved off.
static WEBP_INLINE int VP8LNearLosslessBits(int near_lossless_quality) {
// 100 -> 0
// 80..99 -> 1
// 60..79 -> 2
// 40..59 -> 3
// 20..39 -> 4
// 0..19 -> 5
return 5 - near_lossless_quality / 20;
}
// -----------------------------------------------------------------------------
// Faster logarithm for integers. Small values use a look-up table.
@ -262,6 +274,11 @@ extern VP8LHistogramAddFunc VP8LHistogramAdd;
// -----------------------------------------------------------------------------
// PrefixEncode()
typedef int (*VP8LVectorMismatchFunc)(const uint32_t* const array1,
const uint32_t* const array2, int length);
// Returns the first index where array1 and array2 are different.
extern VP8LVectorMismatchFunc VP8LVectorMismatch;
static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) {
const int log_floor = BitsLog2Floor(n);
if (n == (n & ~(n - 1))) // zero or a power of two.
@ -324,7 +341,14 @@ static WEBP_INLINE void VP8LPrefixEncode(int distance, int* const code,
}
}
// In-place difference of each component with mod 256.
// Sum of each component, mod 256.
static WEBP_INLINE uint32_t VP8LAddPixels(uint32_t a, uint32_t b) {
const uint32_t alpha_and_green = (a & 0xff00ff00u) + (b & 0xff00ff00u);
const uint32_t red_and_blue = (a & 0x00ff00ffu) + (b & 0x00ff00ffu);
return (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
}
// Difference of each component, mod 256.
static WEBP_INLINE uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
const uint32_t alpha_and_green =
0x00ff00ffu + (a & 0xff00ff00u) - (b & 0xff00ff00u);

View File

@ -382,6 +382,7 @@ static float FastLog2Slow(uint32_t v) {
// Mostly used to reduce code size + readability
static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
static WEBP_INLINE int GetMax(int a, int b) { return (a < b) ? b : a; }
//------------------------------------------------------------------------------
// Methods to calculate Entropy (Shannon).
@ -551,18 +552,204 @@ static WEBP_INLINE uint32_t Predict(VP8LPredictorFunc pred_func,
}
}
static int MaxDiffBetweenPixels(uint32_t p1, uint32_t p2) {
const int diff_a = abs((int)(p1 >> 24) - (int)(p2 >> 24));
const int diff_r = abs((int)((p1 >> 16) & 0xff) - (int)((p2 >> 16) & 0xff));
const int diff_g = abs((int)((p1 >> 8) & 0xff) - (int)((p2 >> 8) & 0xff));
const int diff_b = abs((int)(p1 & 0xff) - (int)(p2 & 0xff));
return GetMax(GetMax(diff_a, diff_r), GetMax(diff_g, diff_b));
}
static int MaxDiffAroundPixel(uint32_t current, uint32_t up, uint32_t down,
uint32_t left, uint32_t right) {
const int diff_up = MaxDiffBetweenPixels(current, up);
const int diff_down = MaxDiffBetweenPixels(current, down);
const int diff_left = MaxDiffBetweenPixels(current, left);
const int diff_right = MaxDiffBetweenPixels(current, right);
return GetMax(GetMax(diff_up, diff_down), GetMax(diff_left, diff_right));
}
static uint32_t AddGreenToBlueAndRed(uint32_t argb) {
const uint32_t green = (argb >> 8) & 0xff;
uint32_t red_blue = argb & 0x00ff00ffu;
red_blue += (green << 16) | green;
red_blue &= 0x00ff00ffu;
return (argb & 0xff00ff00u) | red_blue;
}
static void MaxDiffsForRow(int width, int stride, const uint32_t* const argb,
uint8_t* const max_diffs, int used_subtract_green) {
uint32_t current, up, down, left, right;
int x;
if (width <= 2) return;
current = argb[0];
right = argb[1];
if (used_subtract_green) {
current = AddGreenToBlueAndRed(current);
right = AddGreenToBlueAndRed(right);
}
// max_diffs[0] and max_diffs[width - 1] are never used.
for (x = 1; x < width - 1; ++x) {
up = argb[-stride + x];
down = argb[stride + x];
left = current;
current = right;
right = argb[x + 1];
if (used_subtract_green) {
up = AddGreenToBlueAndRed(up);
down = AddGreenToBlueAndRed(down);
right = AddGreenToBlueAndRed(right);
}
max_diffs[x] = MaxDiffAroundPixel(current, up, down, left, right);
}
}
// Quantize the difference between the actual component value and its prediction
// to a multiple of quantization, working modulo 256, taking care not to cross
// a boundary (inclusive upper limit).
static uint8_t NearLosslessComponent(uint8_t value, uint8_t predict,
uint8_t boundary, int quantization) {
const int residual = (value - predict) & 0xff;
const int boundary_residual = (boundary - predict) & 0xff;
const int lower = residual & ~(quantization - 1);
const int upper = lower + quantization;
// Resolve ties towards a value closer to the prediction (i.e. towards lower
// if value comes after prediction and towards upper otherwise).
const int bias = ((boundary - value) & 0xff) < boundary_residual;
if (residual - lower < upper - residual + bias) {
// lower is closer to residual than upper.
if (residual > boundary_residual && lower <= boundary_residual) {
// Halve quantization step to avoid crossing boundary. This midpoint is
// on the same side of boundary as residual because midpoint >= residual
// (since lower is closer than upper) and residual is above the boundary.
return lower + (quantization >> 1);
}
return lower;
} else {
// upper is closer to residual than lower.
if (residual <= boundary_residual && upper > boundary_residual) {
// Halve quantization step to avoid crossing boundary. This midpoint is
// on the same side of boundary as residual because midpoint <= residual
// (since upper is closer than lower) and residual is below the boundary.
return lower + (quantization >> 1);
}
return upper & 0xff;
}
}
// Quantize every component of the difference between the actual pixel value and
// its prediction to a multiple of a quantization (a power of 2, not larger than
// max_quantization which is a power of 2, smaller than max_diff). Take care if
// value and predict have undergone subtract green, which means that red and
// blue are represented as offsets from green.
static uint32_t NearLossless(uint32_t value, uint32_t predict,
int max_quantization, int max_diff,
int used_subtract_green) {
int quantization;
uint8_t new_green = 0;
uint8_t green_diff = 0;
uint8_t a, r, g, b;
if (max_diff <= 2) {
return VP8LSubPixels(value, predict);
}
quantization = max_quantization;
while (quantization >= max_diff) {
quantization >>= 1;
}
if ((value >> 24) == 0 || (value >> 24) == 0xff) {
// Preserve transparency of fully transparent or fully opaque pixels.
a = ((value >> 24) - (predict >> 24)) & 0xff;
} else {
a = NearLosslessComponent(value >> 24, predict >> 24, 0xff, quantization);
}
g = NearLosslessComponent((value >> 8) & 0xff, (predict >> 8) & 0xff, 0xff,
quantization);
if (used_subtract_green) {
// The green offset will be added to red and blue components during decoding
// to obtain the actual red and blue values.
new_green = ((predict >> 8) + g) & 0xff;
// The amount by which green has been adjusted during quantization. It is
// subtracted from red and blue for compensation, to avoid accumulating two
// quantization errors in them.
green_diff = (new_green - (value >> 8)) & 0xff;
}
r = NearLosslessComponent(((value >> 16) - green_diff) & 0xff,
(predict >> 16) & 0xff, 0xff - new_green,
quantization);
b = NearLosslessComponent((value - green_diff) & 0xff, predict & 0xff,
0xff - new_green, quantization);
return ((uint32_t)a << 24) | ((uint32_t)r << 16) | ((uint32_t)g << 8) | b;
}
// Returns the difference between the pixel and its prediction. In case of a
// lossy encoding, updates the source image to avoid propagating the deviation
// further to pixels which depend on the current pixel for their predictions.
static WEBP_INLINE uint32_t GetResidual(int width, int height,
uint32_t* const upper_row,
uint32_t* const current_row,
const uint8_t* const max_diffs,
int mode, VP8LPredictorFunc pred_func,
int x, int y, int max_quantization,
int exact, int used_subtract_green) {
const uint32_t predict = Predict(pred_func, x, y, current_row, upper_row);
uint32_t residual;
if (max_quantization == 1 || mode == 0 || y == 0 || y == height - 1 ||
x == 0 || x == width - 1) {
residual = VP8LSubPixels(current_row[x], predict);
} else {
residual = NearLossless(current_row[x], predict, max_quantization,
max_diffs[x], used_subtract_green);
// Update the source image.
current_row[x] = VP8LAddPixels(predict, residual);
// x is never 0 here so we do not need to update upper_row like below.
}
if (!exact && (current_row[x] & kMaskAlpha) == 0) {
// If alpha is 0, cleanup RGB. We can choose the RGB values of the residual
// for best compression. The prediction of alpha itself can be non-zero and
// must be kept though. We choose RGB of the residual to be 0.
residual &= kMaskAlpha;
// Update the source image.
current_row[x] = predict & ~kMaskAlpha;
// The prediction for the rightmost pixel in a row uses the leftmost pixel
// in that row as its top-right context pixel. Hence if we change the
// leftmost pixel of current_row, the corresponding change must be applied
// to upper_row as well where top-right context is being read from.
if (x == 0 && y != 0) upper_row[width] = current_row[0];
}
return residual;
}
// Returns best predictor and updates the accumulated histogram.
// If max_quantization > 1, assumes that near lossless processing will be
// applied, quantizing residuals to multiples of quantization levels up to
// max_quantization (the actual quantization level depends on smoothness near
// the given pixel).
static int GetBestPredictorForTile(int width, int height,
int tile_x, int tile_y, int bits,
int accumulated[4][256],
const uint32_t* const argb_scratch,
int exact) {
uint32_t* const argb_scratch,
const uint32_t* const argb,
int max_quantization,
int exact, int used_subtract_green) {
const int kNumPredModes = 14;
const int col_start = tile_x << bits;
const int row_start = tile_y << bits;
const int start_x = tile_x << bits;
const int start_y = tile_y << bits;
const int tile_size = 1 << bits;
const int max_y = GetMin(tile_size, height - row_start);
const int max_x = GetMin(tile_size, width - col_start);
const int max_y = GetMin(tile_size, height - start_y);
const int max_x = GetMin(tile_size, width - start_x);
// Whether there exist columns just outside the tile.
const int have_left = (start_x > 0);
const int have_right = (max_x < width - start_x);
// Position and size of the strip covering the tile and adjacent columns if
// they exist.
const int context_start_x = start_x - have_left;
const int context_width = max_x + have_left + have_right;
// The width of upper_row and current_row is one pixel larger than image width
// to allow the top right pixel to point to the leftmost pixel of the next row
// when at the right edge.
uint32_t* upper_row = argb_scratch;
uint32_t* current_row = upper_row + width + 1;
uint8_t* const max_diffs = (uint8_t*)(current_row + width + 1);
float best_diff = MAX_DIFF_COST;
int best_mode = 0;
int mode;
@ -571,28 +758,46 @@ static int GetBestPredictorForTile(int width, int height,
// Need pointers to be able to swap arrays.
int (*histo_argb)[256] = histo_stack_1;
int (*best_histo)[256] = histo_stack_2;
int i, j;
for (mode = 0; mode < kNumPredModes; ++mode) {
const uint32_t* current_row = argb_scratch;
const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
float cur_diff;
int y;
int relative_y;
memset(histo_argb, 0, sizeof(histo_stack_1));
for (y = 0; y < max_y; ++y) {
int x;
const int row = row_start + y;
const uint32_t* const upper_row = current_row;
current_row = upper_row + width;
for (x = 0; x < max_x; ++x) {
const int col = col_start + x;
const uint32_t predict =
Predict(pred_func, col, row, current_row, upper_row);
uint32_t residual = VP8LSubPixels(current_row[col], predict);
if (!exact && (current_row[col] & kMaskAlpha) == 0) {
residual &= kMaskAlpha; // See CopyTileWithPrediction.
}
UpdateHisto(histo_argb, residual);
if (start_y > 0) {
// Read the row above the tile which will become the first upper_row.
// Include a pixel to the left if it exists; include a pixel to the right
// in all cases (wrapping to the leftmost pixel of the next row if it does
// not exist).
memcpy(current_row + context_start_x,
argb + (start_y - 1) * width + context_start_x,
sizeof(*argb) * (max_x + have_left + 1));
}
for (relative_y = 0; relative_y < max_y; ++relative_y) {
const int y = start_y + relative_y;
int relative_x;
uint32_t* tmp = upper_row;
upper_row = current_row;
current_row = tmp;
// Read current_row. Include a pixel to the left if it exists; include a
// pixel to the right in all cases except at the bottom right corner of
// the image (wrapping to the leftmost pixel of the next row if it does
// not exist in the current row).
memcpy(current_row + context_start_x,
argb + y * width + context_start_x,
sizeof(*argb) * (max_x + have_left + (y + 1 < height)));
if (max_quantization > 1 && y >= 1 && y + 1 < height) {
MaxDiffsForRow(context_width, width, argb + y * width + context_start_x,
max_diffs + context_start_x, used_subtract_green);
}
for (relative_x = 0; relative_x < max_x; ++relative_x) {
const int x = start_x + relative_x;
UpdateHisto(histo_argb,
GetResidual(width, height, upper_row, current_row,
max_diffs, mode, pred_func, x, y,
max_quantization, exact, used_subtract_green));
}
}
cur_diff = PredictionCostSpatialHistogram(
@ -615,71 +820,82 @@ static int GetBestPredictorForTile(int width, int height,
return best_mode;
}
// Converts pixels of the image to residuals with respect to predictions.
// If max_quantization > 1, applies near lossless processing, quantizing
// residuals to multiples of quantization levels up to max_quantization
// (the actual quantization level depends on smoothness near the given pixel).
static void CopyImageWithPrediction(int width, int height,
int bits, uint32_t* const modes,
uint32_t* const argb_scratch,
uint32_t* const argb,
int low_effort, int exact) {
int low_effort, int max_quantization,
int exact, int used_subtract_green) {
const int tiles_per_row = VP8LSubSampleSize(width, bits);
const int mask = (1 << bits) - 1;
// The row size is one pixel longer to allow the top right pixel to point to
// the leftmost pixel of the next row when at the right edge.
uint32_t* current_row = argb_scratch;
uint32_t* upper_row = argb_scratch + width + 1;
// The width of upper_row and current_row is one pixel larger than image width
// to allow the top right pixel to point to the leftmost pixel of the next row
// when at the right edge.
uint32_t* upper_row = argb_scratch;
uint32_t* current_row = upper_row + width + 1;
uint8_t* current_max_diffs = (uint8_t*)(current_row + width + 1);
uint8_t* lower_max_diffs = current_max_diffs + width;
int y;
VP8LPredictorFunc pred_func =
low_effort ? VP8LPredictors[kPredLowEffort] : NULL;
int mode = 0;
VP8LPredictorFunc pred_func = NULL;
for (y = 0; y < height; ++y) {
int x;
uint32_t* tmp = upper_row;
uint32_t* const tmp32 = upper_row;
upper_row = current_row;
current_row = tmp;
memcpy(current_row, argb + y * width, sizeof(*current_row) * width);
current_row[width] = (y + 1 < height) ? argb[(y + 1) * width] : ARGB_BLACK;
current_row = tmp32;
memcpy(current_row, argb + y * width,
sizeof(*argb) * (width + (y + 1 < height)));
if (low_effort) {
for (x = 0; x < width; ++x) {
const uint32_t predict =
Predict(pred_func, x, y, current_row, upper_row);
const uint32_t predict = Predict(VP8LPredictors[kPredLowEffort], x, y,
current_row, upper_row);
argb[y * width + x] = VP8LSubPixels(current_row[x], predict);
}
} else {
if (max_quantization > 1) {
// Compute max_diffs for the lower row now, because that needs the
// contents of argb for the current row, which we will overwrite with
// residuals before proceeding with the next row.
uint8_t* const tmp8 = current_max_diffs;
current_max_diffs = lower_max_diffs;
lower_max_diffs = tmp8;
if (y + 2 < height) {
MaxDiffsForRow(width, width, argb + (y + 1) * width, lower_max_diffs,
used_subtract_green);
}
}
for (x = 0; x < width; ++x) {
uint32_t predict, residual;
if ((x & mask) == 0) {
const int mode =
(modes[(y >> bits) * tiles_per_row + (x >> bits)] >> 8) & 0xff;
mode = (modes[(y >> bits) * tiles_per_row + (x >> bits)] >> 8) & 0xff;
pred_func = VP8LPredictors[mode];
}
predict = Predict(pred_func, x, y, current_row, upper_row);
residual = VP8LSubPixels(current_row[x], predict);
if (!exact && (current_row[x] & kMaskAlpha) == 0) {
// If alpha is 0, cleanup RGB. We can choose the RGB values of the
// residual for best compression. The prediction of alpha itself can
// be non-zero and must be kept though. We choose RGB of the residual
// to be 0.
residual &= kMaskAlpha;
// Update input image so that next predictions use correct RGB value.
current_row[x] = predict & ~kMaskAlpha;
if (x == 0 && y != 0) upper_row[width] = current_row[x];
}
argb[y * width + x] = residual;
argb[y * width + x] = GetResidual(
width, height, upper_row, current_row, current_max_diffs, mode,
pred_func, x, y, max_quantization, exact, used_subtract_green);
}
}
}
}
// Finds the best predictor for each tile, and converts the image to residuals
// with respect to predictions. If near_lossless_quality < 100, applies
// near lossless processing, shaving off more bits of residuals for lower
// qualities.
void VP8LResidualImage(int width, int height, int bits, int low_effort,
uint32_t* const argb, uint32_t* const argb_scratch,
uint32_t* const image, int exact) {
const int max_tile_size = 1 << bits;
uint32_t* const image, int near_lossless_quality,
int exact, int used_subtract_green) {
const int tiles_per_row = VP8LSubSampleSize(width, bits);
const int tiles_per_col = VP8LSubSampleSize(height, bits);
uint32_t* const upper_row = argb_scratch;
uint32_t* const current_tile_rows = argb_scratch + width;
int tile_y;
int histo[4][256];
const int max_quantization = 1 << VP8LNearLosslessBits(near_lossless_quality);
if (low_effort) {
int i;
for (i = 0; i < tiles_per_row * tiles_per_col; ++i) {
@ -688,26 +904,19 @@ void VP8LResidualImage(int width, int height, int bits, int low_effort,
} else {
memset(histo, 0, sizeof(histo));
for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
const int tile_y_offset = tile_y * max_tile_size;
const int this_tile_height =
(tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
int tile_x;
if (tile_y > 0) {
memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
width * sizeof(*upper_row));
}
memcpy(current_tile_rows, &argb[tile_y_offset * width],
this_tile_height * width * sizeof(*current_tile_rows));
for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
const int pred = GetBestPredictorForTile(width, height, tile_x, tile_y,
bits, (int (*)[256])histo, argb_scratch, exact);
bits, histo, argb_scratch, argb, max_quantization, exact,
used_subtract_green);
image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8);
}
}
}
CopyImageWithPrediction(width, height, bits,
image, argb_scratch, argb, low_effort, exact);
CopyImageWithPrediction(width, height, bits, image, argb_scratch, argb,
low_effort, max_quantization, exact,
used_subtract_green);
}
void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {
@ -1053,6 +1262,17 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
}
//------------------------------------------------------------------------------
static int VectorMismatch(const uint32_t* const array1,
const uint32_t* const array2, int length) {
int match_len = 0;
while (match_len < length && array1[match_len] == array2[match_len]) {
++match_len;
}
return match_len;
}
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
void VP8LBundleColorMap(const uint8_t* const row, int width,
int xbits, uint32_t* const dst) {
@ -1149,6 +1369,8 @@ GetEntropyUnrefinedHelperFunc VP8LGetEntropyUnrefinedHelper;
VP8LHistogramAddFunc VP8LHistogramAdd;
VP8LVectorMismatchFunc VP8LVectorMismatch;
extern void VP8LEncDspInitSSE2(void);
extern void VP8LEncDspInitSSE41(void);
extern void VP8LEncDspInitNEON(void);
@ -1181,6 +1403,8 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) {
VP8LHistogramAdd = HistogramAdd;
VP8LVectorMismatch = VectorMismatch;
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
if (VP8GetCPUInfo != NULL) {
#if defined(WEBP_USE_SSE2)

View File

@ -324,6 +324,57 @@ static float CombinedShannonEntropy(const int X[256], const int Y[256]) {
#undef ANALYZE_X_OR_Y
#undef ANALYZE_XY
//------------------------------------------------------------------------------
static int VectorMismatch(const uint32_t* const array1,
const uint32_t* const array2, int length) {
int match_len;
if (length >= 12) {
__m128i A0 = _mm_loadu_si128((const __m128i*)&array1[0]);
__m128i A1 = _mm_loadu_si128((const __m128i*)&array2[0]);
match_len = 0;
do {
// Loop unrolling and early load both provide a speedup of 10% for the
// current function. Also, max_limit can be MAX_LENGTH=4096 at most.
const __m128i cmpA = _mm_cmpeq_epi32(A0, A1);
const __m128i B0 =
_mm_loadu_si128((const __m128i*)&array1[match_len + 4]);
const __m128i B1 =
_mm_loadu_si128((const __m128i*)&array2[match_len + 4]);
if (_mm_movemask_epi8(cmpA) != 0xffff) break;
match_len += 4;
{
const __m128i cmpB = _mm_cmpeq_epi32(B0, B1);
A0 = _mm_loadu_si128((const __m128i*)&array1[match_len + 4]);
A1 = _mm_loadu_si128((const __m128i*)&array2[match_len + 4]);
if (_mm_movemask_epi8(cmpB) != 0xffff) break;
match_len += 4;
}
} while (match_len + 12 < length);
} else {
match_len = 0;
// Unroll the potential first two loops.
if (length >= 4 &&
_mm_movemask_epi8(_mm_cmpeq_epi32(
_mm_loadu_si128((const __m128i*)&array1[0]),
_mm_loadu_si128((const __m128i*)&array2[0]))) == 0xffff) {
match_len = 4;
if (length >= 8 &&
_mm_movemask_epi8(_mm_cmpeq_epi32(
_mm_loadu_si128((const __m128i*)&array1[4]),
_mm_loadu_si128((const __m128i*)&array2[4]))) == 0xffff)
match_len = 8;
}
}
while (match_len < length && array1[match_len] == array2[match_len]) {
++match_len;
}
return match_len;
}
//------------------------------------------------------------------------------
// Entry point
@ -336,6 +387,7 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE2(void) {
VP8LCollectColorRedTransforms = CollectColorRedTransforms;
VP8LHistogramAdd = HistogramAdd;
VP8LCombinedShannonEntropy = CombinedShannonEntropy;
VP8LVectorMismatch = VectorMismatch;
}
#else // !WEBP_USE_SSE2

555
src/dsp/msa_macro.h Normal file
View File

@ -0,0 +1,555 @@
// Copyright 2016 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// MSA common macros
//
// Author(s): Prashant Patil (prashant.patil@imgtec.com)
#ifndef WEBP_DSP_MSA_MACRO_H_
#define WEBP_DSP_MSA_MACRO_H_
#include <stdint.h>
#include <msa.h>
#if defined(__clang__)
#define CLANG_BUILD
#endif
#ifdef CLANG_BUILD
#define ADDVI_H(a, b) __msa_addvi_h((v8i16)a, b)
#define SRAI_H(a, b) __msa_srai_h((v8i16)a, b)
#define SRAI_W(a, b) __msa_srai_w((v4i32)a, b)
#else
#define ADDVI_H(a, b) (a + b)
#define SRAI_H(a, b) (a >> b)
#define SRAI_W(a, b) (a >> b)
#endif
#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc))
#define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
#define LD_SB(...) LD_B(v16i8, __VA_ARGS__)
#define LD_H(RTYPE, psrc) *((RTYPE*)(psrc))
#define LD_UH(...) LD_H(v8u16, __VA_ARGS__)
#define LD_SH(...) LD_H(v8i16, __VA_ARGS__)
#define LD_W(RTYPE, psrc) *((RTYPE*)(psrc))
#define LD_UW(...) LD_W(v4u32, __VA_ARGS__)
#define LD_SW(...) LD_W(v4i32, __VA_ARGS__)
#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = in
#define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
#define ST_SB(...) ST_B(v16i8, __VA_ARGS__)
#define ST_H(RTYPE, in, pdst) *((RTYPE*)(pdst)) = in
#define ST_UH(...) ST_H(v8u16, __VA_ARGS__)
#define ST_SH(...) ST_H(v8i16, __VA_ARGS__)
#define ST_W(RTYPE, in, pdst) *((RTYPE*)(pdst)) = in
#define ST_UW(...) ST_W(v4u32, __VA_ARGS__)
#define ST_SW(...) ST_W(v4i32, __VA_ARGS__)
#define MSA_LOAD_FUNC(TYPE, INSTR, FUNC_NAME) \
static inline TYPE FUNC_NAME(const void* const psrc) { \
const uint8_t* const psrc_m = (const uint8_t*)psrc; \
TYPE val_m; \
asm volatile ( \
"" #INSTR " %[val_m], %[psrc_m] \n\t" \
: [val_m] "=r" (val_m) \
: [psrc_m] "m" (*psrc_m)); \
return val_m; \
}
#define MSA_LOAD(psrc, FUNC_NAME) FUNC_NAME(psrc)
#define MSA_STORE_FUNC(TYPE, INSTR, FUNC_NAME) \
static inline void FUNC_NAME(TYPE val, void* const pdst) { \
uint8_t* const pdst_m = (uint8_t*)pdst; \
TYPE val_m = val; \
asm volatile ( \
" " #INSTR " %[val_m], %[pdst_m] \n\t" \
: [pdst_m] "=m" (*pdst_m) \
: [val_m] "r" (val_m)); \
}
#define MSA_STORE(val, pdst, FUNC_NAME) FUNC_NAME(val, pdst)
#if (__mips_isa_rev >= 6)
MSA_LOAD_FUNC(uint16_t, lh, msa_lh);
#define LH(psrc) MSA_LOAD(psrc, msa_lh)
MSA_LOAD_FUNC(uint32_t, lw, msa_lw);
#define LW(psrc) MSA_LOAD(psrc, msa_lw)
#if (__mips == 64)
MSA_LOAD_FUNC(uint64_t, ld, msa_ld);
#define LD(psrc) MSA_LOAD(psrc, msa_ld)
#else // !(__mips == 64)
#define LD(psrc) ((((uint64_t)MSA_LOAD(psrc + 4, msa_lw)) << 32) | \
MSA_LOAD(psrc, msa_lw))
#endif // (__mips == 64)
MSA_STORE_FUNC(uint16_t, sh, msa_sh);
#define SH(val, pdst) MSA_STORE(val, pdst, msa_sh)
MSA_STORE_FUNC(uint32_t, sw, msa_sw);
#define SW(val, pdst) MSA_STORE(val, pdst, msa_sw)
MSA_STORE_FUNC(uint64_t, sd, msa_sd);
#define SD(val, pdst) MSA_STORE(val, pdst, msa_sd)
#else // !(__mips_isa_rev >= 6)
MSA_LOAD_FUNC(uint16_t, ulh, msa_ulh);
#define LH(psrc) MSA_LOAD(psrc, msa_ulh)
MSA_LOAD_FUNC(uint32_t, ulw, msa_ulw);
#define LW(psrc) MSA_LOAD(psrc, msa_ulw)
#if (__mips == 64)
MSA_LOAD_FUNC(uint64_t, uld, msa_uld);
#define LD(psrc) MSA_LOAD(psrc, msa_uld)
#else // !(__mips == 64)
#define LD(psrc) ((((uint64_t)MSA_LOAD(psrc + 4, msa_ulw)) << 32) | \
MSA_LOAD(psrc, msa_ulw))
#endif // (__mips == 64)
MSA_STORE_FUNC(uint16_t, ush, msa_ush);
#define SH(val, pdst) MSA_STORE(val, pdst, msa_ush)
MSA_STORE_FUNC(uint32_t, usw, msa_usw);
#define SW(val, pdst) MSA_STORE(val, pdst, msa_usw)
#define SD(val, pdst) { \
uint8_t* const pdst_sd_m = (uint8_t*)(pdst); \
const uint32_t val0_m = (uint32_t)(val & 0x00000000FFFFFFFF); \
const uint32_t val1_m = (uint32_t)((val >> 32) & 0x00000000FFFFFFFF); \
SW(val0_m, pdst_sd_m); \
SW(val1_m, pdst_sd_m + 4); \
}
#endif // (__mips_isa_rev >= 6)
/* Description : Load 4 words with stride
* Arguments : Inputs - psrc, stride
* Outputs - out0, out1, out2, out3
* Details : Load word in 'out0' from (psrc)
* Load word in 'out1' from (psrc + stride)
* Load word in 'out2' from (psrc + 2 * stride)
* Load word in 'out3' from (psrc + 3 * stride)
*/
#define LW4(psrc, stride, out0, out1, out2, out3) { \
const uint8_t* ptmp = (const uint8_t*)psrc; \
out0 = LW(ptmp); \
ptmp += stride; \
out1 = LW(ptmp); \
ptmp += stride; \
out2 = LW(ptmp); \
ptmp += stride; \
out3 = LW(ptmp); \
}
/* Description : Store 4 words with stride
* Arguments : Inputs - in0, in1, in2, in3, pdst, stride
* Details : Store word from 'in0' to (pdst)
* Store word from 'in1' to (pdst + stride)
* Store word from 'in2' to (pdst + 2 * stride)
* Store word from 'in3' to (pdst + 3 * stride)
*/
#define SW4(in0, in1, in2, in3, pdst, stride) { \
uint8_t* ptmp = (uint8_t*)pdst; \
SW(in0, ptmp); \
ptmp += stride; \
SW(in1, ptmp); \
ptmp += stride; \
SW(in2, ptmp); \
ptmp += stride; \
SW(in3, ptmp); \
}
/* Description : Load vectors with 16 byte elements with stride
* Arguments : Inputs - psrc, stride
* Outputs - out0, out1
* Return Type - as per RTYPE
* Details : Load 16 byte elements in 'out0' from (psrc)
* Load 16 byte elements in 'out1' from (psrc + stride)
*/
#define LD_B2(RTYPE, psrc, stride, out0, out1) { \
out0 = LD_B(RTYPE, psrc); \
out1 = LD_B(RTYPE, psrc + stride); \
}
#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__)
#define LD_SB2(...) LD_B2(v16i8, __VA_ARGS__)
#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) { \
LD_B2(RTYPE, psrc, stride, out0, out1); \
LD_B2(RTYPE, psrc + 2 * stride , stride, out2, out3); \
}
#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__)
#define LD_SB4(...) LD_B4(v16i8, __VA_ARGS__)
/* Description : Load vectors with 8 halfword elements with stride
* Arguments : Inputs - psrc, stride
* Outputs - out0, out1
* Details : Load 8 halfword elements in 'out0' from (psrc)
* Load 8 halfword elements in 'out1' from (psrc + stride)
*/
#define LD_H2(RTYPE, psrc, stride, out0, out1) { \
out0 = LD_H(RTYPE, psrc); \
out1 = LD_H(RTYPE, psrc + stride); \
}
#define LD_UH2(...) LD_H2(v8u16, __VA_ARGS__)
#define LD_SH2(...) LD_H2(v8i16, __VA_ARGS__)
/* Description : Store 4x4 byte block to destination memory from input vector
* Arguments : Inputs - in0, in1, pdst, stride
* Details : 'Idx0' word element from input vector 'in0' is copied to the
* GP register and stored to (pdst)
* 'Idx1' word element from input vector 'in0' is copied to the
* GP register and stored to (pdst + stride)
* 'Idx2' word element from input vector 'in0' is copied to the
* GP register and stored to (pdst + 2 * stride)
* 'Idx3' word element from input vector 'in0' is copied to the
* GP register and stored to (pdst + 3 * stride)
*/
#define ST4x4_UB(in0, in1, idx0, idx1, idx2, idx3, pdst, stride) { \
uint8_t* const pblk_4x4_m = (uint8_t*)pdst; \
const uint32_t out0_m = __msa_copy_s_w((v4i32)in0, idx0); \
const uint32_t out1_m = __msa_copy_s_w((v4i32)in0, idx1); \
const uint32_t out2_m = __msa_copy_s_w((v4i32)in1, idx2); \
const uint32_t out3_m = __msa_copy_s_w((v4i32)in1, idx3); \
SW4(out0_m, out1_m, out2_m, out3_m, pblk_4x4_m, stride); \
}
/* Description : Immediate number of elements to slide
* Arguments : Inputs - in0, in1, slide_val
* Outputs - out
* Return Type - as per RTYPE
* Details : Byte elements from 'in1' vector are slid into 'in0' by
* value specified in the 'slide_val'
*/
#define SLDI_B(RTYPE, in0, in1, slide_val) \
(RTYPE)__msa_sldi_b((v16i8)in0, (v16i8)in1, slide_val) \
#define SLDI_UB(...) SLDI_B(v16u8, __VA_ARGS__)
#define SLDI_SB(...) SLDI_B(v16i8, __VA_ARGS__)
#define SLDI_SH(...) SLDI_B(v8i16, __VA_ARGS__)
/* Description : Shuffle halfword vector elements as per mask vector
* Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
* Outputs - out0, out1
* Return Type - as per RTYPE
* Details : halfword elements from 'in0' & 'in1' are copied selectively to
* 'out0' as per control vector 'mask0'
*/
#define VSHF_H2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) { \
out0 = (RTYPE)__msa_vshf_h((v8i16)mask0, (v8i16)in1, (v8i16)in0); \
out1 = (RTYPE)__msa_vshf_h((v8i16)mask1, (v8i16)in3, (v8i16)in2); \
}
#define VSHF_H2_UH(...) VSHF_H2(v8u16, __VA_ARGS__)
#define VSHF_H2_SH(...) VSHF_H2(v8i16, __VA_ARGS__)
/* Description : Clips all signed halfword elements of input vector
* between 0 & 255
* Arguments : Input/output - val
* Return Type - signed halfword
*/
#define CLIP_SH_0_255(val) { \
const v8i16 max_m = __msa_ldi_h(255); \
val = __msa_maxi_s_h((v8i16)val, 0); \
val = __msa_min_s_h(max_m, (v8i16)val); \
}
#define CLIP_SH2_0_255(in0, in1) { \
CLIP_SH_0_255(in0); \
CLIP_SH_0_255(in1); \
}
/* Description : Clips all signed word elements of input vector
* between 0 & 255
* Arguments : Input/output - val
* Return Type - signed word
*/
#define CLIP_SW_0_255(val) { \
const v4i32 max_m = __msa_ldi_w(255); \
val = __msa_maxi_s_w((v4i32)val, 0); \
val = __msa_min_s_w(max_m, (v4i32)val); \
}
#define CLIP_SW4_0_255(in0, in1, in2, in3) { \
CLIP_SW_0_255(in0); \
CLIP_SW_0_255(in1); \
CLIP_SW_0_255(in2); \
CLIP_SW_0_255(in3); \
}
/* Description : Set element n input vector to GPR value
* Arguments : Inputs - in0, in1, in2, in3
* Output - out
* Return Type - as per RTYPE
* Details : Set element 0 in vector 'out' to value specified in 'in0'
*/
#define INSERT_W2(RTYPE, in0, in1, out) { \
out = (RTYPE)__msa_insert_w((v4i32)out, 0, in0); \
out = (RTYPE)__msa_insert_w((v4i32)out, 1, in1); \
}
#define INSERT_W2_UB(...) INSERT_W2(v16u8, __VA_ARGS__)
#define INSERT_W2_SB(...) INSERT_W2(v16i8, __VA_ARGS__)
#define INSERT_W4(RTYPE, in0, in1, in2, in3, out) { \
out = (RTYPE)__msa_insert_w((v4i32)out, 0, in0); \
out = (RTYPE)__msa_insert_w((v4i32)out, 1, in1); \
out = (RTYPE)__msa_insert_w((v4i32)out, 2, in2); \
out = (RTYPE)__msa_insert_w((v4i32)out, 3, in3); \
}
#define INSERT_W4_UB(...) INSERT_W4(v16u8, __VA_ARGS__)
#define INSERT_W4_SB(...) INSERT_W4(v16i8, __VA_ARGS__)
#define INSERT_W4_SW(...) INSERT_W4(v4i32, __VA_ARGS__)
/* Description : Interleave right half of byte elements from vectors
* Arguments : Inputs - in0, in1, in2, in3
* Outputs - out0, out1
* Return Type - as per RTYPE
* Details : Right half of byte elements of 'in0' and 'in1' are interleaved
* and written to out0.
*/
#define ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1) { \
out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
out1 = (RTYPE)__msa_ilvr_b((v16i8)in2, (v16i8)in3); \
}
#define ILVR_B2_UB(...) ILVR_B2(v16u8, __VA_ARGS__)
#define ILVR_B2_SB(...) ILVR_B2(v16i8, __VA_ARGS__)
#define ILVR_B2_UH(...) ILVR_B2(v8u16, __VA_ARGS__)
#define ILVR_B2_SH(...) ILVR_B2(v8i16, __VA_ARGS__)
#define ILVR_B2_SW(...) ILVR_B2(v4i32, __VA_ARGS__)
#define ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
out0, out1, out2, out3) { \
ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
ILVR_B2(RTYPE, in4, in5, in6, in7, out2, out3); \
}
#define ILVR_B4_UB(...) ILVR_B4(v16u8, __VA_ARGS__)
#define ILVR_B4_SB(...) ILVR_B4(v16i8, __VA_ARGS__)
#define ILVR_B4_UH(...) ILVR_B4(v8u16, __VA_ARGS__)
#define ILVR_B4_SH(...) ILVR_B4(v8i16, __VA_ARGS__)
#define ILVR_B4_SW(...) ILVR_B4(v4i32, __VA_ARGS__)
/* Description : Interleave right half of halfword elements from vectors
* Arguments : Inputs - in0, in1, in2, in3
* Outputs - out0, out1
* Return Type - as per RTYPE
* Details : Right half of halfword elements of 'in0' and 'in1' are
* interleaved and written to 'out0'.
*/
#define ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1) { \
out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1); \
out1 = (RTYPE)__msa_ilvr_h((v8i16)in2, (v8i16)in3); \
}
#define ILVR_H2_UB(...) ILVR_H2(v16u8, __VA_ARGS__)
#define ILVR_H2_SH(...) ILVR_H2(v8i16, __VA_ARGS__)
#define ILVR_H2_SW(...) ILVR_H2(v4i32, __VA_ARGS__)
#define ILVR_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
out0, out1, out2, out3) { \
ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
ILVR_H2(RTYPE, in4, in5, in6, in7, out2, out3); \
}
#define ILVR_H4_UB(...) ILVR_H4(v16u8, __VA_ARGS__)
#define ILVR_H4_SH(...) ILVR_H4(v8i16, __VA_ARGS__)
#define ILVR_H4_SW(...) ILVR_H4(v4i32, __VA_ARGS__)
/* Description : Interleave right half of double word elements from vectors
* Arguments : Inputs - in0, in1, in2, in3
* Outputs - out0, out1
* Return Type - as per RTYPE
* Details : Right half of double word elements of 'in0' and 'in1' are
* interleaved and written to 'out0'.
*/
#define ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1) { \
out0 = (RTYPE)__msa_ilvr_d((v2i64)in0, (v2i64)in1); \
out1 = (RTYPE)__msa_ilvr_d((v2i64)in2, (v2i64)in3); \
}
#define ILVR_D2_UB(...) ILVR_D2(v16u8, __VA_ARGS__)
#define ILVR_D2_SB(...) ILVR_D2(v16i8, __VA_ARGS__)
#define ILVR_D2_SH(...) ILVR_D2(v8i16, __VA_ARGS__)
#define ILVRL_H2(RTYPE, in0, in1, out0, out1) { \
out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1); \
out1 = (RTYPE)__msa_ilvl_h((v8i16)in0, (v8i16)in1); \
}
#define ILVRL_H2_UB(...) ILVRL_H2(v16u8, __VA_ARGS__)
#define ILVRL_H2_SB(...) ILVRL_H2(v16i8, __VA_ARGS__)
#define ILVRL_H2_SH(...) ILVRL_H2(v8i16, __VA_ARGS__)
#define ILVRL_H2_SW(...) ILVRL_H2(v4i32, __VA_ARGS__)
#define ILVRL_H2_UW(...) ILVRL_H2(v4u32, __VA_ARGS__)
#define ILVRL_W2(RTYPE, in0, in1, out0, out1) { \
out0 = (RTYPE)__msa_ilvr_w((v4i32)in0, (v4i32)in1); \
out1 = (RTYPE)__msa_ilvl_w((v4i32)in0, (v4i32)in1); \
}
#define ILVRL_W2_UB(...) ILVRL_W2(v16u8, __VA_ARGS__)
#define ILVRL_W2_SH(...) ILVRL_W2(v8i16, __VA_ARGS__)
#define ILVRL_W2_SW(...) ILVRL_W2(v4i32, __VA_ARGS__)
/* Description : Pack even byte elements of vector pairs
* Arguments : Inputs - in0, in1, in2, in3
* Outputs - out0, out1
* Return Type - as per RTYPE
* Details : Even byte elements of 'in0' are copied to the left half of
* 'out0' & even byte elements of 'in1' are copied to the right
* half of 'out0'.
*/
#define PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) { \
out0 = (RTYPE)__msa_pckev_b((v16i8)in0, (v16i8)in1); \
out1 = (RTYPE)__msa_pckev_b((v16i8)in2, (v16i8)in3); \
}
#define PCKEV_B2_SB(...) PCKEV_B2(v16i8, __VA_ARGS__)
#define PCKEV_B2_UB(...) PCKEV_B2(v16u8, __VA_ARGS__)
#define PCKEV_B2_SH(...) PCKEV_B2(v8i16, __VA_ARGS__)
#define PCKEV_B2_SW(...) PCKEV_B2(v4i32, __VA_ARGS__)
/* Description : Arithmetic immediate shift right all elements of word vector
* Arguments : Inputs - in0, in1, shift
* Outputs - in place operation
* Return Type - as per input vector RTYPE
* Details : Each element of vector 'in0' is right shifted by 'shift' and
* the result is written in-place. 'shift' is a GP variable.
*/
#define SRAI_W2(RTYPE, in0, in1, shift_val) { \
in0 = (RTYPE)SRAI_W(in0, shift_val); \
in1 = (RTYPE)SRAI_W(in1, shift_val); \
}
#define SRAI_W2_SW(...) SRAI_W2(v4i32, __VA_ARGS__)
#define SRAI_W2_UW(...) SRAI_W2(v4u32, __VA_ARGS__)
#define SRAI_W4(RTYPE, in0, in1, in2, in3, shift_val) { \
SRAI_W2(RTYPE, in0, in1, shift_val); \
SRAI_W2(RTYPE, in2, in3, shift_val); \
}
#define SRAI_W4_SW(...) SRAI_W4(v4i32, __VA_ARGS__)
#define SRAI_W4_UW(...) SRAI_W4(v4u32, __VA_ARGS__)
/* Description : Arithmetic shift right all elements of half-word vector
* Arguments : Inputs - in0, in1, shift
* Outputs - in place operation
* Return Type - as per input vector RTYPE
* Details : Each element of vector 'in0' is right shifted by 'shift' and
* the result is written in-place. 'shift' is a GP variable.
*/
#define SRAI_H2(RTYPE, in0, in1, shift_val) { \
in0 = (RTYPE)SRAI_H(in0, shift_val); \
in1 = (RTYPE)SRAI_H(in1, shift_val); \
}
#define SRAI_H2_SH(...) SRAI_H2(v8i16, __VA_ARGS__)
#define SRAI_H2_UH(...) SRAI_H2(v8u16, __VA_ARGS__)
/* Description : Arithmetic rounded shift right all elements of word vector
* Arguments : Inputs - in0, in1, shift
* Outputs - in place operation
* Return Type - as per input vector RTYPE
* Details : Each element of vector 'in0' is right shifted by 'shift' and
* the result is written in-place. 'shift' is a GP variable.
*/
#define SRARI_W2(RTYPE, in0, in1, shift) { \
in0 = (RTYPE)__msa_srari_w((v4i32)in0, shift); \
in1 = (RTYPE)__msa_srari_w((v4i32)in1, shift); \
}
#define SRARI_W2_SW(...) SRARI_W2(v4i32, __VA_ARGS__)
#define SRARI_W4(RTYPE, in0, in1, in2, in3, shift) { \
SRARI_W2(RTYPE, in0, in1, shift); \
SRARI_W2(RTYPE, in2, in3, shift); \
}
#define SRARI_W4_SH(...) SRARI_W4(v8i16, __VA_ARGS__)
#define SRARI_W4_UW(...) SRARI_W4(v4u32, __VA_ARGS__)
#define SRARI_W4_SW(...) SRARI_W4(v4i32, __VA_ARGS__)
/* Description : Addition of 2 pairs of half-word vectors
* Arguments : Inputs - in0, in1, in2, in3
* Outputs - out0, out1
* Details : Each element in 'in0' is added to 'in1' and result is written
* to 'out0'.
*/
#define ADDVI_H2(RTYPE, in0, in1, in2, in3, out0, out1) { \
out0 = (RTYPE)ADDVI_H(in0, in1); \
out1 = (RTYPE)ADDVI_H(in2, in3); \
}
#define ADDVI_H2_SH(...) ADDVI_H2(v8i16, __VA_ARGS__)
#define ADDVI_H2_UH(...) ADDVI_H2(v8u16, __VA_ARGS__)
/* Description : Addition of 2 pairs of vectors
* Arguments : Inputs - in0, in1, in2, in3
* Outputs - out0, out1
* Details : Each element in 'in0' is added to 'in1' and result is written
* to 'out0'.
*/
#define ADD2(in0, in1, in2, in3, out0, out1) { \
out0 = in0 + in1; \
out1 = in2 + in3; \
}
#define ADD4(in0, in1, in2, in3, in4, in5, in6, in7, \
out0, out1, out2, out3) { \
ADD2(in0, in1, in2, in3, out0, out1); \
ADD2(in4, in5, in6, in7, out2, out3); \
}
/* Description : Sign extend halfword elements from input vector and return
* the result in pair of vectors
* Arguments : Input - in (halfword vector)
* Outputs - out0, out1 (sign extended word vectors)
* Return Type - signed word
* Details : Sign bit of halfword elements from input vector 'in' is
* extracted and interleaved right with same vector 'in0' to
* generate 4 signed word elements in 'out0'
* Then interleaved left with same vector 'in0' to
* generate 4 signed word elements in 'out1'
*/
#define UNPCK_SH_SW(in, out0, out1) { \
const v8i16 tmp_m = __msa_clti_s_h((v8i16)in, 0); \
ILVRL_H2_SW(tmp_m, in, out0, out1); \
}
/* Description : Butterfly of 4 input vectors
* Arguments : Inputs - in0, in1, in2, in3
* Outputs - out0, out1, out2, out3
* Details : Butterfly operation
*/
#define BUTTERFLY_4(in0, in1, in2, in3, out0, out1, out2, out3) { \
out0 = in0 + in3; \
out1 = in1 + in2; \
out2 = in1 - in2; \
out3 = in0 - in3; \
}
/* Description : Transpose 4x4 block with word elements in vectors
* Arguments : Inputs - in0, in1, in2, in3
* Outputs - out0, out1, out2, out3
* Return Type - as per RTYPE
*/
#define TRANSPOSE4x4_W(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) { \
v4i32 s0_m, s1_m, s2_m, s3_m; \
ILVRL_W2_SW(in1, in0, s0_m, s1_m); \
ILVRL_W2_SW(in3, in2, s2_m, s3_m); \
out0 = (RTYPE)__msa_ilvr_d((v2i64)s2_m, (v2i64)s0_m); \
out1 = (RTYPE)__msa_ilvl_d((v2i64)s2_m, (v2i64)s0_m); \
out2 = (RTYPE)__msa_ilvr_d((v2i64)s3_m, (v2i64)s1_m); \
out3 = (RTYPE)__msa_ilvl_d((v2i64)s3_m, (v2i64)s1_m); \
}
#define TRANSPOSE4x4_SW_SW(...) TRANSPOSE4x4_W(v4i32, __VA_ARGS__)
/* Description : Add block 4x4
* Arguments : Inputs - in0, in1, in2, in3, pdst, stride
* Details : Least significant 4 bytes from each input vector are added to
* the destination bytes, clipped between 0-255 and stored.
*/
#define ADDBLK_ST4x4_UB(in0, in1, in2, in3, pdst, stride) { \
uint32_t src0_m, src1_m, src2_m, src3_m; \
v8i16 inp0_m, inp1_m, res0_m, res1_m; \
v16i8 dst0_m = { 0 }; \
v16i8 dst1_m = { 0 }; \
const v16i8 zero_m = { 0 }; \
ILVR_D2_SH(in1, in0, in3, in2, inp0_m, inp1_m); \
LW4(pdst, stride, src0_m, src1_m, src2_m, src3_m); \
INSERT_W2_SB(src0_m, src1_m, dst0_m); \
INSERT_W2_SB(src2_m, src3_m, dst1_m); \
ILVR_B2_SH(zero_m, dst0_m, zero_m, dst1_m, res0_m, res1_m); \
ADD2(res0_m, inp0_m, res1_m, inp1_m, res0_m, res1_m); \
CLIP_SH2_0_255(res0_m, res1_m); \
PCKEV_B2_SB(res0_m, res0_m, res1_m, res1_m, dst0_m, dst1_m); \
ST4x4_UB(dst0_m, dst1_m, 0, 1, 0, 1, pdst, stride); \
}
#endif /* WEBP_DSP_MSA_MACRO_H_ */

View File

@ -173,10 +173,10 @@ void WebPRescalerExportRow(WebPRescaler* const wrk) {
WebPRescalerExportRowExpand(wrk);
} else if (wrk->fxy_scale) {
WebPRescalerExportRowShrink(wrk);
} else { // very special case for src = dst = 1x1
} else { // special case
int i;
assert(wrk->src_height == wrk->dst_height && wrk->x_add == 1);
assert(wrk->src_width == 1 && wrk->dst_width <= 2);
assert(wrk->src_height == 1 && wrk->dst_height == 1);
for (i = 0; i < wrk->num_channels * wrk->dst_width; ++i) {
wrk->dst[i] = wrk->irow[i];
wrk->irow[i] = 0;

View File

@ -18,6 +18,7 @@
#include <assert.h>
#include "../utils/rescaler.h"
#include "../utils/utils.h"
//------------------------------------------------------------------------------
// Implementations of critical functions ImportRow / ExportRow

View File

@ -14,9 +14,7 @@
#include "./dsp.h"
// Code is disabled for now, in favor of the plain-C version
// TODO(djordje.pesut): adapt the code to reflect the C-version.
#if 0 // defined(WEBP_USE_MIPS_DSP_R2)
#if defined(WEBP_USE_MIPS_DSP_R2)
#include <assert.h>
#include "./yuv.h"
@ -24,21 +22,21 @@
#if !defined(WEBP_YUV_USE_TABLE)
#define YUV_TO_RGB(Y, U, V, R, G, B) do { \
const int t1 = kYScale * Y; \
const int t2 = kVToG * V; \
R = kVToR * V; \
G = kUToG * U; \
B = kUToB * U; \
const int t1 = MultHi(Y, 19077); \
const int t2 = MultHi(V, 13320); \
R = MultHi(V, 26149); \
G = MultHi(U, 6419); \
B = MultHi(U, 33050); \
R = t1 + R; \
G = t1 - G; \
B = t1 + B; \
R = R + kRCst; \
G = G - t2 + kGCst; \
B = B + kBCst; \
R = R - 14234; \
G = G - t2 + 8708; \
B = B - 17685; \
__asm__ volatile ( \
"shll_s.w %[" #R "], %[" #R "], 9 \n\t" \
"shll_s.w %[" #G "], %[" #G "], 9 \n\t" \
"shll_s.w %[" #B "], %[" #B "], 9 \n\t" \
"shll_s.w %[" #R "], %[" #R "], 17 \n\t" \
"shll_s.w %[" #G "], %[" #G "], 17 \n\t" \
"shll_s.w %[" #B "], %[" #B "], 17 \n\t" \
"precrqu_s.qb.ph %[" #R "], %[" #R "], $zero \n\t" \
"precrqu_s.qb.ph %[" #G "], %[" #G "], $zero \n\t" \
"precrqu_s.qb.ph %[" #B "], %[" #B "], $zero \n\t" \
@ -279,6 +277,6 @@ WEBP_DSP_INIT_STUB(WebPInitYUV444ConvertersMIPSdspR2)
#endif // WEBP_USE_MIPS_DSP_R2
#if 1 // !(defined(FANCY_UPSAMPLING) && defined(WEBP_USE_MIPS_DSP_R2))
#if !(defined(FANCY_UPSAMPLING) && defined(WEBP_USE_MIPS_DSP_R2))
WEBP_DSP_INIT_STUB(WebPInitUpsamplersMIPSdspR2)
#endif

View File

@ -14,8 +14,7 @@
#include "./dsp.h"
// Code is disabled for now, in favor of the plain-C version
#if 0 // defined(WEBP_USE_MIPS32)
#if defined(WEBP_USE_MIPS32)
#include "./yuv.h"
@ -29,19 +28,19 @@ static void FUNC_NAME(const uint8_t* y, \
int i, r, g, b; \
int temp0, temp1, temp2, temp3, temp4; \
for (i = 0; i < (len >> 1); i++) { \
temp1 = kVToR * v[0]; \
temp3 = kVToG * v[0]; \
temp2 = kUToG * u[0]; \
temp4 = kUToB * u[0]; \
temp0 = kYScale * y[0]; \
temp1 += kRCst; \
temp3 -= kGCst; \
temp1 = MultHi(v[0], 26149); \
temp3 = MultHi(v[0], 13320); \
temp2 = MultHi(u[0], 6419); \
temp4 = MultHi(u[0], 33050); \
temp0 = MultHi(y[0], 19077); \
temp1 -= 14234; \
temp3 -= 8708; \
temp2 += temp3; \
temp4 += kBCst; \
temp4 -= 17685; \
r = VP8Clip8(temp0 + temp1); \
g = VP8Clip8(temp0 - temp2); \
b = VP8Clip8(temp0 + temp4); \
temp0 = kYScale * y[1]; \
temp0 = MultHi(y[1], 19077); \
dst[R] = r; \
dst[G] = g; \
dst[B] = b; \
@ -59,15 +58,15 @@ static void FUNC_NAME(const uint8_t* y, \
dst += 2 * XSTEP; \
} \
if (len & 1) { \
temp1 = kVToR * v[0]; \
temp3 = kVToG * v[0]; \
temp2 = kUToG * u[0]; \
temp4 = kUToB * u[0]; \
temp0 = kYScale * y[0]; \
temp1 += kRCst; \
temp3 -= kGCst; \
temp1 = MultHi(v[0], 26149); \
temp3 = MultHi(v[0], 13320); \
temp2 = MultHi(u[0], 6419); \
temp4 = MultHi(u[0], 33050); \
temp0 = MultHi(y[0], 19077); \
temp1 -= 14234; \
temp3 -= 8708; \
temp2 += temp3; \
temp4 += kBCst; \
temp4 -= 17685; \
r = VP8Clip8(temp0 + temp1); \
g = VP8Clip8(temp0 - temp2); \
b = VP8Clip8(temp0 + temp4); \

View File

@ -14,8 +14,7 @@
#include "./dsp.h"
// Code is disabled for now, in favor of the plain-C version
#if 0 // defined(WEBP_USE_MIPS_DSP_R2)
#if defined(WEBP_USE_MIPS_DSP_R2)
#include "./yuv.h"
@ -31,10 +30,10 @@
"mul %[temp2], %[t_con_3], %[temp4] \n\t" \
"mul %[temp4], %[t_con_4], %[temp4] \n\t" \
"mul %[temp0], %[t_con_5], %[temp0] \n\t" \
"addu %[temp1], %[temp1], %[t_con_6] \n\t" \
"subu %[temp1], %[temp1], %[t_con_6] \n\t" \
"subu %[temp3], %[temp3], %[t_con_7] \n\t" \
"addu %[temp2], %[temp2], %[temp3] \n\t" \
"addu %[temp4], %[temp4], %[t_con_8] \n\t" \
"subu %[temp4], %[temp4], %[t_con_8] \n\t" \
#define ROW_FUNC_PART_2(R, G, B, K) \
"addu %[temp5], %[temp0], %[temp1] \n\t" \
@ -43,12 +42,12 @@
".if " #K " \n\t" \
"lbu %[temp0], 1(%[y]) \n\t" \
".endif \n\t" \
"shll_s.w %[temp5], %[temp5], 9 \n\t" \
"shll_s.w %[temp6], %[temp6], 9 \n\t" \
"shll_s.w %[temp5], %[temp5], 17 \n\t" \
"shll_s.w %[temp6], %[temp6], 17 \n\t" \
".if " #K " \n\t" \
"mul %[temp0], %[t_con_5], %[temp0] \n\t" \
".endif \n\t" \
"shll_s.w %[temp7], %[temp7], 9 \n\t" \
"shll_s.w %[temp7], %[temp7], 17 \n\t" \
"precrqu_s.qb.ph %[temp5], %[temp5], $zero \n\t" \
"precrqu_s.qb.ph %[temp6], %[temp6], $zero \n\t" \
"precrqu_s.qb.ph %[temp7], %[temp7], $zero \n\t" \
@ -75,14 +74,14 @@ static void FUNC_NAME(const uint8_t* y, \
uint8_t* dst, int len) { \
int i; \
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; \
const int t_con_1 = kVToR; \
const int t_con_2 = kVToG; \
const int t_con_3 = kUToG; \
const int t_con_4 = kUToB; \
const int t_con_5 = kYScale; \
const int t_con_6 = kRCst; \
const int t_con_7 = kGCst; \
const int t_con_8 = kBCst; \
const int t_con_1 = 26149; \
const int t_con_2 = 13320; \
const int t_con_3 = 6419; \
const int t_con_4 = 33050; \
const int t_con_5 = 19077; \
const int t_con_6 = 14234; \
const int t_con_7 = 8708; \
const int t_con_8 = 17685; \
for (i = 0; i < (len >> 1); i++) { \
__asm__ volatile ( \
ROW_FUNC_PART_1() \

View File

@ -33,7 +33,8 @@ static void ConvertYUV444ToRGB(const __m128i* const Y0,
const __m128i k19077 = _mm_set1_epi16(19077);
const __m128i k26149 = _mm_set1_epi16(26149);
const __m128i k14234 = _mm_set1_epi16(14234);
const __m128i k33050 = _mm_set1_epi16(33050);
// 33050 doesn't fit in a signed short: only use this with unsigned arithmetic
const __m128i k33050 = _mm_set1_epi16((short)33050);
const __m128i k17685 = _mm_set1_epi16(17685);
const __m128i k6419 = _mm_set1_epi16(6419);
const __m128i k13320 = _mm_set1_epi16(13320);

View File

@ -79,7 +79,11 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
config.quality = 8.f * effort_level;
assert(config.quality >= 0 && config.quality <= 100.f);
ok = (VP8LEncodeStream(&config, &picture, bw) == VP8_ENC_OK);
// TODO(urvang): Temporary fix to avoid generating images that trigger
// a decoder bug related to alpha with color cache.
// See: https://code.google.com/p/webp/issues/detail?id=239
// Need to re-enable this later.
ok = (VP8LEncodeStream(&config, &picture, bw, 0 /*use_cache*/) == VP8_ENC_OK);
WebPPictureFree(&picture);
ok = ok && !bw->error_;
if (!ok) {
@ -118,7 +122,6 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
assert(method >= ALPHA_NO_COMPRESSION);
assert(method <= ALPHA_LOSSLESS_COMPRESSION);
assert(sizeof(header) == ALPHA_HEADER_LEN);
// TODO(skal): have a common function and #define's to validate alpha params.
filter_func = WebPFilters[filter];
if (filter_func != NULL) {

View File

@ -307,6 +307,7 @@ static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it,
static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
int best_alpha = DEFAULT_ALPHA;
int smallest_alpha = 0;
int best_mode = 0;
const int max_mode = MAX_UV_MODE;
int mode;
@ -322,6 +323,10 @@ static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
alpha = GetAlpha(&histo);
if (IS_BETTER_ALPHA(alpha, best_alpha)) {
best_alpha = alpha;
}
// The best prediction mode tends to be the one with the smallest alpha.
if (mode == 0 || alpha < smallest_alpha) {
smallest_alpha = alpha;
best_mode = mode;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -115,11 +115,12 @@ static WEBP_INLINE uint32_t PixOrCopyDistance(const PixOrCopy* const p) {
typedef struct VP8LHashChain VP8LHashChain;
struct VP8LHashChain {
// Stores the most recently added position with the given hash value.
int32_t hash_to_first_index_[HASH_SIZE];
// chain_[pos] stores the previous position with the same hash value
// for every pixel in the image.
int32_t* chain_;
// The 20 most significant bits contain the offset at which the best match
// is found. These 20 bits are the limit defined by GetWindowSizeForHashChain
// (through WINDOW_SIZE = 1<<20).
// The lower 12 bits contain the length of the match. The 12 bit limit is
// defined in MaxFindCopyLength with MAX_LENGTH=4096.
uint32_t* offset_length_;
// This is the maximum size of the hash_chain that can be constructed.
// Typically this is the pixel count (width x height) for a given image.
int size_;
@ -127,6 +128,9 @@ struct VP8LHashChain {
// Must be called first, to set size.
int VP8LHashChainInit(VP8LHashChain* const p, int size);
// Pre-compute the best matches for argb.
int VP8LHashChainFill(VP8LHashChain* const p, int quality,
const uint32_t* const argb, int xsize, int ysize);
void VP8LHashChainClear(VP8LHashChain* const p); // release memory
// -----------------------------------------------------------------------------
@ -192,8 +196,8 @@ static WEBP_INLINE void VP8LRefsCursorNext(VP8LRefsCursor* const c) {
// refs[0] or refs[1].
VP8LBackwardRefs* VP8LGetBackwardReferences(
int width, int height, const uint32_t* const argb, int quality,
int low_effort, int* const cache_bits, VP8LHashChain* const hash_chain,
VP8LBackwardRefs refs[2]);
int low_effort, int* const cache_bits,
const VP8LHashChain* const hash_chain, VP8LBackwardRefs refs[2]);
#ifdef __cplusplus
}

View File

@ -281,18 +281,6 @@ int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) {
//------------------------------------------------------------------------------
// Recording of token probabilities.
// Record proba context used
static int Record(int bit, proba_t* const stats) {
proba_t p = *stats;
if (p >= 0xffff0000u) { // an overflow is inbound.
p = ((p + 1u) >> 1) & 0x7fff7fffu; // -> divide the stats by 2.
}
// record bit count (lower 16 bits) and increment total count (upper 16 bits).
p += 0x00010000u + bit;
*stats = p;
return bit;
}
// We keep the table-free variant around for reference, in case.
#define USE_LEVEL_CODE_TABLE
@ -303,31 +291,31 @@ int VP8RecordCoeffs(int ctx, const VP8Residual* const res) {
// should be stats[VP8EncBands[n]], but it's equivalent for n=0 or 1
proba_t* s = res->stats[n][ctx];
if (res->last < 0) {
Record(0, s + 0);
VP8RecordStats(0, s + 0);
return 0;
}
while (n <= res->last) {
int v;
Record(1, s + 0); // order of record doesn't matter
VP8RecordStats(1, s + 0); // order of record doesn't matter
while ((v = res->coeffs[n++]) == 0) {
Record(0, s + 1);
VP8RecordStats(0, s + 1);
s = res->stats[VP8EncBands[n]][0];
}
Record(1, s + 1);
if (!Record(2u < (unsigned int)(v + 1), s + 2)) { // v = -1 or 1
VP8RecordStats(1, s + 1);
if (!VP8RecordStats(2u < (unsigned int)(v + 1), s + 2)) { // v = -1 or 1
s = res->stats[VP8EncBands[n]][1];
} else {
v = abs(v);
#if !defined(USE_LEVEL_CODE_TABLE)
if (!Record(v > 4, s + 3)) {
if (Record(v != 2, s + 4))
Record(v == 4, s + 5);
} else if (!Record(v > 10, s + 6)) {
Record(v > 6, s + 7);
} else if (!Record((v >= 3 + (8 << 2)), s + 8)) {
Record((v >= 3 + (8 << 1)), s + 9);
if (!VP8RecordStats(v > 4, s + 3)) {
if (VP8RecordStats(v != 2, s + 4))
VP8RecordStats(v == 4, s + 5);
} else if (!VP8RecordStats(v > 10, s + 6)) {
VP8RecordStats(v > 6, s + 7);
} else if (!VP8RecordStats((v >= 3 + (8 << 2)), s + 8)) {
VP8RecordStats((v >= 3 + (8 << 1)), s + 9);
} else {
Record((v >= 3 + (8 << 3)), s + 10);
VP8RecordStats((v >= 3 + (8 << 3)), s + 10);
}
#else
if (v > MAX_VARIABLE_LEVEL) {
@ -340,14 +328,14 @@ int VP8RecordCoeffs(int ctx, const VP8Residual* const res) {
int i;
for (i = 0; (pattern >>= 1) != 0; ++i) {
const int mask = 2 << i;
if (pattern & 1) Record(!!(bits & mask), s + 3 + i);
if (pattern & 1) VP8RecordStats(!!(bits & mask), s + 3 + i);
}
}
#endif
s = res->stats[VP8EncBands[n]][2];
}
}
if (n < 16) Record(0, s + 0);
if (n < 16) VP8RecordStats(0, s + 0);
return 1;
}

View File

@ -41,6 +41,20 @@ void VP8InitResidual(int first, int coeff_type,
int VP8RecordCoeffs(int ctx, const VP8Residual* const res);
// Record proba context used.
static WEBP_INLINE int VP8RecordStats(int bit, proba_t* const stats) {
proba_t p = *stats;
// An overflow is inbound. Note we handle this at 0xfffe0000u instead of
// 0xffff0000u to make sure p + 1u does not overflow.
if (p >= 0xfffe0000u) {
p = ((p + 1u) >> 1) & 0x7fff7fffu; // -> divide the stats by 2.
}
// record bit count (lower 16 bits) and increment total count (upper 16 bits).
p += 0x00010000u + bit;
*stats = p;
return bit;
}
// Cost of coding one event with probability 'proba'.
static WEBP_INLINE int VP8BitCost(int bit, uint8_t proba) {
return !bit ? VP8EntropyCost[proba] : VP8EntropyCost[255 - proba];

View File

@ -107,10 +107,9 @@ static void DoFilter(const VP8EncIterator* const it, int level) {
//------------------------------------------------------------------------------
// SSIM metric
enum { KERNEL = 3 };
static const double kMinValue = 1.e-10; // minimal threshold
void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst) {
void VP8SSIMAddStats(const VP8DistoStats* const src, VP8DistoStats* const dst) {
dst->w += src->w;
dst->xm += src->xm;
dst->ym += src->ym;
@ -119,32 +118,7 @@ void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst) {
dst->yym += src->yym;
}
static void VP8SSIMAccumulate(const uint8_t* src1, int stride1,
const uint8_t* src2, int stride2,
int xo, int yo, int W, int H,
DistoStats* const stats) {
const int ymin = (yo - KERNEL < 0) ? 0 : yo - KERNEL;
const int ymax = (yo + KERNEL > H - 1) ? H - 1 : yo + KERNEL;
const int xmin = (xo - KERNEL < 0) ? 0 : xo - KERNEL;
const int xmax = (xo + KERNEL > W - 1) ? W - 1 : xo + KERNEL;
int x, y;
src1 += ymin * stride1;
src2 += ymin * stride2;
for (y = ymin; y <= ymax; ++y, src1 += stride1, src2 += stride2) {
for (x = xmin; x <= xmax; ++x) {
const int s1 = src1[x];
const int s2 = src2[x];
stats->w += 1;
stats->xm += s1;
stats->ym += s2;
stats->xxm += s1 * s1;
stats->xym += s1 * s2;
stats->yym += s2 * s2;
}
}
}
double VP8SSIMGet(const DistoStats* const stats) {
double VP8SSIMGet(const VP8DistoStats* const stats) {
const double xmxm = stats->xm * stats->xm;
const double ymym = stats->ym * stats->ym;
const double xmym = stats->xm * stats->ym;
@ -165,7 +139,7 @@ double VP8SSIMGet(const DistoStats* const stats) {
return (fden != 0.) ? fnum / fden : kMinValue;
}
double VP8SSIMGetSquaredError(const DistoStats* const s) {
double VP8SSIMGetSquaredError(const VP8DistoStats* const s) {
if (s->w > 0.) {
const double iw2 = 1. / (s->w * s->w);
const double sxx = s->xxm * s->w - s->xm * s->xm;
@ -177,34 +151,66 @@ double VP8SSIMGetSquaredError(const DistoStats* const s) {
return kMinValue;
}
void VP8SSIMAccumulatePlane(const uint8_t* src1, int stride1,
const uint8_t* src2, int stride2,
int W, int H, DistoStats* const stats) {
int x, y;
for (y = 0; y < H; ++y) {
for (x = 0; x < W; ++x) {
VP8SSIMAccumulate(src1, stride1, src2, stride2, x, y, W, H, stats);
}
#define LIMIT(A, M) ((A) > (M) ? (M) : (A))
static void VP8SSIMAccumulateRow(const uint8_t* src1, int stride1,
const uint8_t* src2, int stride2,
int y, int W, int H,
VP8DistoStats* const stats) {
int x = 0;
const int w0 = LIMIT(VP8_SSIM_KERNEL, W);
for (x = 0; x < w0; ++x) {
VP8SSIMAccumulateClipped(src1, stride1, src2, stride2, x, y, W, H, stats);
}
for (; x <= W - 8 + VP8_SSIM_KERNEL; ++x) {
VP8SSIMAccumulate(
src1 + (y - VP8_SSIM_KERNEL) * stride1 + (x - VP8_SSIM_KERNEL), stride1,
src2 + (y - VP8_SSIM_KERNEL) * stride2 + (x - VP8_SSIM_KERNEL), stride2,
stats);
}
for (; x < W; ++x) {
VP8SSIMAccumulateClipped(src1, stride1, src2, stride2, x, y, W, H, stats);
}
}
void VP8SSIMAccumulatePlane(const uint8_t* src1, int stride1,
const uint8_t* src2, int stride2,
int W, int H, VP8DistoStats* const stats) {
int x, y;
const int h0 = LIMIT(VP8_SSIM_KERNEL, H);
const int h1 = LIMIT(VP8_SSIM_KERNEL, H - VP8_SSIM_KERNEL);
for (y = 0; y < h0; ++y) {
for (x = 0; x < W; ++x) {
VP8SSIMAccumulateClipped(src1, stride1, src2, stride2, x, y, W, H, stats);
}
}
for (; y < h1; ++y) {
VP8SSIMAccumulateRow(src1, stride1, src2, stride2, y, W, H, stats);
}
for (; y < H; ++y) {
for (x = 0; x < W; ++x) {
VP8SSIMAccumulateClipped(src1, stride1, src2, stride2, x, y, W, H, stats);
}
}
}
#undef LIMIT
static double GetMBSSIM(const uint8_t* yuv1, const uint8_t* yuv2) {
int x, y;
DistoStats s = { .0, .0, .0, .0, .0, .0 };
VP8DistoStats s = { .0, .0, .0, .0, .0, .0 };
// compute SSIM in a 10 x 10 window
for (x = 3; x < 13; x++) {
for (y = 3; y < 13; y++) {
VP8SSIMAccumulate(yuv1 + Y_OFF_ENC, BPS, yuv2 + Y_OFF_ENC, BPS,
x, y, 16, 16, &s);
for (y = VP8_SSIM_KERNEL; y < 16 - VP8_SSIM_KERNEL; y++) {
for (x = VP8_SSIM_KERNEL; x < 16 - VP8_SSIM_KERNEL; x++) {
VP8SSIMAccumulateClipped(yuv1 + Y_OFF_ENC, BPS, yuv2 + Y_OFF_ENC, BPS,
x, y, 16, 16, &s);
}
}
for (x = 1; x < 7; x++) {
for (y = 1; y < 7; y++) {
VP8SSIMAccumulate(yuv1 + U_OFF_ENC, BPS, yuv2 + U_OFF_ENC, BPS,
x, y, 8, 8, &s);
VP8SSIMAccumulate(yuv1 + V_OFF_ENC, BPS, yuv2 + V_OFF_ENC, BPS,
x, y, 8, 8, &s);
VP8SSIMAccumulateClipped(yuv1 + U_OFF_ENC, BPS, yuv2 + U_OFF_ENC, BPS,
x, y, 8, 8, &s);
VP8SSIMAccumulateClipped(yuv1 + V_OFF_ENC, BPS, yuv2 + V_OFF_ENC, BPS,
x, y, 8, 8, &s);
}
}
return VP8SSIMGet(&s);
@ -222,6 +228,7 @@ void VP8InitFilter(VP8EncIterator* const it) {
(*it->lf_stats_)[s][i] = 0;
}
}
VP8SSIMDspInit();
}
}

View File

@ -185,6 +185,13 @@ static int GetProba(int a, int b) {
: (255 * a + total / 2) / total; // rounded proba
}
static void ResetSegments(VP8Encoder* const enc) {
int n;
for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
enc->mb_info_[n].segment_ = 0;
}
}
static void SetSegmentProbas(VP8Encoder* const enc) {
int p[NUM_MB_SEGMENTS] = { 0 };
int n;
@ -206,6 +213,7 @@ static void SetSegmentProbas(VP8Encoder* const enc) {
enc->segment_hdr_.update_map_ =
(probas[0] != 255) || (probas[1] != 255) || (probas[2] != 255);
if (!enc->segment_hdr_.update_map_) ResetSegments(enc);
enc->segment_hdr_.size_ =
p[0] * (VP8BitCost(0, probas[0]) + VP8BitCost(0, probas[1])) +
p[1] * (VP8BitCost(0, probas[0]) + VP8BitCost(1, probas[1])) +
@ -406,9 +414,7 @@ static int RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
VP8InitResidual(0, 1, enc, &res);
VP8SetResidualCoeffs(rd->y_dc_levels, &res);
it->top_nz_[8] = it->left_nz_[8] =
VP8RecordCoeffTokens(ctx, 1,
res.first, res.last, res.coeffs, tokens);
VP8RecordCoeffs(ctx, &res);
VP8RecordCoeffTokens(ctx, &res, tokens);
VP8InitResidual(1, 0, enc, &res);
} else {
VP8InitResidual(0, 3, enc, &res);
@ -420,9 +426,7 @@ static int RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
const int ctx = it->top_nz_[x] + it->left_nz_[y];
VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
it->top_nz_[x] = it->left_nz_[y] =
VP8RecordCoeffTokens(ctx, res.coeff_type,
res.first, res.last, res.coeffs, tokens);
VP8RecordCoeffs(ctx, &res);
VP8RecordCoeffTokens(ctx, &res, tokens);
}
}
@ -434,9 +438,7 @@ static int RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
VP8RecordCoeffTokens(ctx, 2,
res.first, res.last, res.coeffs, tokens);
VP8RecordCoeffs(ctx, &res);
VP8RecordCoeffTokens(ctx, &res, tokens);
}
}
}
@ -814,7 +816,7 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
num_pass_left, stats.last_value, stats.value,
stats.last_q, stats.q, stats.dq);
#endif
if (size_p0 > PARTITION0_SIZE_LIMIT) {
if (enc->max_i4_header_bits_ > 0 && size_p0 > PARTITION0_SIZE_LIMIT) {
++num_pass_left;
enc->max_i4_header_bits_ >>= 1; // strengthen header bit limitation...
continue; // ...and start over

View File

@ -386,29 +386,27 @@ static void UpdateHistogramCost(VP8LHistogram* const h) {
}
static int GetBinIdForEntropy(double min, double max, double val) {
const double range = max - min + 1e-6;
const double delta = val - min;
return (int)(NUM_PARTITIONS * delta / range);
const double range = max - min;
if (range > 0.) {
const double delta = val - min;
return (int)((NUM_PARTITIONS - 1e-6) * delta / range);
} else {
return 0;
}
}
static int GetHistoBinIndexLowEffort(
const VP8LHistogram* const h, const DominantCostRange* const c) {
const int bin_id = GetBinIdForEntropy(c->literal_min_, c->literal_max_,
h->literal_cost_);
static int GetHistoBinIndex(const VP8LHistogram* const h,
const DominantCostRange* const c, int low_effort) {
int bin_id = GetBinIdForEntropy(c->literal_min_, c->literal_max_,
h->literal_cost_);
assert(bin_id < NUM_PARTITIONS);
return bin_id;
}
static int GetHistoBinIndex(
const VP8LHistogram* const h, const DominantCostRange* const c) {
const int bin_id =
GetBinIdForEntropy(c->blue_min_, c->blue_max_, h->blue_cost_) +
NUM_PARTITIONS * GetBinIdForEntropy(c->red_min_, c->red_max_,
h->red_cost_) +
NUM_PARTITIONS * NUM_PARTITIONS * GetBinIdForEntropy(c->literal_min_,
c->literal_max_,
h->literal_cost_);
assert(bin_id < BIN_SIZE);
if (!low_effort) {
bin_id = bin_id * NUM_PARTITIONS
+ GetBinIdForEntropy(c->red_min_, c->red_max_, h->red_cost_);
bin_id = bin_id * NUM_PARTITIONS
+ GetBinIdForEntropy(c->blue_min_, c->blue_max_, h->blue_cost_);
assert(bin_id < BIN_SIZE);
}
return bin_id;
}
@ -469,16 +467,13 @@ static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo,
// bin-hash histograms on three of the dominant (literal, red and blue)
// symbol costs.
for (i = 0; i < histo_size; ++i) {
int num_histos;
VP8LHistogram* const histo = histograms[i];
const int16_t bin_id = low_effort ?
(int16_t)GetHistoBinIndexLowEffort(histo, &cost_range) :
(int16_t)GetHistoBinIndex(histo, &cost_range);
const VP8LHistogram* const histo = histograms[i];
const int bin_id = GetHistoBinIndex(histo, &cost_range, low_effort);
const int bin_offset = bin_id * bin_depth;
// bin_map[n][0] for every bin 'n' maintains the counter for the number of
// histograms in that bin.
// Get and increment the num_histos in that bin.
num_histos = ++bin_map[bin_offset];
const int num_histos = ++bin_map[bin_offset];
assert(bin_offset + num_histos < bin_depth * BIN_SIZE);
// Add histogram i'th index at num_histos (last) position in the bin_map.
bin_map[bin_offset + num_histos] = i;
@ -597,8 +592,8 @@ static int HistoQueueInit(HistoQueue* const histo_queue, const int max_index) {
histo_queue->max_size = max_index * max_index;
// We allocate max_size + 1 because the last element at index "size" is
// used as temporary data (and it could be up to max_size).
histo_queue->queue = WebPSafeMalloc(histo_queue->max_size + 1,
sizeof(*histo_queue->queue));
histo_queue->queue = (HistogramPair*)WebPSafeMalloc(
histo_queue->max_size + 1, sizeof(*histo_queue->queue));
return histo_queue->queue != NULL;
}
@ -636,8 +631,11 @@ static void UpdateQueueFront(HistoQueue* const histo_queue) {
// -----------------------------------------------------------------------------
static void PreparePair(VP8LHistogram** histograms, int idx1, int idx2,
HistogramPair* const pair,
VP8LHistogram* const histos) {
HistogramPair* const pair) {
VP8LHistogram* h1;
VP8LHistogram* h2;
double sum_cost;
if (idx1 > idx2) {
const int tmp = idx2;
idx2 = idx1;
@ -645,21 +643,24 @@ static void PreparePair(VP8LHistogram** histograms, int idx1, int idx2,
}
pair->idx1 = idx1;
pair->idx2 = idx2;
pair->cost_diff =
HistogramAddEval(histograms[idx1], histograms[idx2], histos, 0);
pair->cost_combo = histos->bit_cost_;
h1 = histograms[idx1];
h2 = histograms[idx2];
sum_cost = h1->bit_cost_ + h2->bit_cost_;
pair->cost_combo = 0.;
GetCombinedHistogramEntropy(h1, h2, sum_cost, &pair->cost_combo);
pair->cost_diff = pair->cost_combo - sum_cost;
}
// Combines histograms by continuously choosing the one with the highest cost
// reduction.
static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo,
VP8LHistogram* const histos) {
static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo) {
int ok = 0;
int image_histo_size = image_histo->size;
int i, j;
VP8LHistogram** const histograms = image_histo->histograms;
// Indexes of remaining histograms.
int* const clusters = WebPSafeMalloc(image_histo_size, sizeof(*clusters));
int* const clusters =
(int*)WebPSafeMalloc(image_histo_size, sizeof(*clusters));
// Priority queue of histogram pairs.
HistoQueue histo_queue;
@ -672,8 +673,7 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo,
clusters[i] = i;
for (j = i + 1; j < image_histo_size; ++j) {
// Initialize positions array.
PreparePair(histograms, i, j, &histo_queue.queue[histo_queue.size],
histos);
PreparePair(histograms, i, j, &histo_queue.queue[histo_queue.size]);
UpdateQueueFront(&histo_queue);
}
}
@ -715,7 +715,7 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo,
for (i = 0; i < image_histo_size; ++i) {
if (clusters[i] != idx1) {
PreparePair(histograms, idx1, clusters[i],
&histo_queue.queue[histo_queue.size], histos);
&histo_queue.queue[histo_queue.size]);
UpdateQueueFront(&histo_queue);
}
}
@ -736,11 +736,10 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo,
return ok;
}
static VP8LHistogram* HistogramCombineStochastic(
VP8LHistogramSet* const image_histo,
VP8LHistogram* tmp_histo,
VP8LHistogram* best_combo,
int quality, int min_cluster_size) {
static void HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
VP8LHistogram* tmp_histo,
VP8LHistogram* best_combo,
int quality, int min_cluster_size) {
int iter;
uint32_t seed = 0;
int tries_with_no_success = 0;
@ -800,7 +799,6 @@ static VP8LHistogram* HistogramCombineStochastic(
}
}
image_histo->size = image_histo_size;
return best_combo;
}
// -----------------------------------------------------------------------------
@ -808,24 +806,23 @@ static VP8LHistogram* HistogramCombineStochastic(
// Find the best 'out' histogram for each of the 'in' histograms.
// Note: we assume that out[]->bit_cost_ is already up-to-date.
static void HistogramRemap(const VP8LHistogramSet* const orig_histo,
const VP8LHistogramSet* const image_histo,
static void HistogramRemap(const VP8LHistogramSet* const in,
const VP8LHistogramSet* const out,
uint16_t* const symbols) {
int i;
VP8LHistogram** const orig_histograms = orig_histo->histograms;
VP8LHistogram** const histograms = image_histo->histograms;
const int orig_histo_size = orig_histo->size;
const int image_histo_size = image_histo->size;
if (image_histo_size > 1) {
for (i = 0; i < orig_histo_size; ++i) {
VP8LHistogram** const in_histo = in->histograms;
VP8LHistogram** const out_histo = out->histograms;
const int in_size = in->size;
const int out_size = out->size;
if (out_size > 1) {
for (i = 0; i < in_size; ++i) {
int best_out = 0;
double best_bits =
HistogramAddThresh(histograms[0], orig_histograms[i], MAX_COST);
double best_bits = MAX_COST;
int k;
for (k = 1; k < image_histo_size; ++k) {
for (k = 0; k < out_size; ++k) {
const double cur_bits =
HistogramAddThresh(histograms[k], orig_histograms[i], best_bits);
if (cur_bits < best_bits) {
HistogramAddThresh(out_histo[k], in_histo[i], best_bits);
if (k == 0 || cur_bits < best_bits) {
best_bits = cur_bits;
best_out = k;
}
@ -833,20 +830,20 @@ static void HistogramRemap(const VP8LHistogramSet* const orig_histo,
symbols[i] = best_out;
}
} else {
assert(image_histo_size == 1);
for (i = 0; i < orig_histo_size; ++i) {
assert(out_size == 1);
for (i = 0; i < in_size; ++i) {
symbols[i] = 0;
}
}
// Recompute each out based on raw and symbols.
for (i = 0; i < image_histo_size; ++i) {
HistogramClear(histograms[i]);
for (i = 0; i < out_size; ++i) {
HistogramClear(out_histo[i]);
}
for (i = 0; i < orig_histo_size; ++i) {
for (i = 0; i < in_size; ++i) {
const int idx = symbols[i];
VP8LHistogramAdd(orig_histograms[i], histograms[idx], histograms[idx]);
VP8LHistogramAdd(in_histo[i], out_histo[idx], out_histo[idx]);
}
}
@ -920,11 +917,10 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
const float x = quality / 100.f;
// cubic ramp between 1 and MAX_HISTO_GREEDY:
const int threshold_size = (int)(1 + (x * x * x) * (MAX_HISTO_GREEDY - 1));
cur_combo = HistogramCombineStochastic(image_histo,
tmp_histos->histograms[0],
cur_combo, quality, threshold_size);
HistogramCombineStochastic(image_histo, tmp_histos->histograms[0],
cur_combo, quality, threshold_size);
if ((image_histo->size <= threshold_size) &&
!HistogramCombineGreedy(image_histo, cur_combo)) {
!HistogramCombineGreedy(image_histo)) {
goto Error;
}
}

View File

@ -14,6 +14,7 @@
// Author: Jyrki Alakuijala (jyrki@google.com)
// Converted to C by Aleksander Kramarz (akramarz@google.com)
#include <assert.h>
#include <stdlib.h>
#include "../dsp/lossless.h"
@ -23,42 +24,14 @@
#define MIN_DIM_FOR_NEAR_LOSSLESS 64
#define MAX_LIMIT_BITS 5
// Computes quantized pixel value and distance from original value.
static void GetValAndDistance(int a, int initial, int bits,
int* const val, int* const distance) {
const int mask = ~((1 << bits) - 1);
*val = (initial & mask) | (initial >> (8 - bits));
*distance = 2 * abs(a - *val);
}
// Clamps the value to range [0, 255].
static int Clamp8b(int val) {
const int min_val = 0;
const int max_val = 0xff;
return (val < min_val) ? min_val : (val > max_val) ? max_val : val;
}
// Quantizes values {a, a+(1<<bits), a-(1<<bits)} and returns the nearest one.
// Quantizes the value up or down to a multiple of 1<<bits (or to 255),
// choosing the closer one, resolving ties using bankers' rounding.
static int FindClosestDiscretized(int a, int bits) {
int best_val = a, i;
int min_distance = 256;
for (i = -1; i <= 1; ++i) {
int candidate, distance;
const int val = Clamp8b(a + i * (1 << bits));
GetValAndDistance(a, val, bits, &candidate, &distance);
if (i != 0) {
++distance;
}
// Smallest distance but favor i == 0 over i == -1 and i == 1
// since that keeps the overall intensity more constant in the
// images.
if (distance < min_distance) {
min_distance = distance;
best_val = candidate;
}
}
return best_val;
const int mask = (1 << bits) - 1;
const int biased = a + (mask >> 1) + ((a >> bits) & 1);
assert(bits > 0);
if (biased > 0xff) return 0xff;
return biased & ~mask;
}
// Applies FindClosestDiscretized to all channels of pixel.
@ -124,22 +97,11 @@ static void NearLossless(int xsize, int ysize, uint32_t* argb,
}
}
static int QualityToLimitBits(int quality) {
// quality mapping:
// 0..19 -> 5
// 0..39 -> 4
// 0..59 -> 3
// 0..79 -> 2
// 0..99 -> 1
// 100 -> 0
return MAX_LIMIT_BITS - quality / 20;
}
int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality) {
int i;
uint32_t* const copy_buffer =
(uint32_t*)WebPSafeMalloc(xsize * 3, sizeof(*copy_buffer));
const int limit_bits = QualityToLimitBits(quality);
const int limit_bits = VP8LNearLosslessBits(quality);
assert(argb != NULL);
assert(limit_bits >= 0);
assert(limit_bits <= MAX_LIMIT_BITS);

Some files were not shown because too many files have changed in this diff Show More