Compare commits

..

155 Commits

Author SHA1 Message Date
a289d8e774 update ChangeLog
Change-Id: Ia1e4669e6270faa6daae6306f47baa31488f119d
2017-11-25 19:01:58 -08:00
c10a493caf vwebp: disable double buffering on windows & mac
this results in flickering with animated webp + alpha. disabling the
option is a workaround to restore the behavior to the previous release.

BUG=webp:365

Change-Id: Iac7fcc0d483837e76cc54ad3f26c4e0e5511e31d
2017-11-25 18:22:39 -08:00
0d4466c2b4 webp_to_sdl.c: fix file mode
Change-Id: I1ca8506d40a60cd6db7f6f0d63a4431848bb0ea9
2017-11-25 13:52:03 -08:00
1b27bf8b76 WEBP_REDUCE_SIZE: disable all rescaler code
BUG=webp:355

Change-Id: Id87cb11902e3fb8544a214308526ea9665ce8440
(cherry picked from commit 0df22b9eed)
2017-11-24 22:40:15 -08:00
126be10950 webpinfo: add -version option
Change-Id: I5861d5ccd2119dd6749dc70b65fd145b5a732f98
2017-11-24 14:21:05 -08:00
9add62b581 bump version to 0.6.1
libwebp{,decoder} - 0.6.1
libwebp libtool - 7.1.0
libwebpdecoder libtool - 3.1.0

mux - 0.4.1
libtool - 3.1.0

demux - 0.3.3
libtool - 2.3.0

Change-Id: If4a95c6e9829d4a608028ee9258b5c2b7af60c37
2017-11-24 21:03:05 +00:00
d3e2614493 update NEWS
Change-Id: I66c8abe05b54558030a8555d80010506730ecbe9
2017-11-24 12:21:44 +00:00
2edda639b2 README: add webpinfo section
Change-Id: Iee4a4ecbe562d6154f627ba62524cd1871a06564
2017-11-23 23:58:22 -08:00
9ca568ef82 Merge "right-size some tables" 2017-11-24 06:28:02 +00:00
31f1995cc5 Merge "SSE2 implementation of HasAlphaXXX" 2017-11-24 06:25:58 +00:00
a80c46bd87 SSE2 implementation of HasAlphaXXX
Change-Id: I2548d9a0c252e20ee3cf5f4be736a3703671ecb4
HasAlpha32b: ~3-4x faster
HasAlpha8b: ~7-8x faster
2017-11-23 15:02:21 +01:00
083507f244 right-size some tables
Change-Id: I5e894fd3f0f4b666512108495d8cecb34f65e119
2017-11-23 08:40:58 +00:00
2e5785b233 anim_utils.c: remove warning when !defined(WEBP_HAVE_GIF)
BUG=webp:355
Change-Id: I57d77672260771bb0c2ab3c802388f2d62b1f187
2017-11-23 00:22:33 -08:00
b299c47eac add WEBP_REDUCE_SIZE
remove auto-filter (-af) support and make WebPPictureCopy,
WebPPictureIsView, WebPPictureView, WebPPictureCrop, and
WebPPictureRescale noops.

Change-Id: If39d512cc268a0015298a1138dbc94feb86575e5
2017-11-22 17:35:39 -08:00
f593d71a64 enc: disable pic->stats/extra_info w/WEBP_DISABLE_STATS
Change-Id: I4ca3fa45710bd6bbe131b2ae047b1e268241657c
2017-11-22 17:04:01 -08:00
541179a9a5 Merge "predictor_enc: fix build w/--disable-near-lossless" 2017-11-23 01:02:59 +00:00
5755a7ec53 predictor_enc: fix build w/--disable-near-lossless
Change-Id: I0d01cdc77e72663f1cf778e3cf3066cd54aa5439
2017-11-22 15:02:21 -08:00
eab5bab74f add WEBP_DISABLE_STATS
use to to make WebPPictureDistortion & WebPPlaneDistortion noops and
clear some ssim code.

Change-Id: I9b50b2318b7a114632e5a237a4002f64e95afbbc
2017-11-22 12:41:17 -08:00
8052c585b3 remove some petty TODOs from vwebp.
they're rather low-prio anyway.

Change-Id: I76dd74fcfb1c974c6f8a074472455d3f0b202e01
2017-11-22 19:22:52 +00:00
c245343dcb move LOAD8x4 and STORE8x2 closer to their use location
Change-Id: I674821732d3e607123070e4bbba87d9359c9a4ec
2017-11-21 23:44:39 -08:00
b9e734fd5c dec,cosmetics: normalize function naming style
Change-Id: I33a2d1b4133db7a6d56d506f5c19670f0268cecd
2017-11-21 14:31:34 -08:00
c188d546b3 dec: harmonize function suffixes
BUG=webp:355

Change-Id: Iabdfd3fbde906c2e35a7d7c080a8512425eb8ccb
2017-11-21 13:00:25 -08:00
28c5ac8104 dec_sse41: harmonize function suffixes
BUG=webp:355

Change-Id: Id55f7b2e6288d1d0885d8451fbc59771222073d6
2017-11-21 12:47:06 -08:00
e65b72a368 Merge "introduce WebPHasAlpha8b and WebPHasAlpha32b" 2017-11-21 06:21:44 +00:00
b94cee98fb dec_sse2: remove HE8uv_SSE2
with gcc-4.8, clang-4.0.1/5 this is no faster (actually up to 2x slower)
than the code generated for memset (0x01010... * dst[-1]). shuffles in
sse4 recover a bit, but performance is still down.

Change-Id: Ie85e8353f8ede559d0b05a1d388787fd18ecc80f
2017-11-20 20:34:05 -08:00
44a0ee3fa7 introduce WebPHasAlpha8b and WebPHasAlpha32b
Rewrote WebPPictureHasTransparency() to use them (even for argb).
This is 10% faster, for some reasons.

SSE2 version should be straightforward.
Removes a TODO.

Change-Id: I7ad5848fc5e355e2df505dbcd5a0f42fb6cbab41
2017-11-20 15:20:29 +01:00
aebf59ac50 Merge "WebPPictureAllocARGB: align argb allocation" 2017-11-17 07:27:41 +00:00
c184665ecd WebPPictureAllocARGB: align argb allocation
Change-Id: Ib390e8bbb97b38316a38af6a33e8a26bd050ee16
2017-11-16 20:19:09 -08:00
3daf7509c2 WebPParseHeaders: remove obsolete animation TODO
The WebPDemux and WebPAnimDecoder APIs are provided for the purpose of
animated webp parsing and decoding. No major changes are currently
planned for the libwebp API.

Change-Id: I2758ecda195b0c4091572d5731a0a85fa3716303
2017-11-16 20:18:21 -08:00
80285d97ad cmake: avoid security warnings under msvc
fopen, sscanf are error checked and only used in the examples. this
matches Makefile.vc.

Change-Id: I411c3ace6b5db092656d6b03dc5b438bd70616fc
2017-11-15 18:41:41 -08:00
650eac5542 cmake: don't set -Wall with MSVC
take the generator default (/W3); /Wall produces too many warnings that
can't be acted on.

Change-Id: I112d0f46456af2758ddfee6becc098447ca50b6f
2017-11-15 18:29:15 -08:00
c462cd0065 Remove useless code.
The casts are to the same type and the #define not used.

Change-Id: I8d69c3b9dde7a1c53c2ba5a026a653d8c2e1d2a7
2017-11-08 10:52:49 +01:00
01a98217ad Merge "remove WebPWorkerImpl declaration from the header" 2017-11-06 20:37:08 +00:00
3c49fc47e7 Merge "thread_utils: fix potentially bad call to Execute" 2017-11-06 20:36:13 +00:00
fde2782ecb thread_utils: fix potentially bad call to Execute
We must use the Interface, and avoid fwd decl.

Change-Id: I18d77a009a29921b6e3694de4df494952b11a83f
2017-11-05 16:39:29 -08:00
2a270c1df5 remove WebPWorkerImpl declaration from the header
BUG=webp:355

Change-Id: Ia4efce4e8f3745e5cdcac495f4a79a8c03062d88
2017-11-05 12:31:48 -08:00
f1f437cc89 remove mention of 'lossy-only parameters' from the doc
it's confusing and mildly accurate.

BUG=webp:355

Change-Id: Ie9667bcdf429f1092b8a523a473391b741164523
2017-11-05 19:38:24 +00:00
3879074d99 Merge "WebPMemToUint32: remove ptr cast to int" 2017-10-31 06:41:05 +00:00
04b029d236 WebPMemToUint32: remove ptr cast to int
this can result in an alignment hint on arm causing a SIGBUS. casting
the input ptr to anything aside from its type is unnecessary for memcpy
and is contrary to the intent of this function.

Change-Id: I9a4d3f4be90f80cd8c3e96ccbe557e51e34cf7a5
2017-10-30 17:08:46 -07:00
b7971d0e22 dsp: avoid defining _C functions w/NEON builds
when targeting NEON C functions with NEON equivalents won't be used, but
will contribute to binary size. the same goes for sse2, etc., but this
change is primarily concerned with binary sizes for android arm targets.

note '-noasm' or otherwise modifying VP8GetCPUInfo will have no effect
on the use of NEON functions.

this decision can be overridden by defining WEBP_DSP_OMIT_C_CODE to 0.

Change-Id: I47bd453c84a3d341ca39bc986a39eb9c785aface
2017-10-27 10:54:56 -07:00
6ba98764e8 webpdec: correct alloc size check w/use_argb
the allocation is always 4 x width regardless of the presence of alpha

Change-Id: I154bd7e5c0190d37abd669e17e18911ebb7e066c
2017-10-26 23:17:11 -07:00
5cfb3b0f6c normalize include guards
some fell out of sync after:
668e1dd4 src/{dec,enc,utils}: give filenames a unique suffix

Change-Id: I280d3b3f44797f3bfb4835784add50a41cdd3793
2017-10-21 00:06:23 -07:00
f433205ee3 Merge changes Ia17c7dfc,I75423abb,Ia2f716b4,I161caa14,I4210081a, ...
* changes:
  {dec,enc}_neon: harmonize function suffixes x2
  upsampling_neon: harmonize function suffixes
  yuv_neon: harmonize function suffixes
  rescaler_neon: harmonize function suffixes
  lossless_neon: harmonize function suffixes
  lossless_enc_neon: harmonize function suffixes
  filters_neon,cosmetics: fix indent
  enc_neon: harmonize function suffixes
  dec_neon: harmonize function suffixes
2017-10-21 06:34:14 +00:00
8d033b14d7 {dec,enc}_neon: harmonize function suffixes x2
+ neon.h

BUG=webp:355

Change-Id: Ia17c7dfc7d61742a4758823675a2d556a739c389
2017-10-20 19:00:53 -07:00
0295e9815d upsampling_neon: harmonize function suffixes
BUG=webp:355

Change-Id: I75423abbe0bcea3c98a42e412cc2116be81b5d08
2017-10-20 19:00:53 -07:00
d572c4e52b yuv_neon: harmonize function suffixes
BUG=webp:355

Change-Id: Ia2f716b459950c18717b062175197d1e6419bf2a
2017-10-20 19:00:53 -07:00
ab9c2500db rescaler_neon: harmonize function suffixes
BUG=webp:355

Change-Id: I161caa14f7ebbc3ae978b1722472625a77d0a4a4
2017-10-20 19:00:53 -07:00
93e0ce27f4 lossless_neon: harmonize function suffixes
BUG=webp:355

Change-Id: I4210081a39800b5c2589c443da237269908af666
2017-10-20 19:00:53 -07:00
22fbc50edd lossless_enc_neon: harmonize function suffixes
BUG=webp:355

Change-Id: I462facaeade4f0f4fc1e96895493306d095a6a9a
2017-10-20 19:00:53 -07:00
447875b47b filters_neon,cosmetics: fix indent
BUG=webp:355

Change-Id: I9df1119f1ea94868f75253a92c2e878c9290f744
2017-10-20 19:00:29 -07:00
e51bdd439c remove unused VP8TokenToStats() function
BUG=webp:355

Change-Id: I0ad6f13003ef7201431c54c7db395254191de67c
2017-10-20 04:29:45 -07:00
785da7eadd enc_neon: harmonize function suffixes
BUG=webp:355

Change-Id: Ie59efd271d16f12d21f3c800667dfc0980dc2e68
2017-10-20 00:18:32 -07:00
bc1a251fcf dec_neon: harmonize function suffixes
BUG=webp:355

Change-Id: I61c9a0c9e24515322955e04afd8c4ea6a44b9319
2017-10-20 00:14:18 -07:00
61e535f1ac dsp/lossless: workaround gcc-4.8 bug on arm
and all older versions.
force Sub3() to not be inlined, otherwise the code in Select() will be
incorrect.

extends the check add previously in:
637b3888 dsp/lossless: workaround gcc-4.9 bug on arm

BUG=webp:363

Change-Id: I1403b558f8660b764f3a570a3326822d5ef0be29
2017-10-19 13:05:48 -07:00
68b2eab7df cwebp: fix alpha reporting w/lossless & metadata
the incorrect bit was being extracted from the lossless bitstream header
causing the alpha flag in VP8X to be misreported. previously the
signature byte was ignored in the calculation of the offset.

since:
8ba1bf61 Stricter check for presence of alpha when writing lossless
images

BUG=webp:361

Change-Id: I7c618b5f01a37f5e4b799dee11a7949efaf88046
2017-10-18 12:35:56 -07:00
30042faa9a WebPDemuxGetI: add doc details around WebPFormatFeature
Change-Id: Id5f17686a8dbd6a6a0ba354d9216a8b89a51597c
2017-10-17 19:59:16 +00:00
0a17f4712c Merge "WIP: list includes as descendants of the project dir" 2017-10-11 08:21:42 +00:00
a439972175 WIP: list includes as descendants of the project dir
#include "(.|..)/..." -> #include "src/..."

Change-Id: I772880aa097a770722043c8a4393552ba38a89b6
2017-10-10 23:04:05 -07:00
0827570873 Merge "Make sure we reach the full range for alpha blending." 2017-10-09 09:21:21 +00:00
d361a6a733 yuv_sse2: harmonize function suffixes
BUG=webp:355

Change-Id: I02a66f7446c75a10c3ce4766235e5767617d0dce
2017-10-08 14:06:34 -07:00
6921aa6f0c upsampling_sse2: harmonize function suffixes
BUG=webp:355

Change-Id: I3a02cc717eb7506bd87511d6a17ab1691e84f72c
2017-10-08 14:06:30 -07:00
08c67d3ed1 ssim_sse2: harmonize function suffixes
BUG=webp:355

Change-Id: I1282559888118b8cb0a46b7f0aa627d26b8838f5
2017-10-08 14:06:24 -07:00
582a1b572a rescaler_sse2: harmonize function suffixes
BUG=webp:355

Change-Id: I978fd826ff90149c0ffd9d7607dcc6f88082d3e6
2017-10-08 14:06:19 -07:00
2c1b18ba2f lossless_sse2: harmonize function suffixes
BUG=webp:355

Change-Id: I59d828800c2ab2a36e0ea90f629b74bd57207411
2017-10-08 14:06:14 -07:00
0ac46e818b lossless_enc_sse2: harmonize function suffixes
BUG=webp:355

Change-Id: I06c64416103c3f3fc0519dd46d64b0a35f9798e4
2017-10-08 14:06:05 -07:00
bc634d57c2 enc_sse2: harmonize function suffixes
BUG=webp:355

Change-Id: Idd2f289fcf99f12bf36494111b07a8906c99c826
2017-10-08 14:05:59 -07:00
bcb7347c2b dec_sse2: harmonize function suffixes
BUG=webp:355

Change-Id: Ic0390a4a24a5d8caff5b8af9fc9d59769ec533b1
2017-10-07 15:14:03 -07:00
e14ad93c0a Make sure we reach the full range for alpha blending.
255*255*257>>16 == 254 while we want 255.

BUG=webp:360

Change-Id: I2b9ac18f8802145f5a3d500c149ad9eceacbd75b
2017-10-05 16:53:57 +02:00
7038ca8d52 demux,StoreFrame: restore hdr size check to min req
avoids over reading if the reported ANMF payload is < 8 bytes.

likely broken since:
81b8a741 Design change in ANMF and FRGM chunks:

Change-Id: I3e267bafea348a50545587dea8fafb2199c6b650
2017-10-03 23:18:18 -07:00
fb3daad604 cpu: fix ssse3 check
ssse3 is bit #9 in ecx, bit 1 is sse3. this only controls the check for
slow ssse3 and likely had no ill effect.

Change-Id: I84ce73dc480e1cdbd085e37be06f3f402116c201
2017-09-29 16:27:47 -07:00
be590e0644 Merge "Fix CMake redefinition for HAVE_CPU_FEATURES_H" 2017-09-26 05:59:05 +00:00
35f736e1ec Fix CMake redefinition for HAVE_CPU_FEATURES_H
It is still redefined, but to the same constant,
hence no more warning as mentioned on BUG=webp:358

Change-Id: I80a834c139d5d60cd693d468b0e2ea399729ab3e
2017-09-25 14:43:20 +02:00
a5216efc8c Fix integer overflow warning.
Though the overflow could happen, it does not change the
end results.

Change-Id: I1b84e022a0776d35eab5c5c4fb7d3563f5667bfa
2017-09-25 11:02:22 +02:00
a9c8916b87 decode.h,WebPIDecGetRGB: clarify output ptr validity
*last_y, *width, *height, *stride are only valid on non-NULL return

Change-Id: Iee2eeb29dd36392e2e7876d47df182a81dbb41ce
2017-09-22 16:58:33 -07:00
3c74c645ca gif2webp: handle 1-frame case properly + fix anim_diff
follow-up to commit b4e046778

Change-Id: I3c7617d09682262ca7929e6a3e56777b163cce45
2017-09-20 22:21:15 +00:00
c7f295d30c Merge "gif2webp: introduce -loop_compatibility option" 2017-09-20 09:13:37 +00:00
b4e046778d gif2webp: introduce -loop_compatibility option
This patch fixes the compatibility for loop-count handling.
This aims at addressing the change in Chrome handling of loop-count
prior to M63.

Before M63: loop-count interpretation was aligned to GIF's behaviour in
Chrome, but incompatible with WebP's spec. In particular, you couldn't
loop exactly once.

Post-M63: loop-count in WebP is really the total number of loops. Gif2webp
will convert correctly from a GIF source by adjusting the loop count.

Note: The Chrome version can be retrieved from the User-Agent
string (chrome://version).
An M63 version will contain the pattern:
   Chrome/63.x.xxxx.xx
for instance.

Change-Id: Ie6dc13227e6498f4d7af2f09247913648997648a
2017-09-20 07:37:49 +00:00
f78da3dea6 add LOCAL_CLANG_PREREQ and avoid WORK_AROUND_GCC w/3.8+
this results in a 15-20% speedup for lossy decoding on a N5/S6/CM1

BUG=webp:339

Change-Id: Icdeb84c3e0b8908147ac276b4d8f76c3d565b735
2017-09-19 20:59:49 -07:00
01c426f1e7 define WEBP_USE_INTRINSICS w/gcc-4.9+
32-bit builds are neutral to slightly faster using ndk r15c on a
N5/S6/CM1

BUG=webp:339

Change-Id: I94b9442e0ceaf2f5edb2b4026bc8b99cd77c918b
2017-09-19 20:59:43 -07:00
8635973dc3 use sdl-config (if available) to determine the link flags
this fixes the compilation of vwebp_sdl on MacOS.

BUG=webp:355

Change-Id: I48c5607f31965b220db6bf707cff22b7157e0bb6
2017-09-06 20:35:48 +00:00
e9459382b0 use CPPFLAGS before CFLAGS
this way, -Isrc/ is used *before* system-wide -I locations
like /opt/local/include/ for instance.

Change-Id: I83c97775aff87695720a0ff3fca5c3cf2bb62a09
2017-09-03 21:13:33 -07:00
4a9d788e40 Merge "Android.mk,mips: fix clang build with r15" 2017-09-01 06:32:06 +00:00
4fbdc9fb12 Android.mk,mips: fix clang build with r15
-integrated-as is now required, the opposite of r14

Change-Id: Ic478b2b3b933e66e7d159030eac29f58743eecda
2017-08-31 22:42:14 -07:00
a80fcc4ae1 ifdef code not used by Chrome/Android.
Change-Id: Id086f6fd602b1fe3dc9034764b6a920a696ff1d2
2017-08-31 14:02:05 +02:00
3993af127e Fix signed integer overflows.
Change-Id: I62c9949f0edac58d69d991d6be5f85ae9e4d62a9
2017-08-31 11:56:42 +02:00
f66f94ef36 anim_dump: small tool to dump frames from animated WebP
dumps frames to PNG, PAM or TIFF files.


Change-Id: I86a4d7e235cb7040cf5bbcae28270cb5a84be087
2017-08-30 06:12:03 +00:00
6eba857b75 Merge "rationalize the Makefile.am" 2017-08-29 21:05:20 +00:00
c5e34fba66 function definition cleanup
Change-Id: I1f6bb4405f09c0c863d0bd1c77340636abdbae69
2017-08-29 14:09:35 +02:00
3822762a6c rationalize the Makefile.am
one library addition per line, etc...

BUG=webp:355

Change-Id: I95761dea598a382db5632c5187210937e129ff75
2017-08-29 00:00:14 -07:00
501ef6e4e9 configure style fix: animdiff -> anim_diff
BUG=webp:355

Change-Id: I4a90a11e075bf30aaa34a0b4c08c9038b0eb8f60
2017-08-29 05:41:33 +00:00
f8bdc26821 Merge "protect against NULL dump_folder[] value in ReadAnimatedImage()" 2017-08-29 05:41:16 +00:00
23bfc652fe protect against NULL dump_folder[] value in ReadAnimatedImage()
BUG=webp:355

Change-Id: If449ed4b0bd7a11ce7c646eabe2a7b58a1abf009
2017-08-28 20:57:22 +00:00
8dc3d71ba0 cosmetics,ReadAnimatedWebP: correct function comment
BUG=webp:355

Change-Id: If671b67067ee6ebaa63e9710ddbb7d59b33dbc3b
2017-08-28 11:43:26 -07:00
5bd40066cc Merge changes I66a64a0a,I4d2e520f
* changes:
  cosmetics,webpinfo: remove an else after a return
  cosmetics,cwebp: remove an else after a return
2017-08-27 06:03:09 +00:00
7945575c92 cosmetics,webpinfo: remove an else after a return
this is redundant with an if block that returns

BUG=webp:355

Change-Id: I66a64a0a4932c5f419f6171684b72088c43e7136
2017-08-26 14:22:39 -07:00
8729fa1102 cosmetics,cwebp: remove an else after a return
this is redundant with an if block that returns

BUG=webp:355

Change-Id: I4d2e520f89a8e144ab13aa3696f305365e584a0e
2017-08-26 14:22:32 -07:00
f324b7f9ba cosmetics: normalize fn proto & decl param names
BUG=webp:355

Change-Id: If020eb3b064cdb11853d1fa47058acae34752ce3
2017-08-26 14:17:31 -07:00
869eb36983 CMake cleanups.
Change-Id: I177c856eb06e8fb2847a0d483114198a2a0a5dae
2017-08-25 14:01:56 +02:00
289e62a313 Remove declaration of unimplemented VP8ApplyNearLosslessPredict
Change-Id: I1bebea099a06bccfdd103ba0557320d6ad68af38
2017-08-23 17:16:29 +02:00
20a94186ce pnmdec,PAM: validate depth before calculating bytes_per_px
fixes potential integer overflow with corrupt files.

Change-Id: I03e27fb14ad559dcd47e2704afdb6be9a163e02e
2017-08-19 13:56:06 -07:00
34130afe8b anim_encode: fix integer overflow
calculate the file duration using unsigned math. this could still result
in an incorrect average duration calculation if there were multiple
rollovers. caching the duration is an option if it was desirable to
support such an extreme case.

Change-Id: I3875d94d081fec947c03a857055df6e27ff5351d
2017-08-18 16:52:57 -07:00
42c79aa66b Merge "Encoder: harmonize function suffixes" 2017-08-09 18:13:57 +00:00
b09307dcde Encoder: harmonize function suffixes
BUG=webp:355

Change-Id: Ia2fe95db7dfb303f3f64e390d43bc41b8933256c
2017-08-09 02:41:01 +00:00
bed0456d58 Merge "SSIM: harmonize the function suffix" 2017-08-09 02:37:39 +00:00
54f6a3cf3a lossless_sse2.c: fix some missed suffix changes
BUG=webp:355

Change-Id: If830e3169a4021899ed850aa7edfd94b81fa2cf9
2017-08-08 14:19:05 -07:00
088f1dcce8 SSIM: harmonize the function suffix
BUG=webp:355

Change-Id: I751852ddb2abb7319e41e6c7d022ac4f288b4d08
2017-08-08 08:52:06 -07:00
86fc4dd9f4 webpdec: use ImgIoUtilCheckSizeArgumentsOverflow
...prior to allocating a Picture. this is consistent with the other
readers and allows the allocation size to be limited at compile time

BUG=webp:355

Change-Id: Ib8e027ef863489b1e0f9e2a1403c3836da3ef48d
2017-08-08 07:02:02 -07:00
08ea9ecde3 imageio: add ability restrict max image size
WEBP_MAX_IMAGE_SIZE can be defined to control this limit.
Set it to 1.5GiB w/--config=asan-fuzzer to avoid OOM with large resolution
images. This limit leaves some headroom over the single image max of 2^14 *
2^14 * 4

BUG=webp:355

Change-Id: I4d48eb0a063638297a842582e0229dfd5a54df5f
2017-08-08 07:02:02 -07:00
6f9daa4a3a jpegdec,ReadError: fix leaks on error
everything post jpeg decoder creation should go through the error path
to ensure it's cleaned up properly

Change-Id: If78b4529e40797c67c3d0e624af1c036badea674
2017-08-08 07:02:00 -07:00
a0f72a4fe0 VP8LTransformColorFunc: drop an non-respected 'const' from the signature.
BUG=webp:355

Change-Id: Ie99bf377a55db2950bfbac9423bfe0967623ea5d
2017-08-07 19:05:01 -07:00
8c934902cd Merge "Lossess dec: harmonize the function suffixes" 2017-08-08 02:04:10 +00:00
622242aaba Lossess dec: harmonize the function suffixes
BUG=webp:355

Change-Id: I445d64df6aa2e347f41e7af306be12a77e2ac6a5
2017-08-07 18:22:41 -07:00
1411f02761 Lossless Enc: harmonize the function suffixes
BUG=webp:355

Change-Id: I8baf506bd2a27095b956ef22a862b071f60c0d72
2017-08-07 18:02:07 -07:00
24ad2e3c99 add const to two variables
Change-Id: I97374ccbf118baa59425346ffc439036a4482bf4
2017-08-07 23:02:15 +02:00
46efe062b8 Merge "Allow the lossless cruncher to work for alpha." 2017-08-07 21:00:02 +00:00
8c3f9a4706 Speed-up LZ77.
No need to look for a bigger maximum reach if we reach the end
of the image.

Change-Id: I56b166f9266f15cdff5aa59a75559972db19858f
2017-08-07 21:01:27 +02:00
1aef4c710b Allow the lossless cruncher to work for alpha.
Change-Id: I7185e75404fae8c739e0536026f4687d25decad7
2017-08-07 20:58:05 +02:00
b8821dbd81 Improve the box LZ77 speed.
For a pixel, we look for the longest match starting in a window around it.
For the following pixel, the previous result can be used and smaller
search window is used.

Change-Id: Ice16f9a7c8754099d068380848f0d77de3f756ac
2017-08-06 20:15:20 +02:00
7beed2807b add missing ()s to macro parameters
BUG=webp:355

Change-Id: I616c6d3540d6551edd1b1cfdb5bffcf0a044c90f
2017-08-04 17:02:53 -07:00
6473d20b3e Merge "fix Android standalone toolchain build" 2017-08-04 18:25:21 +00:00
dcefed950b Merge "build.gradle: fix arm64 build" 2017-08-03 02:21:26 +00:00
0c83a8bc69 Merge "yuv: harmonize suffix naming" 2017-08-02 06:35:36 +00:00
c6d1db4b36 fix Android standalone toolchain build
add a check for cpu-features.h and rework some of the ifdef's around
android + neon. for android builds with cpu-features enabled the
*_neon.c files will still need to be flagged correctly (with e.g.,
.c.neon in Android.mk) to properly build them.

BUG=webp:353

Change-Id: I905ce305af0a204e560b915d8665093a3edaceb9
2017-08-01 22:59:03 -07:00
663a6d9d2e unify the ALTERNATE_CODE flag usage
Pattern is now:
 #if !defined(FLAG)
 #define FLAG 0   // ALTERNATE_CODE
 #endif
...
 #if (FLAG == 1)
 ...
 #else
  ...
 #endif    // FLAG
...

Removed some unused code / flags:
  WEBP_YUV_USE_TABLE, WEBP_REFERENCE_IMPLEMENTATION,
  experimental code,  VP8YUVInit(), ...

BUG=webp:355

Change-Id: I98deb9189446a4cfd665c13ea8aa1ce6a308c63f
2017-08-01 20:49:29 -07:00
73ea9f2702 yuv: harmonize suffix naming
BUG=webp:355

Change-Id: I403c4b3cdfc55b3b1648f98a1d189326a3e660a3
2017-08-01 20:40:00 -07:00
c71b68ac45 build.gradle: fix arm64 build
only armv7-a uses .c.neon, arm64 has NEON by default; this matches the
Android.mk

Change-Id: Iee2bcf6a532cf54e31866d9db6923c3f0a1d88ba
2017-08-01 18:31:13 -07:00
c4568b47fd Rescaler: harmonize the suffix naming
BUG=webp:355

Change-Id: I7720502c62f96c780793d3d881eac7b3afae1418
2017-08-01 23:49:44 +00:00
6cb13b0532 Merge "alpha_processing: harmonize the naming suffixes to be _C()" 2017-08-01 03:38:03 +00:00
83a3e69a20 Merge "simplify WEBP_EXTERN macro" 2017-08-01 03:29:12 +00:00
7295fde2e6 Merge "filters: harmonize the suffixes naming to _SSE2(), _C(), etc." 2017-08-01 01:55:48 +00:00
8e42ba4c80 simplify WEBP_EXTERN macro
including the type in the macro doesn't bring much benefit to ordering,
current platforms work with a prefix, this would be insufficient if the
attribute needed to follow the function prototype. this form makes it
easier to override on the command line.

BUG=webp:355

Change-Id: Iba41ec0bb319403054be0e899c4cc472dd932fd9
2017-07-31 18:27:52 -07:00
331ab34bcd cost*.c: harmonize the suffix namings
BUG=webp:355

Change-Id: Ic2e60eaab71cdffe1ebf93fc36aaa3eb25bbf08d
2017-07-31 17:18:32 -07:00
b161f670f8 filters: harmonize the suffixes naming to _SSE2(), _C(), etc.
BUG=webp:355

Change-Id: I28f464eb13444c3046332cdda3c547f81700ecf4
2017-08-01 00:09:05 +00:00
dec5e4d330 alpha_processing: harmonize the naming suffixes to be _C()
BUG=webp:355

Change-Id: Iae8221cd34957764ead21aa46abfc320e5514a4b
2017-07-31 23:34:24 +00:00
6878d42720 fix memory leak in SDL_Init()
we use a static guard to only call SDL_Init() once.

Another option would be to call SDL_Quit(), but it doesn't seem to be
doing anything.

Also, fix the HTML code and add 'use strict' directive.

BUG=webp:354

Change-Id: I3c6421e2c1c8cc200556cd4092a0ead9a8b054ef
2017-07-31 12:47:02 -07:00
461ae5551b Merge "configure: fix warnings in sdl check" 2017-07-18 07:48:34 +00:00
62486a2206 configure: test for -Wundef
Change-Id: Ia4bcde5276ae9572c5b3152e578d59a4e3552988
2017-07-11 16:12:36 -07:00
92982609bc dsp.h: fix -Wundef w/__mips_dsp_rev
Change-Id: I552a543c7b039774041b43ace75b0cbea566b119
2017-07-11 16:12:32 -07:00
0265cede89 configure: fix warnings in sdl check
correct shell syntax, drop trailing ','

Change-Id: I0535bdf92f03e17902a844d8e7d742b351ce4822
2017-07-11 15:38:18 -07:00
88c73d8a7a backward_references_enc.h: fix WINDOW_SIZE_BITS check
this check was relocated in:
b903b80c Split cost-based backward references in its own file.

quiets -Wundef

Change-Id: I7f7a4773fb8cc77ca9f671b11f50d5db2275d415
2017-07-11 15:36:14 -07:00
4ea49f6b82 rescaler_sse2.c: fix WEBP_RESCALER_FIX -> _RFIX typo
quiets -Wundef

Change-Id: I8f1facf401b6f1ab393005c93086ac3e2ae354d5
2017-07-11 15:35:27 -07:00
1b526638b8 Clean-up some CMake
- some image libraries define _INCLUDE_DIR and not INCLUDE_DIRS.
- CMakePushCheckState is a non-destructive way of changing the
CMAKE_REQUIRED_* variables.

Change-Id: I7d318a0ddf9754a5c047f47ba1713f9f94e1ec1c
2017-07-10 16:29:33 +02:00
87f57a4b62 Merge "cmake: fix gif lib detection when cross compiling" 2017-07-07 07:55:48 +00:00
b34a9db1a1 cosmetics,dec_sse2: remove some redundant comments
Change-Id: I5a59d6dde9b6638b318f36d51d0d53870a3de273
2017-07-06 23:19:18 -07:00
471c5755fc cmake: fix gif lib detection when cross compiling
GIF find_package only locates the header and library, it doesn't fail
compile tests when detecting the version, but falls back to 3 (as of at
least cmake 3.7.2). Make sure the library links to avoid incorrect
detection when cross compiling.

Change-Id: I0224180bbdf800ab261a9333236730b23c006e69
2017-07-06 15:33:31 -07:00
c793417a3c cmake: disable gif2webp if gif lib isn't found
Change-Id: I218d5c0e3f4a82905c743705382cf02d26b29e6d
2017-07-06 15:33:01 -07:00
dcbc1c881a cmake: split gif detection from IMG deps
gifdec isn't part of imageio lib, it's only used by gif2webp.

Change-Id: I70bff378a32f8fb2ebb8a5a7701049ffff7f7992
2017-07-06 15:32:55 -07:00
66ad84f0f9 Merge "muxread: remove unreachable code" 2017-07-05 23:48:45 +00:00
50ec3ab790 muxread: remove unreachable code
IDX_UNKNOWN chunks are handled separately in WebPMuxGetChunk()

Change-Id: Iaa0d3a1cd1080264c27671637551f52f71d73b07
2017-07-05 15:47:50 -07:00
7d67a1646d Lossy encoding: smoothen transparent areas to improve compression
If "exact" is false, we can modify the luma samples in fully transparent
areas to facilitate lossy compression. Experiments on some PNG images
show compression improvement of more than 20%.

Change-Id: I1a728cfa920a6652bc1f600d87c01f7f648c4942
2017-07-05 10:03:01 -07:00
e50650c77f Merge "fix signature for DISABLE_TOKEN_BUFFER compilation" 2017-07-02 07:33:52 +00:00
671d2567d4 fix signature for DISABLE_TOKEN_BUFFER compilation
Change-Id: Idb7aa2503c7870ef3b15a23325832428d47ae7c6
2017-07-01 14:46:28 -07:00
d67555809f cpu.cmake: use unique flag to test simd disable flags
this avoids the result being cached in a common variable causing
potentially incorrect results in future tests and improves the logging
output

BUG=webp:351

Change-Id: I4b3110beec38f505fc8f835c3ea689a7d81a36bb
2017-06-28 19:13:34 -07:00
28914528e1 Merge "Remove the argb* files." 2017-06-25 15:57:17 +00:00
8acb4942f7 Remove the argb* files.
Half of the functionality was duplicated.
The rest is about the alpha channel handling so we
might as well put it in the appropriate file.

Change-Id: I8d5ef0afce82cc4842ab7132fd97995c42e6140a
2017-06-25 14:44:33 +02:00
206 changed files with 4852 additions and 5435 deletions

View File

@ -55,9 +55,6 @@ dsp_dec_srcs := \
src/dsp/alpha_processing_neon.$(NEON) \
src/dsp/alpha_processing_sse2.c \
src/dsp/alpha_processing_sse41.c \
src/dsp/argb.c \
src/dsp/argb_mips_dsp_r2.c \
src/dsp/argb_sse2.c \
src/dsp/cpu.c \
src/dsp/dec.c \
src/dsp/dec_clip_tables.c \

View File

@ -4,17 +4,17 @@ project(libwebp C)
# Options for coder / decoder executables.
option(WEBP_ENABLE_SIMD "Enable any SIMD optimization." ON)
option(WEBP_ENABLE_WASM "Enable WebAssembly optimizations." OFF)
option(WEBP_BUILD_CWEBP "Build the cwebp command line tool." OFF)
option(WEBP_BUILD_DWEBP "Build the dwebp command line tool." OFF)
option(WEBP_BUILD_GIF2WEBP "Build the gif2webp conversion tool." OFF)
option(WEBP_BUILD_IMG2WEBP "Build the img2webp animation tool." OFF)
option(WEBP_BUILD_WEBPINFO "Build the webpinfo command line tool." OFF)
option(WEBP_BUILD_WEBP_JS "Emscripten build of webp.js." OFF)
option(WEBP_ENABLE_NEAR_LOSSLESS "Enable near-lossless encoding" ON)
option(WEBP_EXPERIMENTAL_FEATURES "Build with experimental features." OFF)
option(WEBP_ENABLE_SWAP_16BIT_CSP "Enable byte swap for 16 bit colorspaces." OFF)
if(WEBP_BUILD_WEBP_JS OR WEBP_ENABLE_WASM)
if(WEBP_BUILD_WEBP_JS)
set(WEBP_ENABLE_SIMD OFF)
endif()
@ -27,19 +27,13 @@ if(NOT CMAKE_BUILD_TYPE)
)
endif()
include(cmake/config.h.cmake)
# Extract the version of the library.
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/configure.ac SOURCE_FILE)
string(REGEX MATCH "[0-9.]+" WEBP_VERSION ${SOURCE_FILE})
# Include dependencies.
include(cmake/deps.cmake)
################################################################################
# Options.
if(WEBP_ENABLE_SWAP_16BIT_CSP)
add_definitions(-DWEBP_SWAP_16BIT_CSP)
endif()
if(WEBP_ENABLE_WASM)
add_definitions(-DWEBP_USE_WASM)
add_definitions(-DWEBP_SWAP_16BIT_CSP=1)
endif()
################################################################################
@ -54,7 +48,10 @@ if(ANDROID)
set(WEBP_DEP_INCLUDE_DIRS ${WEBP_DEP_INCLUDE_DIRS}
${ANDROID_NDK}/sources/android/cpufeatures
)
add_definitions(-DHAVE_CPU_FEATURES_H)
add_definitions(-DHAVE_CPU_FEATURES_H=1)
set(HAVE_CPU_FEATURES_H 1)
else()
set(HAVE_CPU_FEATURES_H 0)
endif()
################################################################################
@ -106,8 +103,13 @@ endforeach()
### Define the mandatory libraries.
# Build the webpdecoder library.
add_definitions(-Wall)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src/ ${WEBP_DEP_INCLUDE_DIRS})
if(MSVC)
# avoid security warnings for e.g., fopen() used in the examples.
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
else()
add_definitions(-Wall)
endif()
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${WEBP_DEP_INCLUDE_DIRS})
add_library(webpdecode OBJECT ${WEBP_DEC_SRCS})
add_library(webpdspdecode OBJECT ${WEBP_DSP_COMMON_SRCS} ${WEBP_DSP_DEC_SRCS})
add_library(webputilsdecode OBJECT ${WEBP_UTILS_COMMON_SRCS}
@ -145,13 +147,13 @@ function(parse_version FILE NAME VAR)
set(${VAR} "${VERSION}" PARENT_SCOPE)
endfunction()
parse_version(Makefile.am webp WEBP_WEBP_SOVERSION)
set_target_properties(webp PROPERTIES VERSION ${WEBP_VERSION}
set_target_properties(webp PROPERTIES VERSION ${PACKAGE_VERSION}
SOVERSION ${WEBP_WEBP_SOVERSION})
parse_version(Makefile.am webpdecoder WEBP_DECODER_SOVERSION)
set_target_properties(webpdecoder PROPERTIES VERSION ${WEBP_VERSION}
set_target_properties(webpdecoder PROPERTIES VERSION ${PACKAGE_VERSION}
SOVERSION ${WEBP_DECODER_SOVERSION})
parse_version(demux/Makefile.am webpdemux WEBP_DEMUX_SOVERSION)
set_target_properties(webpdemux PROPERTIES VERSION ${WEBP_VERSION}
set_target_properties(webpdemux PROPERTIES VERSION ${PACKAGE_VERSION}
SOVERSION ${WEBP_DEMUX_SOVERSION})
# Define the libraries to install.
@ -167,11 +169,9 @@ math(EXPR WEBP_SIMD_FILES_TO_INCLUDE_RANGE
foreach(I_FILE RANGE ${WEBP_SIMD_FILES_TO_INCLUDE_RANGE})
list(GET WEBP_SIMD_FILES_TO_INCLUDE ${I_FILE} FILE)
list(GET WEBP_SIMD_FLAGS_TO_INCLUDE ${I_FILE} SIMD_COMPILE_FLAG)
if(NOT ${SIMD_COMPILE_FLAG} STREQUAL "NOTFOUND")
set_source_files_properties(${FILE} PROPERTIES
COMPILE_FLAGS ${SIMD_COMPILE_FLAG}
)
endif()
set_source_files_properties(${FILE} PROPERTIES
COMPILE_FLAGS ${SIMD_COMPILE_FLAG}
)
endforeach()
# Build the executables if asked for.
@ -200,6 +200,10 @@ if(WEBP_BUILD_CWEBP OR WEBP_BUILD_DWEBP OR
"imageenc_[^ ]*")
add_library(imageenc ${IMAGEENC_SRCS})
target_link_libraries(imageenc webp)
set_property(TARGET exampleutil imageioutil imagedec imageenc
PROPERTY INCLUDE_DIRECTORIES
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
endif()
if(WEBP_BUILD_DWEBP)
@ -210,6 +214,8 @@ if(WEBP_BUILD_DWEBP)
add_executable(dwebp ${DWEBP_SRCS})
target_link_libraries(dwebp exampleutil imagedec imageenc webpdecoder)
install(TARGETS dwebp RUNTIME DESTINATION bin)
set_property(TARGET dwebp PROPERTY INCLUDE_DIRECTORIES
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
endif()
if(WEBP_BUILD_CWEBP)
@ -220,6 +226,12 @@ if(WEBP_BUILD_CWEBP)
add_executable(cwebp ${CWEBP_SRCS})
target_link_libraries(cwebp exampleutil imagedec webp)
install(TARGETS cwebp RUNTIME DESTINATION bin)
set_property(TARGET cwebp PROPERTY INCLUDE_DIRECTORIES
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
endif()
if(WEBP_BUILD_GIF2WEBP AND NOT GIF_FOUND)
unset(WEBP_BUILD_GIF2WEBP CACHE)
endif()
if(WEBP_BUILD_GIF2WEBP OR WEBP_BUILD_IMG2WEBP)
@ -228,7 +240,7 @@ if(WEBP_BUILD_GIF2WEBP OR WEBP_BUILD_IMG2WEBP)
add_library(webpmux ${WEBP_MUX_SRCS})
target_link_libraries(webpmux webp)
parse_version(mux/Makefile.am webpmux WEBP_MUX_SOVERSION)
set_target_properties(webpmux PROPERTIES VERSION ${WEBP_VERSION}
set_target_properties(webpmux PROPERTIES VERSION ${PACKAGE_VERSION}
SOVERSION ${WEBP_MUX_SOVERSION})
list(APPEND INSTALLED_LIBRARIES webpmux)
endif()
@ -242,6 +254,8 @@ if(WEBP_BUILD_GIF2WEBP)
target_link_libraries(gif2webp exampleutil imageioutil webp webpmux
${WEBP_DEP_GIF_LIBRARIES})
install(TARGETS gif2webp RUNTIME DESTINATION bin)
set_property(TARGET gif2webp PROPERTY INCLUDE_DIRECTORIES
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
endif()
if(WEBP_BUILD_IMG2WEBP)
@ -252,6 +266,8 @@ if(WEBP_BUILD_IMG2WEBP)
add_executable(img2webp ${IMG2WEBP_SRCS})
target_link_libraries(img2webp exampleutil imagedec imageioutil webp webpmux)
install(TARGETS img2webp RUNTIME DESTINATION bin)
set_property(TARGET img2webp PROPERTY INCLUDE_DIRECTORIES
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
endif()
if (WEBP_BUILD_WEBPINFO)
@ -262,6 +278,8 @@ if (WEBP_BUILD_WEBPINFO)
add_executable(webpinfo ${WEBPINFO_SRCS})
target_link_libraries(webpinfo exampleutil imageioutil)
install(TARGETS webpinfo RUNTIME DESTINATION bin)
set_property(TARGET webpinfo PROPERTY INCLUDE_DIRECTORIES
${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src)
endif()
if(WEBP_BUILD_WEBP_JS)
@ -269,6 +287,7 @@ if(WEBP_BUILD_WEBP_JS)
add_executable(webp_js
${CMAKE_CURRENT_SOURCE_DIR}/extras/webp_to_sdl.c)
target_link_libraries(webp_js webpdecoder SDL)
set(WEBP_HAVE_SDL 1)
set_target_properties(webp_js PROPERTIES LINK_FLAGS
"-s EXPORTED_FUNCTIONS='[\"_WebpToSDL\"]' -s INVOKE_RUN=0")
set_target_properties(webp_js PROPERTIES OUTPUT_NAME webp)
@ -286,6 +305,14 @@ if(WEBP_BUILD_WEBP_JS)
target_compile_definitions(webpdecoder PUBLIC EMSCRIPTEN)
endif()
# Generate the config.h file.
configure_file(${CMAKE_CURRENT_LIST_DIR}/cmake/config.h.in
${CMAKE_CURRENT_BINARY_DIR}/src/webp/config.h)
add_definitions(-DHAVE_CONFIG_H)
# The webp folder is included as we reference config.h as
# ../webp/config.h or webp/config.h
include_directories(${CMAKE_CURRENT_BINARY_DIR})
# Install the different headers and libraries.
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/src/webp/decode.h
${CMAKE_CURRENT_SOURCE_DIR}/src/webp/demux.h
@ -302,7 +329,7 @@ install(TARGETS ${INSTALLED_LIBRARIES}
include(CMakePackageConfigHelpers)
write_basic_package_version_file(
"${CMAKE_CURRENT_BINARY_DIR}/WebPConfigVersion.cmake"
VERSION ${WEBP_VERSION}
VERSION ${PACKAGE_VERSION}
COMPATIBILITY AnyNewerVersion
)

283
ChangeLog
View File

@ -1,9 +1,292 @@
c10a493c vwebp: disable double buffering on windows & mac
0d4466c2 webp_to_sdl.c: fix file mode
1b27bf8b WEBP_REDUCE_SIZE: disable all rescaler code
126be109 webpinfo: add -version option
9add62b5 bump version to 0.6.1
d3e26144 update NEWS
2edda639 README: add webpinfo section
9ca568ef Merge "right-size some tables"
31f1995c Merge "SSE2 implementation of HasAlphaXXX"
a80c46bd SSE2 implementation of HasAlphaXXX
083507f2 right-size some tables
2e5785b2 anim_utils.c: remove warning when !defined(WEBP_HAVE_GIF)
b299c47e add WEBP_REDUCE_SIZE
f593d71a enc: disable pic->stats/extra_info w/WEBP_DISABLE_STATS
541179a9 Merge "predictor_enc: fix build w/--disable-near-lossless"
5755a7ec predictor_enc: fix build w/--disable-near-lossless
eab5bab7 add WEBP_DISABLE_STATS
8052c585 remove some petty TODOs from vwebp.
c245343d move LOAD8x4 and STORE8x2 closer to their use location
b9e734fd dec,cosmetics: normalize function naming style
c188d546 dec: harmonize function suffixes
28c5ac81 dec_sse41: harmonize function suffixes
e65b72a3 Merge "introduce WebPHasAlpha8b and WebPHasAlpha32b"
b94cee98 dec_sse2: remove HE8uv_SSE2
44a0ee3f introduce WebPHasAlpha8b and WebPHasAlpha32b
aebf59ac Merge "WebPPictureAllocARGB: align argb allocation"
c184665e WebPPictureAllocARGB: align argb allocation
3daf7509 WebPParseHeaders: remove obsolete animation TODO
80285d97 cmake: avoid security warnings under msvc
650eac55 cmake: don't set -Wall with MSVC
c462cd00 Remove useless code.
01a98217 Merge "remove WebPWorkerImpl declaration from the header"
3c49fc47 Merge "thread_utils: fix potentially bad call to Execute"
fde2782e thread_utils: fix potentially bad call to Execute
2a270c1d remove WebPWorkerImpl declaration from the header
f1f437cc remove mention of 'lossy-only parameters' from the doc
3879074d Merge "WebPMemToUint32: remove ptr cast to int"
04b029d2 WebPMemToUint32: remove ptr cast to int
b7971d0e dsp: avoid defining _C functions w/NEON builds
6ba98764 webpdec: correct alloc size check w/use_argb
5cfb3b0f normalize include guards
f433205e Merge changes Ia17c7dfc,I75423abb,Ia2f716b4,I161caa14,I4210081a, ...
8d033b14 {dec,enc}_neon: harmonize function suffixes x2
0295e981 upsampling_neon: harmonize function suffixes
d572c4e5 yuv_neon: harmonize function suffixes
ab9c2500 rescaler_neon: harmonize function suffixes
93e0ce27 lossless_neon: harmonize function suffixes
22fbc50e lossless_enc_neon: harmonize function suffixes
447875b4 filters_neon,cosmetics: fix indent
e51bdd43 remove unused VP8TokenToStats() function
785da7ea enc_neon: harmonize function suffixes
bc1a251f dec_neon: harmonize function suffixes
61e535f1 dsp/lossless: workaround gcc-4.8 bug on arm
68b2eab7 cwebp: fix alpha reporting w/lossless & metadata
30042faa WebPDemuxGetI: add doc details around WebPFormatFeature
0a17f471 Merge "WIP: list includes as descendants of the project dir"
a4399721 WIP: list includes as descendants of the project dir
08275708 Merge "Make sure we reach the full range for alpha blending."
d361a6a7 yuv_sse2: harmonize function suffixes
6921aa6f upsampling_sse2: harmonize function suffixes
08c67d3e ssim_sse2: harmonize function suffixes
582a1b57 rescaler_sse2: harmonize function suffixes
2c1b18ba lossless_sse2: harmonize function suffixes
0ac46e81 lossless_enc_sse2: harmonize function suffixes
bc634d57 enc_sse2: harmonize function suffixes
bcb7347c dec_sse2: harmonize function suffixes
e14ad93c Make sure we reach the full range for alpha blending.
7038ca8d demux,StoreFrame: restore hdr size check to min req
fb3daad6 cpu: fix ssse3 check
be590e06 Merge "Fix CMake redefinition for HAVE_CPU_FEATURES_H"
35f736e1 Fix CMake redefinition for HAVE_CPU_FEATURES_H
a5216efc Fix integer overflow warning.
a9c8916b decode.h,WebPIDecGetRGB: clarify output ptr validity
3c74c645 gif2webp: handle 1-frame case properly + fix anim_diff
c7f295d3 Merge "gif2webp: introduce -loop_compatibility option"
b4e04677 gif2webp: introduce -loop_compatibility option
f78da3de add LOCAL_CLANG_PREREQ and avoid WORK_AROUND_GCC w/3.8+
01c426f1 define WEBP_USE_INTRINSICS w/gcc-4.9+
8635973d use sdl-config (if available) to determine the link flags
e9459382 use CPPFLAGS before CFLAGS
4a9d788e Merge "Android.mk,mips: fix clang build with r15"
4fbdc9fb Android.mk,mips: fix clang build with r15
a80fcc4a ifdef code not used by Chrome/Android.
3993af12 Fix signed integer overflows.
f66f94ef anim_dump: small tool to dump frames from animated WebP
6eba857b Merge "rationalize the Makefile.am"
c5e34fba function definition cleanup
3822762a rationalize the Makefile.am
501ef6e4 configure style fix: animdiff -> anim_diff
f8bdc268 Merge "protect against NULL dump_folder[] value in ReadAnimatedImage()"
23bfc652 protect against NULL dump_folder[] value in ReadAnimatedImage()
8dc3d71b cosmetics,ReadAnimatedWebP: correct function comment
5bd40066 Merge changes I66a64a0a,I4d2e520f
7945575c cosmetics,webpinfo: remove an else after a return
8729fa11 cosmetics,cwebp: remove an else after a return
f324b7f9 cosmetics: normalize fn proto & decl param names
869eb369 CMake cleanups.
289e62a3 Remove declaration of unimplemented VP8ApplyNearLosslessPredict
20a94186 pnmdec,PAM: validate depth before calculating bytes_per_px
34130afe anim_encode: fix integer overflow
42c79aa6 Merge "Encoder: harmonize function suffixes"
b09307dc Encoder: harmonize function suffixes
bed0456d Merge "SSIM: harmonize the function suffix"
54f6a3cf lossless_sse2.c: fix some missed suffix changes
088f1dcc SSIM: harmonize the function suffix
86fc4dd9 webpdec: use ImgIoUtilCheckSizeArgumentsOverflow
08ea9ecd imageio: add ability restrict max image size
6f9daa4a jpegdec,ReadError: fix leaks on error
a0f72a4f VP8LTransformColorFunc: drop an non-respected 'const' from the signature.
8c934902 Merge "Lossess dec: harmonize the function suffixes"
622242aa Lossess dec: harmonize the function suffixes
1411f027 Lossless Enc: harmonize the function suffixes
24ad2e3c add const to two variables
46efe062 Merge "Allow the lossless cruncher to work for alpha."
8c3f9a47 Speed-up LZ77.
1aef4c71 Allow the lossless cruncher to work for alpha.
b8821dbd Improve the box LZ77 speed.
7beed280 add missing ()s to macro parameters
6473d20b Merge "fix Android standalone toolchain build"
dcefed95 Merge "build.gradle: fix arm64 build"
0c83a8bc Merge "yuv: harmonize suffix naming"
c6d1db4b fix Android standalone toolchain build
663a6d9d unify the ALTERNATE_CODE flag usage
73ea9f27 yuv: harmonize suffix naming
c71b68ac build.gradle: fix arm64 build
c4568b47 Rescaler: harmonize the suffix naming
6cb13b05 Merge "alpha_processing: harmonize the naming suffixes to be _C()"
83a3e69a Merge "simplify WEBP_EXTERN macro"
7295fde2 Merge "filters: harmonize the suffixes naming to _SSE2(), _C(), etc."
8e42ba4c simplify WEBP_EXTERN macro
331ab34b cost*.c: harmonize the suffix namings
b161f670 filters: harmonize the suffixes naming to _SSE2(), _C(), etc.
dec5e4d3 alpha_processing: harmonize the naming suffixes to be _C()
6878d427 fix memory leak in SDL_Init()
461ae555 Merge "configure: fix warnings in sdl check"
62486a22 configure: test for -Wundef
92982609 dsp.h: fix -Wundef w/__mips_dsp_rev
0265cede configure: fix warnings in sdl check
88c73d8a backward_references_enc.h: fix WINDOW_SIZE_BITS check
4ea49f6b rescaler_sse2.c: fix WEBP_RESCALER_FIX -> _RFIX typo
1b526638 Clean-up some CMake
87f57a4b Merge "cmake: fix gif lib detection when cross compiling"
b34a9db1 cosmetics,dec_sse2: remove some redundant comments
471c5755 cmake: fix gif lib detection when cross compiling
c793417a cmake: disable gif2webp if gif lib isn't found
dcbc1c88 cmake: split gif detection from IMG deps
66ad84f0 Merge "muxread: remove unreachable code"
50ec3ab7 muxread: remove unreachable code
7d67a164 Lossy encoding: smoothen transparent areas to improve compression
e50650c7 Merge "fix signature for DISABLE_TOKEN_BUFFER compilation"
671d2567 fix signature for DISABLE_TOKEN_BUFFER compilation
d6755580 cpu.cmake: use unique flag to test simd disable flags
28914528 Merge "Remove the argb* files."
8acb4942 Remove the argb* files.
3b62347b README: correct cmake invocation note
7ca0df13 Have the SSE2 version of PackARGB use common code.
7b250459 Merge "Re-use the transformed image when trying several LZ77 in lossless."
e132072f Re-use the transformed image when trying several LZ77 in lossless.
5d7a50ef Get code to compile in C++.
7b012987 configure: test for -Wparentheses-equality
f0569adb Fix man pages for multi-threading.
f1d5a397 multithread cruncher: only copy stats when picture->stats != NULL
f8c2ac15 Multi-thread the lossless cruncher.
a88c6522 Merge "Integrate a new LZ77 looking for matches in the neighborhood of a pixel only."
8f6df1d0 Unroll Predictors 10, 11 and 12.
355c3d1b Integrate a new LZ77 looking for matches in the neighborhood of a pixel only.
a1779a01 Refactor LZ77 handling in preparation for a new method.
67de68b5 Android.mk/build.gradle: fix mips build with clang from r14b
f209a548 Use the plane code and not the distance when computing statistics.
b903b80c Split cost-based backward references in its own file.
498cad34 Cosmetic changes in backward reference.
e4eb4587 lossless, VP8LTransformColor_C: make sure no overflow happens with colors.
af6deaff webpinfo: handle alpha flag mismatch
7caef29b Fix typo that creeped in.
39e19f92 Merge "near lossless: fix unsigned int overflow warnings."
9bbc0891 near lossless: fix unsigned int overflow warnings.
e1118d62 Merge "cosmetics,FindClosestDiscretized: use uint in mask creation"
186bc9b7 Merge "webpinfo: tolerate ALPH+VP8L"
b5887297 cosmetics,FindClosestDiscretized: use uint in mask creation
f1784aee near_lossless,FindClosestDiscretized: use unsigned ops
0d20abb3 webpinfo: tolerate ALPH+VP8L
972104b3 webpmux: tolerate false positive Alpha flag
dd7e83cc tiffdec,ReadTIFF: ensure data_size is < tsize_t max
d988eb7b tiffdec,MyRead: quiet -Wshorten-64-to-32 warning
dabda707 webpinfo: add support to parse Alpha bitstream
4c117643 webpinfo: correct background color output, BGRA->ARGB
defc98d7 Doc: clarify the role of quality in WebPConfig.
d78ff780 Merge "Fix code to compile with C++."
c8f14093 Fix code to compile with C++.
497dc6a7 pnmdec: sanitize invalid header output
d78e5867 Merge "configure: test for -Wconstant-conversion"
481e91eb Merge "pnmdec,PAM: set bytes_per_px based on depth when missing"
93b12753 configure: test for -Wconstant-conversion
645f0c53 pnmdec,PAM: set bytes_per_px based on depth when missing
e9154605 Merge "vwebp: activate GLUT double-buffering"
818d795b vwebp: activate GLUT double-buffering
d63e6f4b Add a man page for webpinfo
4d708435 Merge "NEON: implement ConvertRGB24ToY/BGR24/ARGB/RGBA32ToUV/ARGBToUV"
faf42213 NEON: implement ConvertRGB24ToY/BGR24/ARGB/RGBA32ToUV/ARGBToUV
b4d576fa Install man pages with CMake.
cbc1b921 webpinfo: add features to parse bitstream header
e644c556 Fix bad bit writer initialization.
b62cdad2 Merge "Implement a cruncher for lossless at method 6."
da3e4dfb use the exact constant for the gamma transfer function
a9c701e0 Merge "tiffdec: fix EXTRASAMPLES check"
adab8ce0 Implement a cruncher for lossless at method 6.
1b92b237 Merge "Fix VP8ApplyNearLossless to respect const and stride."
1923ff02 tiffdec: fix EXTRASAMPLES check
97cce5ba tiffdec: only request EXTRASAMPLES w/> 3 samples/px
0dcd85b6 Fix VP8ApplyNearLossless to respect const and stride.
f7682189 yuv: rationalize the C/SSE2 function naming
52245424 NEON implementation of some Sharp-YUV420 functions
690efd82 Avoid several backward reference copies.
4bb1f607 src/dec/vp8_dec.h, cosmetics: fix comments
285748be cmake: build/install webpinfo
78fd199c backward_references_enc.c: clear -Wshadow warnings
ae836410 WebPLog2FloorC: clear -Wshadow warning
d0b7404e Merge "WASM support"
134e314f WASM support
c08adb6f Merge "VP8LEnc: remove use of BitsLog2Ceiling()"
28c37ebd VP8LEnc: remove use of BitsLog2Ceiling()
2cb58ab2 webpinfo: output format as a human readable string
bb175a93 Merge "rename some symbols clashing with MSVC headers"
39eda658 Remove a duplicated pixel hash implementation.
36b8274d rename some symbols clashing with MSVC headers
274daf54 Add webpinfo tool.
ec5036e4 add explicit reference to /usr/local/{lib,inc}
18f0dfac Merge "fix TIFF encoder regarding rgbA/RGBA"
4e2b0b50 Merge "webpdec.h: fix a doc typo"
e2eeabff Merge "Install binaries, libraries and headers in CMake."
836607e6 webpdec.h: fix a doc typo
9273e441 fix TIFF encoder regarding rgbA/RGBA
17e3c11f Add limited PAM decoding support
5f624871 Install binaries, libraries and headers in CMake.
976adac1 Merge "lossless incremental decoding: fix missing eos_ test"
f8fad4fa lossless incremental decoding: fix missing eos_ test
27415d41 Merge "vwebp_sdl: fix the makefile.unix"
49566182 Merge "ImgIoUtilWriteFile(): use ImgIoUtilSetBinaryMode"
6f75a51b Analyze the transform entropy on the whole image.
a5e4e3af Use palette only if we can in entropy analysis.
75a9c3c4 Improve compression by better entropy analysis.
39cf6f4f vwebp_sdl: fix the makefile.unix
699b0416 ImgIoUtilWriteFile(): use ImgIoUtilSetBinaryMode
7d985bd1 Fix small entropy analysis bug.
6e7caf06 Optimize the color cache size.
833c9219 More efficient stochastic histogram merge.
5183326b Refactor the greedy histogram merge.
99f6f462 Merge "histogram_enc.c,MyRand: s/ul/u/ for unsigned constants"
80a22186 ssim.c: remove dead include
a128dfff histogram_enc.c,MyRand: s/ul/u/ for unsigned constants
693bf74e move the SSIM calculation code in ssim.c / ssim_sse2.c
10d791ca Merge "Fix the random generator in HistogramCombineStochastic."
fa63a966 Fix the random generator in HistogramCombineStochastic.
16be192f VP8LSetBitPos: remove the eos_ setting
027151ca don't erase the surface before blitting.
4105d565 disable WEBP_USE_XXX optimisations when EMSCRIPTEN is defined
9ee32a75 Merge "WebP-JS: emscripten-based Javascript decoder"
ca9f7b7d WebP-JS: emscripten-based Javascript decoder
868aa690 Perform greedy histogram merge in a unified way.
5b393f2d Merge "fix path typo for vwebp_sdl in Makefile.vc"
e0012bea CMake: only use libwebpdecoder for building dwebp
84c2a7b0 fix path typo for vwebp_sdl in Makefile.vc
1b0e4abf Merge "Add a flag to disable SIMD optimizations."
32263250 Add a flag to disable SIMD optimizations.
b494fdec optimize the ARGB->ARGB Import to use memcpy
f1536039 Merge "ReadWebP: decode directly into a pre-allocated buffer"
e69ed291 ReadWebP: decode directly into a pre-allocated buffer
57d8de8a Merge "vwebp_sdl: simple viewer based on SDL"
5cfd4ebc LZ77 interval speedups. Faster, smaller, simpler.
1e7ad88b PNM header decoder: add some basic numerical validation
17c7890c Merge "Add a decoder only library for WebP in CMake."
be733786 Merge "Add clang build fix for MSA"
03cda0e4 Add a decoder only library for WebP in CMake.
aa893914 Add clang build fix for MSA
31a92e97 Merge "imageio: add limited PNM support for reading"
dcf9d82a imageio: add limited PNM support for reading
6524fcd6 vwebp_sdl: simple viewer based on SDL
6cf24a24 get_disto: fix reference file read
43d472aa Merge tag 'v0.6.0'
50d1a848 update ChangeLog (tag: v0.6.0, origin/0.6.0, 0.6.0)
20a7fea0 extras/Makefile.am: fix libwebpextras.la reference
415f3ffe update ChangeLog (tag: v0.6.0-rc3)
3c6d1224 update NEWS
ee4a4141 update AUTHORS
32ed856f Fix "all|no frames are keyframes" settings.
1c3190b6 Merge "Fix "all|no frames are keyframes" settings."
f4dc56fd disable GradientUnfilter_NEON
4f3e3bbd disable GradientUnfilter_NEON
2dc0bdca Fix "all|no frames are keyframes" settings.
0d8e0588 img2webp: treat -loop as a no-op w/single images
b0450139 ReadImage(): restore size reporting
0ad3b4ef update ChangeLog (tag: v0.6.0-rc2)

View File

@ -29,7 +29,7 @@ PLATFORM_LDFLAGS = /SAFESEH
NOLOGO = /nologo
CCNODBG = cl.exe $(NOLOGO) /O2 /DNDEBUG
CCDEBUG = cl.exe $(NOLOGO) /Od /Gm /Zi /D_DEBUG /RTC1
CFLAGS = /Isrc $(NOLOGO) /W3 /EHsc /c
CFLAGS = /I. /Isrc $(NOLOGO) /W3 /EHsc /c
CFLAGS = $(CFLAGS) /DWIN32 /D_CRT_SECURE_NO_WARNINGS /DWIN32_LEAN_AND_MEAN
LDFLAGS = /LARGEADDRESSAWARE /MANIFEST /NXCOMPAT /DYNAMICBASE
LDFLAGS = $(LDFLAGS) $(PLATFORM_LDFLAGS)
@ -155,6 +155,7 @@ CFGSET = TRUE
!MESSAGE - all - build (de)mux-based targets for CFG
!MESSAGE - gif2webp - requires libgif & >= VS2013
!MESSAGE - anim_diff - requires libgif & >= VS2013
!MESSAGE - anim_dump
!MESSAGE
!MESSAGE RTLIBCFG controls the runtime library linkage - 'static' or 'dynamic'.
!MESSAGE 'legacy' will produce a Windows 2000 compatible library.
@ -233,9 +234,6 @@ DSP_DEC_OBJS = \
$(DIROBJ)\dsp\yuv_sse2.obj \
DSP_ENC_OBJS = \
$(DIROBJ)\dsp\argb.obj \
$(DIROBJ)\dsp\argb_mips_dsp_r2.obj \
$(DIROBJ)\dsp\argb_sse2.obj \
$(DIROBJ)\dsp\cost.obj \
$(DIROBJ)\dsp\cost_mips32.obj \
$(DIROBJ)\dsp\cost_mips_dsp_r2.obj \
@ -358,10 +356,15 @@ all: ex $(EXTRA_EXAMPLES)
# C99 support which is only available from VS2013 onward.
gif2webp: $(DIRBIN)\gif2webp.exe
anim_diff: $(DIRBIN)\anim_diff.exe
anim_dump: $(DIRBIN)\anim_dump.exe
$(DIRBIN)\anim_diff.exe: $(DIROBJ)\examples\anim_diff.obj $(EX_ANIM_UTIL_OBJS)
$(DIRBIN)\anim_diff.exe: $(EX_UTIL_OBJS) $(IMAGEIO_UTIL_OBJS)
$(DIRBIN)\anim_diff.exe: $(EX_GIF_DEC_OBJS) $(LIBWEBPDEMUX) $(LIBWEBP)
$(DIRBIN)\anim_dump.exe: $(DIROBJ)\examples\anim_dump.obj $(EX_ANIM_UTIL_OBJS)
$(DIRBIN)\anim_dump.exe: $(EX_UTIL_OBJS) $(IMAGEIO_UTIL_OBJS)
$(DIRBIN)\anim_dump.exe: $(EX_GIF_DEC_OBJS) $(LIBWEBPDEMUX) $(LIBWEBP)
$(DIRBIN)\anim_dump.exe: $(IMAGEIO_ENC_OBJS)
$(DIRBIN)\cwebp.exe: $(DIROBJ)\examples\cwebp.obj $(IMAGEIO_DEC_OBJS)
$(DIRBIN)\cwebp.exe: $(IMAGEIO_UTIL_OBJS)
$(DIRBIN)\dwebp.exe: $(DIROBJ)\examples\dwebp.obj $(IMAGEIO_DEC_OBJS)
@ -444,7 +447,7 @@ $(OUTPUT_DIRS):
$(DIROBJ)\$(DLLINC):
@echo #ifndef WEBP_DLL_H_ > $@
@echo #define WEBP_DLL_H_ >> $@
@echo #define WEBP_EXTERN(type) __declspec(dllexport) type >> $@
@echo #define WEBP_EXTERN __declspec(dllexport) >> $@
@echo #endif /* WEBP_DLL_H_ */ >> $@
.SUFFIXES: .c .obj .res .exe
@ -456,6 +459,9 @@ $(DIROBJ)\dsp\enc_avx2.obj: src\dsp\enc_avx2.c
$(DIROBJ)\examples\anim_diff.obj: examples\anim_diff.c
$(CC) $(CFLAGS) /DWEBP_HAVE_GIF /Fd$(LIBWEBP_PDBNAME) \
/Fo$(DIROBJ)\examples\ examples\$(@B).c
$(DIROBJ)\examples\anim_dump.obj: examples\anim_dump.c
$(CC) $(CFLAGS) /DWEBP_HAVE_GIF /Fd$(LIBWEBP_PDBNAME) \
/Fo$(DIROBJ)\examples\ examples\$(@B).c
$(DIROBJ)\examples\anim_util.obj: examples\anim_util.c
$(CC) $(CFLAGS) /DWEBP_HAVE_GIF /Fd$(LIBWEBP_PDBNAME) \
/Fo$(DIROBJ)\examples\ examples\$(@B).c

13
NEWS
View File

@ -1,3 +1,16 @@
- 11/24/2017: version 0.6.1
This is a binary compatible release.
* lossless performance and compression improvements + a new 'cruncher' mode
(-m 6 -q 100)
* ARM performance improvements with clang (15-20% w/ndk r15c, issue #339)
* webp-js: emscripten/webassembly based javascript decoder
* miscellaneous bug & build fixes (issue #329, #332, #343, #353, #360, #361,
#363)
Tool updates / additions:
added webpinfo - prints file format information (issue #330)
gif2webp - loop behavior modified to match Chrome M63+ (crbug.com/649264);
'-loop_compatibility' can be used for the old behavior
- 1/26/2017: version 0.6.0
* lossless performance and compression improvements
* miscellaneous performance improvements (SSE2, NEON, MSA)

25
README
View File

@ -4,7 +4,7 @@
\__\__/\____/\_____/__/ ____ ___
/ _/ / \ \ / _ \/ _/
/ \_/ / / \ \ __/ \__
\____/____/\_____/_____/____/v0.6.0
\____/____/\_____/_____/____/v0.6.1
Description:
============
@ -113,8 +113,8 @@ make install
CMake:
------
With CMake, you can compile libwebp, cwebp, dwebp, gif2web, img2webp and the
JS bindings.
With CMake, you can compile libwebp, cwebp, dwebp, gif2web, img2webp, webpinfo
and the JS bindings.
Prerequisites:
A compiler (e.g., gcc with autotools) and CMake.
@ -367,6 +367,23 @@ Use following options to convert into alternate image formats:
-quiet ....... quiet mode, don't print anything
-noasm ....... disable all assembly optimizations
WebP file analysis tool:
========================
'webpinfo' can be used to print out the chunk level structure and bitstream
header information of WebP files. It can also check if the files are of valid
WebP format.
Usage: webpinfo [options] in_files
Note: there could be multiple input files;
options must come before input files.
Options:
-version ........... Print version number and exit.
-quiet ............. Do not show chunk parsing information.
-diag .............. Show parsing error diagnosis.
-summary ........... Show chunk stats summary.
-bitstream_info .... Parse bitstream header.
Visualization tool:
===================
@ -477,6 +494,8 @@ Options:
-metadata <string> ..... comma separated list of metadata to
copy from the input to the output if present
Valid values: all, none, icc, xmp (default)
-loop_compatibility .... use compatibility mode for Chrome
version prior to M62 (inclusive)
-mt .................... use multi-threading if available
-version ............... print version number and exit

View File

@ -1,7 +1,7 @@
 __ __ ____ ____ ____ __ __ _ __ __
/ \\/ \/ _ \/ _ \/ _ \/ \ \/ \___/_ / _\
\ / __/ _ \ __/ / / (_/ /__
\__\__/\_____/_____/__/ \__//_/\_____/__/___/v0.4.0
\__\__/\_____/_____/__/ \__//_/\_____/__/___/v0.4.1
Description:

View File

@ -1,91 +0,0 @@
Description:
============
This file describes the compilation of libwebp using portable intrinsics /
WebAssembly (wasm) to native targets using clang and CMake.
Prerequisites:
==============
- cmake 2.8+
- clang 3.9+ for portable intrinsics support; as wasm progresses a tip of tree
build may be necessary.
Building:
=========
- configure the project with CMake using:
$ mkdir -p build && \
cd build && \
cmake -DWEBP_BUILD_DWEBP=1 -DCMAKE_C_COMPILER=clang -DWEBP_ENABLE_WASM=1 ../
- compile dwebp using 'make'.
- Note this currently generates native executables only and is incompatible
with -DWEBP_BUILD_WEBP_JS.
Build options:
==============
- platform specific multiply high (mulhi) implementation, disabled by default.
arm: -DCMAKE_C_FLAGS='-DENABLE_NEON_BUILTIN_MULHI_INT16X8 ...'
x86: -DCMAKE_C_FLAGS='-DENABLE_X86_BUILTIN_MULHI_INT16X8 ...'
Cross compilation:
==================
- arm toolchains can be obtained from:
http://www.linaro.org/downloads/
- the android ndk can be obtained from:
https://developer.android.com/ndk/downloads/index.html
armv7:
------
Android:
$ ./android-ndk-r15b/build/tools/make_standalone_toolchain.py \
--arch arm --api 24 --stl gnustl --install-dir /opt/android-arm-24
$ mkdir -p build && cd build
$ cmake ../libwebp \
-DWEBP_BUILD_DWEBP=1 \
-DCMAKE_C_COMPILER=/opt/android-arm-24/bin/clang \
-DCMAKE_PREFIX_PATH=/opt/android-arm-24/sysroot/usr/lib \
-DCMAKE_C_FLAGS=-fPIE \
-DCMAKE_EXE_LINKER_FLAGS=-Wl,-pie \
-DCMAKE_BUILD_TYPE=Release \
-DWEBP_ENABLE_WASM=1
Linux:
$ gcc_arm=/opt/gcc-arm; target=arm-linux-gnueabihf
$ mkdir -p build && cd build
$ cmake ../libwebp -DWEBP_BUILD_DWEBP=1 -DWEBP_ENABLE_WASM=1 \
-DCMAKE_C_COMPILER=clang \
-DCMAKE_C_FLAGS="--target=$target --gcc-toolchain=$gcc_arm --sysroot=$gcc_arm/$target/libc -march=armv7-a -mfpu=neon" \
-DCMAKE_PREFIX_PATH=$gcc_arm/$target/libc/usr
aarch64 / arm64:
----------------
Android:
$ ./android-ndk-r15b/build/tools/make_standalone_toolchain.py \
--arch arm64 --api 24 --stl gnustl --install-dir /opt/android-arm64-24
$ mkdir -p build && cd build
$ cmake ../libwebp \
-DWEBP_BUILD_DWEBP=1 \
-DCMAKE_C_COMPILER=/opt/android-arm64-24/bin/clang \
-DCMAKE_PREFIX_PATH=/opt/android-arm64-24/sysroot/usr/lib \
-DCMAKE_C_FLAGS=-fPIE \
-DCMAKE_EXE_LINKER_FLAGS=-Wl,-pie \
-DCMAKE_BUILD_TYPE=Release \
-DWEBP_ENABLE_WASM=1
Linux:
$ gcc_arm=/opt/gcc-aarch64; target=aarch64-linux-gnu
$ mkdir -p build && cd build
$ cmake ../libwebp -DWEBP_BUILD_DWEBP=1 -DWEBP_ENABLE_WASM=1 \
-DCMAKE_C_COMPILER=clang \
-DCMAKE_C_FLAGS="--target=$target --gcc-toolchain=$gcc_arm --sysroot=$gcc_arm/$target/libc" \
-DCMAKE_PREFIX_PATH=$gcc_arm/$target/libc/usr

View File

@ -31,11 +31,6 @@ using Emscripten and CMake.
- that's it! Upon completion, you should have the webp.js and
webp.js.mem files generated.
- Note this generates both webp_js and webp_wasm without any SIMD enabled due
to bugs with this toolchain associated with the SSE2 code.
-DWEBP_ENABLE_WASM is currently meant to generate native (x86, arm)
executables (dwebp, cwebp) and is incompatible with -DWEBP_BUILD_WEBP_JS.
The callable JavaScript function is WebPToSDL(), which decodes a raw WebP
bitstream into a canvas. See webp_js/index.html for a simple usage sample.

View File

@ -82,12 +82,14 @@ model {
}
}
// Check for NEON usage.
if (getTargetPlatform() == "arm" || getTargetPlatform() == "arm64") {
if (getTargetPlatform() == "arm") {
NEON = "c.neon"
cCompiler.define "HAVE_CPU_FEATURES_H"
} else {
NEON = "c"
}
cCompiler.args "-I" + file(".").absolutePath
}
// Link to pthread for shared libraries.
withType(SharedLibraryBinarySpec) {
@ -120,9 +122,6 @@ model {
include "alpha_processing_neon.$NEON"
include "alpha_processing_sse2.c"
include "alpha_processing_sse41.c"
include "argb.c"
include "argb_mips_dsp_r2.c"
include "argb_sse2.c"
include "cpu.c"
include "dec.c"
include "dec_clip_tables.c"

View File

@ -13,6 +13,9 @@
/* Set to 1 if __builtin_bswap64 is available */
#cmakedefine HAVE_BUILTIN_BSWAP64 1
/* Define to 1 if you have the <cpu-features.h> header file. */
#cmakedefine HAVE_CPU_FEATURES_H 1
/* Define to 1 if you have the <dlfcn.h> header file. */
#cmakedefine HAVE_DLFCN_H 1
@ -115,9 +118,19 @@
/* Set to 1 if JPEG library is installed */
#cmakedefine WEBP_HAVE_JPEG 1
/* Set to 1 if NEON is supported */
#cmakedefine WEBP_HAVE_NEON
/* Set to 1 if runtime detection of NEON is enabled */
/* TODO: handle properly in CMake */
#cmakedefine WEBP_HAVE_NEON_RTCD
/* Set to 1 if PNG library is installed */
#cmakedefine WEBP_HAVE_PNG 1
/* Set to 1 if SDL library is installed */
#cmakedefine WEBP_HAVE_SDL 1
/* Set to 1 if SSE2 is supported */
#cmakedefine WEBP_HAVE_SSE2 1
@ -127,6 +140,9 @@
/* Set to 1 if TIFF library is installed */
#cmakedefine WEBP_HAVE_TIFF 1
/* Enable near lossless encoding */
#cmakedefine WEBP_NEAR_LOSSLESS 1
/* Undefine this to disable thread support. */
#cmakedefine WEBP_USE_THREAD 1

View File

@ -1,4 +1,5 @@
## Check for SIMD extensions.
include(CMakePushCheckState)
function(webp_check_compiler_flag WEBP_SIMD_FLAG ENABLE_SIMD)
if(NOT ENABLE_SIMD)
@ -7,6 +8,8 @@ function(webp_check_compiler_flag WEBP_SIMD_FLAG ENABLE_SIMD)
return()
endif()
unset(WEBP_HAVE_FLAG_${WEBP_SIMD_FLAG} CACHE)
cmake_push_check_state()
set(CMAKE_REQUIRED_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR})
check_c_source_compiles("
#include \"${CMAKE_CURRENT_LIST_DIR}/../src/dsp/dsp.h\"
int main(void) {
@ -17,6 +20,7 @@ function(webp_check_compiler_flag WEBP_SIMD_FLAG ENABLE_SIMD)
}
" WEBP_HAVE_FLAG_${WEBP_SIMD_FLAG}
)
cmake_pop_check_state()
if(WEBP_HAVE_FLAG_${WEBP_SIMD_FLAG})
set(WEBP_HAVE_${WEBP_SIMD_FLAG} 1 PARENT_SCOPE)
else()
@ -60,6 +64,7 @@ foreach(I_SIMD RANGE ${WEBP_SIMD_FLAGS_RANGE})
# First try with no extra flag added as the compiler might have default flags
# (especially on Android).
unset(WEBP_HAVE_${WEBP_SIMD_FLAG} CACHE)
cmake_push_check_state()
set(CMAKE_REQUIRED_FLAGS)
webp_check_compiler_flag(${WEBP_SIMD_FLAG} ${WEBP_ENABLE_SIMD})
if(NOT WEBP_HAVE_${WEBP_SIMD_FLAG})
@ -85,11 +90,8 @@ foreach(I_SIMD RANGE ${WEBP_SIMD_FLAGS_RANGE})
foreach(FILE ${SIMD_FILES})
list(APPEND WEBP_SIMD_FILES_NOT_TO_INCLUDE ${FILE})
endforeach()
# Explicitly disable SIMD. Avoid this with WASM to avoid an ICE with clang:
# https://bugs.chromium.org/p/webp/issues/detail?id=350
# WASM overrides the native SIMD so building it in is harmless aside from
# binary size.
if(NOT WEBP_ENABLE_WASM AND SIMD_DISABLE_FLAGS)
# Explicitly disable SIMD.
if(SIMD_DISABLE_FLAGS)
list(GET SIMD_DISABLE_FLAGS ${I_SIMD} SIMD_COMPILE_FLAG)
include(CheckCCompilerFlag)
if(SIMD_COMPILE_FLAG)
@ -104,11 +106,12 @@ foreach(I_SIMD RANGE ${WEBP_SIMD_FLAGS_RANGE})
set(COMMON_PATTERNS)
endif()
set(CMAKE_REQUIRED_DEFINITIONS ${SIMD_COMPILE_FLAG})
check_c_source_compiles("int main(void) {return 0;}" FLAG2
check_c_source_compiles("int main(void) {return 0;}"
FLAG_${SIMD_COMPILE_FLAG}
FAIL_REGEX "warning: argument unused during compilation:"
${COMMON_PATTERNS}
)
if(NOT FLAG2)
if(NOT FLAG_${SIMD_COMPILE_FLAG})
unset(HAS_COMPILE_FLAG CACHE)
endif()
endif()
@ -118,14 +121,5 @@ foreach(I_SIMD RANGE ${WEBP_SIMD_FLAGS_RANGE})
endif()
endif()
endif()
cmake_pop_check_state()
endforeach()
## Add *_wasm.c files if enabled.
if(WEBP_ENABLE_WASM)
file(GLOB SIMD_FILES "${CMAKE_CURRENT_LIST_DIR}/../"
"src/dsp/*_wasm.c"
)
foreach(FILE ${SIMD_FILES})
list(APPEND WEBP_SIMD_FILES_TO_INCLUDE ${FILE})
endforeach()
endif()

View File

@ -70,18 +70,43 @@ foreach(I_LIB PNG JPEG TIFF)
set(WEBP_HAVE_${I_LIB} ${${I_LIB}_FOUND})
if(${I_LIB}_FOUND)
list(APPEND WEBP_DEP_IMG_LIBRARIES ${${I_LIB}_LIBRARIES})
list(APPEND WEBP_DEP_IMG_INCLUDE_DIRS ${${I_LIB}_INCLUDE_DIRS})
list(APPEND WEBP_DEP_IMG_INCLUDE_DIRS
${${I_LIB}_INCLUDE_DIR} ${${I_LIB}_INCLUDE_DIRS})
endif()
endforeach()
if(WEBP_DEP_IMG_INCLUDE_DIRS)
list(REMOVE_DUPLICATES WEBP_DEP_IMG_INCLUDE_DIRS)
endif()
# GIF detection, gifdec isn't part of the imageio lib.
include(CMakePushCheckState)
set(WEBP_DEP_GIF_LIBRARIES)
set(WEBP_DEP_GIF_INCLUDE_DIRS)
find_package(GIF)
set(WEBP_HAVE_GIF ${GIF_FOUND})
if(GIF_FOUND)
list(APPEND WEBP_DEP_GIF_LIBRARIES ${GIF_LIBRARIES})
list(APPEND WEBP_DEP_GIF_INCLUDE_DIRS ${GIF_INCLUDE_DIR})
# GIF find_package only locates the header and library, it doesn't fail
# compile tests when detecting the version, but falls back to 3 (as of at
# least cmake 3.7.2). Make sure the library links to avoid incorrect
# detection when cross compiling.
cmake_push_check_state()
set(CMAKE_REQUIRED_LIBRARIES ${GIF_LIBRARIES})
set(CMAKE_REQUIRED_INCLUDES ${GIF_INCLUDE_DIR})
check_c_source_compiles("
#include <gif_lib.h>
int main(void) {
(void)DGifOpenFileHandle;
return 0;
}
" GIF_COMPILES
)
cmake_pop_check_state()
if(GIF_COMPILES)
list(APPEND WEBP_DEP_GIF_LIBRARIES ${GIF_LIBRARIES})
list(APPEND WEBP_DEP_GIF_INCLUDE_DIRS ${GIF_INCLUDE_DIR})
else()
unset(GIF_FOUND)
endif()
endif()
## Check for specific headers.
@ -139,13 +164,3 @@ strip_bracket(PACKAGE_URL)
set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
set(PACKAGE_TARNAME ${PACKAGE_NAME})
set(VERSION ${PACKAGE_VERSION})
## Generate the config.h header.
configure_file(${CMAKE_CURRENT_LIST_DIR}/config.h.in
${CMAKE_CURRENT_BINARY_DIR}/include/webp/config.h)
add_definitions(-DHAVE_CONFIG_H)
# The webp folder is included as we reference config.h as
# ../webp/config.h or webp/config.h
include_directories(${CMAKE_CURRENT_BINARY_DIR}/include
${CMAKE_CURRENT_BINARY_DIR}/include/webp
)

View File

@ -1,4 +1,4 @@
AC_INIT([libwebp], [0.6.0],
AC_INIT([libwebp], [0.6.1],
[https://bugs.chromium.org/p/webp],,
[http://developers.google.com/speed/webp])
AC_CANONICAL_HOST
@ -79,6 +79,7 @@ TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wold-style-definition])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wparentheses-equality])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wshadow])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wshorten-64-to-32])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wundef])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wunreachable-code])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wunused-but-set-variable])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wunused])
@ -444,12 +445,12 @@ AS_IF([test "x$enable_sdl" != "xno"], [
CLEAR_LIBVARS([SDL])
WITHLIB_OPTION([sdl], [SDL])
$sdl_header = "no";
sdl_header="no"
LIBCHECK_PROLOGUE([SDL])
AC_CHECK_HEADER([SDL/SDL.h], [sdl_header="SDL_SDL.h"],
[AC_CHECK_HEADER([SDL.h], [sdl_header="SDL.h"],
[AC_MSG_WARN(SDL library not available - no sdl.h)])])
if test x"$sdl_header" != "xno" ; then
if test x"$sdl_header" != "xno"; then
AC_CHECK_LIB(SDL, SDL_Init,
[SDL_LIBS="-lSDL"
SDL_INCLUDES="-DWEBP_HAVE_SDL"
@ -458,14 +459,14 @@ AS_IF([test "x$enable_sdl" != "xno"], [
sdl_support=yes
],
AC_MSG_WARN(Optional SDL library not found),
[$MATH_LIBS]),
if test x"$sdl_header" == "xSDL.h" ; then
[$MATH_LIBS])
if test x"$sdl_header" = "xSDL.h"; then
SDL_INCLUDES="$SDL_INCLUDES -DWEBP_HAVE_JUST_SDL_H"
fi
fi
LIBCHECK_EPILOGUE([SDL])
if test "$sdl_support" = "yes" ; then
if test "$sdl_support" = "yes"; then
build_vwebp_sdl=yes
fi
])
@ -589,7 +590,7 @@ AS_IF([test "x$enable_gif" != "xno"], [
if test "$gif_support" = "yes" -a \
"$enable_libwebpdemux" = "yes"; then
build_animdiff=yes
build_anim_diff=yes
fi
if test "$gif_support" = "yes" -a \
@ -597,7 +598,7 @@ AS_IF([test "x$enable_gif" != "xno"], [
build_gif2webp=yes
fi
])
AM_CONDITIONAL([BUILD_ANIMDIFF], [test "${build_animdiff}" = "yes"])
AM_CONDITIONAL([BUILD_ANIMDIFF], [test "${build_anim_diff}" = "yes"])
AM_CONDITIONAL([BUILD_GIF2WEBP], [test "${build_gif2webp}" = "yes"])
if test "$enable_libwebpmux" = "yes"; then
@ -662,7 +663,7 @@ if test "$enable_wic" = "yes"; then
fi
esac
dnl === If --enable-swap-16bit-csp is defined, add -DWEBP_SWAP_16BIT_CSP
dnl === If --enable-swap-16bit-csp is defined, add -DWEBP_SWAP_16BIT_CSP=1
USE_SWAP_16BIT_CSP=""
AC_MSG_CHECKING(if --enable-swap-16bit-csp option is specified)
@ -670,7 +671,7 @@ AC_ARG_ENABLE([swap-16bit-csp],
AS_HELP_STRING([--enable-swap-16bit-csp],
[Enable byte swap for 16 bit colorspaces]))
if test "$enable_swap_16bit_csp" = "yes"; then
USE_SWAP_16BIT_CSP="-DWEBP_SWAP_16BIT_CSP"
USE_SWAP_16BIT_CSP="-DWEBP_SWAP_16BIT_CSP=1"
fi
AC_MSG_RESULT(${enable_swap_16bit_csp-no})
AC_SUBST(USE_SWAP_16BIT_CSP)
@ -688,6 +689,21 @@ fi
AC_MSG_RESULT(${enable_experimental-no})
AC_SUBST(USE_EXPERIMENTAL_CODE)
dnl === If --disable-near-lossless is defined, add -DWEBP_NEAR_LOSSLESS=0
AC_DEFINE(WEBP_NEAR_LOSSLESS, [1], [Enable near lossless encoding])
AC_MSG_CHECKING(if --disable-near-lossless option is specified)
AC_ARG_ENABLE([near_lossless],
AS_HELP_STRING([--disable-near-lossless],
[Disable near lossless encoding]),
[], [enable_near_lossless=yes])
if test "$enable_near_lossless" = "no"; then
AC_DEFINE(WEBP_NEAR_LOSSLESS, [0], [Enable near lossless encoding])
AC_MSG_RESULT([yes])
else
AC_MSG_RESULT([no])
fi
dnl === Check whether libwebpmux should be built
AC_MSG_CHECKING(whether libwebpmux is to be built)
AC_ARG_ENABLE([libwebpmux],
@ -762,7 +778,7 @@ dwebp : yes
PNG : ${png_support-no}
WIC : ${wic_support-no}
GIF support : ${gif_support-no}
anim_diff : ${build_animdiff-no}
anim_diff : ${build_anim_diff-no}
gif2webp : ${build_gif2webp-no}
img2webp : ${build_img2webp-no}
webpmux : ${enable_libwebpmux-no}

View File

@ -2,7 +2,7 @@ AM_CPPFLAGS += -I$(top_builddir)/src -I$(top_srcdir)/src
bin_PROGRAMS = dwebp cwebp
if BUILD_ANIMDIFF
noinst_PROGRAMS = anim_diff
noinst_PROGRAMS = anim_diff anim_dump
endif
if BUILD_GIF2WEBP
bin_PROGRAMS += gif2webp
@ -27,20 +27,36 @@ libexample_util_la_LIBADD = ../src/libwebp.la
anim_diff_SOURCES = anim_diff.c anim_util.c anim_util.h
anim_diff_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(GIF_INCLUDES)
anim_diff_LDADD = ../src/demux/libwebpdemux.la
anim_diff_LDADD += libexample_util.la ../imageio/libimageio_util.la
anim_diff_LDADD =
anim_diff_LDADD += ../src/demux/libwebpdemux.la
anim_diff_LDADD += libexample_util.la
anim_diff_LDADD += ../imageio/libimageio_util.la
anim_diff_LDADD += $(GIF_LIBS) -lm
anim_dump_SOURCES = anim_dump.c anim_util.c anim_util.h
anim_dump_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(PNG_INCLUDES)
anim_dump_CPPFLAGS += $(GIF_INCLUDES)
anim_dump_LDADD =
anim_dump_LDADD += ../src/demux/libwebpdemux.la
anim_dump_LDADD += libexample_util.la
anim_dump_LDADD += ../imageio/libimageio_util.la
anim_dump_LDADD += ../imageio/libimageenc.la
anim_dump_LDADD += $(PNG_LIBS) $(GIF_LIBS) $(TIFF_LIBS) -lm
cwebp_SOURCES = cwebp.c stopwatch.h
cwebp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
cwebp_LDADD = libexample_util.la ../imageio/libimageio_util.la
cwebp_LDADD += ../imageio/libimagedec.la ../src/libwebp.la
cwebp_LDADD =
cwebp_LDADD += libexample_util.la
cwebp_LDADD += ../imageio/libimageio_util.la
cwebp_LDADD += ../imageio/libimagedec.la
cwebp_LDADD += ../src/libwebp.la
cwebp_LDADD += $(JPEG_LIBS) $(PNG_LIBS) $(TIFF_LIBS)
dwebp_SOURCES = dwebp.c stopwatch.h
dwebp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
dwebp_CPPFLAGS += $(JPEG_INCLUDES) $(PNG_INCLUDES)
dwebp_LDADD = libexample_util.la
dwebp_LDADD =
dwebp_LDADD += libexample_util.la
dwebp_LDADD += ../imageio/libimagedec.la
dwebp_LDADD += ../imageio/libimageenc.la
dwebp_LDADD += ../imageio/libimageio_util.la
@ -49,35 +65,52 @@ dwebp_LDADD +=$(PNG_LIBS) $(JPEG_LIBS)
gif2webp_SOURCES = gif2webp.c gifdec.c gifdec.h
gif2webp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(GIF_INCLUDES)
gif2webp_LDADD = libexample_util.la ../imageio/libimageio_util.la
gif2webp_LDADD += ../src/mux/libwebpmux.la ../src/libwebp.la $(GIF_LIBS)
gif2webp_LDADD =
gif2webp_LDADD += libexample_util.la
gif2webp_LDADD += ../imageio/libimageio_util.la
gif2webp_LDADD += ../src/mux/libwebpmux.la
gif2webp_LDADD += ../src/libwebp.la
gif2webp_LDADD += $(GIF_LIBS)
vwebp_SOURCES = vwebp.c
vwebp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE) $(GL_INCLUDES)
vwebp_LDADD = libexample_util.la ../imageio/libimageio_util.la
vwebp_LDADD += ../src/demux/libwebpdemux.la $(GL_LIBS)
vwebp_LDADD =
vwebp_LDADD += libexample_util.la
vwebp_LDADD += ../imageio/libimageio_util.la
vwebp_LDADD += ../src/demux/libwebpdemux.la
vwebp_LDADD += $(GL_LIBS)
webpmux_SOURCES = webpmux.c
webpmux_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
webpmux_LDADD = libexample_util.la ../imageio/libimageio_util.la
webpmux_LDADD += ../src/mux/libwebpmux.la ../src/libwebp.la
webpmux_LDADD =
webpmux_LDADD += libexample_util.la
webpmux_LDADD += ../imageio/libimageio_util.la
webpmux_LDADD += ../src/mux/libwebpmux.la
webpmux_LDADD += ../src/libwebp.la
img2webp_SOURCES = img2webp.c
img2webp_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
img2webp_LDADD = libexample_util.la ../imageio/libimageio_util.la
img2webp_LDADD =
img2webp_LDADD += libexample_util.la
img2webp_LDADD += ../imageio/libimageio_util.la
img2webp_LDADD += ../imageio/libimagedec.la
img2webp_LDADD += ../src/mux/libwebpmux.la ../src/libwebp.la
img2webp_LDADD += ../src/mux/libwebpmux.la
img2webp_LDADD += ../src/libwebp.la
img2webp_LDADD += $(PNG_LIBS) $(JPEG_LIBS) $(TIFF_LIBS)
webpinfo_SOURCES = webpinfo.c
webpinfo_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
webpinfo_LDADD = libexample_util.la ../imageio/libimageio_util.la
webpinfo_LDADD =
webpinfo_LDADD += libexample_util.la
webpinfo_LDADD += ../imageio/libimageio_util.la
webpinfo_LDADD += ../src/libwebp.la
if BUILD_LIBWEBPDECODER
anim_diff_LDADD += ../src/libwebpdecoder.la
anim_dump_LDADD += ../src/libwebpdecoder.la
vwebp_LDADD += ../src/libwebpdecoder.la
else
anim_diff_LDADD += ../src/libwebp.la
anim_dump_LDADD += ../src/libwebp.la
vwebp_LDADD += ../src/libwebp.la
endif

104
examples/anim_dump.c Normal file
View File

@ -0,0 +1,104 @@
// Copyright 2017 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Decodes an animated WebP file and dumps the decoded frames as PNG or TIFF.
//
// Author: Skal (pascal.massimino@gmail.com)
#include <stdio.h>
#include <string.h> // for 'strcmp'.
#include "./anim_util.h"
#include "webp/decode.h"
#include "../imageio/image_enc.h"
#if defined(_MSC_VER) && _MSC_VER < 1900
#define snprintf _snprintf
#endif
static void Help(void) {
printf("Usage: anim_dump [options] files...\n");
printf("\nOptions:\n");
printf(" -folder <string> .... dump folder (default: '.')\n");
printf(" -prefix <string> .... prefix for dumped frames "
"(default: 'dump_')\n");
printf(" -tiff ............... save frames as TIFF\n");
printf(" -pam ................ save frames as PAM\n");
}
int main(int argc, const char* argv[]) {
int error = 0;
const char* dump_folder = ".";
const char* prefix = "dump_";
const char* suffix = "png";
WebPOutputFileFormat format = PNG;
int c;
if (argc < 2) {
Help();
return -1;
}
for (c = 1; !error && c < argc; ++c) {
if (!strcmp(argv[c], "-folder")) {
if (c + 1 == argc) {
fprintf(stderr, "missing argument after option '%s'\n", argv[c]);
error = 1;
break;
}
dump_folder = argv[++c];
} else if (!strcmp(argv[c], "-prefix")) {
if (c + 1 == argc) {
fprintf(stderr, "missing argument after option '%s'\n", argv[c]);
error = 1;
break;
}
prefix = argv[++c];
} else if (!strcmp(argv[c], "-tiff")) {
format = TIFF;
suffix = "tiff";
} else if (!strcmp(argv[c], "-pam")) {
format = PAM;
suffix = "pam";
} else {
uint32_t i;
AnimatedImage image;
const char* const file = argv[c];
memset(&image, 0, sizeof(image));
printf("Decoding file: %s as %s/%sxxxx.%s\n",
file, dump_folder, prefix, suffix);
if (!ReadAnimatedImage(file, &image, 0, NULL)) {
fprintf(stderr, "Error decoding file: %s\n Aborting.\n", file);
error = 1;
break;
}
for (i = 0; !error && i < image.num_frames; ++i) {
char out_file[1024];
WebPDecBuffer buffer;
WebPInitDecBuffer(&buffer);
buffer.colorspace = MODE_RGBA;
buffer.is_external_memory = 1;
buffer.width = image.canvas_width;
buffer.height = image.canvas_height;
buffer.u.RGBA.rgba = image.frames[i].rgba;
buffer.u.RGBA.stride = buffer.width * sizeof(uint32_t);
buffer.u.RGBA.size = buffer.u.RGBA.stride * buffer.height;
snprintf(out_file, sizeof(out_file), "%s/%s%.4d.%s",
dump_folder, prefix, i, suffix);
if (!WebPSaveImage(&buffer, format, out_file)) {
fprintf(stderr, "Error while saving image '%s'\n", out_file);
error = 1;
}
WebPFreeDecBuffer(&buffer);
}
ClearAnimatedImage(&image);
}
}
return error ? 1 : 0;
}

View File

@ -16,7 +16,7 @@
#include <stdio.h>
#include <string.h>
#ifdef WEBP_HAVE_GIF
#if defined(WEBP_HAVE_GIF)
#include <gif_lib.h>
#endif
#include "webp/format_constants.h"
@ -33,11 +33,13 @@ static const int kNumChannels = 4;
// -----------------------------------------------------------------------------
// Common utilities.
#if defined(WEBP_HAVE_GIF)
// Returns true if the frame covers the full canvas.
static int IsFullFrame(int width, int height,
int canvas_width, int canvas_height) {
return (width == canvas_width && height == canvas_height);
}
#endif // WEBP_HAVE_GIF
static int CheckSizeForOverflow(uint64_t size) {
return (size == (size_t)size);
@ -85,6 +87,7 @@ void ClearAnimatedImage(AnimatedImage* const image) {
}
}
#if defined(WEBP_HAVE_GIF)
// Clear the canvas to transparent.
static void ZeroFillCanvas(uint8_t* rgba,
uint32_t canvas_width, uint32_t canvas_height) {
@ -126,6 +129,7 @@ static void CopyFrameRectangle(const uint8_t* src, uint8_t* dst, int stride,
dst += stride;
}
}
#endif // WEBP_HAVE_GIF
// Canonicalize all transparent pixels to transparent black to aid comparison.
static void CleanupTransparentPixels(uint32_t* rgba,
@ -152,6 +156,8 @@ static int DumpFrame(const char filename[], const char dump_folder[],
FILE* f = NULL;
const char* row;
if (dump_folder == NULL) dump_folder = ".";
base_name = strrchr(filename, '/');
base_name = (base_name == NULL) ? filename : base_name + 1;
max_len = strlen(dump_folder) + 1 + strlen(base_name)
@ -200,7 +206,7 @@ static int IsWebP(const WebPData* const webp_data) {
return (WebPGetInfo(webp_data->bytes, webp_data->size, NULL, NULL) != 0);
}
// Read animated WebP bitstream 'file_str' into 'AnimatedImage' struct.
// Read animated WebP bitstream 'webp_data' into 'AnimatedImage' struct.
static int ReadAnimatedWebP(const char filename[],
const WebPData* const webp_data,
AnimatedImage* const image, int dump_frames,
@ -278,7 +284,7 @@ static int ReadAnimatedWebP(const char filename[],
// -----------------------------------------------------------------------------
// GIF Decoding.
#ifdef WEBP_HAVE_GIF
#if defined(WEBP_HAVE_GIF)
// Returns true if this is a valid GIF bitstream.
static int IsGIF(const WebPData* const data) {
@ -423,6 +429,11 @@ static uint32_t GetBackgroundColorGIF(GifFileType* gif) {
}
// Find appropriate app extension and get loop count from the next extension.
// We use Chrome's interpretation of the 'loop_count' semantics:
// if not present -> loop once
// if present and loop_count == 0, return 0 ('infinite').
// if present and loop_count != 0, it's the number of *extra* loops
// so we need to return loop_count + 1 as total loop number.
static uint32_t GetLoopCountGIF(const GifFileType* const gif) {
int i;
for (i = 0; i < gif->ImageCount; ++i) {
@ -440,12 +451,13 @@ static uint32_t GetLoopCountGIF(const GifFileType* const gif) {
if (signature_is_ok &&
eb2->Function == CONTINUE_EXT_FUNC_CODE && eb2->ByteCount >= 3 &&
eb2->Bytes[0] == 1) {
return ((uint32_t)(eb2->Bytes[2]) << 8) +
((uint32_t)(eb2->Bytes[1]) << 0);
const uint32_t extra_loop = ((uint32_t)(eb2->Bytes[2]) << 8) +
((uint32_t)(eb2->Bytes[1]) << 0);
return (extra_loop > 0) ? extra_loop + 1 : 0;
}
}
}
return 0; // Default.
return 1; // Default.
}
// Get duration of 'n'th frame in milliseconds.

View File

@ -463,8 +463,9 @@ static int WriteWebPWithMetadata(FILE* const out,
} else {
const int is_lossless = !memcmp(webp, "VP8L", kTagSize);
if (is_lossless) {
// Presence of alpha is stored in the 29th bit of VP8L data.
if (webp[kChunkHeaderSize + 3] & (1 << 5)) flags |= kAlphaFlag;
// Presence of alpha is stored in the 37th bit (29th after the
// signature) of VP8L data.
if (webp[kChunkHeaderSize + 4] & (1 << 4)) flags |= kAlphaFlag;
}
ok = ok && (fwrite(kVP8XHeader, kChunkHeaderSize, 1, out) == 1);
ok = ok && WriteLE32(out, flags);
@ -486,10 +487,10 @@ static int WriteWebPWithMetadata(FILE* const out,
*metadata_written |= METADATA_XMP;
}
return ok;
} else {
// No metadata, just write the original image file.
return (fwrite(webp, webp_size, 1, out) == 1);
}
// No metadata, just write the original image file.
return (fwrite(webp, webp_size, 1, out) == 1);
}
//------------------------------------------------------------------------------

View File

@ -72,8 +72,10 @@ static void Help(void) {
printf(" -metadata <string> ..... comma separated list of metadata to\n");
printf(" ");
printf("copy from the input to the output if present\n");
printf(" "
"Valid values: all, none, icc, xmp (default)\n");
printf(" ");
printf("Valid values: all, none, icc, xmp (default)\n");
printf(" -loop_compatibility .... use compatibility mode for Chrome\n");
printf(" version prior to M62 (inclusive)\n");
printf(" -mt .................... use multi-threading if available\n");
printf("\n");
printf(" -version ............... print version number and exit\n");
@ -104,7 +106,7 @@ int main(int argc, const char *argv[]) {
WebPAnimEncoderOptions enc_options;
WebPConfig config;
int is_first_frame = 1; // Whether we are processing the first frame.
int frame_number = 0; // Whether we are processing the first frame.
int done;
int c;
int quiet = 0;
@ -115,8 +117,9 @@ int main(int argc, const char *argv[]) {
int stored_icc = 0; // Whether we have already stored an ICC profile.
WebPData xmp_data;
int stored_xmp = 0; // Whether we have already stored an XMP profile.
int loop_count = 0;
int loop_count = 0; // default: infinite
int stored_loop_count = 0; // Whether we have found an explicit loop count.
int loop_compatibility = 0;
WebPMux* mux = NULL;
int default_kmin = 1; // Whether to use default kmin value.
@ -151,6 +154,8 @@ int main(int argc, const char *argv[]) {
} else if (!strcmp(argv[c], "-mixed")) {
enc_options.allow_mixed = 1;
config.lossless = 0;
} else if (!strcmp(argv[c], "-loop_compatibility")) {
loop_compatibility = 1;
} else if (!strcmp(argv[c], "-q") && c < argc - 1) {
config.quality = ExUtilGetFloat(argv[++c], &parse_error);
} else if (!strcmp(argv[c], "-m") && c < argc - 1) {
@ -277,7 +282,7 @@ int main(int argc, const char *argv[]) {
if (!DGifGetImageDesc(gif)) goto End;
if (is_first_frame) {
if (frame_number == 0) {
if (verbose) {
printf("Canvas screen: %d x %d\n", gif->SWidth, gif->SHeight);
}
@ -319,7 +324,6 @@ int main(int argc, const char *argv[]) {
"a memory error.\n");
goto End;
}
is_first_frame = 0;
}
// Some even more broken GIF can have sub-rect with zero width/height.
@ -336,7 +340,11 @@ int main(int argc, const char *argv[]) {
GIFBlendFrames(&frame, &gif_rect, &curr_canvas);
if (!WebPAnimEncoderAdd(enc, &curr_canvas, frame_timestamp, &config)) {
fprintf(stderr, "%s\n", WebPAnimEncoderGetError(enc));
fprintf(stderr, "Error while adding frame #%d: %s\n", frame_number,
WebPAnimEncoderGetError(enc));
goto End;
} else {
++frame_number;
}
// Update canvases.
@ -386,7 +394,7 @@ int main(int argc, const char *argv[]) {
if (verbose) {
fprintf(stderr, "Loop count: %d\n", loop_count);
}
stored_loop_count = (loop_count != 0);
stored_loop_count = loop_compatibility ? (loop_count != 0) : 1;
} else { // An extension containing metadata.
// We only store the first encountered chunk of each type, and
// only if requested by the user.
@ -443,6 +451,23 @@ int main(int argc, const char *argv[]) {
goto End;
}
if (!loop_compatibility) {
if (!stored_loop_count) {
// if no loop-count element is seen, the default is '1' (loop-once)
// and we need to signal it explicitly in WebP. Note however that
// in case there's a single frame, we still don't need to store it.
if (frame_number > 1) {
stored_loop_count = 1;
loop_count = 1;
}
} else if (loop_count > 0) {
// adapt GIF's semantic to WebP's (except in the infinite-loop case)
loop_count += 1;
}
}
// loop_count of 0 is the default (infinite), so no need to signal it
if (loop_count == 0) stored_loop_count = 0;
if (stored_loop_count || stored_icc || stored_xmp) {
// Re-mux to add loop count and/or metadata as needed.
mux = WebPMuxCreate(&webp_data, 1);

View File

@ -248,9 +248,9 @@ static void HandleKey(unsigned char key, int pos_x, int pos_y) {
}
}
} else if (key == 'i') {
// Note: doesn't handle refresh of animation's last-frame (it's quite
// more involved to do, since you need to save the previous frame).
kParams.print_info = 1 - kParams.print_info;
// TODO(skal): handle refresh of animation's last-frame too. It's quite
// more involved though (need to save the previous frame).
if (!kParams.has_animation) ClearPreviousFrame();
glutPostRedisplay();
} else if (key == 'd') {
@ -260,8 +260,8 @@ static void HandleKey(unsigned char key, int pos_x, int pos_y) {
}
static void HandleReshape(int width, int height) {
// TODO(skal): should we preserve aspect ratio?
// Also: handle larger-than-screen pictures correctly.
// Note: reshape doesn't preserve aspect ratio, and might
// be handling larger-than-screen pictures incorrectly.
glViewport(0, 0, width, height);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
@ -378,13 +378,23 @@ static void HandleDisplay(void) {
}
}
glPopMatrix();
#if defined(__APPLE__) || defined(_WIN32)
glFlush();
#else
glutSwapBuffers();
#endif
}
static void StartDisplay(void) {
const int width = kParams.canvas_width;
const int height = kParams.canvas_height;
// TODO(webp:365) GLUT_DOUBLE results in flickering / old frames to be
// partially displayed with animated webp + alpha.
#if defined(__APPLE__) || defined(_WIN32)
glutInitDisplayMode(GLUT_RGBA);
#else
glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA);
#endif
glutInitWindowSize(width, height);
glutCreateWindow("WebP viewer");
glutDisplayFunc(HandleDisplay);

View File

@ -233,20 +233,20 @@ static int GetSignedBits(const uint8_t* const data, size_t data_size, size_t nb,
return 1;
}
#define GET_BITS(v, n) \
do { \
if (!GetBits(data, data_size, n, &v, bit_pos)) { \
LOG_ERROR("Truncated lossy bitstream."); \
return WEBP_INFO_TRUNCATED_DATA; \
} \
#define GET_BITS(v, n) \
do { \
if (!GetBits(data, data_size, n, &(v), bit_pos)) { \
LOG_ERROR("Truncated lossy bitstream."); \
return WEBP_INFO_TRUNCATED_DATA; \
} \
} while (0)
#define GET_SIGNED_BITS(v, n) \
do { \
if (!GetSignedBits(data, data_size, n, &v, bit_pos)) { \
LOG_ERROR("Truncated lossy bitstream."); \
return WEBP_INFO_TRUNCATED_DATA; \
} \
#define GET_SIGNED_BITS(v, n) \
do { \
if (!GetSignedBits(data, data_size, n, &(v), bit_pos)) { \
LOG_ERROR("Truncated lossy bitstream."); \
return WEBP_INFO_TRUNCATED_DATA; \
} \
} while (0)
static WebPInfoStatus ParseLossySegmentHeader(const WebPInfo* const webp_info,
@ -462,12 +462,12 @@ static int LLGetBits(const uint8_t* const data, size_t data_size, size_t nb,
return 1;
}
#define LL_GET_BITS(v, n) \
do { \
if (!LLGetBits(data, data_size, n, &v, bit_pos)) { \
LOG_ERROR("Truncated lossless bitstream."); \
return WEBP_INFO_TRUNCATED_DATA; \
} \
#define LL_GET_BITS(v, n) \
do { \
if (!LLGetBits(data, data_size, n, &(v), bit_pos)) { \
LOG_ERROR("Truncated lossless bitstream."); \
return WEBP_INFO_TRUNCATED_DATA; \
} \
} while (0)
static WebPInfoStatus ParseLosslessTransform(WebPInfo* const webp_info,
@ -817,9 +817,8 @@ static WebPInfoStatus ProcessImageChunk(const ChunkData* const chunk_data,
if (webp_info->seen_image_subchunk_) {
LOG_ERROR("Consecutive VP8/VP8L sub-chunks in an ANMF chunk.");
return WEBP_INFO_PARSE_ERROR;
} else {
webp_info->seen_image_subchunk_ = 1;
}
webp_info->seen_image_subchunk_ = 1;
} else {
if (webp_info->chunk_counts_[CHUNK_VP8] ||
webp_info->chunk_counts_[CHUNK_VP8L]) {
@ -873,9 +872,9 @@ static WebPInfoStatus ProcessALPHChunk(const ChunkData* const chunk_data,
if (webp_info->seen_alpha_subchunk_) {
LOG_ERROR("Consecutive ALPH sub-chunks in an ANMF chunk.");
return WEBP_INFO_PARSE_ERROR;
} else {
webp_info->seen_alpha_subchunk_ = 1;
}
webp_info->seen_alpha_subchunk_ = 1;
if (webp_info->seen_image_subchunk_) {
LOG_ERROR("ALPHA sub-chunk detected after VP8 sub-chunk "
"in an ANMF chunk.");
@ -1107,6 +1106,7 @@ static void HelpLong(void) {
"Note: there could be multiple input files;\n"
" options must come before input files.\n"
"Options:\n"
" -version ........... Print version number and exit.\n"
" -quiet ............. Do not show chunk parsing information.\n"
" -diag .............. Show parsing error diagnosis.\n"
" -summary ........... Show chunk stats summary.\n"
@ -1140,6 +1140,11 @@ int main(int argc, const char* argv[]) {
show_summary = 1;
} else if (!strcmp(argv[c], "-bitstream_info")) {
parse_bitstream = 1;
} else if (!strcmp(argv[c], "-version")) {
const int version = WebPGetDecoderVersion();
printf("WebP Decoder version: %d.%d.%d\n",
(version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff);
return 0;
} else { // Assume the remaining are all input files.
break;
}

View File

@ -1,3 +1,4 @@
AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir)
AM_CPPFLAGS += -I$(top_builddir)/src -I$(top_srcdir)/src
noinst_LTLIBRARIES = libwebpextras.la
@ -19,18 +20,22 @@ endif
get_disto_SOURCES = get_disto.c
get_disto_CPPFLAGS = $(AM_CPPFLAGS)
get_disto_LDADD = ../imageio/libimageio_util.la ../imageio/libimagedec.la
get_disto_LDADD =
get_disto_LDADD += ../imageio/libimageio_util.la
get_disto_LDADD += ../imageio/libimagedec.la
get_disto_LDADD += ../src/libwebp.la
get_disto_LDADD += $(PNG_LIBS) $(JPEG_LIBS) $(TIFF_LIBS)
webp_quality_SOURCES = webp_quality.c
webp_quality_CPPFLAGS = $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
webp_quality_LDADD = ../imageio/libimageio_util.la
webp_quality_LDADD =
webp_quality_LDADD += ../imageio/libimageio_util.la
webp_quality_LDADD += libwebpextras.la
webp_quality_LDADD += ../src/libwebp.la
vwebp_sdl_SOURCES = vwebp_sdl.c webp_to_sdl.c webp_to_sdl.h
vwebp_sdl_CPPFLAGS = $(AM_CPPFLAGS) $(SDL_INCLUDES)
vwebp_sdl_LDADD = ../imageio/libimageio_util.la
vwebp_sdl_LDADD =
vwebp_sdl_LDADD += ../imageio/libimageio_util.la
vwebp_sdl_LDADD += ../src/libwebp.la
vwebp_sdl_LDADD += $(SDL_LIBS)

View File

@ -10,7 +10,7 @@
// Additional WebP utilities.
//
#include "./extras.h"
#include "extras/extras.h"
#include "webp/format_constants.h"
#include <assert.h>
@ -18,7 +18,7 @@
#define XTRA_MAJ_VERSION 0
#define XTRA_MIN_VERSION 1
#define XTRA_REV_VERSION 0
#define XTRA_REV_VERSION 1
//------------------------------------------------------------------------------

View File

@ -25,28 +25,28 @@ extern "C" {
// Returns the version number of the extras library, packed in hexadecimal using
// 8bits for each of major/minor/revision. E.g: v2.5.7 is 0x020507.
WEBP_EXTERN(int) WebPGetExtrasVersion(void);
WEBP_EXTERN int WebPGetExtrasVersion(void);
//------------------------------------------------------------------------------
// Ad-hoc colorspace importers.
// Import luma sample (gray scale image) into 'picture'. The 'picture'
// width and height must be set prior to calling this function.
WEBP_EXTERN(int) WebPImportGray(const uint8_t* gray, WebPPicture* picture);
WEBP_EXTERN int WebPImportGray(const uint8_t* gray, WebPPicture* picture);
// Import rgb sample in RGB565 packed format into 'picture'. The 'picture'
// width and height must be set prior to calling this function.
WEBP_EXTERN(int) WebPImportRGB565(const uint8_t* rgb565, WebPPicture* pic);
WEBP_EXTERN int WebPImportRGB565(const uint8_t* rgb565, WebPPicture* pic);
// Import rgb sample in RGB4444 packed format into 'picture'. The 'picture'
// width and height must be set prior to calling this function.
WEBP_EXTERN(int) WebPImportRGB4444(const uint8_t* rgb4444, WebPPicture* pic);
WEBP_EXTERN int WebPImportRGB4444(const uint8_t* rgb4444, WebPPicture* pic);
// Import a color mapped image. The number of colors is less or equal to
// MAX_PALETTE_SIZE. 'pic' must have been initialized. Its content, if any,
// will be discarded. Returns 'false' in case of error, or if indexed[] contains
// invalid indices.
WEBP_EXTERN(int)
WEBP_EXTERN int
WebPImportColorMappedARGB(const uint8_t* indexed, int indexed_stride,
const uint32_t palette[], int palette_size,
WebPPicture* pic);
@ -59,7 +59,7 @@ WebPImportColorMappedARGB(const uint8_t* indexed, int indexed_stride,
// Otherwise (lossy bitstream), the returned value is in the range [0..100].
// Any error (invalid bitstream, animated WebP, incomplete header, etc.)
// will return a value of -1.
WEBP_EXTERN(int) VP8EstimateQuality(const uint8_t* const data, size_t size);
WEBP_EXTERN int VP8EstimateQuality(const uint8_t* const data, size_t size);
//------------------------------------------------------------------------------

View File

@ -24,8 +24,8 @@
#include <string.h>
#include "webp/encode.h"
#include "../imageio/image_dec.h"
#include "../imageio/imageio_util.h"
#include "imageio/image_dec.h"
#include "imageio/imageio_util.h"
static size_t ReadPicture(const char* const filename, WebPPicture* const pic,
int keep_alpha) {

View File

@ -11,7 +11,7 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include "./extras.h"
#include "extras/extras.h"
#include "webp/decode.h"
#include <math.h>

View File

@ -24,7 +24,7 @@
#include "webp_to_sdl.h"
#include "webp/decode.h"
#include "../imageio/imageio_util.h"
#include "imageio/imageio_util.h"
#if defined(WEBP_HAVE_JUST_SDL_H)
#include <SDL.h>

View File

@ -11,8 +11,8 @@
#include <stdlib.h>
#include <string.h>
#include "./extras.h"
#include "../imageio/imageio_util.h"
#include "extras/extras.h"
#include "imageio/imageio_util.h"
int main(int argc, const char *argv[]) {
int c;

7
extras/webp_to_sdl.c Executable file → Normal file
View File

@ -28,6 +28,7 @@
#include <SDL/SDL.h>
#endif
static int init_ok = 0;
int WebpToSDL(const char* data, unsigned int data_size) {
int ok = 0;
VP8StatusCode status;
@ -42,7 +43,10 @@ int WebpToSDL(const char* data, unsigned int data_size) {
return 1;
}
SDL_Init(SDL_INIT_VIDEO);
if (!init_ok) {
SDL_Init(SDL_INIT_VIDEO);
init_ok = 1;
}
status = WebPGetFeatures((uint8_t*)data, (size_t)data_size, &config.input);
if (status != VP8_STATUS_OK) goto Error;
@ -97,6 +101,7 @@ int WebpToSDL(const char* data, unsigned int data_size) {
Error:
SDL_FreeSurface(surface);
SDL_FreeSurface(screen);
WebPFreeDecBuffer(output);
return ok;
}

View File

@ -1,13 +1,18 @@
AM_CPPFLAGS += -I$(top_builddir)/src -I$(top_srcdir)/src
noinst_LTLIBRARIES = libimageio_util.la libimagedec.la libimageenc.la
noinst_LTLIBRARIES =
noinst_LTLIBRARIES += libimageio_util.la
noinst_LTLIBRARIES += libimagedec.la
noinst_LTLIBRARIES += libimageenc.la
noinst_HEADERS =
noinst_HEADERS += ../src/webp/decode.h
noinst_HEADERS += ../src/webp/types.h
libimageio_util_la_SOURCES = imageio_util.c imageio_util.h
libimageio_util_la_SOURCES =
libimageio_util_la_SOURCES += imageio_util.c imageio_util.h
libimagedec_la_SOURCES = image_dec.c image_dec.h
libimagedec_la_SOURCES =
libimagedec_la_SOURCES += image_dec.c image_dec.h
libimagedec_la_SOURCES += jpegdec.c jpegdec.h
libimagedec_la_SOURCES += metadata.c metadata.h
libimagedec_la_SOURCES += pngdec.c pngdec.h
@ -18,6 +23,7 @@ libimagedec_la_SOURCES += wicdec.c wicdec.h
libimagedec_la_CPPFLAGS = $(JPEG_INCLUDES) $(PNG_INCLUDES) $(TIFF_INCLUDES)
libimagedec_la_CPPFLAGS += $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)
libimageenc_la_SOURCES = image_enc.c image_enc.h
libimageenc_la_SOURCES =
libimageenc_la_SOURCES += image_enc.c image_enc.h
libimageenc_la_CPPFLAGS = $(JPEG_INCLUDES) $(PNG_INCLUDES) $(TIFF_INCLUDES)
libimageenc_la_CPPFLAGS += $(AM_CPPFLAGS) $(USE_EXPERIMENTAL_CODE)

View File

@ -542,22 +542,24 @@ int WebPWriteYUV(FILE* fout, const WebPDecBuffer* const buffer) {
// Generic top-level call
int WebPSaveImage(const WebPDecBuffer* const buffer,
WebPOutputFileFormat format, const char* const out_file) {
WebPOutputFileFormat format,
const char* const out_file_name) {
FILE* fout = NULL;
int needs_open_file = 1;
const int use_stdout = (out_file != NULL) && !strcmp(out_file, "-");
const int use_stdout = (out_file_name != NULL) && !strcmp(out_file_name, "-");
int ok = 1;
if (buffer == NULL || out_file == NULL) return 0;
if (buffer == NULL || out_file_name == NULL) return 0;
#ifdef HAVE_WINCODEC_H
needs_open_file = (format != PNG);
#endif
if (needs_open_file) {
fout = use_stdout ? ImgIoUtilSetBinaryMode(stdout) : fopen(out_file, "wb");
fout = use_stdout ? ImgIoUtilSetBinaryMode(stdout)
: fopen(out_file_name, "wb");
if (fout == NULL) {
fprintf(stderr, "Error opening output file %s\n", out_file);
fprintf(stderr, "Error opening output file %s\n", out_file_name);
return 0;
}
}
@ -566,7 +568,7 @@ int WebPSaveImage(const WebPDecBuffer* const buffer,
format == RGBA || format == BGRA || format == ARGB ||
format == rgbA || format == bgrA || format == Argb) {
#ifdef HAVE_WINCODEC_H
ok &= WebPWritePNG(out_file, use_stdout, buffer);
ok &= WebPWritePNG(out_file_name, use_stdout, buffer);
#else
ok &= WebPWritePNG(fout, buffer);
#endif

View File

@ -137,7 +137,11 @@ void ImgIoUtilCopyPlane(const uint8_t* src, int src_stride,
int ImgIoUtilCheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) {
const uint64_t total_size = nmemb * size;
return (total_size == (size_t)total_size);
int ok = (total_size == (size_t)total_size);
#if defined(WEBP_MAX_IMAGE_SIZE)
ok = ok && (total_size <= (uint64_t)WEBP_MAX_IMAGE_SIZE);
#endif
return ok;
}
// -----------------------------------------------------------------------------

View File

@ -304,18 +304,18 @@ int ReadJPEG(const uint8_t* const data, size_t data_size,
if (stride != (int)stride ||
!ImgIoUtilCheckSizeArgumentsOverflow(stride, height)) {
goto End;
goto Error;
}
rgb = (uint8_t*)malloc((size_t)stride * height);
if (rgb == NULL) {
goto End;
goto Error;
}
buffer[0] = (JSAMPLE*)rgb;
while (dinfo.output_scanline < dinfo.output_height) {
if (jpeg_read_scanlines((j_decompress_ptr)&dinfo, buffer, 1) != 1) {
goto End;
goto Error;
}
buffer[0] += stride;
}

View File

@ -117,8 +117,13 @@ static size_t ReadPAMFields(PNMInfo* const info, size_t off) {
}
}
if (!(info->seen_flags & TUPLE_FLAG)) {
info->seen_flags |= TUPLE_FLAG;
info->bytes_per_px = info->depth * (info->max_value > 255 ? 2 : 1);
if (info->depth > 0 && info->depth <= 4) {
info->seen_flags |= TUPLE_FLAG;
info->bytes_per_px = info->depth * (info->max_value > 255 ? 2 : 1);
} else {
fprintf(stderr, "PAM: invalid bitdepth (%d).\n", info->depth);
return 0;
}
}
if (info->seen_flags != ALL_NEEDED_FLAGS) {
fprintf(stderr, "PAM: incomplete header.\n");

View File

@ -141,10 +141,21 @@ int ReadWebP(const uint8_t* const data, size_t data_size,
do {
const int has_alpha = keep_alpha && bitstream->has_alpha;
uint64_t stride;
pic->width = bitstream->width;
pic->height = bitstream->height;
if (!pic->use_argb) pic->colorspace = has_alpha ? WEBP_YUV420A
: WEBP_YUV420;
if (pic->use_argb) {
stride = (uint64_t)bitstream->width * 4;
} else {
stride = (uint64_t)bitstream->width * (has_alpha ? 5 : 3) / 2;
pic->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420;
}
if (!ImgIoUtilCheckSizeArgumentsOverflow(stride, bitstream->height)) {
status = VP8_STATUS_OUT_OF_MEMORY;
break;
}
ok = WebPPictureAlloc(pic);
if (!ok) {
status = VP8_STATUS_OUT_OF_MEMORY;

View File

@ -34,6 +34,16 @@ else
GL_LIBS = -lglut -lGL
endif
# SDL flags: use sdl-config if it exists
SDL_CONFIG = $(shell sdl-config --version 2> /dev/null)
ifneq ($(SDL_CONFIG),)
SDL_LIBS = $(shell sdl-config --libs)
SDL_FLAGS = $(shell sdl-config --cflags)
else
# use best-guess
SDL_LIBS = -lSDL
SDL_FLAGS =
endif
# To install libraries on Mac OS X:
# 1. Install MacPorts (http://www.macports.org/install.php)
@ -57,7 +67,7 @@ endif
# EXTRA_FLAGS += -DWEBP_EXPERIMENTAL_FEATURES
# Extra flags to enable byte swap for 16 bit colorspaces.
# EXTRA_FLAGS += -DWEBP_SWAP_16BIT_CSP
# EXTRA_FLAGS += -DWEBP_SWAP_16BIT_CSP=1
# Extra flags to enable multi-threading
EXTRA_FLAGS += -DWEBP_USE_THREAD
@ -103,7 +113,7 @@ endif
AR = ar
ARFLAGS = r
CPPFLAGS = -Isrc/ -Wall
CPPFLAGS = -I. -Isrc/ -Wall
CFLAGS = -O3 -DNDEBUG $(EXTRA_FLAGS)
CC = gcc
INSTALL = install
@ -173,9 +183,6 @@ DSP_DEC_OBJS = \
src/dsp/yuv_sse2.o \
DSP_ENC_OBJS = \
src/dsp/argb.o \
src/dsp/argb_mips_dsp_r2.o \
src/dsp/argb_sse2.o \
src/dsp/cost.o \
src/dsp/cost_mips32.o \
src/dsp/cost_mips_dsp_r2.o \
@ -335,7 +342,8 @@ OUT_LIBS += src/libwebp.a
EXTRA_LIB = extras/libwebpextras.a
OUT_EXAMPLES = examples/cwebp examples/dwebp
EXTRA_EXAMPLES = examples/gif2webp examples/vwebp examples/webpmux \
examples/anim_diff examples/img2webp examples/webpinfo
examples/anim_diff examples/anim_dump \
examples/img2webp examples/webpinfo
OTHER_EXAMPLES = extras/get_disto extras/webp_quality extras/vwebp_sdl
OUTPUT = $(OUT_LIBS) $(OUT_EXAMPLES)
@ -363,7 +371,7 @@ src/utils/bit_reader_utils.o: src/utils/endian_inl_utils.h
src/utils/bit_writer_utils.o: src/utils/endian_inl_utils.h
%.o: %.c $(HDRS)
$(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@
$(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@
examples/libanim_util.a: $(ANIM_UTIL_OBJS)
examples/libexample_util.a: $(EX_UTIL_OBJS)
@ -381,6 +389,7 @@ src/demux/libwebpdemux.a: $(LIBWEBPDEMUX_OBJS)
$(AR) $(ARFLAGS) $@ $^
examples/anim_diff: examples/anim_diff.o $(ANIM_UTIL_OBJS) $(GIFDEC_OBJS)
examples/anim_dump: examples/anim_dump.o $(ANIM_UTIL_OBJS)
examples/cwebp: examples/cwebp.o
examples/dwebp: examples/dwebp.o
examples/gif2webp: examples/gif2webp.o $(GIFDEC_OBJS)
@ -394,6 +403,13 @@ examples/anim_diff: src/demux/libwebpdemux.a examples/libexample_util.a
examples/anim_diff: imageio/libimageio_util.a src/libwebp.a
examples/anim_diff: EXTRA_LIBS += $(GIF_LIBS)
examples/anim_diff: EXTRA_FLAGS += -DWEBP_HAVE_GIF
examples/anim_dump: examples/libanim_util.a
examples/anim_dump: src/demux/libwebpdemux.a
examples/anim_dump: examples/libexample_util.a
examples/anim_dump: imageio/libimageio_util.a
examples/anim_dump: imageio/libimageenc.a
examples/anim_dump: src/libwebp.a
examples/anim_dump: EXTRA_LIBS += $(GIF_LIBS) $(DWEBP_LIBS)
examples/cwebp: examples/libexample_util.a
examples/cwebp: imageio/libimagedec.a
examples/cwebp: imageio/libimageio_util.a
@ -434,8 +450,8 @@ extras/vwebp_sdl: extras/vwebp_sdl.o
extras/vwebp_sdl: extras/webp_to_sdl.o
extras/vwebp_sdl: imageio/libimageio_util.a
extras/vwebp_sdl: src/libwebp.a
extras/vwebp_sdl: EXTRA_FLAGS += -DWEBP_HAVE_SDL
extras/vwebp_sdl: EXTRA_LIBS += -lSDL
extras/vwebp_sdl: EXTRA_FLAGS += -DWEBP_HAVE_SDL $(SDL_FLAGS)
extras/vwebp_sdl: EXTRA_LIBS += $(SDL_LIBS)
$(OUT_EXAMPLES) $(EXTRA_EXAMPLES) $(OTHER_EXAMPLES):
$(CC) -o $@ $^ $(LDFLAGS)

View File

@ -1,5 +1,5 @@
.\" Hey, EMACS: -*- nroff -*-
.TH GIF2WEBP 1 "January 25, 2017"
.TH GIF2WEBP 1 "September 20, 2017"
.SH NAME
gif2webp \- Convert a GIF image to WebP
.SH SYNOPSIS
@ -109,6 +109,9 @@ the range of 20 to 50.
.TP
.B \-mt
Use multi-threading for encoding, if possible.
.B \-loop_compatibility
If enabled, handle the loop information in a compatible fashion for Chrome
version prior to M62 (inclusive) and Firefox.
.TP
.B \-v
Print extra information.

View File

@ -1,5 +1,5 @@
.\" Hey, EMACS: -*- nroff -*-
.TH WEBPINFO 1 "May 08, 2017"
.TH WEBPINFO 1 "November 24, 2017"
.SH NAME
webpinfo \- print out the chunk level structure of WebP files
along with basic integrity checks.
@ -22,16 +22,19 @@ WebP format.
.SH OPTIONS
.TP
.B -quiet
.B \-version
Print the version number (as major.minor.revision) and exit.
.TP
.B \-quiet
Do not show chunk parsing information.
.TP
.B -diag
.B \-diag
Show parsing error diagnosis.
.TP
.B -summary
.B \-summary
Show chunk stats summary.
.TP
.BI -bitstream_info
.BI \-bitstream_info
Parse bitstream header.
.TP
.B \-h, \-help

View File

@ -22,6 +22,7 @@ commondir = $(includedir)/webp
libwebp_la_SOURCES =
libwebpinclude_HEADERS =
libwebpinclude_HEADERS += webp/encode.h
noinst_HEADERS =
noinst_HEADERS += webp/format_constants.h
@ -35,7 +36,7 @@ libwebp_la_LIBADD += utils/libwebputils.la
# other than the ones listed on the command line, i.e., after linking, it will
# not have unresolved symbols. Some platforms (Windows among them) require all
# symbols in shared libraries to be resolved at library creation.
libwebp_la_LDFLAGS = -no-undefined -version-info 7:0:0
libwebp_la_LDFLAGS = -no-undefined -version-info 7:1:0
libwebpincludedir = $(includedir)/webp
pkgconfig_DATA = libwebp.pc
@ -47,7 +48,7 @@ if BUILD_LIBWEBPDECODER
libwebpdecoder_la_LIBADD += dsp/libwebpdspdecode.la
libwebpdecoder_la_LIBADD += utils/libwebputilsdecode.la
libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 3:0:0
libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 3:1:0
pkgconfig_DATA += libwebpdecoder.pc
endif

View File

@ -1,3 +1,4 @@
AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir)
noinst_LTLIBRARIES = libwebpdecode.la
libwebpdecode_la_SOURCES =

View File

@ -12,13 +12,13 @@
// Author: Skal (pascal.massimino@gmail.com)
#include <stdlib.h>
#include "./alphai_dec.h"
#include "./vp8i_dec.h"
#include "./vp8li_dec.h"
#include "../dsp/dsp.h"
#include "../utils/quant_levels_dec_utils.h"
#include "../utils/utils.h"
#include "../webp/format_constants.h"
#include "src/dec/alphai_dec.h"
#include "src/dec/vp8i_dec.h"
#include "src/dec/vp8li_dec.h"
#include "src/dsp/dsp.h"
#include "src/utils/quant_levels_dec_utils.h"
#include "src/utils/utils.h"
#include "src/webp/format_constants.h"
//------------------------------------------------------------------------------
// ALPHDecoder object.

View File

@ -11,11 +11,11 @@
//
// Author: Urvang (urvang@google.com)
#ifndef WEBP_DEC_ALPHAI_H_
#define WEBP_DEC_ALPHAI_H_
#ifndef WEBP_DEC_ALPHAI_DEC_H_
#define WEBP_DEC_ALPHAI_DEC_H_
#include "./webpi_dec.h"
#include "../utils/filters_utils.h"
#include "src/dec/webpi_dec.h"
#include "src/utils/filters_utils.h"
#ifdef __cplusplus
extern "C" {
@ -51,4 +51,4 @@ void WebPDeallocateAlphaMemory(VP8Decoder* const dec);
} // extern "C"
#endif
#endif /* WEBP_DEC_ALPHAI_H_ */
#endif /* WEBP_DEC_ALPHAI_DEC_H_ */

View File

@ -13,15 +13,15 @@
#include <stdlib.h>
#include "./vp8i_dec.h"
#include "./webpi_dec.h"
#include "../utils/utils.h"
#include "src/dec/vp8i_dec.h"
#include "src/dec/webpi_dec.h"
#include "src/utils/utils.h"
//------------------------------------------------------------------------------
// WebPDecBuffer
// Number of bytes per pixel for the different color-spaces.
static const int kModeBpp[MODE_LAST] = {
static const uint8_t kModeBpp[MODE_LAST] = {
3, 4, 3, 4, 4, 2, 2,
4, 4, 4, 2, // pre-multiplied modes
1, 1 };
@ -36,7 +36,7 @@ static int IsValidColorspace(int webp_csp_mode) {
// strictly speaking, the very last (or first, if flipped) row
// doesn't require padding.
#define MIN_BUFFER_SIZE(WIDTH, HEIGHT, STRIDE) \
(uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH)
((uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH))
static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
int ok = 1;
@ -98,9 +98,14 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
uint64_t uv_size = 0, a_size = 0, total_size;
// We need memory and it hasn't been allocated yet.
// => initialize output buffer, now that dimensions are known.
const int stride = w * kModeBpp[mode];
const uint64_t size = (uint64_t)stride * h;
int stride;
uint64_t size;
if ((uint64_t)w * kModeBpp[mode] >= (1ull << 32)) {
return VP8_STATUS_INVALID_PARAM;
}
stride = w * kModeBpp[mode];
size = (uint64_t)stride * h;
if (!WebPIsRGBMode(mode)) {
uv_stride = (w + 1) / 2;
uv_size = (uint64_t)uv_stride * ((h + 1) / 2);
@ -169,11 +174,11 @@ VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer) {
return VP8_STATUS_OK;
}
VP8StatusCode WebPAllocateDecBuffer(int w, int h,
VP8StatusCode WebPAllocateDecBuffer(int width, int height,
const WebPDecoderOptions* const options,
WebPDecBuffer* const out) {
WebPDecBuffer* const buffer) {
VP8StatusCode status;
if (out == NULL || w <= 0 || h <= 0) {
if (buffer == NULL || width <= 0 || height <= 0) {
return VP8_STATUS_INVALID_PARAM;
}
if (options != NULL) { // First, apply options if there is any.
@ -182,33 +187,39 @@ VP8StatusCode WebPAllocateDecBuffer(int w, int h,
const int ch = options->crop_height;
const int x = options->crop_left & ~1;
const int y = options->crop_top & ~1;
if (x < 0 || y < 0 || cw <= 0 || ch <= 0 || x + cw > w || y + ch > h) {
if (x < 0 || y < 0 || cw <= 0 || ch <= 0 ||
x + cw > width || y + ch > height) {
return VP8_STATUS_INVALID_PARAM; // out of frame boundary.
}
w = cw;
h = ch;
width = cw;
height = ch;
}
if (options->use_scaling) {
#if !defined(WEBP_REDUCE_SIZE)
int scaled_width = options->scaled_width;
int scaled_height = options->scaled_height;
if (!WebPRescalerGetScaledDimensions(
w, h, &scaled_width, &scaled_height)) {
width, height, &scaled_width, &scaled_height)) {
return VP8_STATUS_INVALID_PARAM;
}
w = scaled_width;
h = scaled_height;
width = scaled_width;
height = scaled_height;
#else
return VP8_STATUS_INVALID_PARAM; // rescaling not supported
#endif
}
}
out->width = w;
out->height = h;
buffer->width = width;
buffer->height = height;
// Then, allocate buffer for real.
status = AllocateBuffer(out);
status = AllocateBuffer(buffer);
if (status != VP8_STATUS_OK) return status;
// Use the stride trick if vertical flip is needed.
if (options != NULL && options->flip) {
status = WebPFlipBuffer(out);
status = WebPFlipBuffer(buffer);
}
return status;
}

View File

@ -11,8 +11,8 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#ifndef WEBP_DEC_COMMON_H_
#define WEBP_DEC_COMMON_H_
#ifndef WEBP_DEC_COMMON_DEC_H_
#define WEBP_DEC_COMMON_DEC_H_
// intra prediction modes
enum { B_DC_PRED = 0, // 4x4 modes
@ -51,4 +51,4 @@ enum { MB_FEATURE_TREE_PROBS = 3,
NUM_PROBAS = 11
};
#endif // WEBP_DEC_COMMON_H_
#endif // WEBP_DEC_COMMON_DEC_H_

View File

@ -12,13 +12,13 @@
// Author: Skal (pascal.massimino@gmail.com)
#include <stdlib.h>
#include "./vp8i_dec.h"
#include "../utils/utils.h"
#include "src/dec/vp8i_dec.h"
#include "src/utils/utils.h"
//------------------------------------------------------------------------------
// Main reconstruction function.
static const int kScan[16] = {
static const uint16_t kScan[16] = {
0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
@ -320,7 +320,7 @@ static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
#define MIN_DITHER_AMP 4
#define DITHER_AMP_TAB_SIZE 12
static const int kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
static const uint8_t kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
// roughly, it's dqm->uv_mat_[1]
8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1
};
@ -728,7 +728,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
}
mem = (uint8_t*)dec->mem_;
dec->intra_t_ = (uint8_t*)mem;
dec->intra_t_ = mem;
mem += intra_pred_mode_size;
dec->yuv_t_ = (VP8TopSamples*)mem;
@ -750,7 +750,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
mem = (uint8_t*)WEBP_ALIGN(mem);
assert((yuv_size & WEBP_ALIGN_CST) == 0);
dec->yuv_b_ = (uint8_t*)mem;
dec->yuv_b_ = mem;
mem += yuv_size;
dec->mb_data_ = (VP8MBData*)mem;
@ -766,7 +766,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
const int extra_rows = kFilterExtraRows[dec->filter_type_];
const int extra_y = extra_rows * dec->cache_y_stride_;
const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;
dec->cache_y_ = ((uint8_t*)mem) + extra_y;
dec->cache_y_ = mem + extra_y;
dec->cache_u_ = dec->cache_y_
+ 16 * num_caches * dec->cache_y_stride_ + extra_uv;
dec->cache_v_ = dec->cache_u_
@ -776,7 +776,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
mem += cache_size;
// alpha plane
dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;
dec->alpha_plane_ = alpha_size ? mem : NULL;
mem += alpha_size;
assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);

View File

@ -15,10 +15,10 @@
#include <string.h>
#include <stdlib.h>
#include "./alphai_dec.h"
#include "./webpi_dec.h"
#include "./vp8i_dec.h"
#include "../utils/utils.h"
#include "src/dec/alphai_dec.h"
#include "src/dec/webpi_dec.h"
#include "src/dec/vp8i_dec.h"
#include "src/utils/utils.h"
// In append mode, buffer allocations increase as multiples of this value.
// Needs to be a power of 2.
@ -673,12 +673,12 @@ void WebPIDelete(WebPIDecoder* idec) {
//------------------------------------------------------------------------------
// Wrapper toward WebPINewDecoder
WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE csp, uint8_t* output_buffer,
size_t output_buffer_size, int output_stride) {
const int is_external_memory = (output_buffer != NULL) ? 1 : 0;
WebPIDecoder* idec;
if (mode >= MODE_YUV) return NULL;
if (csp >= MODE_YUV) return NULL;
if (is_external_memory == 0) { // Overwrite parameters to sane values.
output_buffer_size = 0;
output_stride = 0;
@ -689,7 +689,7 @@ WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
}
idec = WebPINewDecoder(NULL);
if (idec == NULL) return NULL;
idec->output_.colorspace = mode;
idec->output_.colorspace = csp;
idec->output_.is_external_memory = is_external_memory;
idec->output_.u.RGBA.rgba = output_buffer;
idec->output_.u.RGBA.stride = output_stride;

View File

@ -13,11 +13,11 @@
#include <assert.h>
#include <stdlib.h>
#include "../dec/vp8i_dec.h"
#include "./webpi_dec.h"
#include "../dsp/dsp.h"
#include "../dsp/yuv.h"
#include "../utils/utils.h"
#include "src/dec/vp8i_dec.h"
#include "src/dec/webpi_dec.h"
#include "src/dsp/dsp.h"
#include "src/dsp/yuv.h"
#include "src/utils/utils.h"
//------------------------------------------------------------------------------
// Main YUV<->RGB conversion functions
@ -212,7 +212,7 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
int num_rows;
const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
#ifdef WEBP_SWAP_16BIT_CSP
#if (WEBP_SWAP_16BIT_CSP == 1)
uint8_t* alpha_dst = base_rgba;
#else
uint8_t* alpha_dst = base_rgba + 1;
@ -241,6 +241,7 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
//------------------------------------------------------------------------------
// YUV rescaling (no final RGB conversion needed)
#if !defined(WEBP_REDUCE_SIZE)
static int Rescale(const uint8_t* src, int src_stride,
int new_lines, WebPRescaler* const wrk) {
int num_lines_out = 0;
@ -431,7 +432,7 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos,
int max_lines_out) {
const WebPRGBABuffer* const buf = &p->output->u.RGBA;
uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride;
#ifdef WEBP_SWAP_16BIT_CSP
#if (WEBP_SWAP_16BIT_CSP == 1)
uint8_t* alpha_dst = base_rgba;
#else
uint8_t* alpha_dst = base_rgba + 1;
@ -541,6 +542,8 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
return 1;
}
#endif // WEBP_REDUCE_SIZE
//------------------------------------------------------------------------------
// Default custom functions
@ -561,10 +564,14 @@ static int CustomSetup(VP8Io* io) {
WebPInitUpsamplers();
}
if (io->use_scaling) {
#if !defined(WEBP_REDUCE_SIZE)
const int ok = is_rgb ? InitRGBRescaler(io, p) : InitYUVRescaler(io, p);
if (!ok) {
return 0; // memory error
}
#else
return 0; // rescaling support not compiled
#endif
} else {
if (is_rgb) {
WebPInitSamplers();
@ -598,9 +605,6 @@ static int CustomSetup(VP8Io* io) {
}
}
if (is_rgb) {
VP8YUVInit();
}
return 1;
}

View File

@ -11,7 +11,7 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include "./vp8i_dec.h"
#include "src/dec/vp8i_dec.h"
static WEBP_INLINE int clip(int v, int M) {
return v < 0 ? 0 : v > M ? M : v;

View File

@ -11,15 +11,19 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include "./vp8i_dec.h"
#include "../utils/bit_reader_inl_utils.h"
#include "src/dec/vp8i_dec.h"
#include "src/utils/bit_reader_inl_utils.h"
#if !defined(USE_GENERIC_TREE)
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
// using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then.
#define USE_GENERIC_TREE
#define USE_GENERIC_TREE 1 // ALTERNATE_CODE
#else
#define USE_GENERIC_TREE 0
#endif
#endif // USE_GENERIC_TREE
#ifdef USE_GENERIC_TREE
#if (USE_GENERIC_TREE == 1)
static const int8_t kYModesIntra4[18] = {
-B_DC_PRED, 1,
-B_TM_PRED, 2,
@ -317,7 +321,7 @@ static void ParseIntraMode(VP8BitReader* const br,
int x;
for (x = 0; x < 4; ++x) {
const uint8_t* const prob = kBModesProba[top[x]][ymode];
#ifdef USE_GENERIC_TREE
#if (USE_GENERIC_TREE == 1)
// Generic tree-parsing
int i = kYModesIntra4[VP8GetBit(br, prob[0])];
while (i > 0) {
@ -335,7 +339,7 @@ static void ParseIntraMode(VP8BitReader* const br,
(!VP8GetBit(br, prob[6]) ? B_LD_PRED :
(!VP8GetBit(br, prob[7]) ? B_VL_PRED :
(!VP8GetBit(br, prob[8]) ? B_HD_PRED : B_HU_PRED)));
#endif // USE_GENERIC_TREE
#endif // USE_GENERIC_TREE
top[x] = ymode;
}
memcpy(modes, top, 4 * sizeof(*top));
@ -498,7 +502,7 @@ static const uint8_t
// Paragraph 9.9
static const int kBands[16 + 1] = {
static const uint8_t kBands[16 + 1] = {
0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
0 // extra entry as sentinel
};

View File

@ -13,12 +13,12 @@
#include <stdlib.h>
#include "./alphai_dec.h"
#include "./vp8i_dec.h"
#include "./vp8li_dec.h"
#include "./webpi_dec.h"
#include "../utils/bit_reader_inl_utils.h"
#include "../utils/utils.h"
#include "src/dec/alphai_dec.h"
#include "src/dec/vp8i_dec.h"
#include "src/dec/vp8li_dec.h"
#include "src/dec/webpi_dec.h"
#include "src/utils/bit_reader_inl_utils.h"
#include "src/utils/utils.h"
//------------------------------------------------------------------------------

View File

@ -11,10 +11,10 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#ifndef WEBP_WEBP_DECODE_VP8_H_
#define WEBP_WEBP_DECODE_VP8_H_
#ifndef WEBP_DEC_VP8_DEC_H_
#define WEBP_DEC_VP8_DEC_H_
#include "../webp/decode.h"
#include "src/webp/decode.h"
#ifdef __cplusplus
extern "C" {
@ -157,24 +157,24 @@ void VP8Delete(VP8Decoder* const dec);
// Miscellaneous VP8/VP8L bitstream probing functions.
// Returns true if the next 3 bytes in data contain the VP8 signature.
WEBP_EXTERN(int) VP8CheckSignature(const uint8_t* const data, size_t data_size);
WEBP_EXTERN int VP8CheckSignature(const uint8_t* const data, size_t data_size);
// Validates the VP8 data-header and retrieves basic header information viz
// width and height. Returns 0 in case of formatting error. *width/*height
// can be passed NULL.
WEBP_EXTERN(int) VP8GetInfo(
WEBP_EXTERN int VP8GetInfo(
const uint8_t* data,
size_t data_size, // data available so far
size_t chunk_size, // total data size expected in the chunk
int* const width, int* const height);
// Returns true if the next byte(s) in data is a VP8L signature.
WEBP_EXTERN(int) VP8LCheckSignature(const uint8_t* const data, size_t size);
WEBP_EXTERN int VP8LCheckSignature(const uint8_t* const data, size_t size);
// Validates the VP8L data-header and retrieves basic header information viz
// width, height and alpha. Returns 0 in case of formatting error.
// width/height/has_alpha can be passed NULL.
WEBP_EXTERN(int) VP8LGetInfo(
WEBP_EXTERN int VP8LGetInfo(
const uint8_t* data, size_t data_size, // data available so far
int* const width, int* const height, int* const has_alpha);
@ -182,4 +182,4 @@ WEBP_EXTERN(int) VP8LGetInfo(
} // extern "C"
#endif
#endif /* WEBP_WEBP_DECODE_VP8_H_ */
#endif /* WEBP_DEC_VP8_DEC_H_ */

View File

@ -11,16 +11,16 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#ifndef WEBP_DEC_VP8I_H_
#define WEBP_DEC_VP8I_H_
#ifndef WEBP_DEC_VP8I_DEC_H_
#define WEBP_DEC_VP8I_DEC_H_
#include <string.h> // for memcpy()
#include "./common_dec.h"
#include "./vp8li_dec.h"
#include "../utils/bit_reader_utils.h"
#include "../utils/random_utils.h"
#include "../utils/thread_utils.h"
#include "../dsp/dsp.h"
#include "src/dec/common_dec.h"
#include "src/dec/vp8li_dec.h"
#include "src/utils/bit_reader_utils.h"
#include "src/utils/random_utils.h"
#include "src/utils/thread_utils.h"
#include "src/dsp/dsp.h"
#ifdef __cplusplus
extern "C" {
@ -32,7 +32,7 @@ extern "C" {
// version numbers
#define DEC_MAJ_VERSION 0
#define DEC_MIN_VERSION 6
#define DEC_REV_VERSION 0
#define DEC_REV_VERSION 1
// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
// Constraints are: We need to store one 16x16 block of luma samples (y),
@ -57,7 +57,6 @@ extern "C" {
// '|' = left sample, '-' = top sample, '+' = top-left sample
// 't' = extra top-right sample for 4x4 modes
#define YUV_SIZE (BPS * 17 + BPS * 9)
#define Y_SIZE (BPS * 17)
#define Y_OFF (BPS * 1 + 8)
#define U_OFF (Y_OFF + BPS * 16 + BPS)
#define V_OFF (U_OFF + 16)
@ -317,4 +316,4 @@ const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
} // extern "C"
#endif
#endif /* WEBP_DEC_VP8I_H_ */
#endif /* WEBP_DEC_VP8I_DEC_H_ */

View File

@ -14,22 +14,22 @@
#include <stdlib.h>
#include "./alphai_dec.h"
#include "./vp8li_dec.h"
#include "../dsp/dsp.h"
#include "../dsp/lossless.h"
#include "../dsp/lossless_common.h"
#include "../dsp/yuv.h"
#include "../utils/endian_inl_utils.h"
#include "../utils/huffman_utils.h"
#include "../utils/utils.h"
#include "src/dec/alphai_dec.h"
#include "src/dec/vp8li_dec.h"
#include "src/dsp/dsp.h"
#include "src/dsp/lossless.h"
#include "src/dsp/lossless_common.h"
#include "src/dsp/yuv.h"
#include "src/utils/endian_inl_utils.h"
#include "src/utils/huffman_utils.h"
#include "src/utils/utils.h"
#define NUM_ARGB_CACHE_ROWS 16
static const int kCodeLengthLiterals = 16;
static const int kCodeLengthRepeatCode = 16;
static const int kCodeLengthExtraBits[3] = { 2, 3, 7 };
static const int kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };
static const uint8_t kCodeLengthExtraBits[3] = { 2, 3, 7 };
static const uint8_t kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };
// -----------------------------------------------------------------------------
// Five Huffman codes are used at each meta code:
@ -86,7 +86,7 @@ static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
// All values computed for 8-bit first level lookup with Mark Adler's tool:
// http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c
#define FIXED_TABLE_SIZE (630 * 3 + 410)
static const int kTableSize[12] = {
static const uint16_t kTableSize[12] = {
FIXED_TABLE_SIZE + 654,
FIXED_TABLE_SIZE + 656,
FIXED_TABLE_SIZE + 658,
@ -485,6 +485,7 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
//------------------------------------------------------------------------------
// Scaling.
#if !defined(WEBP_REDUCE_SIZE)
static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
const int num_channels = 4;
const int in_width = io->mb_w;
@ -516,10 +517,13 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
out_width, out_height, 0, num_channels, work);
return 1;
}
#endif // WEBP_REDUCE_SIZE
//------------------------------------------------------------------------------
// Export to ARGB
#if !defined(WEBP_REDUCE_SIZE)
// We have special "export" function since we need to convert from BGRA
static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace,
int rgba_stride, uint8_t* const rgba) {
@ -561,6 +565,8 @@ static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
return num_lines_out;
}
#endif // WEBP_REDUCE_SIZE
// Emit rows without any scaling.
static int EmitRows(WEBP_CSP_MODE colorspace,
const uint8_t* row_in, int in_stride,
@ -746,9 +752,12 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
if (WebPIsRGBMode(output->colorspace)) { // convert to RGBA
const WebPRGBABuffer* const buf = &output->u.RGBA;
uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride;
const int num_rows_out = io->use_scaling ?
const int num_rows_out =
#if !defined(WEBP_REDUCE_SIZE)
io->use_scaling ?
EmitRescaledRowsRGBA(dec, rows_data, in_stride, io->mb_h,
rgba, buf->stride) :
#endif // WEBP_REDUCE_SIZE
EmitRows(output->colorspace, rows_data, in_stride,
io->mb_w, io->mb_h, rgba, buf->stride);
// Update 'last_out_row_'.
@ -1632,12 +1641,19 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
if (!AllocateInternalBuffers32b(dec, io->width)) goto Err;
#if !defined(WEBP_REDUCE_SIZE)
if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err;
if (io->use_scaling || WebPIsPremultipliedMode(dec->output_->colorspace)) {
// need the alpha-multiply functions for premultiplied output or rescaling
WebPInitAlphaProcessing();
}
#else
if (io->use_scaling) {
dec->status_ = VP8_STATUS_INVALID_PARAM;
goto Err;
}
#endif
if (!WebPIsRGBMode(dec->output_->colorspace)) {
WebPInitConvertARGBToYUV();
if (dec->output_->u.YUVA.a != NULL) WebPInitAlphaProcessing();

View File

@ -12,14 +12,14 @@
// Author: Skal (pascal.massimino@gmail.com)
// Vikas Arora(vikaas.arora@gmail.com)
#ifndef WEBP_DEC_VP8LI_H_
#define WEBP_DEC_VP8LI_H_
#ifndef WEBP_DEC_VP8LI_DEC_H_
#define WEBP_DEC_VP8LI_DEC_H_
#include <string.h> // for memcpy()
#include "./webpi_dec.h"
#include "../utils/bit_reader_utils.h"
#include "../utils/color_cache_utils.h"
#include "../utils/huffman_utils.h"
#include "src/dec/webpi_dec.h"
#include "src/utils/bit_reader_utils.h"
#include "src/utils/color_cache_utils.h"
#include "src/utils/huffman_utils.h"
#ifdef __cplusplus
extern "C" {
@ -132,4 +132,4 @@ void VP8LDelete(VP8LDecoder* const dec);
} // extern "C"
#endif
#endif /* WEBP_DEC_VP8LI_H_ */
#endif /* WEBP_DEC_VP8LI_DEC_H_ */

View File

@ -13,11 +13,11 @@
#include <stdlib.h>
#include "./vp8i_dec.h"
#include "./vp8li_dec.h"
#include "./webpi_dec.h"
#include "../utils/utils.h"
#include "../webp/mux_types.h" // ALPHA_FLAG
#include "src/dec/vp8i_dec.h"
#include "src/dec/vp8li_dec.h"
#include "src/dec/webpi_dec.h"
#include "src/utils/utils.h"
#include "src/webp/mux_types.h" // ALPHA_FLAG
//------------------------------------------------------------------------------
// RIFF layout is:
@ -421,7 +421,9 @@ VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
NULL, NULL, NULL, &has_animation,
NULL, headers);
if (status == VP8_STATUS_OK || status == VP8_STATUS_NOT_ENOUGH_DATA) {
// TODO(jzern): full support of animation frames will require API additions.
// The WebPDemux API + libwebp can be used to decode individual
// uncomposited frames or the WebPAnimDecoder can be used to fully
// reconstruct them (see webp/demux.h).
if (has_animation) {
status = VP8_STATUS_UNSUPPORTED_FEATURE;
}

View File

@ -11,15 +11,15 @@
//
// Author: somnath@google.com (Somnath Banerjee)
#ifndef WEBP_DEC_WEBPI_H_
#define WEBP_DEC_WEBPI_H_
#ifndef WEBP_DEC_WEBPI_DEC_H_
#define WEBP_DEC_WEBPI_DEC_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "../utils/rescaler_utils.h"
#include "./vp8_dec.h"
#include "src/utils/rescaler_utils.h"
#include "src/dec/vp8_dec.h"
//------------------------------------------------------------------------------
// WebPDecParams: Decoding output parameters. Transient internal object.
@ -130,4 +130,4 @@ int WebPAvoidSlowMemory(const WebPDecBuffer* const output,
} // extern "C"
#endif
#endif /* WEBP_DEC_WEBPI_H_ */
#endif /* WEBP_DEC_WEBPI_DEC_H_ */

View File

@ -1,3 +1,4 @@
AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir)
lib_LTLIBRARIES = libwebpdemux.la
libwebpdemux_la_SOURCES =
@ -9,6 +10,6 @@ libwebpdemuxinclude_HEADERS += ../webp/mux_types.h
libwebpdemuxinclude_HEADERS += ../webp/types.h
libwebpdemux_la_LIBADD = ../libwebp.la
libwebpdemux_la_LDFLAGS = -no-undefined -version-info 2:2:0
libwebpdemux_la_LDFLAGS = -no-undefined -version-info 2:3:0
libwebpdemuxincludedir = $(includedir)/webp
pkgconfig_DATA = libwebpdemux.pc

View File

@ -11,15 +11,15 @@
//
#ifdef HAVE_CONFIG_H
#include "../webp/config.h"
#include "src/webp/config.h"
#endif
#include <assert.h>
#include <string.h>
#include "../utils/utils.h"
#include "../webp/decode.h"
#include "../webp/demux.h"
#include "src/utils/utils.h"
#include "src/webp/decode.h"
#include "src/webp/demux.h"
#define NUM_CHANNELS 4

View File

@ -11,21 +11,21 @@
//
#ifdef HAVE_CONFIG_H
#include "../webp/config.h"
#include "src/webp/config.h"
#endif
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "../utils/utils.h"
#include "../webp/decode.h" // WebPGetFeatures
#include "../webp/demux.h"
#include "../webp/format_constants.h"
#include "src/utils/utils.h"
#include "src/webp/decode.h" // WebPGetFeatures
#include "src/webp/demux.h"
#include "src/webp/format_constants.h"
#define DMUX_MAJ_VERSION 0
#define DMUX_MIN_VERSION 3
#define DMUX_REV_VERSION 2
#define DMUX_REV_VERSION 3
typedef struct {
size_t start_; // start location of the data
@ -205,12 +205,14 @@ static void SetFrameInfo(size_t start_offset, size_t size,
frame->complete_ = complete;
}
// Store image bearing chunks to 'frame'.
// Store image bearing chunks to 'frame'. 'min_size' is an optional size
// requirement, it may be zero.
static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
MemBuffer* const mem, Frame* const frame) {
int alpha_chunks = 0;
int image_chunks = 0;
int done = (MemDataSize(mem) < min_size);
int done = (MemDataSize(mem) < CHUNK_HEADER_SIZE ||
MemDataSize(mem) < min_size);
ParseStatus status = PARSE_OK;
if (done) return PARSE_NEED_MORE_DATA;
@ -401,9 +403,9 @@ static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(*frame));
if (frame == NULL) return PARSE_ERROR;
// For the single image case we allow parsing of a partial frame, but we need
// at least CHUNK_HEADER_SIZE for parsing.
status = StoreFrame(1, CHUNK_HEADER_SIZE, &dmux->mem_, frame);
// For the single image case we allow parsing of a partial frame, so no
// minimum size is imposed here.
status = StoreFrame(1, 0, &dmux->mem_, frame);
if (status != PARSE_ERROR) {
const int has_alpha = !!(dmux->feature_flags_ & ALPHA_FLAG);
// Clear any alpha when the alpha flag is missing.

View File

@ -6,8 +6,8 @@
LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
VS_VERSION_INFO VERSIONINFO
FILEVERSION 0,3,0,2
PRODUCTVERSION 0,3,0,2
FILEVERSION 0,3,0,3
PRODUCTVERSION 0,3,0,3
FILEFLAGSMASK 0x3fL
#ifdef _DEBUG
FILEFLAGS 0x1L
@ -24,12 +24,12 @@ BEGIN
BEGIN
VALUE "CompanyName", "Google, Inc."
VALUE "FileDescription", "libwebpdemux DLL"
VALUE "FileVersion", "0.3.2"
VALUE "FileVersion", "0.3.3"
VALUE "InternalName", "libwebpdemux.dll"
VALUE "LegalCopyright", "Copyright (C) 2017"
VALUE "OriginalFilename", "libwebpdemux.dll"
VALUE "ProductName", "WebP Image Demuxer"
VALUE "ProductVersion", "0.3.2"
VALUE "ProductVersion", "0.3.3"
END
END
BLOCK "VarFileInfo"

View File

@ -1,9 +1,15 @@
noinst_LTLIBRARIES = libwebpdsp.la libwebpdsp_avx2.la
noinst_LTLIBRARIES += libwebpdsp_sse2.la libwebpdspdecode_sse2.la
noinst_LTLIBRARIES += libwebpdsp_sse41.la libwebpdspdecode_sse41.la
noinst_LTLIBRARIES += libwebpdsp_neon.la libwebpdspdecode_neon.la
noinst_LTLIBRARIES += libwebpdsp_msa.la libwebpdspdecode_msa.la
noinst_LTLIBRARIES += libwebpdspdecode_wasm.la
AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir)
noinst_LTLIBRARIES =
noinst_LTLIBRARIES += libwebpdsp.la
noinst_LTLIBRARIES += libwebpdsp_avx2.la
noinst_LTLIBRARIES += libwebpdsp_sse2.la
noinst_LTLIBRARIES += libwebpdspdecode_sse2.la
noinst_LTLIBRARIES += libwebpdsp_sse41.la
noinst_LTLIBRARIES += libwebpdspdecode_sse41.la
noinst_LTLIBRARIES += libwebpdsp_neon.la
noinst_LTLIBRARIES += libwebpdspdecode_neon.la
noinst_LTLIBRARIES += libwebpdsp_msa.la
noinst_LTLIBRARIES += libwebpdspdecode_msa.la
if BUILD_LIBWEBPDECODER
noinst_LTLIBRARIES += libwebpdspdecode.la
@ -40,8 +46,6 @@ COMMON_SOURCES += yuv_mips32.c
COMMON_SOURCES += yuv_mips_dsp_r2.c
ENC_SOURCES =
ENC_SOURCES += argb.c
ENC_SOURCES += argb_mips_dsp_r2.c
ENC_SOURCES += cost.c
ENC_SOURCES += cost_mips32.c
ENC_SOURCES += cost_mips_dsp_r2.c
@ -97,12 +101,7 @@ libwebpdspdecode_msa_la_SOURCES += upsampling_msa.c
libwebpdspdecode_msa_la_CPPFLAGS = $(libwebpdsp_msa_la_CPPFLAGS)
libwebpdspdecode_msa_la_CFLAGS = $(libwebpdsp_msa_la_CFLAGS)
# WASM is not fully integrated into configure; the addition here keeps source
# extraction by cmake simple.
libwebpdspdecode_wasm_la_SOURCES = dec_wasm.c
libwebpdsp_sse2_la_SOURCES =
libwebpdsp_sse2_la_SOURCES += argb_sse2.c
libwebpdsp_sse2_la_SOURCES += cost_sse2.c
libwebpdsp_sse2_la_SOURCES += enc_sse2.c
libwebpdsp_sse2_la_SOURCES += lossless_enc_sse2.c
@ -143,7 +142,8 @@ libwebpdsp_la_CPPFLAGS += $(AM_CPPFLAGS)
libwebpdsp_la_CPPFLAGS += $(USE_EXPERIMENTAL_CODE) $(USE_SWAP_16BIT_CSP)
libwebpdsp_la_LDFLAGS = -lm
libwebpdsp_la_LIBADD =
libwebpdsp_la_LIBADD += libwebpdsp_avx2.la libwebpdsp_sse2.la
libwebpdsp_la_LIBADD += libwebpdsp_avx2.la
libwebpdsp_la_LIBADD += libwebpdsp_sse2.la
libwebpdsp_la_LIBADD += libwebpdsp_sse41.la
libwebpdsp_la_LIBADD += libwebpdsp_neon.la
libwebpdsp_la_LIBADD += libwebpdsp_msa.la

View File

@ -12,10 +12,13 @@
// Author: Skal (pascal.massimino@gmail.com)
#include <assert.h>
#include "./dsp.h"
#include "src/dsp/dsp.h"
// Tables can be faster on some platform but incur some extra binary size (~2k).
// #define USE_TABLES_FOR_ALPHA_MULT
#if !defined(USE_TABLES_FOR_ALPHA_MULT)
#define USE_TABLES_FOR_ALPHA_MULT 0 // ALTERNATE_CODE
#endif
// -----------------------------------------------------------------------------
@ -29,7 +32,7 @@ static uint32_t Mult(uint8_t x, uint32_t mult) {
return v;
}
#ifdef USE_TABLES_FOR_ALPHA_MULT
#if (USE_TABLES_FOR_ALPHA_MULT == 1)
static const uint32_t kMultTables[2][256] = {
{ // (255u << MFIX) / alpha
@ -132,9 +135,9 @@ static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
return inverse ? (255u << MFIX) / a : a * KINV_255;
}
#endif // USE_TABLES_FOR_ALPHA_MULT
#endif // USE_TABLES_FOR_ALPHA_MULT
void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse) {
void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) {
int x;
for (x = 0; x < width; ++x) {
const uint32_t argb = ptr[x];
@ -154,8 +157,8 @@ void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse) {
}
}
void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha,
int width, int inverse) {
void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
int width, int inverse) {
int x;
for (x = 0; x < width; ++x) {
const uint32_t a = alpha[x];
@ -217,8 +220,9 @@ void WebPMultRows(uint8_t* ptr, int stride,
#define PREMULTIPLY(x, m) (((x) * (m) + (1U << 23)) >> 24)
#endif
static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
int w, int h, int stride) {
#if !WEBP_NEON_OMIT_C_CODE
static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first,
int w, int h, int stride) {
while (h-- > 0) {
uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
@ -235,6 +239,7 @@ static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
rgba += stride;
}
}
#endif // !WEBP_NEON_OMIT_C_CODE
#undef MULTIPLIER
#undef PREMULTIPLY
@ -254,9 +259,9 @@ static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
return (x * m) >> 16;
}
static WEBP_INLINE void ApplyAlphaMultiply4444(uint8_t* rgba4444,
int w, int h, int stride,
int rg_byte_pos /* 0 or 1 */) {
static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444,
int w, int h, int stride,
int rg_byte_pos /* 0 or 1 */) {
while (h-- > 0) {
int i;
for (i = 0; i < w; ++i) {
@ -275,15 +280,16 @@ static WEBP_INLINE void ApplyAlphaMultiply4444(uint8_t* rgba4444,
}
#undef MULTIPLIER
static void ApplyAlphaMultiply_16b(uint8_t* rgba4444,
int w, int h, int stride) {
#ifdef WEBP_SWAP_16BIT_CSP
ApplyAlphaMultiply4444(rgba4444, w, h, stride, 1);
static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444,
int w, int h, int stride) {
#if (WEBP_SWAP_16BIT_CSP == 1)
ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 1);
#else
ApplyAlphaMultiply4444(rgba4444, w, h, stride, 0);
ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 0);
#endif
}
#if !WEBP_NEON_OMIT_C_CODE
static int DispatchAlpha_C(const uint8_t* alpha, int alpha_stride,
int width, int height,
uint8_t* dst, int dst_stride) {
@ -338,6 +344,36 @@ static void ExtractGreen_C(const uint32_t* argb, uint8_t* alpha, int size) {
int i;
for (i = 0; i < size; ++i) alpha[i] = argb[i] >> 8;
}
#endif // !WEBP_NEON_OMIT_C_CODE
//------------------------------------------------------------------------------
static int HasAlpha8b_C(const uint8_t* src, int length) {
while (length-- > 0) if (*src++ != 0xff) return 1;
return 0;
}
static int HasAlpha32b_C(const uint8_t* src, int length) {
int x;
for (x = 0; length-- > 0; x += 4) if (src[x] != 0xff) return 1;
return 0;
}
//------------------------------------------------------------------------------
// Simple channel manipulations.
static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
}
static void PackRGB_C(const uint8_t* r, const uint8_t* g, const uint8_t* b,
int len, int step, uint32_t* out) {
int i, offset = 0;
for (i = 0; i < len; ++i) {
out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
offset += step;
}
}
void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int);
void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int);
@ -345,6 +381,11 @@ int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int);
int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size);
void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
int len, int step, uint32_t* out);
int (*WebPHasAlpha8b)(const uint8_t* src, int length);
int (*WebPHasAlpha32b)(const uint8_t* src, int length);
//------------------------------------------------------------------------------
// Init function
@ -360,15 +401,21 @@ static volatile VP8CPUInfo alpha_processing_last_cpuinfo_used =
WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
if (alpha_processing_last_cpuinfo_used == VP8GetCPUInfo) return;
WebPMultARGBRow = WebPMultARGBRowC;
WebPMultRow = WebPMultRowC;
WebPApplyAlphaMultiply = ApplyAlphaMultiply;
WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b;
WebPMultARGBRow = WebPMultARGBRow_C;
WebPMultRow = WebPMultRow_C;
WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b_C;
WebPPackRGB = PackRGB_C;
#if !WEBP_NEON_OMIT_C_CODE
WebPApplyAlphaMultiply = ApplyAlphaMultiply_C;
WebPDispatchAlpha = DispatchAlpha_C;
WebPDispatchAlphaToGreen = DispatchAlphaToGreen_C;
WebPExtractAlpha = ExtractAlpha_C;
WebPExtractGreen = ExtractGreen_C;
#endif
WebPHasAlpha8b = HasAlpha8b_C;
WebPHasAlpha32b = HasAlpha32b_C;
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
if (VP8GetCPUInfo != NULL) {
@ -382,16 +429,31 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
#endif
}
#endif
#if defined(WEBP_USE_NEON)
if (VP8GetCPUInfo(kNEON)) {
WebPInitAlphaProcessingNEON();
}
#endif
#if defined(WEBP_USE_MIPS_DSP_R2)
if (VP8GetCPUInfo(kMIPSdspR2)) {
WebPInitAlphaProcessingMIPSdspR2();
}
#endif
}
#if defined(WEBP_USE_NEON)
if (WEBP_NEON_OMIT_C_CODE ||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
WebPInitAlphaProcessingNEON();
}
#endif
assert(WebPMultARGBRow != NULL);
assert(WebPMultRow != NULL);
assert(WebPApplyAlphaMultiply != NULL);
assert(WebPApplyAlphaMultiply4444 != NULL);
assert(WebPDispatchAlpha != NULL);
assert(WebPDispatchAlphaToGreen != NULL);
assert(WebPExtractAlpha != NULL);
assert(WebPExtractGreen != NULL);
assert(WebPPackRGB != NULL);
assert(WebPHasAlpha8b != NULL);
assert(WebPHasAlpha32b != NULL);
alpha_processing_last_cpuinfo_used = VP8GetCPUInfo;
}

View File

@ -12,13 +12,13 @@
// Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
// Djordje Pesut (djordje.pesut@imgtec.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_MIPS_DSP_R2)
static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
int width, int height,
uint8_t* dst, int dst_stride) {
static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride,
int width, int height,
uint8_t* dst, int dst_stride) {
uint32_t alpha_mask = 0xffffffff;
int i, j, temp0;
@ -79,7 +79,8 @@ static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
return (alpha_mask != 0xff);
}
static void MultARGBRow(uint32_t* const ptr, int width, int inverse) {
static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width,
int inverse) {
int x;
const uint32_t c_00ffffff = 0x00ffffffu;
const uint32_t c_ff000000 = 0xff000000u;
@ -124,14 +125,54 @@ static void MultARGBRow(uint32_t* const ptr, int width, int inverse) {
}
}
static void PackRGB_MIPSdspR2(const uint8_t* r, const uint8_t* g,
const uint8_t* b, int len, int step,
uint32_t* out) {
int temp0, temp1, temp2, offset;
const int rest = len & 1;
const int a = 0xff;
const uint32_t* const loop_end = out + len - rest;
__asm__ volatile (
"xor %[offset], %[offset], %[offset] \n\t"
"beq %[loop_end], %[out], 0f \n\t"
"2: \n\t"
"lbux %[temp0], %[offset](%[r]) \n\t"
"lbux %[temp1], %[offset](%[g]) \n\t"
"lbux %[temp2], %[offset](%[b]) \n\t"
"ins %[temp0], %[a], 16, 16 \n\t"
"ins %[temp2], %[temp1], 16, 16 \n\t"
"addiu %[out], %[out], 4 \n\t"
"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
"sw %[temp0], -4(%[out]) \n\t"
"addu %[offset], %[offset], %[step] \n\t"
"bne %[loop_end], %[out], 2b \n\t"
"0: \n\t"
"beq %[rest], $zero, 1f \n\t"
"lbux %[temp0], %[offset](%[r]) \n\t"
"lbux %[temp1], %[offset](%[g]) \n\t"
"lbux %[temp2], %[offset](%[b]) \n\t"
"ins %[temp0], %[a], 16, 16 \n\t"
"ins %[temp2], %[temp1], 16, 16 \n\t"
"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
"sw %[temp0], 0(%[out]) \n\t"
"1: \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[offset]"=&r"(offset), [out]"+&r"(out)
: [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
[loop_end]"r"(loop_end), [rest]"r"(rest)
: "memory"
);
}
//------------------------------------------------------------------------------
// Entry point
extern void WebPInitAlphaProcessingMIPSdspR2(void);
WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingMIPSdspR2(void) {
WebPDispatchAlpha = DispatchAlpha;
WebPMultARGBRow = MultARGBRow;
WebPDispatchAlpha = DispatchAlpha_MIPSdspR2;
WebPMultARGBRow = MultARGBRow_MIPSdspR2;
WebPPackRGB = PackRGB_MIPSdspR2;
}
#else // !WEBP_USE_MIPS_DSP_R2

View File

@ -11,11 +11,11 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_NEON)
#include "./neon.h"
#include "src/dsp/neon.h"
//------------------------------------------------------------------------------

View File

@ -11,16 +11,16 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE2)
#include <emmintrin.h>
//------------------------------------------------------------------------------
static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
int width, int height,
uint8_t* dst, int dst_stride) {
static int DispatchAlpha_SSE2(const uint8_t* alpha, int alpha_stride,
int width, int height,
uint8_t* dst, int dst_stride) {
// alpha_and stores an 'and' operation of all the alpha[] values. The final
// value is not 0xff if any of the alpha[] is not equal to 0xff.
uint32_t alpha_and = 0xff;
@ -72,9 +72,9 @@ static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
return (alpha_and != 0xff);
}
static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride,
int width, int height,
uint32_t* dst, int dst_stride) {
static void DispatchAlphaToGreen_SSE2(const uint8_t* alpha, int alpha_stride,
int width, int height,
uint32_t* dst, int dst_stride) {
int i, j;
const __m128i zero = _mm_setzero_si128();
const int limit = width & ~15;
@ -98,9 +98,9 @@ static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride,
}
}
static int ExtractAlpha(const uint8_t* argb, int argb_stride,
int width, int height,
uint8_t* alpha, int alpha_stride) {
static int ExtractAlpha_SSE2(const uint8_t* argb, int argb_stride,
int width, int height,
uint8_t* alpha, int alpha_stride) {
// alpha_and stores an 'and' operation of all the alpha[] values. The final
// value is not 0xff if any of the alpha[] is not equal to 0xff.
uint32_t alpha_and = 0xff;
@ -210,6 +210,61 @@ static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first,
#undef MULTIPLIER
#undef PREMULTIPLY
//------------------------------------------------------------------------------
// Alpha detection
static int HasAlpha8b_SSE2(const uint8_t* src, int length) {
const __m128i all_0xff = _mm_set1_epi8(0xff);
int i = 0;
for (; i + 16 <= length; i += 16) {
const __m128i v = _mm_loadu_si128((const __m128i*)(src + i));
const __m128i bits = _mm_cmpeq_epi8(v, all_0xff);
const int mask = _mm_movemask_epi8(bits);
if (mask != 0xffff) return 1;
}
for (; i < length; ++i) if (src[i] != 0xff) return 1;
return 0;
}
static int HasAlpha32b_SSE2(const uint8_t* src, int length) {
const __m128i alpha_mask = _mm_set1_epi32(0xff);
const __m128i all_0xff = _mm_set1_epi8(0xff);
int i = 0;
// We don't know if we can access the last 3 bytes after the last alpha
// value 'src[4 * length - 4]' (because we don't know if alpha is the first
// or the last byte of the quadruplet). Hence the '-3' protection below.
length = length * 4 - 3; // size in bytes
for (; i + 64 <= length; i += 64) {
const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0));
const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
const __m128i a2 = _mm_loadu_si128((const __m128i*)(src + i + 32));
const __m128i a3 = _mm_loadu_si128((const __m128i*)(src + i + 48));
const __m128i b0 = _mm_and_si128(a0, alpha_mask);
const __m128i b1 = _mm_and_si128(a1, alpha_mask);
const __m128i b2 = _mm_and_si128(a2, alpha_mask);
const __m128i b3 = _mm_and_si128(a3, alpha_mask);
const __m128i c0 = _mm_packs_epi32(b0, b1);
const __m128i c1 = _mm_packs_epi32(b2, b3);
const __m128i d = _mm_packus_epi16(c0, c1);
const __m128i bits = _mm_cmpeq_epi8(d, all_0xff);
const int mask = _mm_movemask_epi8(bits);
if (mask != 0xffff) return 1;
}
for (; i + 32 <= length; i += 32) {
const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0));
const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
const __m128i b0 = _mm_and_si128(a0, alpha_mask);
const __m128i b1 = _mm_and_si128(a1, alpha_mask);
const __m128i c = _mm_packs_epi32(b0, b1);
const __m128i d = _mm_packus_epi16(c, c);
const __m128i bits = _mm_cmpeq_epi8(d, all_0xff);
const int mask = _mm_movemask_epi8(bits);
if (mask != 0xffff) return 1;
}
for (; i <= length; i += 4) if (src[i] != 0xff) return 1;
return 0;
}
// -----------------------------------------------------------------------------
// Apply alpha value to rows
@ -238,7 +293,7 @@ static void MultARGBRow_SSE2(uint32_t* const ptr, int width, int inverse) {
}
}
width -= x;
if (width > 0) WebPMultARGBRowC(ptr + x, width, inverse);
if (width > 0) WebPMultARGBRow_C(ptr + x, width, inverse);
}
static void MultRow_SSE2(uint8_t* const ptr, const uint8_t* const alpha,
@ -261,7 +316,7 @@ static void MultRow_SSE2(uint8_t* const ptr, const uint8_t* const alpha,
}
}
width -= x;
if (width > 0) WebPMultRowC(ptr + x, alpha + x, width, inverse);
if (width > 0) WebPMultRow_C(ptr + x, alpha + x, width, inverse);
}
//------------------------------------------------------------------------------
@ -273,9 +328,12 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE2(void) {
WebPMultARGBRow = MultARGBRow_SSE2;
WebPMultRow = MultRow_SSE2;
WebPApplyAlphaMultiply = ApplyAlphaMultiply_SSE2;
WebPDispatchAlpha = DispatchAlpha;
WebPDispatchAlphaToGreen = DispatchAlphaToGreen;
WebPExtractAlpha = ExtractAlpha;
WebPDispatchAlpha = DispatchAlpha_SSE2;
WebPDispatchAlphaToGreen = DispatchAlphaToGreen_SSE2;
WebPExtractAlpha = ExtractAlpha_SSE2;
WebPHasAlpha8b = HasAlpha8b_SSE2;
WebPHasAlpha32b = HasAlpha32b_SSE2;
}
#else // !WEBP_USE_SSE2

View File

@ -11,7 +11,7 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE41)
@ -19,9 +19,9 @@
//------------------------------------------------------------------------------
static int ExtractAlpha(const uint8_t* argb, int argb_stride,
int width, int height,
uint8_t* alpha, int alpha_stride) {
static int ExtractAlpha_SSE41(const uint8_t* argb, int argb_stride,
int width, int height,
uint8_t* alpha, int alpha_stride) {
// alpha_and stores an 'and' operation of all the alpha[] values. The final
// value is not 0xff if any of the alpha[] is not equal to 0xff.
uint32_t alpha_and = 0xff;
@ -82,7 +82,7 @@ static int ExtractAlpha(const uint8_t* argb, int argb_stride,
extern void WebPInitAlphaProcessingSSE41(void);
WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE41(void) {
WebPExtractAlpha = ExtractAlpha;
WebPExtractAlpha = ExtractAlpha_SSE41;
}
#else // !WEBP_USE_SSE41

View File

@ -1,68 +0,0 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// ARGB making functions.
//
// Author: Djordje Pesut (djordje.pesut@imgtec.com)
#include "./dsp.h"
static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
}
static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
const uint8_t* b, int len, uint32_t* out) {
int i;
for (i = 0; i < len; ++i) {
out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
}
}
static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b,
int len, int step, uint32_t* out) {
int i, offset = 0;
for (i = 0; i < len; ++i) {
out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
offset += step;
}
}
void (*VP8PackARGB)(const uint8_t*, const uint8_t*, const uint8_t*,
const uint8_t*, int, uint32_t*);
void (*VP8PackRGB)(const uint8_t*, const uint8_t*, const uint8_t*,
int, int, uint32_t*);
extern void VP8EncDspARGBInitMIPSdspR2(void);
extern void VP8EncDspARGBInitSSE2(void);
static volatile VP8CPUInfo argb_last_cpuinfo_used =
(VP8CPUInfo)&argb_last_cpuinfo_used;
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInit(void) {
if (argb_last_cpuinfo_used == VP8GetCPUInfo) return;
VP8PackARGB = PackARGB;
VP8PackRGB = PackRGB;
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
if (VP8GetCPUInfo != NULL) {
#if defined(WEBP_USE_SSE2)
if (VP8GetCPUInfo(kSSE2)) {
VP8EncDspARGBInitSSE2();
}
#endif
#if defined(WEBP_USE_MIPS_DSP_R2)
if (VP8GetCPUInfo(kMIPSdspR2)) {
VP8EncDspARGBInitMIPSdspR2();
}
#endif
}
argb_last_cpuinfo_used = VP8GetCPUInfo;
}

View File

@ -1,110 +0,0 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// ARGB making functions (mips version).
//
// Author: Djordje Pesut (djordje.pesut@imgtec.com)
#include "./dsp.h"
#if defined(WEBP_USE_MIPS_DSP_R2)
static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
const uint8_t* b, int len, uint32_t* out) {
int temp0, temp1, temp2, temp3, offset;
const int rest = len & 1;
const uint32_t* const loop_end = out + len - rest;
const int step = 4;
__asm__ volatile (
"xor %[offset], %[offset], %[offset] \n\t"
"beq %[loop_end], %[out], 0f \n\t"
"2: \n\t"
"lbux %[temp0], %[offset](%[a]) \n\t"
"lbux %[temp1], %[offset](%[r]) \n\t"
"lbux %[temp2], %[offset](%[g]) \n\t"
"lbux %[temp3], %[offset](%[b]) \n\t"
"ins %[temp1], %[temp0], 16, 16 \n\t"
"ins %[temp3], %[temp2], 16, 16 \n\t"
"addiu %[out], %[out], 4 \n\t"
"precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t"
"sw %[temp0], -4(%[out]) \n\t"
"addu %[offset], %[offset], %[step] \n\t"
"bne %[loop_end], %[out], 2b \n\t"
"0: \n\t"
"beq %[rest], $zero, 1f \n\t"
"lbux %[temp0], %[offset](%[a]) \n\t"
"lbux %[temp1], %[offset](%[r]) \n\t"
"lbux %[temp2], %[offset](%[g]) \n\t"
"lbux %[temp3], %[offset](%[b]) \n\t"
"ins %[temp1], %[temp0], 16, 16 \n\t"
"ins %[temp3], %[temp2], 16, 16 \n\t"
"precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t"
"sw %[temp0], 0(%[out]) \n\t"
"1: \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
: [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
[loop_end]"r"(loop_end), [rest]"r"(rest)
: "memory"
);
}
static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b,
int len, int step, uint32_t* out) {
int temp0, temp1, temp2, offset;
const int rest = len & 1;
const int a = 0xff;
const uint32_t* const loop_end = out + len - rest;
__asm__ volatile (
"xor %[offset], %[offset], %[offset] \n\t"
"beq %[loop_end], %[out], 0f \n\t"
"2: \n\t"
"lbux %[temp0], %[offset](%[r]) \n\t"
"lbux %[temp1], %[offset](%[g]) \n\t"
"lbux %[temp2], %[offset](%[b]) \n\t"
"ins %[temp0], %[a], 16, 16 \n\t"
"ins %[temp2], %[temp1], 16, 16 \n\t"
"addiu %[out], %[out], 4 \n\t"
"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
"sw %[temp0], -4(%[out]) \n\t"
"addu %[offset], %[offset], %[step] \n\t"
"bne %[loop_end], %[out], 2b \n\t"
"0: \n\t"
"beq %[rest], $zero, 1f \n\t"
"lbux %[temp0], %[offset](%[r]) \n\t"
"lbux %[temp1], %[offset](%[g]) \n\t"
"lbux %[temp2], %[offset](%[b]) \n\t"
"ins %[temp0], %[a], 16, 16 \n\t"
"ins %[temp2], %[temp1], 16, 16 \n\t"
"precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
"sw %[temp0], 0(%[out]) \n\t"
"1: \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[offset]"=&r"(offset), [out]"+&r"(out)
: [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
[loop_end]"r"(loop_end), [rest]"r"(rest)
: "memory"
);
}
//------------------------------------------------------------------------------
// Entry point
extern void VP8EncDspARGBInitMIPSdspR2(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitMIPSdspR2(void) {
VP8PackARGB = PackARGB;
VP8PackRGB = PackRGB;
}
#else // !WEBP_USE_MIPS_DSP_R2
WEBP_DSP_INIT_STUB(VP8EncDspARGBInitMIPSdspR2)
#endif // WEBP_USE_MIPS_DSP_R2

View File

@ -1,53 +0,0 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// ARGB making functions (SSE2 version).
//
// Author: Skal (pascal.massimino@gmail.com)
#include "./dsp.h"
#include "./lossless.h"
#if defined(WEBP_USE_SSE2)
#include <assert.h>
#include <emmintrin.h>
#include <string.h>
static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
const uint8_t* b, int len, uint32_t* out) {
(void)a;
if (g == r + 1) { // RGBA input order. Need to swap R and B.
assert(b == r + 2);
assert(a == r + 3);
VP8LConvertBGRAToRGBA((const uint32_t*)r, len, (uint8_t*)out);
} else {
assert(g == b + 1);
assert(r == b + 2);
assert(a == b + 3);
memcpy(out, b, len * 4);
}
}
//------------------------------------------------------------------------------
// Entry point
extern void VP8EncDspARGBInitSSE2(void);
extern void VP8LDspInitSSE2(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitSSE2(void) {
VP8LDspInitSSE2();
VP8PackARGB = PackARGB;
}
#else // !WEBP_USE_SSE2
WEBP_DSP_INIT_STUB(VP8EncDspARGBInitSSE2)
#endif // WEBP_USE_SSE2

View File

@ -9,8 +9,8 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include "./dsp.h"
#include "../enc/cost_enc.h"
#include "src/dsp/dsp.h"
#include "src/enc/cost_enc.h"
//------------------------------------------------------------------------------
// Boolean-cost cost table
@ -319,7 +319,7 @@ const uint8_t VP8EncBands[16 + 1] = {
//------------------------------------------------------------------------------
// Mode costs
static int GetResidualCost(int ctx0, const VP8Residual* const res) {
static int GetResidualCost_C(int ctx0, const VP8Residual* const res) {
int n = res->first;
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
const int p0 = res->prob[n][ctx0][0];
@ -354,8 +354,8 @@ static int GetResidualCost(int ctx0, const VP8Residual* const res) {
return cost;
}
static void SetResidualCoeffs(const int16_t* const coeffs,
VP8Residual* const res) {
static void SetResidualCoeffs_C(const int16_t* const coeffs,
VP8Residual* const res) {
int n;
res->last = -1;
assert(res->first == 0 || coeffs[0] == 0);
@ -384,8 +384,8 @@ static volatile VP8CPUInfo cost_last_cpuinfo_used =
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInit(void) {
if (cost_last_cpuinfo_used == VP8GetCPUInfo) return;
VP8GetResidualCost = GetResidualCost;
VP8SetResidualCoeffs = SetResidualCoeffs;
VP8GetResidualCost = GetResidualCost_C;
VP8SetResidualCoeffs = SetResidualCoeffs_C;
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
if (VP8GetCPUInfo != NULL) {

View File

@ -9,13 +9,13 @@
//
// Author: Djordje Pesut (djordje.pesut@imgtec.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_MIPS32)
#include "../enc/cost_enc.h"
#include "src/enc/cost_enc.h"
static int GetResidualCost(int ctx0, const VP8Residual* const res) {
static int GetResidualCost_MIPS32(int ctx0, const VP8Residual* const res) {
int temp0, temp1;
int v_reg, ctx_reg;
int n = res->first;
@ -96,8 +96,8 @@ static int GetResidualCost(int ctx0, const VP8Residual* const res) {
return cost;
}
static void SetResidualCoeffs(const int16_t* const coeffs,
VP8Residual* const res) {
static void SetResidualCoeffs_MIPS32(const int16_t* const coeffs,
VP8Residual* const res) {
const int16_t* p_coeffs = (int16_t*)coeffs;
int temp0, temp1, temp2, n, n1;
assert(res->first == 0 || coeffs[0] == 0);
@ -143,8 +143,8 @@ static void SetResidualCoeffs(const int16_t* const coeffs,
extern void VP8EncDspCostInitMIPS32(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPS32(void) {
VP8GetResidualCost = GetResidualCost;
VP8SetResidualCoeffs = SetResidualCoeffs;
VP8GetResidualCost = GetResidualCost_MIPS32;
VP8SetResidualCoeffs = SetResidualCoeffs_MIPS32;
}
#else // !WEBP_USE_MIPS32

View File

@ -9,13 +9,13 @@
//
// Author: Djordje Pesut (djordje.pesut@imgtec.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_MIPS_DSP_R2)
#include "../enc/cost_enc.h"
#include "src/enc/cost_enc.h"
static int GetResidualCost(int ctx0, const VP8Residual* const res) {
static int GetResidualCost_MIPSdspR2(int ctx0, const VP8Residual* const res) {
int temp0, temp1;
int v_reg, ctx_reg;
int n = res->first;
@ -97,7 +97,7 @@ static int GetResidualCost(int ctx0, const VP8Residual* const res) {
extern void VP8EncDspCostInitMIPSdspR2(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPSdspR2(void) {
VP8GetResidualCost = GetResidualCost;
VP8GetResidualCost = GetResidualCost_MIPSdspR2;
}
#else // !WEBP_USE_MIPS_DSP_R2

View File

@ -11,19 +11,19 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE2)
#include <emmintrin.h>
#include "../enc/cost_enc.h"
#include "../enc/vp8i_enc.h"
#include "../utils/utils.h"
#include "src/enc/cost_enc.h"
#include "src/enc/vp8i_enc.h"
#include "src/utils/utils.h"
//------------------------------------------------------------------------------
static void SetResidualCoeffsSSE2(const int16_t* const coeffs,
VP8Residual* const res) {
static void SetResidualCoeffs_SSE2(const int16_t* const coeffs,
VP8Residual* const res) {
const __m128i c0 = _mm_loadu_si128((const __m128i*)(coeffs + 0));
const __m128i c1 = _mm_loadu_si128((const __m128i*)(coeffs + 8));
// Use SSE2 to compare 16 values with a single instruction.
@ -42,7 +42,7 @@ static void SetResidualCoeffsSSE2(const int16_t* const coeffs,
res->coeffs = coeffs;
}
static int GetResidualCostSSE2(int ctx0, const VP8Residual* const res) {
static int GetResidualCost_SSE2(int ctx0, const VP8Residual* const res) {
uint8_t levels[16], ctxs[16];
uint16_t abs_levels[16];
int n = res->first;
@ -108,8 +108,8 @@ static int GetResidualCostSSE2(int ctx0, const VP8Residual* const res) {
extern void VP8EncDspCostInitSSE2(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitSSE2(void) {
VP8SetResidualCoeffs = SetResidualCoeffsSSE2;
VP8GetResidualCost = GetResidualCostSSE2;
VP8SetResidualCoeffs = SetResidualCoeffs_SSE2;
VP8GetResidualCost = GetResidualCost_SSE2;
}
#else // !WEBP_USE_SSE2

View File

@ -11,7 +11,7 @@
//
// Author: Christian Duvivier (cduvivier@google.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_HAVE_NEON_RTCD)
#include <stdio.h>
@ -23,13 +23,11 @@
#endif
//------------------------------------------------------------------------------
// x86/x86-64 micro-arch detection.
// SSE2 detection.
//
// skip x86 specific code for WASM builds
#if defined(WEBP_USE_WASM)
// apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
#elif (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
#if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
__asm__ volatile (
"mov %%ebx, %%edi\n"
@ -65,10 +63,8 @@ static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
#define GetCPUInfo __cpuid
#endif
// skip xgetbv definition for WASM builds
#if defined(WEBP_USE_WASM)
// NaCl has no support for xgetbv or the raw opcode.
#elif !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
static WEBP_INLINE uint64_t xgetbv(void) {
const uint32_t ecx = 0;
uint32_t eax, edx;
@ -98,19 +94,7 @@ static WEBP_INLINE uint64_t xgetbv(void) {
#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains.
#endif
//------------------------------------------------------------------------------
// Platform specific VP8CPUInfo functions.
//
// WASM needs to precede platform specific architecture checks as the defines
// will still be present when building this target.
#if defined(WEBP_USE_WASM)
static int wasmCPUInfo(CPUFeature feature) {
if (feature != kWASM) return 0;
return 1;
}
VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo;
#elif defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
// helper function for run-time detection of slow SSSE3 platforms
static int CheckSlowModel(int info) {
@ -159,7 +143,7 @@ static int x86CPUInfo(CPUFeature feature) {
return !!(cpu_info[2] & (1 << 0));
}
if (feature == kSlowSSSE3) {
if (is_intel && (cpu_info[2] & (1 << 0))) { // SSSE3?
if (is_intel && (cpu_info[2] & (1 << 9))) { // SSSE3?
return CheckSlowModel(cpu_info[0]);
}
return 0;

View File

@ -11,9 +11,11 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include "./dsp.h"
#include "../dec/vp8i_dec.h"
#include "../utils/utils.h"
#include <assert.h>
#include "src/dsp/dsp.h"
#include "src/dec/vp8i_dec.h"
#include "src/utils/utils.h"
//------------------------------------------------------------------------------
@ -25,7 +27,7 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
// Transforms (Paragraph 14.4)
#define STORE(x, y, v) \
dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3))
dst[(x) + (y) * BPS] = clip_8b(dst[(x) + (y) * BPS] + ((v) >> 3))
#define STORE2(y, dc, d, c) do { \
const int DC = (dc); \
@ -38,7 +40,8 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
#define MUL1(a) ((((a) * 20091) >> 16) + (a))
#define MUL2(a) (((a) * 35468) >> 16)
static void TransformOne(const int16_t* in, uint8_t* dst) {
#if !WEBP_NEON_OMIT_C_CODE
static void TransformOne_C(const int16_t* in, uint8_t* dst) {
int C[4 * 4], *tmp;
int i;
tmp = C;
@ -78,7 +81,7 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
}
// Simplified transform when only in[0], in[1] and in[4] are non-zero
static void TransformAC3(const int16_t* in, uint8_t* dst) {
static void TransformAC3_C(const int16_t* in, uint8_t* dst) {
const int a = in[0] + 4;
const int c4 = MUL2(in[4]);
const int d4 = MUL1(in[4]);
@ -93,19 +96,21 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
#undef MUL2
#undef STORE2
static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
TransformOne(in, dst);
static void TransformTwo_C(const int16_t* in, uint8_t* dst, int do_two) {
TransformOne_C(in, dst);
if (do_two) {
TransformOne(in + 16, dst + 4);
TransformOne_C(in + 16, dst + 4);
}
}
#endif // !WEBP_NEON_OMIT_C_CODE
static void TransformUV(const int16_t* in, uint8_t* dst) {
static void TransformUV_C(const int16_t* in, uint8_t* dst) {
VP8Transform(in + 0 * 16, dst, 1);
VP8Transform(in + 2 * 16, dst + 4 * BPS, 1);
}
static void TransformDC(const int16_t* in, uint8_t* dst) {
#if !WEBP_NEON_OMIT_C_CODE
static void TransformDC_C(const int16_t* in, uint8_t* dst) {
const int DC = in[0] + 4;
int i, j;
for (j = 0; j < 4; ++j) {
@ -114,8 +119,9 @@ static void TransformDC(const int16_t* in, uint8_t* dst) {
}
}
}
#endif // !WEBP_NEON_OMIT_C_CODE
static void TransformDCUV(const int16_t* in, uint8_t* dst) {
static void TransformDCUV_C(const int16_t* in, uint8_t* dst) {
if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst);
if (in[1 * 16]) VP8TransformDC(in + 1 * 16, dst + 4);
if (in[2 * 16]) VP8TransformDC(in + 2 * 16, dst + 4 * BPS);
@ -127,7 +133,8 @@ static void TransformDCUV(const int16_t* in, uint8_t* dst) {
//------------------------------------------------------------------------------
// Paragraph 14.3
static void TransformWHT(const int16_t* in, int16_t* out) {
#if !WEBP_NEON_OMIT_C_CODE
static void TransformWHT_C(const int16_t* in, int16_t* out) {
int tmp[16];
int i;
for (i = 0; i < 4; ++i) {
@ -153,6 +160,7 @@ static void TransformWHT(const int16_t* in, int16_t* out) {
out += 64;
}
}
#endif // !WEBP_NEON_OMIT_C_CODE
void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
@ -161,6 +169,7 @@ void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
#define DST(x, y) dst[(x) + (y) * BPS]
#if !WEBP_NEON_OMIT_C_CODE
static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
const uint8_t* top = dst - BPS;
const uint8_t* const clip0 = VP8kclip1 - top[-1];
@ -174,21 +183,21 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
dst += BPS;
}
}
static void TM4(uint8_t* dst) { TrueMotion(dst, 4); }
static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); }
static void TM16(uint8_t* dst) { TrueMotion(dst, 16); }
static void TM4_C(uint8_t* dst) { TrueMotion(dst, 4); }
static void TM8uv_C(uint8_t* dst) { TrueMotion(dst, 8); }
static void TM16_C(uint8_t* dst) { TrueMotion(dst, 16); }
//------------------------------------------------------------------------------
// 16x16
static void VE16(uint8_t* dst) { // vertical
static void VE16_C(uint8_t* dst) { // vertical
int j;
for (j = 0; j < 16; ++j) {
memcpy(dst + j * BPS, dst - BPS, 16);
}
}
static void HE16(uint8_t* dst) { // horizontal
static void HE16_C(uint8_t* dst) { // horizontal
int j;
for (j = 16; j > 0; --j) {
memset(dst, dst[-1], 16);
@ -203,7 +212,7 @@ static WEBP_INLINE void Put16(int v, uint8_t* dst) {
}
}
static void DC16(uint8_t* dst) { // DC
static void DC16_C(uint8_t* dst) { // DC
int DC = 16;
int j;
for (j = 0; j < 16; ++j) {
@ -212,7 +221,7 @@ static void DC16(uint8_t* dst) { // DC
Put16(DC >> 5, dst);
}
static void DC16NoTop(uint8_t* dst) { // DC with top samples not available
static void DC16NoTop_C(uint8_t* dst) { // DC with top samples not available
int DC = 8;
int j;
for (j = 0; j < 16; ++j) {
@ -221,7 +230,7 @@ static void DC16NoTop(uint8_t* dst) { // DC with top samples not available
Put16(DC >> 4, dst);
}
static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available
static void DC16NoLeft_C(uint8_t* dst) { // DC with left samples not available
int DC = 8;
int i;
for (i = 0; i < 16; ++i) {
@ -230,9 +239,10 @@ static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available
Put16(DC >> 4, dst);
}
static void DC16NoTopLeft(uint8_t* dst) { // DC with no top and left samples
static void DC16NoTopLeft_C(uint8_t* dst) { // DC with no top and left samples
Put16(0x80, dst);
}
#endif // !WEBP_NEON_OMIT_C_CODE
VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
@ -242,7 +252,8 @@ VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
#define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
#define AVG2(a, b) (((a) + (b) + 1) >> 1)
static void VE4(uint8_t* dst) { // vertical
#if !WEBP_NEON_OMIT_C_CODE
static void VE4_C(uint8_t* dst) { // vertical
const uint8_t* top = dst - BPS;
const uint8_t vals[4] = {
AVG3(top[-1], top[0], top[1]),
@ -255,8 +266,9 @@ static void VE4(uint8_t* dst) { // vertical
memcpy(dst + i * BPS, vals, sizeof(vals));
}
}
#endif // !WEBP_NEON_OMIT_C_CODE
static void HE4(uint8_t* dst) { // horizontal
static void HE4_C(uint8_t* dst) { // horizontal
const int A = dst[-1 - BPS];
const int B = dst[-1];
const int C = dst[-1 + BPS];
@ -268,7 +280,8 @@ static void HE4(uint8_t* dst) { // horizontal
WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(D, E, E));
}
static void DC4(uint8_t* dst) { // DC
#if !WEBP_NEON_OMIT_C_CODE
static void DC4_C(uint8_t* dst) { // DC
uint32_t dc = 4;
int i;
for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS];
@ -276,7 +289,7 @@ static void DC4(uint8_t* dst) { // DC
for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4);
}
static void RD4(uint8_t* dst) { // Down-right
static void RD4_C(uint8_t* dst) { // Down-right
const int I = dst[-1 + 0 * BPS];
const int J = dst[-1 + 1 * BPS];
const int K = dst[-1 + 2 * BPS];
@ -295,7 +308,7 @@ static void RD4(uint8_t* dst) { // Down-right
DST(3, 0) = AVG3(D, C, B);
}
static void LD4(uint8_t* dst) { // Down-Left
static void LD4_C(uint8_t* dst) { // Down-Left
const int A = dst[0 - BPS];
const int B = dst[1 - BPS];
const int C = dst[2 - BPS];
@ -312,8 +325,9 @@ static void LD4(uint8_t* dst) { // Down-Left
DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
DST(3, 3) = AVG3(G, H, H);
}
#endif // !WEBP_NEON_OMIT_C_CODE
static void VR4(uint8_t* dst) { // Vertical-Right
static void VR4_C(uint8_t* dst) { // Vertical-Right
const int I = dst[-1 + 0 * BPS];
const int J = dst[-1 + 1 * BPS];
const int K = dst[-1 + 2 * BPS];
@ -335,7 +349,7 @@ static void VR4(uint8_t* dst) { // Vertical-Right
DST(3, 1) = AVG3(B, C, D);
}
static void VL4(uint8_t* dst) { // Vertical-Left
static void VL4_C(uint8_t* dst) { // Vertical-Left
const int A = dst[0 - BPS];
const int B = dst[1 - BPS];
const int C = dst[2 - BPS];
@ -357,7 +371,7 @@ static void VL4(uint8_t* dst) { // Vertical-Left
DST(3, 3) = AVG3(F, G, H);
}
static void HU4(uint8_t* dst) { // Horizontal-Up
static void HU4_C(uint8_t* dst) { // Horizontal-Up
const int I = dst[-1 + 0 * BPS];
const int J = dst[-1 + 1 * BPS];
const int K = dst[-1 + 2 * BPS];
@ -372,7 +386,7 @@ static void HU4(uint8_t* dst) { // Horizontal-Up
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
}
static void HD4(uint8_t* dst) { // Horizontal-Down
static void HD4_C(uint8_t* dst) { // Horizontal-Down
const int I = dst[-1 + 0 * BPS];
const int J = dst[-1 + 1 * BPS];
const int K = dst[-1 + 2 * BPS];
@ -404,14 +418,15 @@ VP8PredFunc VP8PredLuma4[NUM_BMODES];
//------------------------------------------------------------------------------
// Chroma
static void VE8uv(uint8_t* dst) { // vertical
#if !WEBP_NEON_OMIT_C_CODE
static void VE8uv_C(uint8_t* dst) { // vertical
int j;
for (j = 0; j < 8; ++j) {
memcpy(dst + j * BPS, dst - BPS, 8);
}
}
static void HE8uv(uint8_t* dst) { // horizontal
static void HE8uv_C(uint8_t* dst) { // horizontal
int j;
for (j = 0; j < 8; ++j) {
memset(dst, dst[-1], 8);
@ -427,7 +442,7 @@ static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
}
}
static void DC8uv(uint8_t* dst) { // DC
static void DC8uv_C(uint8_t* dst) { // DC
int dc0 = 8;
int i;
for (i = 0; i < 8; ++i) {
@ -436,7 +451,7 @@ static void DC8uv(uint8_t* dst) { // DC
Put8x8uv(dc0 >> 4, dst);
}
static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples
static void DC8uvNoLeft_C(uint8_t* dst) { // DC with no left samples
int dc0 = 4;
int i;
for (i = 0; i < 8; ++i) {
@ -445,7 +460,7 @@ static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples
Put8x8uv(dc0 >> 3, dst);
}
static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
static void DC8uvNoTop_C(uint8_t* dst) { // DC with no top samples
int dc0 = 4;
int i;
for (i = 0; i < 8; ++i) {
@ -454,17 +469,19 @@ static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
Put8x8uv(dc0 >> 3, dst);
}
static void DC8uvNoTopLeft(uint8_t* dst) { // DC with nothing
static void DC8uvNoTopLeft_C(uint8_t* dst) { // DC with nothing
Put8x8uv(0x80, dst);
}
#endif // !WEBP_NEON_OMIT_C_CODE
VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];
//------------------------------------------------------------------------------
// Edge filtering functions
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
// 4 pixels in, 2 pixels out
static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
static WEBP_INLINE void DoFilter2_C(uint8_t* p, int step) {
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1]; // in [-893,892]
const int a1 = VP8ksclip2[(a + 4) >> 3]; // in [-16,15]
@ -474,7 +491,7 @@ static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
}
// 4 pixels in, 4 pixels out
static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
static WEBP_INLINE void DoFilter4_C(uint8_t* p, int step) {
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
const int a = 3 * (q0 - p0);
const int a1 = VP8ksclip2[(a + 4) >> 3];
@ -487,7 +504,7 @@ static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
}
// 6 pixels in, 6 pixels out
static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
static WEBP_INLINE void DoFilter6_C(uint8_t* p, int step) {
const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
const int q0 = p[0], q1 = p[step], q2 = p[2*step];
const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
@ -503,18 +520,22 @@ static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
p[ 2*step] = VP8kclip1[q2 - a3];
}
static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
static WEBP_INLINE int Hev(const uint8_t* p, int step, int thresh) {
const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh);
}
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
#if !WEBP_NEON_OMIT_C_CODE
static WEBP_INLINE int NeedsFilter_C(const uint8_t* p, int step, int t) {
const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
return ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) <= t);
}
#endif // !WEBP_NEON_OMIT_C_CODE
static WEBP_INLINE int needs_filter2(const uint8_t* p,
int step, int t, int it) {
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
static WEBP_INLINE int NeedsFilter2_C(const uint8_t* p,
int step, int t, int it) {
const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step];
const int p0 = p[-step], q0 = p[0];
const int q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
@ -523,140 +544,159 @@ static WEBP_INLINE int needs_filter2(const uint8_t* p,
VP8kabs0[p1 - p0] <= it && VP8kabs0[q3 - q2] <= it &&
VP8kabs0[q2 - q1] <= it && VP8kabs0[q1 - q0] <= it;
}
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
//------------------------------------------------------------------------------
// Simple In-loop filtering (Paragraph 15.2)
static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
#if !WEBP_NEON_OMIT_C_CODE
static void SimpleVFilter16_C(uint8_t* p, int stride, int thresh) {
int i;
const int thresh2 = 2 * thresh + 1;
for (i = 0; i < 16; ++i) {
if (needs_filter(p + i, stride, thresh2)) {
do_filter2(p + i, stride);
if (NeedsFilter_C(p + i, stride, thresh2)) {
DoFilter2_C(p + i, stride);
}
}
}
static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
static void SimpleHFilter16_C(uint8_t* p, int stride, int thresh) {
int i;
const int thresh2 = 2 * thresh + 1;
for (i = 0; i < 16; ++i) {
if (needs_filter(p + i * stride, 1, thresh2)) {
do_filter2(p + i * stride, 1);
if (NeedsFilter_C(p + i * stride, 1, thresh2)) {
DoFilter2_C(p + i * stride, 1);
}
}
}
static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
static void SimpleVFilter16i_C(uint8_t* p, int stride, int thresh) {
int k;
for (k = 3; k > 0; --k) {
p += 4 * stride;
SimpleVFilter16(p, stride, thresh);
SimpleVFilter16_C(p, stride, thresh);
}
}
static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
static void SimpleHFilter16i_C(uint8_t* p, int stride, int thresh) {
int k;
for (k = 3; k > 0; --k) {
p += 4;
SimpleHFilter16(p, stride, thresh);
SimpleHFilter16_C(p, stride, thresh);
}
}
#endif // !WEBP_NEON_OMIT_C_CODE
//------------------------------------------------------------------------------
// Complex In-loop filtering (Paragraph 15.3)
static WEBP_INLINE void FilterLoop26(uint8_t* p,
int hstride, int vstride, int size,
int thresh, int ithresh, int hev_thresh) {
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
static WEBP_INLINE void FilterLoop26_C(uint8_t* p,
int hstride, int vstride, int size,
int thresh, int ithresh,
int hev_thresh) {
const int thresh2 = 2 * thresh + 1;
while (size-- > 0) {
if (needs_filter2(p, hstride, thresh2, ithresh)) {
if (hev(p, hstride, hev_thresh)) {
do_filter2(p, hstride);
if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) {
if (Hev(p, hstride, hev_thresh)) {
DoFilter2_C(p, hstride);
} else {
do_filter6(p, hstride);
DoFilter6_C(p, hstride);
}
}
p += vstride;
}
}
static WEBP_INLINE void FilterLoop24(uint8_t* p,
int hstride, int vstride, int size,
int thresh, int ithresh, int hev_thresh) {
static WEBP_INLINE void FilterLoop24_C(uint8_t* p,
int hstride, int vstride, int size,
int thresh, int ithresh,
int hev_thresh) {
const int thresh2 = 2 * thresh + 1;
while (size-- > 0) {
if (needs_filter2(p, hstride, thresh2, ithresh)) {
if (hev(p, hstride, hev_thresh)) {
do_filter2(p, hstride);
if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) {
if (Hev(p, hstride, hev_thresh)) {
DoFilter2_C(p, hstride);
} else {
do_filter4(p, hstride);
DoFilter4_C(p, hstride);
}
}
p += vstride;
}
}
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
#if !WEBP_NEON_OMIT_C_CODE
// on macroblock edges
static void VFilter16(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
static void VFilter16_C(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
FilterLoop26_C(p, stride, 1, 16, thresh, ithresh, hev_thresh);
}
static void HFilter16(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
static void HFilter16_C(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
FilterLoop26_C(p, 1, stride, 16, thresh, ithresh, hev_thresh);
}
// on three inner edges
static void VFilter16i(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void VFilter16i_C(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
int k;
for (k = 3; k > 0; --k) {
p += 4 * stride;
FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
FilterLoop24_C(p, stride, 1, 16, thresh, ithresh, hev_thresh);
}
}
#endif // !WEBP_NEON_OMIT_C_CODE
static void HFilter16i(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
static void HFilter16i_C(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
int k;
for (k = 3; k > 0; --k) {
p += 4;
FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
FilterLoop24_C(p, 1, stride, 16, thresh, ithresh, hev_thresh);
}
}
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
#if !WEBP_NEON_OMIT_C_CODE
// 8-pixels wide variant, for chroma filtering
static void VFilter8(uint8_t* u, uint8_t* v, int stride,
int thresh, int ithresh, int hev_thresh) {
FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
static void VFilter8_C(uint8_t* u, uint8_t* v, int stride,
int thresh, int ithresh, int hev_thresh) {
FilterLoop26_C(u, stride, 1, 8, thresh, ithresh, hev_thresh);
FilterLoop26_C(v, stride, 1, 8, thresh, ithresh, hev_thresh);
}
#endif // !WEBP_NEON_OMIT_C_CODE
static void HFilter8(uint8_t* u, uint8_t* v, int stride,
int thresh, int ithresh, int hev_thresh) {
FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
static void HFilter8_C(uint8_t* u, uint8_t* v, int stride,
int thresh, int ithresh, int hev_thresh) {
FilterLoop26_C(u, 1, stride, 8, thresh, ithresh, hev_thresh);
FilterLoop26_C(v, 1, stride, 8, thresh, ithresh, hev_thresh);
}
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
int thresh, int ithresh, int hev_thresh) {
FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
#if !WEBP_NEON_OMIT_C_CODE
static void VFilter8i_C(uint8_t* u, uint8_t* v, int stride,
int thresh, int ithresh, int hev_thresh) {
FilterLoop24_C(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
FilterLoop24_C(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
}
#endif // !WEBP_NEON_OMIT_C_CODE
static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
int thresh, int ithresh, int hev_thresh) {
FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
static void HFilter8i_C(uint8_t* u, uint8_t* v, int stride,
int thresh, int ithresh, int hev_thresh) {
FilterLoop24_C(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
FilterLoop24_C(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
}
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
//------------------------------------------------------------------------------
static void DitherCombine8x8(const uint8_t* dither, uint8_t* dst,
int dst_stride) {
static void DitherCombine8x8_C(const uint8_t* dither, uint8_t* dst,
int dst_stride) {
int i, j;
for (j = 0; j < 8; ++j) {
for (i = 0; i < 8; ++i) {
@ -700,7 +740,6 @@ extern void VP8DspInitNEON(void);
extern void VP8DspInitMIPS32(void);
extern void VP8DspInitMIPSdspR2(void);
extern void VP8DspInitMSA(void);
extern void VP8DspInitWASM(void);
static volatile VP8CPUInfo dec_last_cpuinfo_used =
(VP8CPUInfo)&dec_last_cpuinfo_used;
@ -710,54 +749,66 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
VP8InitClipTables();
VP8TransformWHT = TransformWHT;
VP8Transform = TransformTwo;
VP8TransformUV = TransformUV;
VP8TransformDC = TransformDC;
VP8TransformDCUV = TransformDCUV;
VP8TransformAC3 = TransformAC3;
#if !WEBP_NEON_OMIT_C_CODE
VP8TransformWHT = TransformWHT_C;
VP8Transform = TransformTwo_C;
VP8TransformDC = TransformDC_C;
VP8TransformAC3 = TransformAC3_C;
#endif
VP8TransformUV = TransformUV_C;
VP8TransformDCUV = TransformDCUV_C;
VP8VFilter16 = VFilter16;
VP8HFilter16 = HFilter16;
VP8VFilter8 = VFilter8;
VP8HFilter8 = HFilter8;
VP8VFilter16i = VFilter16i;
VP8HFilter16i = HFilter16i;
VP8VFilter8i = VFilter8i;
VP8HFilter8i = HFilter8i;
VP8SimpleVFilter16 = SimpleVFilter16;
VP8SimpleHFilter16 = SimpleHFilter16;
VP8SimpleVFilter16i = SimpleVFilter16i;
VP8SimpleHFilter16i = SimpleHFilter16i;
#if !WEBP_NEON_OMIT_C_CODE
VP8VFilter16 = VFilter16_C;
VP8VFilter16i = VFilter16i_C;
VP8HFilter16 = HFilter16_C;
VP8VFilter8 = VFilter8_C;
VP8VFilter8i = VFilter8i_C;
VP8SimpleVFilter16 = SimpleVFilter16_C;
VP8SimpleHFilter16 = SimpleHFilter16_C;
VP8SimpleVFilter16i = SimpleVFilter16i_C;
VP8SimpleHFilter16i = SimpleHFilter16i_C;
#endif
VP8PredLuma4[0] = DC4;
VP8PredLuma4[1] = TM4;
VP8PredLuma4[2] = VE4;
VP8PredLuma4[3] = HE4;
VP8PredLuma4[4] = RD4;
VP8PredLuma4[5] = VR4;
VP8PredLuma4[6] = LD4;
VP8PredLuma4[7] = VL4;
VP8PredLuma4[8] = HD4;
VP8PredLuma4[9] = HU4;
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
VP8HFilter16i = HFilter16i_C;
VP8HFilter8 = HFilter8_C;
VP8HFilter8i = HFilter8i_C;
#endif
VP8PredLuma16[0] = DC16;
VP8PredLuma16[1] = TM16;
VP8PredLuma16[2] = VE16;
VP8PredLuma16[3] = HE16;
VP8PredLuma16[4] = DC16NoTop;
VP8PredLuma16[5] = DC16NoLeft;
VP8PredLuma16[6] = DC16NoTopLeft;
#if !WEBP_NEON_OMIT_C_CODE
VP8PredLuma4[0] = DC4_C;
VP8PredLuma4[1] = TM4_C;
VP8PredLuma4[2] = VE4_C;
VP8PredLuma4[4] = RD4_C;
VP8PredLuma4[6] = LD4_C;
#endif
VP8PredChroma8[0] = DC8uv;
VP8PredChroma8[1] = TM8uv;
VP8PredChroma8[2] = VE8uv;
VP8PredChroma8[3] = HE8uv;
VP8PredChroma8[4] = DC8uvNoTop;
VP8PredChroma8[5] = DC8uvNoLeft;
VP8PredChroma8[6] = DC8uvNoTopLeft;
VP8PredLuma4[3] = HE4_C;
VP8PredLuma4[5] = VR4_C;
VP8PredLuma4[7] = VL4_C;
VP8PredLuma4[8] = HD4_C;
VP8PredLuma4[9] = HU4_C;
VP8DitherCombine8x8 = DitherCombine8x8;
#if !WEBP_NEON_OMIT_C_CODE
VP8PredLuma16[0] = DC16_C;
VP8PredLuma16[1] = TM16_C;
VP8PredLuma16[2] = VE16_C;
VP8PredLuma16[3] = HE16_C;
VP8PredLuma16[4] = DC16NoTop_C;
VP8PredLuma16[5] = DC16NoLeft_C;
VP8PredLuma16[6] = DC16NoTopLeft_C;
VP8PredChroma8[0] = DC8uv_C;
VP8PredChroma8[1] = TM8uv_C;
VP8PredChroma8[2] = VE8uv_C;
VP8PredChroma8[3] = HE8uv_C;
VP8PredChroma8[4] = DC8uvNoTop_C;
VP8PredChroma8[5] = DC8uvNoLeft_C;
VP8PredChroma8[6] = DC8uvNoTopLeft_C;
#endif
VP8DitherCombine8x8 = DitherCombine8x8_C;
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
if (VP8GetCPUInfo != NULL) {
@ -771,11 +822,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
#endif
}
#endif
#if defined(WEBP_USE_NEON)
if (VP8GetCPUInfo(kNEON)) {
VP8DspInitNEON();
}
#endif
#if defined(WEBP_USE_MIPS32)
if (VP8GetCPUInfo(kMIPS32)) {
VP8DspInitMIPS32();
@ -790,12 +836,59 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
if (VP8GetCPUInfo(kMSA)) {
VP8DspInitMSA();
}
#endif
#if defined(WEBP_USE_WASM)
if (VP8GetCPUInfo(kWASM)) {
VP8DspInitWASM();
}
#endif
}
#if defined(WEBP_USE_NEON)
if (WEBP_NEON_OMIT_C_CODE ||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
VP8DspInitNEON();
}
#endif
assert(VP8TransformWHT != NULL);
assert(VP8Transform != NULL);
assert(VP8TransformDC != NULL);
assert(VP8TransformAC3 != NULL);
assert(VP8TransformUV != NULL);
assert(VP8TransformDCUV != NULL);
assert(VP8VFilter16 != NULL);
assert(VP8HFilter16 != NULL);
assert(VP8VFilter8 != NULL);
assert(VP8HFilter8 != NULL);
assert(VP8VFilter16i != NULL);
assert(VP8HFilter16i != NULL);
assert(VP8VFilter8i != NULL);
assert(VP8HFilter8i != NULL);
assert(VP8SimpleVFilter16 != NULL);
assert(VP8SimpleHFilter16 != NULL);
assert(VP8SimpleVFilter16i != NULL);
assert(VP8SimpleHFilter16i != NULL);
assert(VP8PredLuma4[0] != NULL);
assert(VP8PredLuma4[1] != NULL);
assert(VP8PredLuma4[2] != NULL);
assert(VP8PredLuma4[3] != NULL);
assert(VP8PredLuma4[4] != NULL);
assert(VP8PredLuma4[5] != NULL);
assert(VP8PredLuma4[6] != NULL);
assert(VP8PredLuma4[7] != NULL);
assert(VP8PredLuma4[8] != NULL);
assert(VP8PredLuma4[9] != NULL);
assert(VP8PredLuma16[0] != NULL);
assert(VP8PredLuma16[1] != NULL);
assert(VP8PredLuma16[2] != NULL);
assert(VP8PredLuma16[3] != NULL);
assert(VP8PredLuma16[4] != NULL);
assert(VP8PredLuma16[5] != NULL);
assert(VP8PredLuma16[6] != NULL);
assert(VP8PredChroma8[0] != NULL);
assert(VP8PredChroma8[1] != NULL);
assert(VP8PredChroma8[2] != NULL);
assert(VP8PredChroma8[3] != NULL);
assert(VP8PredChroma8[4] != NULL);
assert(VP8PredChroma8[5] != NULL);
assert(VP8PredChroma8[6] != NULL);
assert(VP8DitherCombine8x8 != NULL);
dec_last_cpuinfo_used = VP8GetCPUInfo;
}

View File

@ -11,11 +11,14 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#define USE_STATIC_TABLES // undefine to have run-time table initialization
// define to 0 to have run-time table initialization
#if !defined(USE_STATIC_TABLES)
#define USE_STATIC_TABLES 1 // ALTERNATE_CODE
#endif
#ifdef USE_STATIC_TABLES
#if (USE_STATIC_TABLES == 1)
static const uint8_t abs0[255 + 255 + 1] = {
0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4,
@ -337,7 +340,7 @@ static uint8_t clip1[255 + 511 + 1];
// and make sure it's set to true _last_ (so as to be thread-safe)
static volatile int tables_ok = 0;
#endif
#endif // USE_STATIC_TABLES
const int8_t* const VP8ksclip1 = (const int8_t*)&sclip1[1020];
const int8_t* const VP8ksclip2 = (const int8_t*)&sclip2[112];
@ -345,7 +348,7 @@ const uint8_t* const VP8kclip1 = &clip1[255];
const uint8_t* const VP8kabs0 = &abs0[255];
WEBP_TSAN_IGNORE_FUNCTION void VP8InitClipTables(void) {
#if !defined(USE_STATIC_TABLES)
#if (USE_STATIC_TABLES == 0)
int i;
if (!tables_ok) {
for (i = -255; i <= 255; ++i) {

View File

@ -12,11 +12,11 @@
// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
// Jovan Zelincevic (jovan.zelincevic@imgtec.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_MIPS32)
#include "./mips_macro.h"
#include "src/dsp/mips_macro.h"
static const int kC1 = 20091 + (1 << 16);
static const int kC2 = 35468;

View File

@ -12,11 +12,11 @@
// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
// Jovan Zelincevic (jovan.zelincevic@imgtec.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_MIPS_DSP_R2)
#include "./mips_macro.h"
#include "src/dsp/mips_macro.h"
static const int kC1 = 20091 + (1 << 16);
static const int kC2 = 35468;

View File

@ -12,11 +12,11 @@
// Author(s): Prashant Patil (prashant.patil@imgtec.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_MSA)
#include "./msa_macro.h"
#include "src/dsp/msa_macro.h"
//------------------------------------------------------------------------------
// Transforms

File diff suppressed because it is too large Load Diff

View File

@ -12,23 +12,25 @@
// Author: somnath@google.com (Somnath Banerjee)
// cduvivier@google.com (Christian Duvivier)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE2)
// The 3-coeff sparse transform in SSE2 is not really faster than the plain-C
// one it seems => disable it by default. Uncomment the following to enable:
// #define USE_TRANSFORM_AC3
#if !defined(USE_TRANSFORM_AC3)
#define USE_TRANSFORM_AC3 0 // ALTERNATE_CODE
#endif
#include <emmintrin.h>
#include "./common_sse2.h"
#include "../dec/vp8i_dec.h"
#include "../utils/utils.h"
#include "src/dsp/common_sse2.h"
#include "src/dec/vp8i_dec.h"
#include "src/utils/utils.h"
//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)
static void Transform(const int16_t* in, uint8_t* dst, int do_two) {
static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
// This implementation makes use of 16-bit fixed point versions of two
// multiply constants:
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
@ -193,7 +195,7 @@ static void Transform(const int16_t* in, uint8_t* dst, int do_two) {
}
}
#if defined(USE_TRANSFORM_AC3)
#if (USE_TRANSFORM_AC3 == 1)
#define MUL(a, b) (((a) * (b)) >> 16)
static void TransformAC3(const int16_t* in, uint8_t* dst) {
static const int kC1 = 20091 + (1 << 16);
@ -248,7 +250,7 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
_mm_subs_epu8((p), (q)))
// Shift each byte of "x" by 3 bits while preserving by the sign bit.
static WEBP_INLINE void SignedShift8b(__m128i* const x) {
static WEBP_INLINE void SignedShift8b_SSE2(__m128i* const x) {
const __m128i zero = _mm_setzero_si128();
const __m128i lo_0 = _mm_unpacklo_epi8(zero, *x);
const __m128i hi_0 = _mm_unpackhi_epi8(zero, *x);
@ -258,8 +260,8 @@ static WEBP_INLINE void SignedShift8b(__m128i* const x) {
}
#define FLIP_SIGN_BIT2(a, b) { \
a = _mm_xor_si128(a, sign_bit); \
b = _mm_xor_si128(b, sign_bit); \
(a) = _mm_xor_si128(a, sign_bit); \
(b) = _mm_xor_si128(b, sign_bit); \
}
#define FLIP_SIGN_BIT4(a, b, c, d) { \
@ -268,11 +270,11 @@ static WEBP_INLINE void SignedShift8b(__m128i* const x) {
}
// input/output is uint8_t
static WEBP_INLINE void GetNotHEV(const __m128i* const p1,
const __m128i* const p0,
const __m128i* const q0,
const __m128i* const q1,
int hev_thresh, __m128i* const not_hev) {
static WEBP_INLINE void GetNotHEV_SSE2(const __m128i* const p1,
const __m128i* const p0,
const __m128i* const q0,
const __m128i* const q1,
int hev_thresh, __m128i* const not_hev) {
const __m128i zero = _mm_setzero_si128();
const __m128i t_1 = MM_ABS(*p1, *p0);
const __m128i t_2 = MM_ABS(*q1, *q0);
@ -285,11 +287,11 @@ static WEBP_INLINE void GetNotHEV(const __m128i* const p1,
}
// input pixels are int8_t
static WEBP_INLINE void GetBaseDelta(const __m128i* const p1,
const __m128i* const p0,
const __m128i* const q0,
const __m128i* const q1,
__m128i* const delta) {
static WEBP_INLINE void GetBaseDelta_SSE2(const __m128i* const p1,
const __m128i* const p0,
const __m128i* const q0,
const __m128i* const q1,
__m128i* const delta) {
// beware of addition order, for saturation!
const __m128i p1_q1 = _mm_subs_epi8(*p1, *q1); // p1 - q1
const __m128i q0_p0 = _mm_subs_epi8(*q0, *p0); // q0 - p0
@ -300,15 +302,16 @@ static WEBP_INLINE void GetBaseDelta(const __m128i* const p1,
}
// input and output are int8_t
static WEBP_INLINE void DoSimpleFilter(__m128i* const p0, __m128i* const q0,
const __m128i* const fl) {
static WEBP_INLINE void DoSimpleFilter_SSE2(__m128i* const p0,
__m128i* const q0,
const __m128i* const fl) {
const __m128i k3 = _mm_set1_epi8(3);
const __m128i k4 = _mm_set1_epi8(4);
__m128i v3 = _mm_adds_epi8(*fl, k3);
__m128i v4 = _mm_adds_epi8(*fl, k4);
SignedShift8b(&v4); // v4 >> 3
SignedShift8b(&v3); // v3 >> 3
SignedShift8b_SSE2(&v4); // v4 >> 3
SignedShift8b_SSE2(&v3); // v3 >> 3
*q0 = _mm_subs_epi8(*q0, v4); // q0 -= v4
*p0 = _mm_adds_epi8(*p0, v3); // p0 += v3
}
@ -317,9 +320,9 @@ static WEBP_INLINE void DoSimpleFilter(__m128i* const p0, __m128i* const q0,
// Update operations:
// q = q - delta and p = p + delta; where delta = [(a_hi >> 7), (a_lo >> 7)]
// Pixels 'pi' and 'qi' are int8_t on input, uint8_t on output (sign flip).
static WEBP_INLINE void Update2Pixels(__m128i* const pi, __m128i* const qi,
const __m128i* const a0_lo,
const __m128i* const a0_hi) {
static WEBP_INLINE void Update2Pixels_SSE2(__m128i* const pi, __m128i* const qi,
const __m128i* const a0_lo,
const __m128i* const a0_hi) {
const __m128i a1_lo = _mm_srai_epi16(*a0_lo, 7);
const __m128i a1_hi = _mm_srai_epi16(*a0_hi, 7);
const __m128i delta = _mm_packs_epi16(a1_lo, a1_hi);
@ -330,11 +333,11 @@ static WEBP_INLINE void Update2Pixels(__m128i* const pi, __m128i* const qi,
}
// input pixels are uint8_t
static WEBP_INLINE void NeedsFilter(const __m128i* const p1,
const __m128i* const p0,
const __m128i* const q0,
const __m128i* const q1,
int thresh, __m128i* const mask) {
static WEBP_INLINE void NeedsFilter_SSE2(const __m128i* const p1,
const __m128i* const p0,
const __m128i* const q0,
const __m128i* const q1,
int thresh, __m128i* const mask) {
const __m128i m_thresh = _mm_set1_epi8(thresh);
const __m128i t1 = MM_ABS(*p1, *q1); // abs(p1 - q1)
const __m128i kFE = _mm_set1_epi8(0xFE);
@ -353,28 +356,29 @@ static WEBP_INLINE void NeedsFilter(const __m128i* const p1,
// Edge filtering functions
// Applies filter on 2 pixels (p0 and q0)
static WEBP_INLINE void DoFilter2(__m128i* const p1, __m128i* const p0,
__m128i* const q0, __m128i* const q1,
int thresh) {
static WEBP_INLINE void DoFilter2_SSE2(__m128i* const p1, __m128i* const p0,
__m128i* const q0, __m128i* const q1,
int thresh) {
__m128i a, mask;
const __m128i sign_bit = _mm_set1_epi8(0x80);
// convert p1/q1 to int8_t (for GetBaseDelta)
// convert p1/q1 to int8_t (for GetBaseDelta_SSE2)
const __m128i p1s = _mm_xor_si128(*p1, sign_bit);
const __m128i q1s = _mm_xor_si128(*q1, sign_bit);
NeedsFilter(p1, p0, q0, q1, thresh, &mask);
NeedsFilter_SSE2(p1, p0, q0, q1, thresh, &mask);
FLIP_SIGN_BIT2(*p0, *q0);
GetBaseDelta(&p1s, p0, q0, &q1s, &a);
GetBaseDelta_SSE2(&p1s, p0, q0, &q1s, &a);
a = _mm_and_si128(a, mask); // mask filter values we don't care about
DoSimpleFilter(p0, q0, &a);
DoSimpleFilter_SSE2(p0, q0, &a);
FLIP_SIGN_BIT2(*p0, *q0);
}
// Applies filter on 4 pixels (p1, p0, q0 and q1)
static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
__m128i* const q0, __m128i* const q1,
const __m128i* const mask, int hev_thresh) {
static WEBP_INLINE void DoFilter4_SSE2(__m128i* const p1, __m128i* const p0,
__m128i* const q0, __m128i* const q1,
const __m128i* const mask,
int hev_thresh) {
const __m128i zero = _mm_setzero_si128();
const __m128i sign_bit = _mm_set1_epi8(0x80);
const __m128i k64 = _mm_set1_epi8(64);
@ -384,7 +388,7 @@ static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
__m128i t1, t2, t3;
// compute hev mask
GetNotHEV(p1, p0, q0, q1, hev_thresh, &not_hev);
GetNotHEV_SSE2(p1, p0, q0, q1, hev_thresh, &not_hev);
// convert to signed values
FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
@ -399,8 +403,8 @@ static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
t2 = _mm_adds_epi8(t1, k3); // 3 * (q0 - p0) + hev(p1 - q1) + 3
t3 = _mm_adds_epi8(t1, k4); // 3 * (q0 - p0) + hev(p1 - q1) + 4
SignedShift8b(&t2); // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
SignedShift8b(&t3); // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
SignedShift8b_SSE2(&t2); // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
SignedShift8b_SSE2(&t3); // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
*p0 = _mm_adds_epi8(*p0, t2); // p0 += t2
*q0 = _mm_subs_epi8(*q0, t3); // q0 -= t3
FLIP_SIGN_BIT2(*p0, *q0);
@ -417,25 +421,26 @@ static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
}
// Applies filter on 6 pixels (p2, p1, p0, q0, q1 and q2)
static WEBP_INLINE void DoFilter6(__m128i* const p2, __m128i* const p1,
__m128i* const p0, __m128i* const q0,
__m128i* const q1, __m128i* const q2,
const __m128i* const mask, int hev_thresh) {
static WEBP_INLINE void DoFilter6_SSE2(__m128i* const p2, __m128i* const p1,
__m128i* const p0, __m128i* const q0,
__m128i* const q1, __m128i* const q2,
const __m128i* const mask,
int hev_thresh) {
const __m128i zero = _mm_setzero_si128();
const __m128i sign_bit = _mm_set1_epi8(0x80);
__m128i a, not_hev;
// compute hev mask
GetNotHEV(p1, p0, q0, q1, hev_thresh, &not_hev);
GetNotHEV_SSE2(p1, p0, q0, q1, hev_thresh, &not_hev);
FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
FLIP_SIGN_BIT2(*p2, *q2);
GetBaseDelta(p1, p0, q0, q1, &a);
GetBaseDelta_SSE2(p1, p0, q0, q1, &a);
{ // do simple filter on pixels with hev
const __m128i m = _mm_andnot_si128(not_hev, *mask);
const __m128i f = _mm_and_si128(a, m);
DoSimpleFilter(p0, q0, &f);
DoSimpleFilter_SSE2(p0, q0, &f);
}
{ // do strong filter on pixels with not hev
@ -460,15 +465,15 @@ static WEBP_INLINE void DoFilter6(__m128i* const p2, __m128i* const p1,
const __m128i a0_lo = _mm_add_epi16(a1_lo, f9_lo); // Filter * 27 + 63
const __m128i a0_hi = _mm_add_epi16(a1_hi, f9_hi); // Filter * 27 + 63
Update2Pixels(p2, q2, &a2_lo, &a2_hi);
Update2Pixels(p1, q1, &a1_lo, &a1_hi);
Update2Pixels(p0, q0, &a0_lo, &a0_hi);
Update2Pixels_SSE2(p2, q2, &a2_lo, &a2_hi);
Update2Pixels_SSE2(p1, q1, &a1_lo, &a1_hi);
Update2Pixels_SSE2(p0, q0, &a0_lo, &a0_hi);
}
}
// reads 8 rows across a vertical edge.
static WEBP_INLINE void Load8x4(const uint8_t* const b, int stride,
__m128i* const p, __m128i* const q) {
static WEBP_INLINE void Load8x4_SSE2(const uint8_t* const b, int stride,
__m128i* const p, __m128i* const q) {
// A0 = 63 62 61 60 23 22 21 20 43 42 41 40 03 02 01 00
// A1 = 73 72 71 70 33 32 31 30 53 52 51 50 13 12 11 10
const __m128i A0 = _mm_set_epi32(
@ -494,11 +499,11 @@ static WEBP_INLINE void Load8x4(const uint8_t* const b, int stride,
*q = _mm_unpackhi_epi32(C0, C1);
}
static WEBP_INLINE void Load16x4(const uint8_t* const r0,
const uint8_t* const r8,
int stride,
__m128i* const p1, __m128i* const p0,
__m128i* const q0, __m128i* const q1) {
static WEBP_INLINE void Load16x4_SSE2(const uint8_t* const r0,
const uint8_t* const r8,
int stride,
__m128i* const p1, __m128i* const p0,
__m128i* const q0, __m128i* const q1) {
// Assume the pixels around the edge (|) are numbered as follows
// 00 01 | 02 03
// 10 11 | 12 13
@ -514,8 +519,8 @@ static WEBP_INLINE void Load16x4(const uint8_t* const r0,
// q0 = 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
// p0 = f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
// q1 = f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
Load8x4(r0, stride, p1, q0);
Load8x4(r8, stride, p0, q1);
Load8x4_SSE2(r0, stride, p1, q0);
Load8x4_SSE2(r8, stride, p0, q1);
{
// p1 = f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
@ -531,7 +536,8 @@ static WEBP_INLINE void Load16x4(const uint8_t* const r0,
}
}
static WEBP_INLINE void Store4x4(__m128i* const x, uint8_t* dst, int stride) {
static WEBP_INLINE void Store4x4_SSE2(__m128i* const x,
uint8_t* dst, int stride) {
int i;
for (i = 0; i < 4; ++i, dst += stride) {
WebPUint32ToMem(dst, _mm_cvtsi128_si32(*x));
@ -540,12 +546,12 @@ static WEBP_INLINE void Store4x4(__m128i* const x, uint8_t* dst, int stride) {
}
// Transpose back and store
static WEBP_INLINE void Store16x4(const __m128i* const p1,
const __m128i* const p0,
const __m128i* const q0,
const __m128i* const q1,
uint8_t* r0, uint8_t* r8,
int stride) {
static WEBP_INLINE void Store16x4_SSE2(const __m128i* const p1,
const __m128i* const p0,
const __m128i* const q0,
const __m128i* const q1,
uint8_t* r0, uint8_t* r8,
int stride) {
__m128i t1, p1_s, p0_s, q0_s, q1_s;
// p0 = 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
@ -572,55 +578,55 @@ static WEBP_INLINE void Store16x4(const __m128i* const p1,
p1_s = _mm_unpacklo_epi16(t1, q1_s);
q1_s = _mm_unpackhi_epi16(t1, q1_s);
Store4x4(&p0_s, r0, stride);
Store4x4_SSE2(&p0_s, r0, stride);
r0 += 4 * stride;
Store4x4(&q0_s, r0, stride);
Store4x4_SSE2(&q0_s, r0, stride);
Store4x4(&p1_s, r8, stride);
Store4x4_SSE2(&p1_s, r8, stride);
r8 += 4 * stride;
Store4x4(&q1_s, r8, stride);
Store4x4_SSE2(&q1_s, r8, stride);
}
//------------------------------------------------------------------------------
// Simple In-loop filtering (Paragraph 15.2)
static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
static void SimpleVFilter16_SSE2(uint8_t* p, int stride, int thresh) {
// Load
__m128i p1 = _mm_loadu_si128((__m128i*)&p[-2 * stride]);
__m128i p0 = _mm_loadu_si128((__m128i*)&p[-stride]);
__m128i q0 = _mm_loadu_si128((__m128i*)&p[0]);
__m128i q1 = _mm_loadu_si128((__m128i*)&p[stride]);
DoFilter2(&p1, &p0, &q0, &q1, thresh);
DoFilter2_SSE2(&p1, &p0, &q0, &q1, thresh);
// Store
_mm_storeu_si128((__m128i*)&p[-stride], p0);
_mm_storeu_si128((__m128i*)&p[0], q0);
}
static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
static void SimpleHFilter16_SSE2(uint8_t* p, int stride, int thresh) {
__m128i p1, p0, q0, q1;
p -= 2; // beginning of p1
Load16x4(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
DoFilter2(&p1, &p0, &q0, &q1, thresh);
Store16x4(&p1, &p0, &q0, &q1, p, p + 8 * stride, stride);
Load16x4_SSE2(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
DoFilter2_SSE2(&p1, &p0, &q0, &q1, thresh);
Store16x4_SSE2(&p1, &p0, &q0, &q1, p, p + 8 * stride, stride);
}
static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
static void SimpleVFilter16i_SSE2(uint8_t* p, int stride, int thresh) {
int k;
for (k = 3; k > 0; --k) {
p += 4 * stride;
SimpleVFilter16(p, stride, thresh);
SimpleVFilter16_SSE2(p, stride, thresh);
}
}
static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
static void SimpleHFilter16i_SSE2(uint8_t* p, int stride, int thresh) {
int k;
for (k = 3; k > 0; --k) {
p += 4;
SimpleHFilter16(p, stride, thresh);
SimpleHFilter16_SSE2(p, stride, thresh);
}
}
@ -628,60 +634,60 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
// Complex In-loop filtering (Paragraph 15.3)
#define MAX_DIFF1(p3, p2, p1, p0, m) do { \
m = MM_ABS(p1, p0); \
m = _mm_max_epu8(m, MM_ABS(p3, p2)); \
m = _mm_max_epu8(m, MM_ABS(p2, p1)); \
(m) = MM_ABS(p1, p0); \
(m) = _mm_max_epu8(m, MM_ABS(p3, p2)); \
(m) = _mm_max_epu8(m, MM_ABS(p2, p1)); \
} while (0)
#define MAX_DIFF2(p3, p2, p1, p0, m) do { \
m = _mm_max_epu8(m, MM_ABS(p1, p0)); \
m = _mm_max_epu8(m, MM_ABS(p3, p2)); \
m = _mm_max_epu8(m, MM_ABS(p2, p1)); \
(m) = _mm_max_epu8(m, MM_ABS(p1, p0)); \
(m) = _mm_max_epu8(m, MM_ABS(p3, p2)); \
(m) = _mm_max_epu8(m, MM_ABS(p2, p1)); \
} while (0)
#define LOAD_H_EDGES4(p, stride, e1, e2, e3, e4) { \
e1 = _mm_loadu_si128((__m128i*)&(p)[0 * stride]); \
e2 = _mm_loadu_si128((__m128i*)&(p)[1 * stride]); \
e3 = _mm_loadu_si128((__m128i*)&(p)[2 * stride]); \
e4 = _mm_loadu_si128((__m128i*)&(p)[3 * stride]); \
(e1) = _mm_loadu_si128((__m128i*)&(p)[0 * (stride)]); \
(e2) = _mm_loadu_si128((__m128i*)&(p)[1 * (stride)]); \
(e3) = _mm_loadu_si128((__m128i*)&(p)[2 * (stride)]); \
(e4) = _mm_loadu_si128((__m128i*)&(p)[3 * (stride)]); \
}
#define LOADUV_H_EDGE(p, u, v, stride) do { \
const __m128i U = _mm_loadl_epi64((__m128i*)&(u)[(stride)]); \
const __m128i V = _mm_loadl_epi64((__m128i*)&(v)[(stride)]); \
p = _mm_unpacklo_epi64(U, V); \
(p) = _mm_unpacklo_epi64(U, V); \
} while (0)
#define LOADUV_H_EDGES4(u, v, stride, e1, e2, e3, e4) { \
LOADUV_H_EDGE(e1, u, v, 0 * stride); \
LOADUV_H_EDGE(e2, u, v, 1 * stride); \
LOADUV_H_EDGE(e3, u, v, 2 * stride); \
LOADUV_H_EDGE(e4, u, v, 3 * stride); \
LOADUV_H_EDGE(e1, u, v, 0 * (stride)); \
LOADUV_H_EDGE(e2, u, v, 1 * (stride)); \
LOADUV_H_EDGE(e3, u, v, 2 * (stride)); \
LOADUV_H_EDGE(e4, u, v, 3 * (stride)); \
}
#define STOREUV(p, u, v, stride) { \
_mm_storel_epi64((__m128i*)&u[(stride)], p); \
p = _mm_srli_si128(p, 8); \
_mm_storel_epi64((__m128i*)&v[(stride)], p); \
_mm_storel_epi64((__m128i*)&(u)[(stride)], p); \
(p) = _mm_srli_si128(p, 8); \
_mm_storel_epi64((__m128i*)&(v)[(stride)], p); \
}
static WEBP_INLINE void ComplexMask(const __m128i* const p1,
const __m128i* const p0,
const __m128i* const q0,
const __m128i* const q1,
int thresh, int ithresh,
__m128i* const mask) {
static WEBP_INLINE void ComplexMask_SSE2(const __m128i* const p1,
const __m128i* const p0,
const __m128i* const q0,
const __m128i* const q1,
int thresh, int ithresh,
__m128i* const mask) {
const __m128i it = _mm_set1_epi8(ithresh);
const __m128i diff = _mm_subs_epu8(*mask, it);
const __m128i thresh_mask = _mm_cmpeq_epi8(diff, _mm_setzero_si128());
__m128i filter_mask;
NeedsFilter(p1, p0, q0, q1, thresh, &filter_mask);
NeedsFilter_SSE2(p1, p0, q0, q1, thresh, &filter_mask);
*mask = _mm_and_si128(thresh_mask, filter_mask);
}
// on macroblock edges
static void VFilter16(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void VFilter16_SSE2(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
__m128i t1;
__m128i mask;
__m128i p2, p1, p0, q0, q1, q2;
@ -694,8 +700,8 @@ static void VFilter16(uint8_t* p, int stride,
LOAD_H_EDGES4(p, stride, q0, q1, q2, t1);
MAX_DIFF2(t1, q2, q1, q0, mask);
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
// Store
_mm_storeu_si128((__m128i*)&p[-3 * stride], p2);
@ -706,28 +712,28 @@ static void VFilter16(uint8_t* p, int stride,
_mm_storeu_si128((__m128i*)&p[+2 * stride], q2);
}
static void HFilter16(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void HFilter16_SSE2(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
__m128i mask;
__m128i p3, p2, p1, p0, q0, q1, q2, q3;
uint8_t* const b = p - 4;
Load16x4(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0); // p3, p2, p1, p0
Load16x4_SSE2(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0);
MAX_DIFF1(p3, p2, p1, p0, mask);
Load16x4(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3); // q0, q1, q2, q3
Load16x4_SSE2(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3);
MAX_DIFF2(q3, q2, q1, q0, mask);
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
Store16x4(&p3, &p2, &p1, &p0, b, b + 8 * stride, stride);
Store16x4(&q0, &q1, &q2, &q3, p, p + 8 * stride, stride);
Store16x4_SSE2(&p3, &p2, &p1, &p0, b, b + 8 * stride, stride);
Store16x4_SSE2(&q0, &q1, &q2, &q3, p, p + 8 * stride, stride);
}
// on three inner edges
static void VFilter16i(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void VFilter16i_SSE2(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
int k;
__m128i p3, p2, p1, p0; // loop invariants
@ -744,8 +750,8 @@ static void VFilter16i(uint8_t* p, int stride,
// p3 and p2 are not just temporary variables here: they will be
// re-used for next span. And q2/q3 will become p1/p0 accordingly.
ComplexMask(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
DoFilter4(&p1, &p0, &p3, &p2, &mask, hev_thresh);
ComplexMask_SSE2(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
DoFilter4_SSE2(&p1, &p0, &p3, &p2, &mask, hev_thresh);
// Store
_mm_storeu_si128((__m128i*)&b[0 * stride], p1);
@ -759,12 +765,12 @@ static void VFilter16i(uint8_t* p, int stride,
}
}
static void HFilter16i(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
static void HFilter16i_SSE2(uint8_t* p, int stride,
int thresh, int ithresh, int hev_thresh) {
int k;
__m128i p3, p2, p1, p0; // loop invariants
Load16x4(p, p + 8 * stride, stride, &p3, &p2, &p1, &p0); // prologue
Load16x4_SSE2(p, p + 8 * stride, stride, &p3, &p2, &p1, &p0); // prologue
for (k = 3; k > 0; --k) {
__m128i mask, tmp1, tmp2;
@ -773,13 +779,13 @@ static void HFilter16i(uint8_t* p, int stride,
p += 4; // beginning of q0 (and next span)
MAX_DIFF1(p3, p2, p1, p0, mask); // compute partial mask
Load16x4(p, p + 8 * stride, stride, &p3, &p2, &tmp1, &tmp2);
Load16x4_SSE2(p, p + 8 * stride, stride, &p3, &p2, &tmp1, &tmp2);
MAX_DIFF2(p3, p2, tmp1, tmp2, mask);
ComplexMask(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
DoFilter4(&p1, &p0, &p3, &p2, &mask, hev_thresh);
ComplexMask_SSE2(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
DoFilter4_SSE2(&p1, &p0, &p3, &p2, &mask, hev_thresh);
Store16x4(&p1, &p0, &p3, &p2, b, b + 8 * stride, stride);
Store16x4_SSE2(&p1, &p0, &p3, &p2, b, b + 8 * stride, stride);
// rotate samples
p1 = tmp1;
@ -788,8 +794,8 @@ static void HFilter16i(uint8_t* p, int stride,
}
// 8-pixels wide variant, for chroma filtering
static void VFilter8(uint8_t* u, uint8_t* v, int stride,
int thresh, int ithresh, int hev_thresh) {
static void VFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
int thresh, int ithresh, int hev_thresh) {
__m128i mask;
__m128i t1, p2, p1, p0, q0, q1, q2;
@ -801,8 +807,8 @@ static void VFilter8(uint8_t* u, uint8_t* v, int stride,
LOADUV_H_EDGES4(u, v, stride, q0, q1, q2, t1);
MAX_DIFF2(t1, q2, q1, q0, mask);
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
// Store
STOREUV(p2, u, v, -3 * stride);
@ -813,28 +819,28 @@ static void VFilter8(uint8_t* u, uint8_t* v, int stride,
STOREUV(q2, u, v, 2 * stride);
}
static void HFilter8(uint8_t* u, uint8_t* v, int stride,
int thresh, int ithresh, int hev_thresh) {
static void HFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
int thresh, int ithresh, int hev_thresh) {
__m128i mask;
__m128i p3, p2, p1, p0, q0, q1, q2, q3;
uint8_t* const tu = u - 4;
uint8_t* const tv = v - 4;
Load16x4(tu, tv, stride, &p3, &p2, &p1, &p0); // p3, p2, p1, p0
Load16x4_SSE2(tu, tv, stride, &p3, &p2, &p1, &p0);
MAX_DIFF1(p3, p2, p1, p0, mask);
Load16x4(u, v, stride, &q0, &q1, &q2, &q3); // q0, q1, q2, q3
Load16x4_SSE2(u, v, stride, &q0, &q1, &q2, &q3);
MAX_DIFF2(q3, q2, q1, q0, mask);
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
DoFilter6_SSE2(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
Store16x4(&p3, &p2, &p1, &p0, tu, tv, stride);
Store16x4(&q0, &q1, &q2, &q3, u, v, stride);
Store16x4_SSE2(&p3, &p2, &p1, &p0, tu, tv, stride);
Store16x4_SSE2(&q0, &q1, &q2, &q3, u, v, stride);
}
static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
int thresh, int ithresh, int hev_thresh) {
static void VFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
int thresh, int ithresh, int hev_thresh) {
__m128i mask;
__m128i t1, t2, p1, p0, q0, q1;
@ -849,8 +855,8 @@ static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
LOADUV_H_EDGES4(u, v, stride, q0, q1, t1, t2);
MAX_DIFF2(t2, t1, q1, q0, mask);
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
DoFilter4_SSE2(&p1, &p0, &q0, &q1, &mask, hev_thresh);
// Store
STOREUV(p1, u, v, -2 * stride);
@ -859,24 +865,24 @@ static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
STOREUV(q1, u, v, 1 * stride);
}
static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
int thresh, int ithresh, int hev_thresh) {
static void HFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
int thresh, int ithresh, int hev_thresh) {
__m128i mask;
__m128i t1, t2, p1, p0, q0, q1;
Load16x4(u, v, stride, &t2, &t1, &p1, &p0); // p3, p2, p1, p0
Load16x4_SSE2(u, v, stride, &t2, &t1, &p1, &p0); // p3, p2, p1, p0
MAX_DIFF1(t2, t1, p1, p0, mask);
u += 4; // beginning of q0
v += 4;
Load16x4(u, v, stride, &q0, &q1, &t1, &t2); // q0, q1, q2, q3
Load16x4_SSE2(u, v, stride, &q0, &q1, &t1, &t2); // q0, q1, q2, q3
MAX_DIFF2(t2, t1, q1, q0, mask);
ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
ComplexMask_SSE2(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
DoFilter4_SSE2(&p1, &p0, &q0, &q1, &mask, hev_thresh);
u -= 2; // beginning of p1
v -= 2;
Store16x4(&p1, &p0, &q0, &q1, u, v, stride);
Store16x4_SSE2(&p1, &p0, &q0, &q1, u, v, stride);
}
//------------------------------------------------------------------------------
@ -893,7 +899,7 @@ static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
// where: AC = (a + b + 1) >> 1, BC = (b + c + 1) >> 1
// and ab = a ^ b, bc = b ^ c, lsb = (AC^BC)&1
static void VE4(uint8_t* dst) { // vertical
static void VE4_SSE2(uint8_t* dst) { // vertical
const __m128i one = _mm_set1_epi8(1);
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
@ -909,7 +915,7 @@ static void VE4(uint8_t* dst) { // vertical
}
}
static void LD4(uint8_t* dst) { // Down-Left
static void LD4_SSE2(uint8_t* dst) { // Down-Left
const __m128i one = _mm_set1_epi8(1);
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS));
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
@ -925,7 +931,7 @@ static void LD4(uint8_t* dst) { // Down-Left
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
}
static void VR4(uint8_t* dst) { // Vertical-Right
static void VR4_SSE2(uint8_t* dst) { // Vertical-Right
const __m128i one = _mm_set1_epi8(1);
const int I = dst[-1 + 0 * BPS];
const int J = dst[-1 + 1 * BPS];
@ -950,7 +956,7 @@ static void VR4(uint8_t* dst) { // Vertical-Right
DST(0, 3) = AVG3(K, J, I);
}
static void VL4(uint8_t* dst) { // Vertical-Left
static void VL4_SSE2(uint8_t* dst) { // Vertical-Left
const __m128i one = _mm_set1_epi8(1);
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS));
const __m128i BCDEFGH_ = _mm_srli_si128(ABCDEFGH, 1);
@ -975,7 +981,7 @@ static void VL4(uint8_t* dst) { // Vertical-Left
DST(3, 3) = (extra_out >> 8) & 0xff;
}
static void RD4(uint8_t* dst) { // Down-right
static void RD4_SSE2(uint8_t* dst) { // Down-right
const __m128i one = _mm_set1_epi8(1);
const __m128i XABCD = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
const __m128i ____XABCD = _mm_slli_si128(XABCD, 4);
@ -1004,7 +1010,7 @@ static void RD4(uint8_t* dst) { // Down-right
//------------------------------------------------------------------------------
// Luma 16x16
static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, int size) {
const uint8_t* top = dst - BPS;
const __m128i zero = _mm_setzero_si128();
int y;
@ -1041,11 +1047,11 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
}
}
static void TM4(uint8_t* dst) { TrueMotion(dst, 4); }
static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); }
static void TM16(uint8_t* dst) { TrueMotion(dst, 16); }
static void TM4_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 4); }
static void TM8uv_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 8); }
static void TM16_SSE2(uint8_t* dst) { TrueMotion_SSE2(dst, 16); }
static void VE16(uint8_t* dst) {
static void VE16_SSE2(uint8_t* dst) {
const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
int j;
for (j = 0; j < 16; ++j) {
@ -1053,7 +1059,7 @@ static void VE16(uint8_t* dst) {
}
}
static void HE16(uint8_t* dst) { // horizontal
static void HE16_SSE2(uint8_t* dst) { // horizontal
int j;
for (j = 16; j > 0; --j) {
const __m128i values = _mm_set1_epi8(dst[-1]);
@ -1062,7 +1068,7 @@ static void HE16(uint8_t* dst) { // horizontal
}
}
static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) {
int j;
const __m128i values = _mm_set1_epi8(v);
for (j = 0; j < 16; ++j) {
@ -1070,7 +1076,7 @@ static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
}
}
static void DC16(uint8_t* dst) { // DC
static void DC16_SSE2(uint8_t* dst) { // DC
const __m128i zero = _mm_setzero_si128();
const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
const __m128i sad8x2 = _mm_sad_epu8(top, zero);
@ -1083,37 +1089,37 @@ static void DC16(uint8_t* dst) { // DC
}
{
const int DC = _mm_cvtsi128_si32(sum) + left + 16;
Put16(DC >> 5, dst);
Put16_SSE2(DC >> 5, dst);
}
}
static void DC16NoTop(uint8_t* dst) { // DC with top samples not available
static void DC16NoTop_SSE2(uint8_t* dst) { // DC with top samples unavailable
int DC = 8;
int j;
for (j = 0; j < 16; ++j) {
DC += dst[-1 + j * BPS];
}
Put16(DC >> 4, dst);
Put16_SSE2(DC >> 4, dst);
}
static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available
static void DC16NoLeft_SSE2(uint8_t* dst) { // DC with left samples unavailable
const __m128i zero = _mm_setzero_si128();
const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
const __m128i sad8x2 = _mm_sad_epu8(top, zero);
// sum the two sads: sad8x2[0:1] + sad8x2[8:9]
const __m128i sum = _mm_add_epi16(sad8x2, _mm_shuffle_epi32(sad8x2, 2));
const int DC = _mm_cvtsi128_si32(sum) + 8;
Put16(DC >> 4, dst);
Put16_SSE2(DC >> 4, dst);
}
static void DC16NoTopLeft(uint8_t* dst) { // DC with no top and left samples
Put16(0x80, dst);
static void DC16NoTopLeft_SSE2(uint8_t* dst) { // DC with no top & left samples
Put16_SSE2(0x80, dst);
}
//------------------------------------------------------------------------------
// Chroma
static void VE8uv(uint8_t* dst) { // vertical
static void VE8uv_SSE2(uint8_t* dst) { // vertical
int j;
const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
for (j = 0; j < 8; ++j) {
@ -1121,17 +1127,8 @@ static void VE8uv(uint8_t* dst) { // vertical
}
}
static void HE8uv(uint8_t* dst) { // horizontal
int j;
for (j = 0; j < 8; ++j) {
const __m128i values = _mm_set1_epi8(dst[-1]);
_mm_storel_epi64((__m128i*)dst, values);
dst += BPS;
}
}
// helper for chroma-DC predictions
static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
int j;
const __m128i values = _mm_set1_epi8(v);
for (j = 0; j < 8; ++j) {
@ -1139,7 +1136,7 @@ static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
}
}
static void DC8uv(uint8_t* dst) { // DC
static void DC8uv_SSE2(uint8_t* dst) { // DC
const __m128i zero = _mm_setzero_si128();
const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
const __m128i sum = _mm_sad_epu8(top, zero);
@ -1150,29 +1147,29 @@ static void DC8uv(uint8_t* dst) { // DC
}
{
const int DC = _mm_cvtsi128_si32(sum) + left + 8;
Put8x8uv(DC >> 4, dst);
Put8x8uv_SSE2(DC >> 4, dst);
}
}
static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples
static void DC8uvNoLeft_SSE2(uint8_t* dst) { // DC with no left samples
const __m128i zero = _mm_setzero_si128();
const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
const __m128i sum = _mm_sad_epu8(top, zero);
const int DC = _mm_cvtsi128_si32(sum) + 4;
Put8x8uv(DC >> 3, dst);
Put8x8uv_SSE2(DC >> 3, dst);
}
static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
static void DC8uvNoTop_SSE2(uint8_t* dst) { // DC with no top samples
int dc0 = 4;
int i;
for (i = 0; i < 8; ++i) {
dc0 += dst[-1 + i * BPS];
}
Put8x8uv(dc0 >> 3, dst);
Put8x8uv_SSE2(dc0 >> 3, dst);
}
static void DC8uvNoTopLeft(uint8_t* dst) { // DC with nothing
Put8x8uv(0x80, dst);
static void DC8uvNoTopLeft_SSE2(uint8_t* dst) { // DC with nothing
Put8x8uv_SSE2(0x80, dst);
}
//------------------------------------------------------------------------------
@ -1181,47 +1178,46 @@ static void DC8uvNoTopLeft(uint8_t* dst) { // DC with nothing
extern void VP8DspInitSSE2(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE2(void) {
VP8Transform = Transform;
#if defined(USE_TRANSFORM_AC3)
VP8TransformAC3 = TransformAC3;
VP8Transform = Transform_SSE2;
#if (USE_TRANSFORM_AC3 == 1)
VP8TransformAC3 = TransformAC3_SSE2;
#endif
VP8VFilter16 = VFilter16;
VP8HFilter16 = HFilter16;
VP8VFilter8 = VFilter8;
VP8HFilter8 = HFilter8;
VP8VFilter16i = VFilter16i;
VP8HFilter16i = HFilter16i;
VP8VFilter8i = VFilter8i;
VP8HFilter8i = HFilter8i;
VP8VFilter16 = VFilter16_SSE2;
VP8HFilter16 = HFilter16_SSE2;
VP8VFilter8 = VFilter8_SSE2;
VP8HFilter8 = HFilter8_SSE2;
VP8VFilter16i = VFilter16i_SSE2;
VP8HFilter16i = HFilter16i_SSE2;
VP8VFilter8i = VFilter8i_SSE2;
VP8HFilter8i = HFilter8i_SSE2;
VP8SimpleVFilter16 = SimpleVFilter16;
VP8SimpleHFilter16 = SimpleHFilter16;
VP8SimpleVFilter16i = SimpleVFilter16i;
VP8SimpleHFilter16i = SimpleHFilter16i;
VP8SimpleVFilter16 = SimpleVFilter16_SSE2;
VP8SimpleHFilter16 = SimpleHFilter16_SSE2;
VP8SimpleVFilter16i = SimpleVFilter16i_SSE2;
VP8SimpleHFilter16i = SimpleHFilter16i_SSE2;
VP8PredLuma4[1] = TM4;
VP8PredLuma4[2] = VE4;
VP8PredLuma4[4] = RD4;
VP8PredLuma4[5] = VR4;
VP8PredLuma4[6] = LD4;
VP8PredLuma4[7] = VL4;
VP8PredLuma4[1] = TM4_SSE2;
VP8PredLuma4[2] = VE4_SSE2;
VP8PredLuma4[4] = RD4_SSE2;
VP8PredLuma4[5] = VR4_SSE2;
VP8PredLuma4[6] = LD4_SSE2;
VP8PredLuma4[7] = VL4_SSE2;
VP8PredLuma16[0] = DC16;
VP8PredLuma16[1] = TM16;
VP8PredLuma16[2] = VE16;
VP8PredLuma16[3] = HE16;
VP8PredLuma16[4] = DC16NoTop;
VP8PredLuma16[5] = DC16NoLeft;
VP8PredLuma16[6] = DC16NoTopLeft;
VP8PredLuma16[0] = DC16_SSE2;
VP8PredLuma16[1] = TM16_SSE2;
VP8PredLuma16[2] = VE16_SSE2;
VP8PredLuma16[3] = HE16_SSE2;
VP8PredLuma16[4] = DC16NoTop_SSE2;
VP8PredLuma16[5] = DC16NoLeft_SSE2;
VP8PredLuma16[6] = DC16NoTopLeft_SSE2;
VP8PredChroma8[0] = DC8uv;
VP8PredChroma8[1] = TM8uv;
VP8PredChroma8[2] = VE8uv;
VP8PredChroma8[3] = HE8uv;
VP8PredChroma8[4] = DC8uvNoTop;
VP8PredChroma8[5] = DC8uvNoLeft;
VP8PredChroma8[6] = DC8uvNoTopLeft;
VP8PredChroma8[0] = DC8uv_SSE2;
VP8PredChroma8[1] = TM8uv_SSE2;
VP8PredChroma8[2] = VE8uv_SSE2;
VP8PredChroma8[4] = DC8uvNoTop_SSE2;
VP8PredChroma8[5] = DC8uvNoLeft_SSE2;
VP8PredChroma8[6] = DC8uvNoTopLeft_SSE2;
}
#else // !WEBP_USE_SSE2

View File

@ -11,15 +11,15 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE41)
#include <smmintrin.h>
#include "../dec/vp8i_dec.h"
#include "../utils/utils.h"
#include "src/dec/vp8i_dec.h"
#include "src/utils/utils.h"
static void HE16(uint8_t* dst) { // horizontal
static void HE16_SSE41(uint8_t* dst) { // horizontal
int j;
const __m128i kShuffle3 = _mm_set1_epi8(3);
for (j = 16; j > 0; --j) {
@ -36,7 +36,7 @@ static void HE16(uint8_t* dst) { // horizontal
extern void VP8DspInitSSE41(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE41(void) {
VP8PredLuma16[3] = HE16;
VP8PredLuma16[3] = HE16_SSE41;
}
#else // !WEBP_USE_SSE41

File diff suppressed because it is too large Load Diff

View File

@ -15,10 +15,10 @@
#define WEBP_DSP_DSP_H_
#ifdef HAVE_CONFIG_H
#include "../webp/config.h"
#include "src/webp/config.h"
#endif
#include "../webp/types.h"
#include "src/webp/types.h"
#ifdef __cplusplus
extern "C" {
@ -51,9 +51,8 @@ extern "C" {
# define __has_builtin(x) 0
#endif
// For now, none of the optimizations below are available in emscripten.
// WebAssembly overrides native optimizations.
#if !(defined(EMSCRIPTEN) || defined(WEBP_USE_WASM))
// for now, none of the optimizations below are available in emscripten
#if !defined(EMSCRIPTEN)
#if defined(_MSC_VER) && _MSC_VER > 1310 && \
(defined(_M_X64) || defined(_M_IX86))
@ -105,7 +104,7 @@ extern "C" {
#define WEBP_USE_MIPS32
#if (__mips_isa_rev >= 2)
#define WEBP_USE_MIPS32_R2
#if defined(__mips_dspr2) || (__mips_dsp_rev >= 2)
#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
#define WEBP_USE_MIPS_DSP_R2
#endif
#endif
@ -117,6 +116,22 @@ extern "C" {
#endif /* EMSCRIPTEN */
#ifndef WEBP_DSP_OMIT_C_CODE
#define WEBP_DSP_OMIT_C_CODE 1
#endif
#if (defined(__aarch64__) || defined(__ARM_NEON__)) && WEBP_DSP_OMIT_C_CODE
#define WEBP_NEON_OMIT_C_CODE 1
#else
#define WEBP_NEON_OMIT_C_CODE 0
#endif
#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
#define WEBP_NEON_WORK_AROUND_GCC 1
#else
#define WEBP_NEON_WORK_AROUND_GCC 0
#endif
// This macro prevents thread_sanitizer from reporting known concurrent writes.
#define WEBP_TSAN_IGNORE_FUNCTION
#if defined(__has_feature)
@ -146,6 +161,11 @@ extern "C" {
#endif
#endif
// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
#if !defined(WEBP_SWAP_16BIT_CSP)
#define WEBP_SWAP_16BIT_CSP 0
#endif
typedef enum {
kSSE2,
kSSE3,
@ -156,12 +176,11 @@ typedef enum {
kNEON,
kMIPS32,
kMIPSdspR2,
kMSA,
kWASM
kMSA
} CPUFeature;
// returns true if the CPU supports the feature.
typedef int (*VP8CPUInfo)(CPUFeature feature);
WEBP_EXTERN(VP8CPUInfo) VP8GetCPUInfo;
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
//------------------------------------------------------------------------------
// Init stub generator
@ -289,6 +308,7 @@ typedef double (*VP8SSIMGetClippedFunc)(const uint8_t* src1, int stride1,
int xo, int yo, // center position
int W, int H); // plane dimension
#if !defined(WEBP_REDUCE_SIZE)
// This version is called with the guarantee that you can load 8 bytes and
// 8 rows at offset src1 and src2
typedef double (*VP8SSIMGetFunc)(const uint8_t* src1, int stride1,
@ -296,10 +316,13 @@ typedef double (*VP8SSIMGetFunc)(const uint8_t* src1, int stride1,
extern VP8SSIMGetFunc VP8SSIMGet; // unclipped / unchecked
extern VP8SSIMGetClippedFunc VP8SSIMGetClipped; // with clipping
#endif
#if !defined(WEBP_DISABLE_STATS)
typedef uint32_t (*VP8AccumulateSSEFunc)(const uint8_t* src1,
const uint8_t* src2, int len);
extern VP8AccumulateSSEFunc VP8AccumulateSSE;
#endif
// must be called before using any of the above directly
void VP8SSIMDspInit(void);
@ -480,12 +503,12 @@ extern WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
extern WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
// Plain-C implementation, as fall-back.
extern void WebPRescalerImportRowExpandC(struct WebPRescaler* const wrk,
const uint8_t* src);
extern void WebPRescalerImportRowShrinkC(struct WebPRescaler* const wrk,
const uint8_t* src);
extern void WebPRescalerExportRowExpandC(struct WebPRescaler* const wrk);
extern void WebPRescalerExportRowShrinkC(struct WebPRescaler* const wrk);
extern void WebPRescalerImportRowExpand_C(struct WebPRescaler* const wrk,
const uint8_t* src);
extern void WebPRescalerImportRowShrink_C(struct WebPRescaler* const wrk,
const uint8_t* src);
extern void WebPRescalerExportRowExpand_C(struct WebPRescaler* const wrk);
extern void WebPRescalerExportRowShrink_C(struct WebPRescaler* const wrk);
// Main entry calls:
extern void WebPRescalerImportRow(struct WebPRescaler* const wrk,
@ -551,25 +574,22 @@ void WebPMultRows(uint8_t* ptr, int stride,
int width, int num_rows, int inverse);
// Plain-C versions, used as fallback by some implementations.
void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha,
int width, int inverse);
void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse);
void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
int width, int inverse);
void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse);
// RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
extern void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
int len, int step, uint32_t* out);
// This function returns true if src[i] contains a value different from 0xff.
extern int (*WebPHasAlpha8b)(const uint8_t* src, int length);
// This function returns true if src[4*i] contains a value different from 0xff.
extern int (*WebPHasAlpha32b)(const uint8_t* src, int length);
// To be called first before using the above.
void WebPInitAlphaProcessing(void);
// ARGB packing function: a/r/g/b input is rgba or bgra order.
extern void (*VP8PackARGB)(const uint8_t* a, const uint8_t* r,
const uint8_t* g, const uint8_t* b, int len,
uint32_t* out);
// RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
extern void (*VP8PackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
int len, int step, uint32_t* out);
// To be called first before using the above.
void VP8EncDspARGBInit(void);
//------------------------------------------------------------------------------
// Filter functions

View File

@ -14,16 +14,18 @@
#include <assert.h>
#include <stdlib.h> // for abs()
#include "./dsp.h"
#include "../enc/vp8i_enc.h"
#include "src/dsp/dsp.h"
#include "src/enc/vp8i_enc.h"
static WEBP_INLINE uint8_t clip_8b(int v) {
return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
}
#if !WEBP_NEON_OMIT_C_CODE
static WEBP_INLINE int clip_max(int v, int max) {
return (v > max) ? max : v;
}
#endif // !WEBP_NEON_OMIT_C_CODE
//------------------------------------------------------------------------------
// Compute susceptibility based on DCT-coeff histograms:
@ -56,9 +58,10 @@ void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
histo->last_non_zero = last_non_zero;
}
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
VP8Histogram* const histo) {
#if !WEBP_NEON_OMIT_C_CODE
static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
VP8Histogram* const histo) {
int j;
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
for (j = start_block; j < end_block; ++j) {
@ -76,6 +79,7 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
}
VP8SetHistogramData(distribution, histo);
}
#endif // !WEBP_NEON_OMIT_C_CODE
//------------------------------------------------------------------------------
// run-time tables (~4k)
@ -100,6 +104,8 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)
#if !WEBP_NEON_OMIT_C_CODE
#define STORE(x, y, v) \
dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
@ -140,15 +146,15 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
}
}
static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
int do_two) {
static void ITransform_C(const uint8_t* ref, const int16_t* in, uint8_t* dst,
int do_two) {
ITransformOne(ref, in, dst);
if (do_two) {
ITransformOne(ref + 4, in + 16, dst + 4);
}
}
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) {
int i;
int tmp[16];
for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
@ -176,13 +182,16 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
}
}
#endif // !WEBP_NEON_OMIT_C_CODE
static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) {
static void FTransform2_C(const uint8_t* src, const uint8_t* ref,
int16_t* out) {
VP8FTransform(src, ref, out);
VP8FTransform(src + 4, ref + 4, out + 16);
}
static void FTransformWHT(const int16_t* in, int16_t* out) {
#if !WEBP_NEON_OMIT_C_CODE
static void FTransformWHT_C(const int16_t* in, int16_t* out) {
// input is 12b signed
int32_t tmp[16];
int i;
@ -211,6 +220,7 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
out[12 + i] = b3 >> 1;
}
}
#endif // !WEBP_NEON_OMIT_C_CODE
#undef MUL
#undef STORE
@ -303,8 +313,8 @@ static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
//------------------------------------------------------------------------------
// Chroma 8x8 prediction (paragraph 12.2)
static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
const uint8_t* top) {
static void IntraChromaPreds_C(uint8_t* dst, const uint8_t* left,
const uint8_t* top) {
// U block
DCMode(C8DC8 + dst, left, top, 8, 8, 4);
VerticalPred(C8VE8 + dst, top, 8);
@ -323,8 +333,8 @@ static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
//------------------------------------------------------------------------------
// luma 16x16 prediction (paragraph 12.3)
static void Intra16Preds(uint8_t* dst,
const uint8_t* left, const uint8_t* top) {
static void Intra16Preds_C(uint8_t* dst,
const uint8_t* left, const uint8_t* top) {
DCMode(I16DC16 + dst, left, top, 16, 16, 5);
VerticalPred(I16VE16 + dst, top, 16);
HorizontalPred(I16HE16 + dst, left, 16);
@ -507,7 +517,7 @@ static void TM4(uint8_t* dst, const uint8_t* top) {
// Left samples are top[-5 .. -2], top_left is top[-1], top are
// located at top[0..3], and top right is top[4..7]
static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) {
DC4(I4DC4 + dst, top);
TM4(I4TM4 + dst, top);
VE4(I4VE4 + dst, top);
@ -523,6 +533,7 @@ static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
//------------------------------------------------------------------------------
// Metric
#if !WEBP_NEON_OMIT_C_CODE
static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
int w, int h) {
int count = 0;
@ -538,20 +549,21 @@ static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
return count;
}
static int SSE16x16(const uint8_t* a, const uint8_t* b) {
static int SSE16x16_C(const uint8_t* a, const uint8_t* b) {
return GetSSE(a, b, 16, 16);
}
static int SSE16x8(const uint8_t* a, const uint8_t* b) {
static int SSE16x8_C(const uint8_t* a, const uint8_t* b) {
return GetSSE(a, b, 16, 8);
}
static int SSE8x8(const uint8_t* a, const uint8_t* b) {
static int SSE8x8_C(const uint8_t* a, const uint8_t* b) {
return GetSSE(a, b, 8, 8);
}
static int SSE4x4(const uint8_t* a, const uint8_t* b) {
static int SSE4x4_C(const uint8_t* a, const uint8_t* b) {
return GetSSE(a, b, 4, 4);
}
#endif // !WEBP_NEON_OMIT_C_CODE
static void Mean16x4(const uint8_t* ref, uint32_t dc[4]) {
static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
int k, x, y;
for (k = 0; k < 4; ++k) {
uint32_t avg = 0;
@ -571,6 +583,7 @@ static void Mean16x4(const uint8_t* ref, uint32_t dc[4]) {
// We try to match the spectral content (weighted) between source and
// reconstructed samples.
#if !WEBP_NEON_OMIT_C_CODE
// Hadamard transform
// Returns the weighted sum of the absolute value of transformed coefficients.
// w[] contains a row-major 4 by 4 symmetric matrix.
@ -608,24 +621,25 @@ static int TTransform(const uint8_t* in, const uint16_t* w) {
return sum;
}
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
static int Disto4x4_C(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
const int sum1 = TTransform(a, w);
const int sum2 = TTransform(b, w);
return abs(sum2 - sum1) >> 5;
}
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
static int Disto16x16_C(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
int D = 0;
int x, y;
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
for (x = 0; x < 16; x += 4) {
D += Disto4x4(a + x + y, b + x + y, w);
D += Disto4x4_C(a + x + y, b + x + y, w);
}
}
return D;
}
#endif // !WEBP_NEON_OMIT_C_CODE
//------------------------------------------------------------------------------
// Quantization
@ -636,8 +650,8 @@ static const uint8_t kZigzag[16] = {
};
// Simple quantization
static int QuantizeBlock(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
int last = -1;
int n;
for (n = 0; n < 16; ++n) {
@ -662,13 +676,15 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
return (last >= 0);
}
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
const VP8Matrix* const mtx) {
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
const VP8Matrix* const mtx) {
int nz;
nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
return nz;
}
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
//------------------------------------------------------------------------------
// Block copy
@ -682,11 +698,11 @@ static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) {
}
}
static void Copy4x4(const uint8_t* src, uint8_t* dst) {
static void Copy4x4_C(const uint8_t* src, uint8_t* dst) {
Copy(src, dst, 4, 4);
}
static void Copy16x8(const uint8_t* src, uint8_t* dst) {
static void Copy16x8_C(const uint8_t* src, uint8_t* dst) {
Copy(src, dst, 16, 8);
}
@ -734,26 +750,32 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
InitTables();
// default C implementations
VP8CollectHistogram = CollectHistogram;
VP8ITransform = ITransform;
VP8FTransform = FTransform;
VP8FTransform2 = FTransform2;
VP8FTransformWHT = FTransformWHT;
VP8EncPredLuma4 = Intra4Preds;
VP8EncPredLuma16 = Intra16Preds;
VP8EncPredChroma8 = IntraChromaPreds;
VP8SSE16x16 = SSE16x16;
VP8SSE8x8 = SSE8x8;
VP8SSE16x8 = SSE16x8;
VP8SSE4x4 = SSE4x4;
VP8TDisto4x4 = Disto4x4;
VP8TDisto16x16 = Disto16x16;
VP8Mean16x4 = Mean16x4;
VP8EncQuantizeBlock = QuantizeBlock;
VP8EncQuantize2Blocks = Quantize2Blocks;
VP8EncQuantizeBlockWHT = QuantizeBlock;
VP8Copy4x4 = Copy4x4;
VP8Copy16x8 = Copy16x8;
#if !WEBP_NEON_OMIT_C_CODE
VP8ITransform = ITransform_C;
VP8FTransform = FTransform_C;
VP8FTransformWHT = FTransformWHT_C;
VP8TDisto4x4 = Disto4x4_C;
VP8TDisto16x16 = Disto16x16_C;
VP8CollectHistogram = CollectHistogram_C;
VP8SSE16x16 = SSE16x16_C;
VP8SSE16x8 = SSE16x8_C;
VP8SSE8x8 = SSE8x8_C;
VP8SSE4x4 = SSE4x4_C;
#endif
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
VP8EncQuantizeBlock = QuantizeBlock_C;
VP8EncQuantize2Blocks = Quantize2Blocks_C;
#endif
VP8FTransform2 = FTransform2_C;
VP8EncPredLuma4 = Intra4Preds_C;
VP8EncPredLuma16 = Intra16Preds_C;
VP8EncPredChroma8 = IntraChromaPreds_C;
VP8Mean16x4 = Mean16x4_C;
VP8EncQuantizeBlockWHT = QuantizeBlock_C;
VP8Copy4x4 = Copy4x4_C;
VP8Copy16x8 = Copy16x8_C;
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
if (VP8GetCPUInfo != NULL) {
@ -772,11 +794,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
VP8EncDspInitAVX2();
}
#endif
#if defined(WEBP_USE_NEON)
if (VP8GetCPUInfo(kNEON)) {
VP8EncDspInitNEON();
}
#endif
#if defined(WEBP_USE_MIPS32)
if (VP8GetCPUInfo(kMIPS32)) {
VP8EncDspInitMIPS32();
@ -793,5 +810,34 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
}
#endif
}
#if defined(WEBP_USE_NEON)
if (WEBP_NEON_OMIT_C_CODE ||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
VP8EncDspInitNEON();
}
#endif
assert(VP8ITransform != NULL);
assert(VP8FTransform != NULL);
assert(VP8FTransformWHT != NULL);
assert(VP8TDisto4x4 != NULL);
assert(VP8TDisto16x16 != NULL);
assert(VP8CollectHistogram != NULL);
assert(VP8SSE16x16 != NULL);
assert(VP8SSE16x8 != NULL);
assert(VP8SSE8x8 != NULL);
assert(VP8SSE4x4 != NULL);
assert(VP8EncQuantizeBlock != NULL);
assert(VP8EncQuantize2Blocks != NULL);
assert(VP8FTransform2 != NULL);
assert(VP8EncPredLuma4 != NULL);
assert(VP8EncPredLuma16 != NULL);
assert(VP8EncPredChroma8 != NULL);
assert(VP8Mean16x4 != NULL);
assert(VP8EncQuantizeBlockWHT != NULL);
assert(VP8Copy4x4 != NULL);
assert(VP8Copy16x8 != NULL);
enc_last_cpuinfo_used = VP8GetCPUInfo;
}

View File

@ -9,7 +9,7 @@
//
// AVX2 version of speed-critical encoding functions.
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_AVX2)

View File

@ -13,13 +13,13 @@
// Jovan Zelincevic (jovan.zelincevic@imgtec.com)
// Slobodan Prijic (slobodan.prijic@imgtec.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_MIPS32)
#include "./mips_macro.h"
#include "../enc/vp8i_enc.h"
#include "../enc/cost_enc.h"
#include "src/dsp/mips_macro.h"
#include "src/enc/vp8i_enc.h"
#include "src/enc/cost_enc.h"
static const int kC1 = 20091 + (1 << 16);
static const int kC2 = 35468;
@ -113,8 +113,9 @@ static const int kC2 = 35468;
"sb %[" #TEMP12 "], 3+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"
// Does one or two inverse transforms.
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
uint8_t* dst) {
static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* ref,
const int16_t* in,
uint8_t* dst) {
int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
int temp7, temp8, temp9, temp10, temp11, temp12, temp13;
int temp14, temp15, temp16, temp17, temp18, temp19, temp20;
@ -144,11 +145,11 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
);
}
static void ITransform(const uint8_t* ref, const int16_t* in,
uint8_t* dst, int do_two) {
ITransformOne(ref, in, dst);
static void ITransform_MIPS32(const uint8_t* ref, const int16_t* in,
uint8_t* dst, int do_two) {
ITransformOne_MIPS32(ref, in, dst);
if (do_two) {
ITransformOne(ref + 4, in + 16, dst + 4);
ITransformOne_MIPS32(ref + 4, in + 16, dst + 4);
}
}
@ -187,8 +188,8 @@ static void ITransform(const uint8_t* ref, const int16_t* in,
"sh %[temp5], " #J "(%[ppin]) \n\t" \
"sh %[level], " #N "(%[pout]) \n\t"
static int QuantizeBlock(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
int temp0, temp1, temp2, temp3, temp4, temp5;
int sign, coeff, level, i;
int max_level = MAX_LEVEL;
@ -238,11 +239,11 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
return 0;
}
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
const VP8Matrix* const mtx) {
static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
const VP8Matrix* const mtx) {
int nz;
nz = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
nz = QuantizeBlock_MIPS32(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz |= QuantizeBlock_MIPS32(in + 1 * 16, out + 1 * 16, mtx) << 1;
return nz;
}
@ -361,8 +362,8 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
"msub %[temp6], %[temp0] \n\t" \
"msub %[temp7], %[temp1] \n\t"
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
static int Disto4x4_MIPS32(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
int tmp[32];
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
@ -396,13 +397,13 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
#undef VERTICAL_PASS
#undef HORIZONTAL_PASS
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
static int Disto16x16_MIPS32(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
int D = 0;
int x, y;
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
for (x = 0; x < 16; x += 4) {
D += Disto4x4(a + x + y, b + x + y, w);
D += Disto4x4_MIPS32(a + x + y, b + x + y, w);
}
}
return D;
@ -478,7 +479,8 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
"sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
"sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
static void FTransform_MIPS32(const uint8_t* src, const uint8_t* ref,
int16_t* out) {
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
int temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
int temp17, temp18, temp19, temp20;
@ -539,7 +541,7 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
GET_SSE_INNER(C, C + 1, C + 2, C + 3) \
GET_SSE_INNER(D, D + 1, D + 2, D + 3)
static int SSE16x16(const uint8_t* a, const uint8_t* b) {
static int SSE16x16_MIPS32(const uint8_t* a, const uint8_t* b) {
int count;
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
@ -573,7 +575,7 @@ static int SSE16x16(const uint8_t* a, const uint8_t* b) {
return count;
}
static int SSE16x8(const uint8_t* a, const uint8_t* b) {
static int SSE16x8_MIPS32(const uint8_t* a, const uint8_t* b) {
int count;
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
@ -599,7 +601,7 @@ static int SSE16x8(const uint8_t* a, const uint8_t* b) {
return count;
}
static int SSE8x8(const uint8_t* a, const uint8_t* b) {
static int SSE8x8_MIPS32(const uint8_t* a, const uint8_t* b) {
int count;
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
@ -621,7 +623,7 @@ static int SSE8x8(const uint8_t* a, const uint8_t* b) {
return count;
}
static int SSE4x4(const uint8_t* a, const uint8_t* b) {
static int SSE4x4_MIPS32(const uint8_t* a, const uint8_t* b) {
int count;
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
@ -651,17 +653,20 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
extern void VP8EncDspInitMIPS32(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMIPS32(void) {
VP8ITransform = ITransform;
VP8FTransform = FTransform;
VP8EncQuantizeBlock = QuantizeBlock;
VP8EncQuantize2Blocks = Quantize2Blocks;
VP8TDisto4x4 = Disto4x4;
VP8TDisto16x16 = Disto16x16;
VP8ITransform = ITransform_MIPS32;
VP8FTransform = FTransform_MIPS32;
VP8EncQuantizeBlock = QuantizeBlock_MIPS32;
VP8EncQuantize2Blocks = Quantize2Blocks_MIPS32;
VP8TDisto4x4 = Disto4x4_MIPS32;
VP8TDisto16x16 = Disto16x16_MIPS32;
#if !defined(WORK_AROUND_GCC)
VP8SSE16x16 = SSE16x16;
VP8SSE8x8 = SSE8x8;
VP8SSE16x8 = SSE16x8;
VP8SSE4x4 = SSE4x4;
VP8SSE16x16 = SSE16x16_MIPS32;
VP8SSE8x8 = SSE8x8_MIPS32;
VP8SSE16x8 = SSE16x8_MIPS32;
VP8SSE4x4 = SSE4x4_MIPS32;
#endif
}

View File

@ -12,13 +12,13 @@
// Author(s): Darko Laus (darko.laus@imgtec.com)
// Mirko Raus (mirko.raus@imgtec.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_MIPS_DSP_R2)
#include "./mips_macro.h"
#include "../enc/cost_enc.h"
#include "../enc/vp8i_enc.h"
#include "src/dsp/mips_macro.h"
#include "src/enc/cost_enc.h"
#include "src/enc/vp8i_enc.h"
static const int kC1 = 20091 + (1 << 16);
static const int kC2 = 35468;
@ -141,7 +141,8 @@ static const int kC2 = 35468;
"sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
"sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
static void FTransform_MIPSdspR2(const uint8_t* src, const uint8_t* ref,
int16_t* out) {
const int c2217 = 2217;
const int c5352 = 5352;
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
@ -238,16 +239,16 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
);
}
static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
int do_two) {
static void ITransform_MIPSdspR2(const uint8_t* ref, const int16_t* in,
uint8_t* dst, int do_two) {
ITransformOne(ref, in, dst);
if (do_two) {
ITransformOne(ref + 4, in + 16, dst + 4);
}
}
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
static int Disto4x4_MIPSdspR2(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17;
@ -313,13 +314,14 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
return abs(temp3 - temp17) >> 5;
}
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
static int Disto16x16_MIPSdspR2(const uint8_t* const a,
const uint8_t* const b,
const uint16_t* const w) {
int D = 0;
int x, y;
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
for (x = 0; x < 16; x += 4) {
D += Disto4x4(a + x + y, b + x + y, w);
D += Disto4x4_MIPSdspR2(a + x + y, b + x + y, w);
}
}
return D;
@ -1011,8 +1013,8 @@ static void HU4(uint8_t* dst, const uint8_t* top) {
//------------------------------------------------------------------------------
// Chroma 8x8 prediction (paragraph 12.2)
static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
const uint8_t* top) {
static void IntraChromaPreds_MIPSdspR2(uint8_t* dst, const uint8_t* left,
const uint8_t* top) {
// U block
DCMode8(C8DC8 + dst, left, top);
VerticalPred8(C8VE8 + dst, top);
@ -1031,8 +1033,8 @@ static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
//------------------------------------------------------------------------------
// luma 16x16 prediction (paragraph 12.3)
static void Intra16Preds(uint8_t* dst,
const uint8_t* left, const uint8_t* top) {
static void Intra16Preds_MIPSdspR2(uint8_t* dst,
const uint8_t* left, const uint8_t* top) {
DCMode16(I16DC16 + dst, left, top);
VerticalPred16(I16VE16 + dst, top);
HorizontalPred16(I16HE16 + dst, left);
@ -1041,7 +1043,7 @@ static void Intra16Preds(uint8_t* dst,
// Left samples are top[-5 .. -2], top_left is top[-1], top are
// located at top[0..3], and top right is top[4..7]
static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
static void Intra4Preds_MIPSdspR2(uint8_t* dst, const uint8_t* top) {
DC4(I4DC4 + dst, top);
TM4(I4TM4 + dst, top);
VE4(I4VE4 + dst, top);
@ -1077,7 +1079,7 @@ static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
GET_SSE_INNER(C) \
GET_SSE_INNER(D)
static int SSE16x16(const uint8_t* a, const uint8_t* b) {
static int SSE16x16_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
int count;
int temp0, temp1, temp2, temp3;
__asm__ volatile (
@ -1107,7 +1109,7 @@ static int SSE16x16(const uint8_t* a, const uint8_t* b) {
return count;
}
static int SSE16x8(const uint8_t* a, const uint8_t* b) {
static int SSE16x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
int count;
int temp0, temp1, temp2, temp3;
__asm__ volatile (
@ -1129,7 +1131,7 @@ static int SSE16x8(const uint8_t* a, const uint8_t* b) {
return count;
}
static int SSE8x8(const uint8_t* a, const uint8_t* b) {
static int SSE8x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
int count;
int temp0, temp1, temp2, temp3;
__asm__ volatile (
@ -1147,7 +1149,7 @@ static int SSE8x8(const uint8_t* a, const uint8_t* b) {
return count;
}
static int SSE4x4(const uint8_t* a, const uint8_t* b) {
static int SSE4x4_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
int count;
int temp0, temp1, temp2, temp3;
__asm__ volatile (
@ -1270,8 +1272,8 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
"usw $0, " #J "(%[ppin]) \n\t" \
"3: \n\t"
static int QuantizeBlock(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
int temp0, temp1, temp2, temp3, temp4, temp5,temp6;
int sign, coeff, level;
int max_level = MAX_LEVEL;
@ -1311,11 +1313,11 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
return (ret != 0);
}
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
const VP8Matrix* const mtx) {
static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
const VP8Matrix* const mtx) {
int nz;
nz = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
nz = QuantizeBlock_MIPSdspR2(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz |= QuantizeBlock_MIPSdspR2(in + 1 * 16, out + 1 * 16, mtx) << 1;
return nz;
}
@ -1358,7 +1360,7 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
"usw %[" #TEMP4 "], " #C "(%[out]) \n\t" \
"usw %[" #TEMP6 "], " #D "(%[out]) \n\t"
static void FTransformWHT(const int16_t* in, int16_t* out) {
static void FTransformWHT_MIPSdspR2(const int16_t* in, int16_t* out) {
int temp0, temp1, temp2, temp3, temp4;
int temp5, temp6, temp7, temp8, temp9;
@ -1450,9 +1452,9 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
"addiu %[temp8], %[temp8], 1 \n\t" \
"sw %[temp8], 0(%[temp3]) \n\t"
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
VP8Histogram* const histo) {
static void CollectHistogram_MIPSdspR2(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
VP8Histogram* const histo) {
int j;
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
const int max_coeff = (MAX_COEFF_THRESH << 16) + MAX_COEFF_THRESH;
@ -1484,23 +1486,28 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
extern void VP8EncDspInitMIPSdspR2(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMIPSdspR2(void) {
VP8FTransform = FTransform;
VP8ITransform = ITransform;
VP8TDisto4x4 = Disto4x4;
VP8TDisto16x16 = Disto16x16;
VP8EncPredLuma16 = Intra16Preds;
VP8EncPredChroma8 = IntraChromaPreds;
VP8EncPredLuma4 = Intra4Preds;
VP8FTransform = FTransform_MIPSdspR2;
VP8FTransformWHT = FTransformWHT_MIPSdspR2;
VP8ITransform = ITransform_MIPSdspR2;
VP8TDisto4x4 = Disto4x4_MIPSdspR2;
VP8TDisto16x16 = Disto16x16_MIPSdspR2;
VP8EncPredLuma16 = Intra16Preds_MIPSdspR2;
VP8EncPredChroma8 = IntraChromaPreds_MIPSdspR2;
VP8EncPredLuma4 = Intra4Preds_MIPSdspR2;
#if !defined(WORK_AROUND_GCC)
VP8SSE16x16 = SSE16x16;
VP8SSE8x8 = SSE8x8;
VP8SSE16x8 = SSE16x8;
VP8SSE4x4 = SSE4x4;
VP8SSE16x16 = SSE16x16_MIPSdspR2;
VP8SSE8x8 = SSE8x8_MIPSdspR2;
VP8SSE16x8 = SSE16x8_MIPSdspR2;
VP8SSE4x4 = SSE4x4_MIPSdspR2;
#endif
VP8EncQuantizeBlock = QuantizeBlock;
VP8EncQuantize2Blocks = Quantize2Blocks;
VP8FTransformWHT = FTransformWHT;
VP8CollectHistogram = CollectHistogram;
VP8EncQuantizeBlock = QuantizeBlock_MIPSdspR2;
VP8EncQuantize2Blocks = Quantize2Blocks_MIPSdspR2;
VP8CollectHistogram = CollectHistogram_MIPSdspR2;
}
#else // !WEBP_USE_MIPS_DSP_R2

View File

@ -11,13 +11,13 @@
//
// Author: Prashant Patil (prashant.patil@imgtec.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_MSA)
#include <stdlib.h>
#include "./msa_macro.h"
#include "../enc/vp8i_enc.h"
#include "src/dsp/msa_macro.h"
#include "src/enc/vp8i_enc.h"
//------------------------------------------------------------------------------
// Transforms
@ -69,15 +69,16 @@ static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
ST4x4_UB(res0, res0, 3, 2, 1, 0, dst, BPS);
}
static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
int do_two) {
static void ITransform_MSA(const uint8_t* ref, const int16_t* in, uint8_t* dst,
int do_two) {
ITransformOne(ref, in, dst);
if (do_two) {
ITransformOne(ref + 4, in + 16, dst + 4);
}
}
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
static void FTransform_MSA(const uint8_t* src, const uint8_t* ref,
int16_t* out) {
uint64_t out0, out1, out2, out3;
uint32_t in0, in1, in2, in3;
v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
@ -130,7 +131,7 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
SD4(out0, out1, out2, out3, out, 8);
}
static void FTransformWHT(const int16_t* in, int16_t* out) {
static void FTransformWHT_MSA(const int16_t* in, int16_t* out) {
v8i16 in0 = { 0 };
v8i16 in1 = { 0 };
v8i16 tmp0, tmp1, tmp2, tmp3;
@ -167,7 +168,7 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
ST_SH2(out0, out1, out, 8);
}
static int TTransform(const uint8_t* in, const uint16_t* w) {
static int TTransform_MSA(const uint8_t* in, const uint16_t* w) {
int sum;
uint32_t in0_m, in1_m, in2_m, in3_m;
v16i8 src0 = { 0 };
@ -199,20 +200,20 @@ static int TTransform(const uint8_t* in, const uint16_t* w) {
return sum;
}
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
const int sum1 = TTransform(a, w);
const int sum2 = TTransform(b, w);
static int Disto4x4_MSA(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
const int sum1 = TTransform_MSA(a, w);
const int sum2 = TTransform_MSA(b, w);
return abs(sum2 - sum1) >> 5;
}
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
static int Disto16x16_MSA(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
int D = 0;
int x, y;
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
for (x = 0; x < 16; x += 4) {
D += Disto4x4(a + x + y, b + x + y, w);
D += Disto4x4_MSA(a + x + y, b + x + y, w);
}
}
return D;
@ -221,9 +222,9 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
//------------------------------------------------------------------------------
// Histogram
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
VP8Histogram* const histo) {
static void CollectHistogram_MSA(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
VP8Histogram* const histo) {
int j;
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
for (j = start_block; j < end_block; ++j) {
@ -430,7 +431,7 @@ static WEBP_INLINE void TM4(uint8_t* dst, const uint8_t* top) {
#undef AVG3
#undef AVG2
static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
static void Intra4Preds_MSA(uint8_t* dst, const uint8_t* top) {
DC4(I4DC4 + dst, top);
TM4(I4TM4 + dst, top);
VE4(I4VE4 + dst, top);
@ -547,8 +548,8 @@ static WEBP_INLINE void DCMode16x16(uint8_t* dst, const uint8_t* left,
STORE16x16(out, dst);
}
static void Intra16Preds(uint8_t* dst,
const uint8_t* left, const uint8_t* top) {
static void Intra16Preds_MSA(uint8_t* dst,
const uint8_t* left, const uint8_t* top) {
DCMode16x16(I16DC16 + dst, left, top);
VerticalPred16x16(I16VE16 + dst, top);
HorizontalPred16x16(I16HE16 + dst, left);
@ -669,8 +670,8 @@ static WEBP_INLINE void DCMode8x8(uint8_t* dst, const uint8_t* left,
STORE8x8(out, dst);
}
static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
const uint8_t* top) {
static void IntraChromaPreds_MSA(uint8_t* dst, const uint8_t* left,
const uint8_t* top) {
// U block
DCMode8x8(C8DC8 + dst, left, top);
VerticalPred8x8(C8VE8 + dst, top);
@ -711,7 +712,7 @@ static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
DPADD_SH2_SW(tmp2, tmp3, tmp2, tmp3, out2, out3); \
} while (0)
static int SSE16x16(const uint8_t* a, const uint8_t* b) {
static int SSE16x16_MSA(const uint8_t* a, const uint8_t* b) {
uint32_t sum;
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
@ -738,7 +739,7 @@ static int SSE16x16(const uint8_t* a, const uint8_t* b) {
return sum;
}
static int SSE16x8(const uint8_t* a, const uint8_t* b) {
static int SSE16x8_MSA(const uint8_t* a, const uint8_t* b) {
uint32_t sum;
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
@ -757,7 +758,7 @@ static int SSE16x8(const uint8_t* a, const uint8_t* b) {
return sum;
}
static int SSE8x8(const uint8_t* a, const uint8_t* b) {
static int SSE8x8_MSA(const uint8_t* a, const uint8_t* b) {
uint32_t sum;
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
@ -777,7 +778,7 @@ static int SSE8x8(const uint8_t* a, const uint8_t* b) {
return sum;
}
static int SSE4x4(const uint8_t* a, const uint8_t* b) {
static int SSE4x4_MSA(const uint8_t* a, const uint8_t* b) {
uint32_t sum = 0;
uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3;
v16u8 src = { 0 }, ref = { 0 }, tmp0, tmp1;
@ -799,8 +800,8 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
//------------------------------------------------------------------------------
// Quantization
static int QuantizeBlock(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
int sum;
v8i16 in0, in1, sh0, sh1, out0, out1;
v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, sign0, sign1;
@ -852,8 +853,8 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
return (sum > 0);
}
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
const VP8Matrix* const mtx) {
static int Quantize2Blocks_MSA(int16_t in[32], int16_t out[32],
const VP8Matrix* const mtx) {
int nz;
nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
@ -866,26 +867,26 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
extern void VP8EncDspInitMSA(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMSA(void) {
VP8ITransform = ITransform;
VP8FTransform = FTransform;
VP8FTransformWHT = FTransformWHT;
VP8ITransform = ITransform_MSA;
VP8FTransform = FTransform_MSA;
VP8FTransformWHT = FTransformWHT_MSA;
VP8TDisto4x4 = Disto4x4;
VP8TDisto16x16 = Disto16x16;
VP8CollectHistogram = CollectHistogram;
VP8TDisto4x4 = Disto4x4_MSA;
VP8TDisto16x16 = Disto16x16_MSA;
VP8CollectHistogram = CollectHistogram_MSA;
VP8EncPredLuma4 = Intra4Preds;
VP8EncPredLuma16 = Intra16Preds;
VP8EncPredChroma8 = IntraChromaPreds;
VP8EncPredLuma4 = Intra4Preds_MSA;
VP8EncPredLuma16 = Intra16Preds_MSA;
VP8EncPredChroma8 = IntraChromaPreds_MSA;
VP8SSE16x16 = SSE16x16;
VP8SSE16x8 = SSE16x8;
VP8SSE8x8 = SSE8x8;
VP8SSE4x4 = SSE4x4;
VP8SSE16x16 = SSE16x16_MSA;
VP8SSE16x8 = SSE16x8_MSA;
VP8SSE8x8 = SSE8x8_MSA;
VP8SSE4x4 = SSE4x4_MSA;
VP8EncQuantizeBlock = QuantizeBlock;
VP8EncQuantize2Blocks = Quantize2Blocks;
VP8EncQuantizeBlockWHT = QuantizeBlock;
VP8EncQuantizeBlock = QuantizeBlock_MSA;
VP8EncQuantize2Blocks = Quantize2Blocks_MSA;
VP8EncQuantizeBlockWHT = QuantizeBlock_MSA;
}
#else // !WEBP_USE_MSA

View File

@ -11,14 +11,14 @@
//
// adapted from libvpx (http://www.webmproject.org/code/)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_NEON)
#include <assert.h>
#include "./neon.h"
#include "../enc/vp8i_enc.h"
#include "src/dsp/neon.h"
#include "src/enc/vp8i_enc.h"
//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)
@ -37,15 +37,15 @@ static const int16_t kC2 = 17734; // half of kC2, actually. See comment above.
#if defined(WEBP_USE_INTRINSICS)
// Treats 'v' as an uint8x8_t and zero extends to an int16x8_t.
static WEBP_INLINE int16x8_t ConvertU8ToS16(uint32x2_t v) {
static WEBP_INLINE int16x8_t ConvertU8ToS16_NEON(uint32x2_t v) {
return vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(v)));
}
// Performs unsigned 8b saturation on 'dst01' and 'dst23' storing the result
// to the corresponding rows of 'dst'.
static WEBP_INLINE void SaturateAndStore4x4(uint8_t* const dst,
const int16x8_t dst01,
const int16x8_t dst23) {
static WEBP_INLINE void SaturateAndStore4x4_NEON(uint8_t* const dst,
const int16x8_t dst01,
const int16x8_t dst23) {
// Unsigned saturate to 8b.
const uint8x8_t dst01_u8 = vqmovun_s16(dst01);
const uint8x8_t dst23_u8 = vqmovun_s16(dst23);
@ -57,8 +57,10 @@ static WEBP_INLINE void SaturateAndStore4x4(uint8_t* const dst,
vst1_lane_u32((uint32_t*)(dst + 3 * BPS), vreinterpret_u32_u8(dst23_u8), 1);
}
static WEBP_INLINE void Add4x4(const int16x8_t row01, const int16x8_t row23,
const uint8_t* const ref, uint8_t* const dst) {
static WEBP_INLINE void Add4x4_NEON(const int16x8_t row01,
const int16x8_t row23,
const uint8_t* const ref,
uint8_t* const dst) {
uint32x2_t dst01 = vdup_n_u32(0);
uint32x2_t dst23 = vdup_n_u32(0);
@ -70,19 +72,20 @@ static WEBP_INLINE void Add4x4(const int16x8_t row01, const int16x8_t row23,
{
// Convert to 16b.
const int16x8_t dst01_s16 = ConvertU8ToS16(dst01);
const int16x8_t dst23_s16 = ConvertU8ToS16(dst23);
const int16x8_t dst01_s16 = ConvertU8ToS16_NEON(dst01);
const int16x8_t dst23_s16 = ConvertU8ToS16_NEON(dst23);
// Descale with rounding.
const int16x8_t out01 = vrsraq_n_s16(dst01_s16, row01, 3);
const int16x8_t out23 = vrsraq_n_s16(dst23_s16, row23, 3);
// Add the inverse transform.
SaturateAndStore4x4(dst, out01, out23);
SaturateAndStore4x4_NEON(dst, out01, out23);
}
}
static WEBP_INLINE void Transpose8x2(const int16x8_t in0, const int16x8_t in1,
int16x8x2_t* const out) {
static WEBP_INLINE void Transpose8x2_NEON(const int16x8_t in0,
const int16x8_t in1,
int16x8x2_t* const out) {
// a0 a1 a2 a3 | b0 b1 b2 b3 => a0 b0 c0 d0 | a1 b1 c1 d1
// c0 c1 c2 c3 | d0 d1 d2 d3 a2 b2 c2 d2 | a3 b3 c3 d3
const int16x8x2_t tmp0 = vzipq_s16(in0, in1); // a0 c0 a1 c1 a2 c2 ...
@ -90,7 +93,7 @@ static WEBP_INLINE void Transpose8x2(const int16x8_t in0, const int16x8_t in1,
*out = vzipq_s16(tmp0.val[0], tmp0.val[1]);
}
static WEBP_INLINE void TransformPass(int16x8x2_t* const rows) {
static WEBP_INLINE void TransformPass_NEON(int16x8x2_t* const rows) {
// {rows} = in0 | in4
// in8 | in12
// B1 = in4 | in12
@ -113,22 +116,22 @@ static WEBP_INLINE void TransformPass(int16x8x2_t* const rows) {
const int16x8_t E0 = vqaddq_s16(D0, D1); // a+d | b+c
const int16x8_t E_tmp = vqsubq_s16(D0, D1); // a-d | b-c
const int16x8_t E1 = vcombine_s16(vget_high_s16(E_tmp), vget_low_s16(E_tmp));
Transpose8x2(E0, E1, rows);
Transpose8x2_NEON(E0, E1, rows);
}
static void ITransformOne(const uint8_t* ref,
const int16_t* in, uint8_t* dst) {
static void ITransformOne_NEON(const uint8_t* ref,
const int16_t* in, uint8_t* dst) {
int16x8x2_t rows;
INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
TransformPass(&rows);
TransformPass(&rows);
Add4x4(rows.val[0], rows.val[1], ref, dst);
TransformPass_NEON(&rows);
TransformPass_NEON(&rows);
Add4x4_NEON(rows.val[0], rows.val[1], ref, dst);
}
#else
static void ITransformOne(const uint8_t* ref,
const int16_t* in, uint8_t* dst) {
static void ITransformOne_NEON(const uint8_t* ref,
const int16_t* in, uint8_t* dst) {
const int kBPS = BPS;
const int16_t kC1C2[] = { kC1, kC2, 0, 0 };
@ -243,16 +246,16 @@ static void ITransformOne(const uint8_t* ref,
#endif // WEBP_USE_INTRINSICS
static void ITransform(const uint8_t* ref,
const int16_t* in, uint8_t* dst, int do_two) {
ITransformOne(ref, in, dst);
static void ITransform_NEON(const uint8_t* ref,
const int16_t* in, uint8_t* dst, int do_two) {
ITransformOne_NEON(ref, in, dst);
if (do_two) {
ITransformOne(ref + 4, in + 16, dst + 4);
ITransformOne_NEON(ref + 4, in + 16, dst + 4);
}
}
// Load all 4x4 pixels into a single uint8x16_t variable.
static uint8x16_t Load4x4(const uint8_t* src) {
static uint8x16_t Load4x4_NEON(const uint8_t* src) {
uint32x4_t out = vdupq_n_u32(0);
out = vld1q_lane_u32((const uint32_t*)(src + 0 * BPS), out, 0);
out = vld1q_lane_u32((const uint32_t*)(src + 1 * BPS), out, 1);
@ -265,10 +268,12 @@ static uint8x16_t Load4x4(const uint8_t* src) {
#if defined(WEBP_USE_INTRINSICS)
static WEBP_INLINE void Transpose4x4_S16(const int16x4_t A, const int16x4_t B,
const int16x4_t C, const int16x4_t D,
int16x8_t* const out01,
int16x8_t* const out32) {
static WEBP_INLINE void Transpose4x4_S16_NEON(const int16x4_t A,
const int16x4_t B,
const int16x4_t C,
const int16x4_t D,
int16x8_t* const out01,
int16x8_t* const out32) {
const int16x4x2_t AB = vtrn_s16(A, B);
const int16x4x2_t CD = vtrn_s16(C, D);
const int32x2x2_t tmp02 = vtrn_s32(vreinterpret_s32_s16(AB.val[0]),
@ -283,24 +288,24 @@ static WEBP_INLINE void Transpose4x4_S16(const int16x4_t A, const int16x4_t B,
vreinterpret_s64_s32(tmp02.val[1])));
}
static WEBP_INLINE int16x8_t DiffU8ToS16(const uint8x8_t a,
const uint8x8_t b) {
static WEBP_INLINE int16x8_t DiffU8ToS16_NEON(const uint8x8_t a,
const uint8x8_t b) {
return vreinterpretq_s16_u16(vsubl_u8(a, b));
}
static void FTransform(const uint8_t* src, const uint8_t* ref,
int16_t* out) {
static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
int16_t* out) {
int16x8_t d0d1, d3d2; // working 4x4 int16 variables
{
const uint8x16_t S0 = Load4x4(src);
const uint8x16_t R0 = Load4x4(ref);
const int16x8_t D0D1 = DiffU8ToS16(vget_low_u8(S0), vget_low_u8(R0));
const int16x8_t D2D3 = DiffU8ToS16(vget_high_u8(S0), vget_high_u8(R0));
const uint8x16_t S0 = Load4x4_NEON(src);
const uint8x16_t R0 = Load4x4_NEON(ref);
const int16x8_t D0D1 = DiffU8ToS16_NEON(vget_low_u8(S0), vget_low_u8(R0));
const int16x8_t D2D3 = DiffU8ToS16_NEON(vget_high_u8(S0), vget_high_u8(R0));
const int16x4_t D0 = vget_low_s16(D0D1);
const int16x4_t D1 = vget_high_s16(D0D1);
const int16x4_t D2 = vget_low_s16(D2D3);
const int16x4_t D3 = vget_high_s16(D2D3);
Transpose4x4_S16(D0, D1, D2, D3, &d0d1, &d3d2);
Transpose4x4_S16_NEON(D0, D1, D2, D3, &d0d1, &d3d2);
}
{ // 1rst pass
const int32x4_t kCst937 = vdupq_n_s32(937);
@ -318,7 +323,7 @@ static void FTransform(const uint8_t* src, const uint8_t* ref,
const int32x4_t a3_m_a2 = vmlsl_n_s16(a3_2217, vget_high_s16(a3a2), 5352);
const int16x4_t tmp1 = vshrn_n_s32(vaddq_s32(a2_p_a3, kCst1812), 9);
const int16x4_t tmp3 = vshrn_n_s32(vaddq_s32(a3_m_a2, kCst937), 9);
Transpose4x4_S16(tmp0, tmp1, tmp2, tmp3, &d0d1, &d3d2);
Transpose4x4_S16_NEON(tmp0, tmp1, tmp2, tmp3, &d0d1, &d3d2);
}
{ // 2nd pass
// the (1<<16) addition is for the replacement: a3!=0 <-> 1-(a3==0)
@ -358,8 +363,8 @@ static const int32_t kCoeff32[] = {
51000, 51000, 51000, 51000
};
static void FTransform(const uint8_t* src, const uint8_t* ref,
int16_t* out) {
static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
int16_t* out) {
const int kBPS = BPS;
const uint8_t* src_ptr = src;
const uint8_t* ref_ptr = ref;
@ -478,7 +483,7 @@ static void FTransform(const uint8_t* src, const uint8_t* ref,
src += stride; \
} while (0)
static void FTransformWHT(const int16_t* src, int16_t* out) {
static void FTransformWHT_NEON(const int16_t* src, int16_t* out) {
const int stride = 16;
const int16x4_t zero = vdup_n_s16(0);
int32x4x4_t tmp0;
@ -516,7 +521,7 @@ static void FTransformWHT(const int16_t* src, int16_t* out) {
tmp0.val[3] = vsubq_s32(a0, a1);
}
{
const int32x4x4_t tmp1 = Transpose4x4(tmp0);
const int32x4x4_t tmp1 = Transpose4x4_NEON(tmp0);
// a0 = tmp[0 + i] + tmp[ 8 + i]
// a1 = tmp[4 + i] + tmp[12 + i]
// a2 = tmp[4 + i] - tmp[12 + i]
@ -560,7 +565,7 @@ static void FTransformWHT(const int16_t* src, int16_t* out) {
// a 26ae, b 26ae
// a 37bf, b 37bf
//
static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16(int16x8x4_t q4_in) {
static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16_NEON(int16x8x4_t q4_in) {
const int16x8x2_t q2_tmp0 = vtrnq_s16(q4_in.val[0], q4_in.val[1]);
const int16x8x2_t q2_tmp1 = vtrnq_s16(q4_in.val[2], q4_in.val[3]);
const int32x4x2_t q2_tmp2 = vtrnq_s32(vreinterpretq_s32_s16(q2_tmp0.val[0]),
@ -574,7 +579,8 @@ static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16(int16x8x4_t q4_in) {
return q4_in;
}
static WEBP_INLINE int16x8x4_t DistoHorizontalPass(const int16x8x4_t q4_in) {
static WEBP_INLINE int16x8x4_t DistoHorizontalPass_NEON(
const int16x8x4_t q4_in) {
// {a0, a1} = {in[0] + in[2], in[1] + in[3]}
// {a3, a2} = {in[0] - in[2], in[1] - in[3]}
const int16x8_t q_a0 = vaddq_s16(q4_in.val[0], q4_in.val[2]);
@ -593,7 +599,7 @@ static WEBP_INLINE int16x8x4_t DistoHorizontalPass(const int16x8x4_t q4_in) {
return q4_out;
}
static WEBP_INLINE int16x8x4_t DistoVerticalPass(const uint8x8x4_t q4_in) {
static WEBP_INLINE int16x8x4_t DistoVerticalPass_NEON(const uint8x8x4_t q4_in) {
const int16x8_t q_a0 = vreinterpretq_s16_u16(vaddl_u8(q4_in.val[0],
q4_in.val[2]));
const int16x8_t q_a1 = vreinterpretq_s16_u16(vaddl_u8(q4_in.val[1],
@ -610,7 +616,7 @@ static WEBP_INLINE int16x8x4_t DistoVerticalPass(const uint8x8x4_t q4_in) {
return q4_out;
}
static WEBP_INLINE int16x4x4_t DistoLoadW(const uint16_t* w) {
static WEBP_INLINE int16x4x4_t DistoLoadW_NEON(const uint16_t* w) {
const uint16x8_t q_w07 = vld1q_u16(&w[0]);
const uint16x8_t q_w8f = vld1q_u16(&w[8]);
int16x4x4_t d4_w;
@ -622,8 +628,8 @@ static WEBP_INLINE int16x4x4_t DistoLoadW(const uint16_t* w) {
return d4_w;
}
static WEBP_INLINE int32x2_t DistoSum(const int16x8x4_t q4_in,
const int16x4x4_t d4_w) {
static WEBP_INLINE int32x2_t DistoSum_NEON(const int16x8x4_t q4_in,
const int16x4x4_t d4_w) {
int32x2_t d_sum;
// sum += w[ 0] * abs(b0);
// sum += w[ 4] * abs(b1);
@ -652,8 +658,8 @@ static WEBP_INLINE int32x2_t DistoSum(const int16x8x4_t q4_in,
// Hadamard transform
// Returns the weighted sum of the absolute value of transformed coefficients.
// w[] contains a row-major 4 by 4 symmetric matrix.
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
static int Disto4x4_NEON(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
uint32x2_t d_in_ab_0123 = vdup_n_u32(0);
uint32x2_t d_in_ab_4567 = vdup_n_u32(0);
uint32x2_t d_in_ab_89ab = vdup_n_u32(0);
@ -679,12 +685,12 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
// Vertical pass first to avoid a transpose (vertical and horizontal passes
// are commutative because w/kWeightY is symmetric) and subsequent
// transpose.
const int16x8x4_t q4_v = DistoVerticalPass(d4_in);
const int16x4x4_t d4_w = DistoLoadW(w);
const int16x8x4_t q4_v = DistoVerticalPass_NEON(d4_in);
const int16x4x4_t d4_w = DistoLoadW_NEON(w);
// horizontal pass
const int16x8x4_t q4_t = DistoTranspose4x4S16(q4_v);
const int16x8x4_t q4_h = DistoHorizontalPass(q4_t);
int32x2_t d_sum = DistoSum(q4_h, d4_w);
const int16x8x4_t q4_t = DistoTranspose4x4S16_NEON(q4_v);
const int16x8x4_t q4_h = DistoHorizontalPass_NEON(q4_t);
int32x2_t d_sum = DistoSum_NEON(q4_h, d4_w);
// abs(sum2 - sum1) >> 5
d_sum = vabs_s32(d_sum);
@ -694,13 +700,13 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
}
#undef LOAD_LANE_32b
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
static int Disto16x16_NEON(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
int D = 0;
int x, y;
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
for (x = 0; x < 16; x += 4) {
D += Disto4x4(a + x + y, b + x + y, w);
D += Disto4x4_NEON(a + x + y, b + x + y, w);
}
}
return D;
@ -708,15 +714,15 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
//------------------------------------------------------------------------------
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
VP8Histogram* const histo) {
static void CollectHistogram_NEON(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
VP8Histogram* const histo) {
const uint16x8_t max_coeff_thresh = vdupq_n_u16(MAX_COEFF_THRESH);
int j;
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
for (j = start_block; j < end_block; ++j) {
int16_t out[16];
FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
FTransform_NEON(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
{
int k;
const int16x8_t a0 = vld1q_s16(out + 0);
@ -740,9 +746,9 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
//------------------------------------------------------------------------------
static WEBP_INLINE void AccumulateSSE16(const uint8_t* const a,
const uint8_t* const b,
uint32x4_t* const sum) {
static WEBP_INLINE void AccumulateSSE16_NEON(const uint8_t* const a,
const uint8_t* const b,
uint32x4_t* const sum) {
const uint8x16_t a0 = vld1q_u8(a);
const uint8x16_t b0 = vld1q_u8(b);
const uint8x16_t abs_diff = vabdq_u8(a0, b0);
@ -757,7 +763,7 @@ static WEBP_INLINE void AccumulateSSE16(const uint8_t* const a,
}
// Horizontal sum of all four uint32_t values in 'sum'.
static int SumToInt(uint32x4_t sum) {
static int SumToInt_NEON(uint32x4_t sum) {
const uint64x2_t sum2 = vpaddlq_u32(sum);
const uint64_t sum3 = vgetq_lane_u64(sum2, 0) + vgetq_lane_u64(sum2, 1);
return (int)sum3;
@ -767,18 +773,18 @@ static int SSE16x16_NEON(const uint8_t* a, const uint8_t* b) {
uint32x4_t sum = vdupq_n_u32(0);
int y;
for (y = 0; y < 16; ++y) {
AccumulateSSE16(a + y * BPS, b + y * BPS, &sum);
AccumulateSSE16_NEON(a + y * BPS, b + y * BPS, &sum);
}
return SumToInt(sum);
return SumToInt_NEON(sum);
}
static int SSE16x8_NEON(const uint8_t* a, const uint8_t* b) {
uint32x4_t sum = vdupq_n_u32(0);
int y;
for (y = 0; y < 8; ++y) {
AccumulateSSE16(a + y * BPS, b + y * BPS, &sum);
AccumulateSSE16_NEON(a + y * BPS, b + y * BPS, &sum);
}
return SumToInt(sum);
return SumToInt_NEON(sum);
}
static int SSE8x8_NEON(const uint8_t* a, const uint8_t* b) {
@ -791,12 +797,12 @@ static int SSE8x8_NEON(const uint8_t* a, const uint8_t* b) {
const uint16x8_t prod = vmull_u8(abs_diff, abs_diff);
sum = vpadalq_u16(sum, prod);
}
return SumToInt(sum);
return SumToInt_NEON(sum);
}
static int SSE4x4_NEON(const uint8_t* a, const uint8_t* b) {
const uint8x16_t a0 = Load4x4(a);
const uint8x16_t b0 = Load4x4(b);
const uint8x16_t a0 = Load4x4_NEON(a);
const uint8x16_t b0 = Load4x4_NEON(b);
const uint8x16_t abs_diff = vabdq_u8(a0, b0);
const uint16x8_t prod1 = vmull_u8(vget_low_u8(abs_diff),
vget_low_u8(abs_diff));
@ -805,7 +811,7 @@ static int SSE4x4_NEON(const uint8_t* a, const uint8_t* b) {
/* pair-wise adds and widen */
const uint32x4_t sum1 = vpaddlq_u16(prod1);
const uint32x4_t sum2 = vpaddlq_u16(prod2);
return SumToInt(vaddq_u32(sum1, sum2));
return SumToInt_NEON(vaddq_u32(sum1, sum2));
}
//------------------------------------------------------------------------------
@ -813,8 +819,8 @@ static int SSE4x4_NEON(const uint8_t* a, const uint8_t* b) {
// Compilation with gcc-4.6.x is problematic for now.
#if !defined(WORK_AROUND_GCC)
static int16x8_t Quantize(int16_t* const in,
const VP8Matrix* const mtx, int offset) {
static int16x8_t Quantize_NEON(int16_t* const in,
const VP8Matrix* const mtx, int offset) {
const uint16x8_t sharp = vld1q_u16(&mtx->sharpen_[offset]);
const uint16x8_t q = vld1q_u16(&mtx->q_[offset]);
const uint16x8_t iq = vld1q_u16(&mtx->iq_[offset]);
@ -847,10 +853,10 @@ static const uint8_t kShuffles[4][8] = {
{ 14, 15, 22, 23, 28, 29, 30, 31 }
};
static int QuantizeBlock(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
const int16x8_t out0 = Quantize(in, mtx, 0);
const int16x8_t out1 = Quantize(in, mtx, 8);
static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
const int16x8_t out0 = Quantize_NEON(in, mtx, 0);
const int16x8_t out1 = Quantize_NEON(in, mtx, 8);
uint8x8x4_t shuffles;
// vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
// non-standard versions there.
@ -889,11 +895,11 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
return 0;
}
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
const VP8Matrix* const mtx) {
static int Quantize2Blocks_NEON(int16_t in[32], int16_t out[32],
const VP8Matrix* const mtx) {
int nz;
nz = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
nz = QuantizeBlock_NEON(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz |= QuantizeBlock_NEON(in + 1 * 16, out + 1 * 16, mtx) << 1;
return nz;
}
@ -905,14 +911,14 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
extern void VP8EncDspInitNEON(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitNEON(void) {
VP8ITransform = ITransform;
VP8FTransform = FTransform;
VP8ITransform = ITransform_NEON;
VP8FTransform = FTransform_NEON;
VP8FTransformWHT = FTransformWHT;
VP8FTransformWHT = FTransformWHT_NEON;
VP8TDisto4x4 = Disto4x4;
VP8TDisto16x16 = Disto16x16;
VP8CollectHistogram = CollectHistogram;
VP8TDisto4x4 = Disto4x4_NEON;
VP8TDisto16x16 = Disto16x16_NEON;
VP8CollectHistogram = CollectHistogram_NEON;
VP8SSE16x16 = SSE16x16_NEON;
VP8SSE16x8 = SSE16x8_NEON;
@ -920,8 +926,8 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitNEON(void) {
VP8SSE4x4 = SSE4x4_NEON;
#if !defined(WORK_AROUND_GCC)
VP8EncQuantizeBlock = QuantizeBlock;
VP8EncQuantize2Blocks = Quantize2Blocks;
VP8EncQuantizeBlock = QuantizeBlock_NEON;
VP8EncQuantize2Blocks = Quantize2Blocks_NEON;
#endif
}

View File

@ -11,23 +11,23 @@
//
// Author: Christian Duvivier (cduvivier@google.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE2)
#include <assert.h>
#include <stdlib.h> // for abs()
#include <emmintrin.h>
#include "./common_sse2.h"
#include "../enc/cost_enc.h"
#include "../enc/vp8i_enc.h"
#include "src/dsp/common_sse2.h"
#include "src/enc/cost_enc.h"
#include "src/enc/vp8i_enc.h"
//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)
// Does one or two inverse transforms.
static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
int do_two) {
static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
int do_two) {
// This implementation makes use of 16-bit fixed point versions of two
// multiply constants:
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
@ -193,10 +193,10 @@ static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
}
}
static void FTransformPass1(const __m128i* const in01,
const __m128i* const in23,
__m128i* const out01,
__m128i* const out32) {
static void FTransformPass1_SSE2(const __m128i* const in01,
const __m128i* const in23,
__m128i* const out01,
__m128i* const out32) {
const __m128i k937 = _mm_set1_epi32(937);
const __m128i k1812 = _mm_set1_epi32(1812);
@ -239,8 +239,9 @@ static void FTransformPass1(const __m128i* const in01,
*out32 = _mm_shuffle_epi32(v23, _MM_SHUFFLE(1, 0, 3, 2)); // 3 2 3 2 3 2..
}
static void FTransformPass2(const __m128i* const v01, const __m128i* const v32,
int16_t* out) {
static void FTransformPass2_SSE2(const __m128i* const v01,
const __m128i* const v32,
int16_t* out) {
const __m128i zero = _mm_setzero_si128();
const __m128i seven = _mm_set1_epi16(7);
const __m128i k5352_2217 = _mm_set_epi16(5352, 2217, 5352, 2217,
@ -291,7 +292,8 @@ static void FTransformPass2(const __m128i* const v01, const __m128i* const v32,
_mm_storeu_si128((__m128i*)&out[8], d2_f3);
}
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
static void FTransform_SSE2(const uint8_t* src, const uint8_t* ref,
int16_t* out) {
const __m128i zero = _mm_setzero_si128();
// Load src.
const __m128i src0 = _mm_loadl_epi64((const __m128i*)&src[0 * BPS]);
@ -328,13 +330,14 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
__m128i v01, v32;
// First pass
FTransformPass1(&row01, &row23, &v01, &v32);
FTransformPass1_SSE2(&row01, &row23, &v01, &v32);
// Second pass
FTransformPass2(&v01, &v32, out);
FTransformPass2_SSE2(&v01, &v32, out);
}
static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) {
static void FTransform2_SSE2(const uint8_t* src, const uint8_t* ref,
int16_t* out) {
const __m128i zero = _mm_setzero_si128();
// Load src and convert to 16b.
@ -374,15 +377,15 @@ static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) {
__m128i v01h, v32h;
// First pass
FTransformPass1(&shuf01l, &shuf23l, &v01l, &v32l);
FTransformPass1(&shuf01h, &shuf23h, &v01h, &v32h);
FTransformPass1_SSE2(&shuf01l, &shuf23l, &v01l, &v32l);
FTransformPass1_SSE2(&shuf01h, &shuf23h, &v01h, &v32h);
// Second pass
FTransformPass2(&v01l, &v32l, out + 0);
FTransformPass2(&v01h, &v32h, out + 16);
FTransformPass2_SSE2(&v01l, &v32l, out + 0);
FTransformPass2_SSE2(&v01h, &v32h, out + 16);
}
static void FTransformWHTRow(const int16_t* const in, __m128i* const out) {
static void FTransformWHTRow_SSE2(const int16_t* const in, __m128i* const out) {
const __m128i kMult = _mm_set_epi16(-1, 1, -1, 1, 1, 1, 1, 1);
const __m128i src0 = _mm_loadl_epi64((__m128i*)&in[0 * 16]);
const __m128i src1 = _mm_loadl_epi64((__m128i*)&in[1 * 16]);
@ -398,14 +401,14 @@ static void FTransformWHTRow(const int16_t* const in, __m128i* const out) {
*out = _mm_madd_epi16(D, kMult);
}
static void FTransformWHT(const int16_t* in, int16_t* out) {
static void FTransformWHT_SSE2(const int16_t* in, int16_t* out) {
// Input is 12b signed.
__m128i row0, row1, row2, row3;
// Rows are 14b signed.
FTransformWHTRow(in + 0 * 64, &row0);
FTransformWHTRow(in + 1 * 64, &row1);
FTransformWHTRow(in + 2 * 64, &row2);
FTransformWHTRow(in + 3 * 64, &row3);
FTransformWHTRow_SSE2(in + 0 * 64, &row0);
FTransformWHTRow_SSE2(in + 1 * 64, &row1);
FTransformWHTRow_SSE2(in + 2 * 64, &row2);
FTransformWHTRow_SSE2(in + 3 * 64, &row3);
{
// The a* are 15b signed.
@ -431,9 +434,9 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
// Compute susceptibility based on DCT-coeff histograms:
// the higher, the "easier" the macroblock is to compress.
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
VP8Histogram* const histo) {
static void CollectHistogram_SSE2(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
VP8Histogram* const histo) {
const __m128i zero = _mm_setzero_si128();
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
int j;
@ -442,7 +445,7 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
int16_t out[16];
int k;
FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
FTransform_SSE2(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
// Convert coefficients to bin (within out[]).
{
@ -476,7 +479,7 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
// Intra predictions
// helper for chroma-DC predictions
static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
int j;
const __m128i values = _mm_set1_epi8(v);
for (j = 0; j < 8; ++j) {
@ -484,7 +487,7 @@ static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
}
}
static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) {
int j;
const __m128i values = _mm_set1_epi8(v);
for (j = 0; j < 16; ++j) {
@ -492,20 +495,20 @@ static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
}
}
static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
static WEBP_INLINE void Fill_SSE2(uint8_t* dst, int value, int size) {
if (size == 4) {
int j;
for (j = 0; j < 4; ++j) {
memset(dst + j * BPS, value, 4);
}
} else if (size == 8) {
Put8x8uv(value, dst);
Put8x8uv_SSE2(value, dst);
} else {
Put16(value, dst);
Put16_SSE2(value, dst);
}
}
static WEBP_INLINE void VE8uv(uint8_t* dst, const uint8_t* top) {
static WEBP_INLINE void VE8uv_SSE2(uint8_t* dst, const uint8_t* top) {
int j;
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
for (j = 0; j < 8; ++j) {
@ -513,7 +516,7 @@ static WEBP_INLINE void VE8uv(uint8_t* dst, const uint8_t* top) {
}
}
static WEBP_INLINE void VE16(uint8_t* dst, const uint8_t* top) {
static WEBP_INLINE void VE16_SSE2(uint8_t* dst, const uint8_t* top) {
const __m128i top_values = _mm_load_si128((const __m128i*)top);
int j;
for (j = 0; j < 16; ++j) {
@ -521,20 +524,20 @@ static WEBP_INLINE void VE16(uint8_t* dst, const uint8_t* top) {
}
}
static WEBP_INLINE void VerticalPred(uint8_t* dst,
const uint8_t* top, int size) {
static WEBP_INLINE void VerticalPred_SSE2(uint8_t* dst,
const uint8_t* top, int size) {
if (top != NULL) {
if (size == 8) {
VE8uv(dst, top);
VE8uv_SSE2(dst, top);
} else {
VE16(dst, top);
VE16_SSE2(dst, top);
}
} else {
Fill(dst, 127, size);
Fill_SSE2(dst, 127, size);
}
}
static WEBP_INLINE void HE8uv(uint8_t* dst, const uint8_t* left) {
static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) {
int j;
for (j = 0; j < 8; ++j) {
const __m128i values = _mm_set1_epi8(left[j]);
@ -543,7 +546,7 @@ static WEBP_INLINE void HE8uv(uint8_t* dst, const uint8_t* left) {
}
}
static WEBP_INLINE void HE16(uint8_t* dst, const uint8_t* left) {
static WEBP_INLINE void HE16_SSE2(uint8_t* dst, const uint8_t* left) {
int j;
for (j = 0; j < 16; ++j) {
const __m128i values = _mm_set1_epi8(left[j]);
@ -552,21 +555,21 @@ static WEBP_INLINE void HE16(uint8_t* dst, const uint8_t* left) {
}
}
static WEBP_INLINE void HorizontalPred(uint8_t* dst,
const uint8_t* left, int size) {
static WEBP_INLINE void HorizontalPred_SSE2(uint8_t* dst,
const uint8_t* left, int size) {
if (left != NULL) {
if (size == 8) {
HE8uv(dst, left);
HE8uv_SSE2(dst, left);
} else {
HE16(dst, left);
HE16_SSE2(dst, left);
}
} else {
Fill(dst, 129, size);
Fill_SSE2(dst, 129, size);
}
}
static WEBP_INLINE void TM(uint8_t* dst, const uint8_t* left,
const uint8_t* top, int size) {
static WEBP_INLINE void TM_SSE2(uint8_t* dst, const uint8_t* left,
const uint8_t* top, int size) {
const __m128i zero = _mm_setzero_si128();
int y;
if (size == 8) {
@ -593,13 +596,13 @@ static WEBP_INLINE void TM(uint8_t* dst, const uint8_t* left,
}
}
static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
const uint8_t* top, int size) {
static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, const uint8_t* left,
const uint8_t* top, int size) {
if (left != NULL) {
if (top != NULL) {
TM(dst, left, top, size);
TM_SSE2(dst, left, top, size);
} else {
HorizontalPred(dst, left, size);
HorizontalPred_SSE2(dst, left, size);
}
} else {
// true motion without left samples (hence: with default 129 value)
@ -607,90 +610,90 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
// Note that if top samples are not available, the default value is
// then 129, and not 127 as in the VerticalPred case.
if (top != NULL) {
VerticalPred(dst, top, size);
VerticalPred_SSE2(dst, top, size);
} else {
Fill(dst, 129, size);
Fill_SSE2(dst, 129, size);
}
}
}
static WEBP_INLINE void DC8uv(uint8_t* dst, const uint8_t* left,
const uint8_t* top) {
static WEBP_INLINE void DC8uv_SSE2(uint8_t* dst, const uint8_t* left,
const uint8_t* top) {
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
const __m128i left_values = _mm_loadl_epi64((const __m128i*)left);
const __m128i combined = _mm_unpacklo_epi64(top_values, left_values);
const int DC = VP8HorizontalAdd8b(&combined) + 8;
Put8x8uv(DC >> 4, dst);
Put8x8uv_SSE2(DC >> 4, dst);
}
static WEBP_INLINE void DC8uvNoLeft(uint8_t* dst, const uint8_t* top) {
static WEBP_INLINE void DC8uvNoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
const __m128i zero = _mm_setzero_si128();
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
const __m128i sum = _mm_sad_epu8(top_values, zero);
const int DC = _mm_cvtsi128_si32(sum) + 4;
Put8x8uv(DC >> 3, dst);
Put8x8uv_SSE2(DC >> 3, dst);
}
static WEBP_INLINE void DC8uvNoTop(uint8_t* dst, const uint8_t* left) {
static WEBP_INLINE void DC8uvNoTop_SSE2(uint8_t* dst, const uint8_t* left) {
// 'left' is contiguous so we can reuse the top summation.
DC8uvNoLeft(dst, left);
DC8uvNoLeft_SSE2(dst, left);
}
static WEBP_INLINE void DC8uvNoTopLeft(uint8_t* dst) {
Put8x8uv(0x80, dst);
static WEBP_INLINE void DC8uvNoTopLeft_SSE2(uint8_t* dst) {
Put8x8uv_SSE2(0x80, dst);
}
static WEBP_INLINE void DC8uvMode(uint8_t* dst, const uint8_t* left,
const uint8_t* top) {
static WEBP_INLINE void DC8uvMode_SSE2(uint8_t* dst, const uint8_t* left,
const uint8_t* top) {
if (top != NULL) {
if (left != NULL) { // top and left present
DC8uv(dst, left, top);
DC8uv_SSE2(dst, left, top);
} else { // top, but no left
DC8uvNoLeft(dst, top);
DC8uvNoLeft_SSE2(dst, top);
}
} else if (left != NULL) { // left but no top
DC8uvNoTop(dst, left);
DC8uvNoTop_SSE2(dst, left);
} else { // no top, no left, nothing.
DC8uvNoTopLeft(dst);
DC8uvNoTopLeft_SSE2(dst);
}
}
static WEBP_INLINE void DC16(uint8_t* dst, const uint8_t* left,
const uint8_t* top) {
static WEBP_INLINE void DC16_SSE2(uint8_t* dst, const uint8_t* left,
const uint8_t* top) {
const __m128i top_row = _mm_load_si128((const __m128i*)top);
const __m128i left_row = _mm_load_si128((const __m128i*)left);
const int DC =
VP8HorizontalAdd8b(&top_row) + VP8HorizontalAdd8b(&left_row) + 16;
Put16(DC >> 5, dst);
Put16_SSE2(DC >> 5, dst);
}
static WEBP_INLINE void DC16NoLeft(uint8_t* dst, const uint8_t* top) {
static WEBP_INLINE void DC16NoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
const __m128i top_row = _mm_load_si128((const __m128i*)top);
const int DC = VP8HorizontalAdd8b(&top_row) + 8;
Put16(DC >> 4, dst);
Put16_SSE2(DC >> 4, dst);
}
static WEBP_INLINE void DC16NoTop(uint8_t* dst, const uint8_t* left) {
static WEBP_INLINE void DC16NoTop_SSE2(uint8_t* dst, const uint8_t* left) {
// 'left' is contiguous so we can reuse the top summation.
DC16NoLeft(dst, left);
DC16NoLeft_SSE2(dst, left);
}
static WEBP_INLINE void DC16NoTopLeft(uint8_t* dst) {
Put16(0x80, dst);
static WEBP_INLINE void DC16NoTopLeft_SSE2(uint8_t* dst) {
Put16_SSE2(0x80, dst);
}
static WEBP_INLINE void DC16Mode(uint8_t* dst, const uint8_t* left,
const uint8_t* top) {
static WEBP_INLINE void DC16Mode_SSE2(uint8_t* dst, const uint8_t* left,
const uint8_t* top) {
if (top != NULL) {
if (left != NULL) { // top and left present
DC16(dst, left, top);
DC16_SSE2(dst, left, top);
} else { // top, but no left
DC16NoLeft(dst, top);
DC16NoLeft_SSE2(dst, top);
}
} else if (left != NULL) { // left but no top
DC16NoTop(dst, left);
DC16NoTop_SSE2(dst, left);
} else { // no top, no left, nothing.
DC16NoTopLeft(dst);
DC16NoTopLeft_SSE2(dst);
}
}
@ -709,7 +712,8 @@ static WEBP_INLINE void DC16Mode(uint8_t* dst, const uint8_t* left,
// where: AC = (a + b + 1) >> 1, BC = (b + c + 1) >> 1
// and ab = a ^ b, bc = b ^ c, lsb = (AC^BC)&1
static WEBP_INLINE void VE4(uint8_t* dst, const uint8_t* top) { // vertical
static WEBP_INLINE void VE4_SSE2(uint8_t* dst,
const uint8_t* top) { // vertical
const __m128i one = _mm_set1_epi8(1);
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(top - 1));
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
@ -725,7 +729,8 @@ static WEBP_INLINE void VE4(uint8_t* dst, const uint8_t* top) { // vertical
}
}
static WEBP_INLINE void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
static WEBP_INLINE void HE4_SSE2(uint8_t* dst,
const uint8_t* top) { // horizontal
const int X = top[-1];
const int I = top[-2];
const int J = top[-3];
@ -737,14 +742,15 @@ static WEBP_INLINE void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L));
}
static WEBP_INLINE void DC4(uint8_t* dst, const uint8_t* top) {
static WEBP_INLINE void DC4_SSE2(uint8_t* dst, const uint8_t* top) {
uint32_t dc = 4;
int i;
for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
Fill(dst, dc >> 3, 4);
Fill_SSE2(dst, dc >> 3, 4);
}
static WEBP_INLINE void LD4(uint8_t* dst, const uint8_t* top) { // Down-Left
static WEBP_INLINE void LD4_SSE2(uint8_t* dst,
const uint8_t* top) { // Down-Left
const __m128i one = _mm_set1_epi8(1);
const __m128i ABCDEFGH = _mm_loadl_epi64((const __m128i*)top);
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
@ -760,8 +766,8 @@ static WEBP_INLINE void LD4(uint8_t* dst, const uint8_t* top) { // Down-Left
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
}
static WEBP_INLINE void VR4(uint8_t* dst,
const uint8_t* top) { // Vertical-Right
static WEBP_INLINE void VR4_SSE2(uint8_t* dst,
const uint8_t* top) { // Vertical-Right
const __m128i one = _mm_set1_epi8(1);
const int I = top[-2];
const int J = top[-3];
@ -786,8 +792,8 @@ static WEBP_INLINE void VR4(uint8_t* dst,
DST(0, 3) = AVG3(K, J, I);
}
static WEBP_INLINE void VL4(uint8_t* dst,
const uint8_t* top) { // Vertical-Left
static WEBP_INLINE void VL4_SSE2(uint8_t* dst,
const uint8_t* top) { // Vertical-Left
const __m128i one = _mm_set1_epi8(1);
const __m128i ABCDEFGH = _mm_loadl_epi64((const __m128i*)top);
const __m128i BCDEFGH_ = _mm_srli_si128(ABCDEFGH, 1);
@ -812,7 +818,8 @@ static WEBP_INLINE void VL4(uint8_t* dst,
DST(3, 3) = (extra_out >> 8) & 0xff;
}
static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) { // Down-right
static WEBP_INLINE void RD4_SSE2(uint8_t* dst,
const uint8_t* top) { // Down-right
const __m128i one = _mm_set1_epi8(1);
const __m128i LKJIXABC = _mm_loadl_epi64((const __m128i*)(top - 5));
const __m128i LKJIXABCD = _mm_insert_epi16(LKJIXABC, top[3], 4);
@ -828,7 +835,7 @@ static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) { // Down-right
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
}
static WEBP_INLINE void HU4(uint8_t* dst, const uint8_t* top) {
static WEBP_INLINE void HU4_SSE2(uint8_t* dst, const uint8_t* top) {
const int I = top[-2];
const int J = top[-3];
const int K = top[-4];
@ -843,7 +850,7 @@ static WEBP_INLINE void HU4(uint8_t* dst, const uint8_t* top) {
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
}
static WEBP_INLINE void HD4(uint8_t* dst, const uint8_t* top) {
static WEBP_INLINE void HD4_SSE2(uint8_t* dst, const uint8_t* top) {
const int X = top[-1];
const int I = top[-2];
const int J = top[-3];
@ -866,7 +873,7 @@ static WEBP_INLINE void HD4(uint8_t* dst, const uint8_t* top) {
DST(1, 3) = AVG3(L, K, J);
}
static WEBP_INLINE void TM4(uint8_t* dst, const uint8_t* top) {
static WEBP_INLINE void TM4_SSE2(uint8_t* dst, const uint8_t* top) {
const __m128i zero = _mm_setzero_si128();
const __m128i top_values = _mm_cvtsi32_si128(WebPMemToUint32(top));
const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
@ -888,55 +895,56 @@ static WEBP_INLINE void TM4(uint8_t* dst, const uint8_t* top) {
// Left samples are top[-5 .. -2], top_left is top[-1], top are
// located at top[0..3], and top right is top[4..7]
static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
DC4(I4DC4 + dst, top);
TM4(I4TM4 + dst, top);
VE4(I4VE4 + dst, top);
HE4(I4HE4 + dst, top);
RD4(I4RD4 + dst, top);
VR4(I4VR4 + dst, top);
LD4(I4LD4 + dst, top);
VL4(I4VL4 + dst, top);
HD4(I4HD4 + dst, top);
HU4(I4HU4 + dst, top);
static void Intra4Preds_SSE2(uint8_t* dst, const uint8_t* top) {
DC4_SSE2(I4DC4 + dst, top);
TM4_SSE2(I4TM4 + dst, top);
VE4_SSE2(I4VE4 + dst, top);
HE4_SSE2(I4HE4 + dst, top);
RD4_SSE2(I4RD4 + dst, top);
VR4_SSE2(I4VR4 + dst, top);
LD4_SSE2(I4LD4 + dst, top);
VL4_SSE2(I4VL4 + dst, top);
HD4_SSE2(I4HD4 + dst, top);
HU4_SSE2(I4HU4 + dst, top);
}
//------------------------------------------------------------------------------
// Chroma 8x8 prediction (paragraph 12.2)
static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
const uint8_t* top) {
static void IntraChromaPreds_SSE2(uint8_t* dst, const uint8_t* left,
const uint8_t* top) {
// U block
DC8uvMode(C8DC8 + dst, left, top);
VerticalPred(C8VE8 + dst, top, 8);
HorizontalPred(C8HE8 + dst, left, 8);
TrueMotion(C8TM8 + dst, left, top, 8);
DC8uvMode_SSE2(C8DC8 + dst, left, top);
VerticalPred_SSE2(C8VE8 + dst, top, 8);
HorizontalPred_SSE2(C8HE8 + dst, left, 8);
TrueMotion_SSE2(C8TM8 + dst, left, top, 8);
// V block
dst += 8;
if (top != NULL) top += 8;
if (left != NULL) left += 16;
DC8uvMode(C8DC8 + dst, left, top);
VerticalPred(C8VE8 + dst, top, 8);
HorizontalPred(C8HE8 + dst, left, 8);
TrueMotion(C8TM8 + dst, left, top, 8);
DC8uvMode_SSE2(C8DC8 + dst, left, top);
VerticalPred_SSE2(C8VE8 + dst, top, 8);
HorizontalPred_SSE2(C8HE8 + dst, left, 8);
TrueMotion_SSE2(C8TM8 + dst, left, top, 8);
}
//------------------------------------------------------------------------------
// luma 16x16 prediction (paragraph 12.3)
static void Intra16Preds(uint8_t* dst,
const uint8_t* left, const uint8_t* top) {
DC16Mode(I16DC16 + dst, left, top);
VerticalPred(I16VE16 + dst, top, 16);
HorizontalPred(I16HE16 + dst, left, 16);
TrueMotion(I16TM16 + dst, left, top, 16);
static void Intra16Preds_SSE2(uint8_t* dst,
const uint8_t* left, const uint8_t* top) {
DC16Mode_SSE2(I16DC16 + dst, left, top);
VerticalPred_SSE2(I16VE16 + dst, top, 16);
HorizontalPred_SSE2(I16HE16 + dst, left, 16);
TrueMotion_SSE2(I16TM16 + dst, left, top, 16);
}
//------------------------------------------------------------------------------
// Metric
static WEBP_INLINE void SubtractAndAccumulate(const __m128i a, const __m128i b,
__m128i* const sum) {
static WEBP_INLINE void SubtractAndAccumulate_SSE2(const __m128i a,
const __m128i b,
__m128i* const sum) {
// take abs(a-b) in 8b
const __m128i a_b = _mm_subs_epu8(a, b);
const __m128i b_a = _mm_subs_epu8(b, a);
@ -951,8 +959,8 @@ static WEBP_INLINE void SubtractAndAccumulate(const __m128i a, const __m128i b,
*sum = _mm_add_epi32(sum1, sum2);
}
static WEBP_INLINE int SSE_16xN(const uint8_t* a, const uint8_t* b,
int num_pairs) {
static WEBP_INLINE int SSE_16xN_SSE2(const uint8_t* a, const uint8_t* b,
int num_pairs) {
__m128i sum = _mm_setzero_si128();
int32_t tmp[4];
int i;
@ -963,8 +971,8 @@ static WEBP_INLINE int SSE_16xN(const uint8_t* a, const uint8_t* b,
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[BPS * 1]);
const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[BPS * 1]);
__m128i sum1, sum2;
SubtractAndAccumulate(a0, b0, &sum1);
SubtractAndAccumulate(a1, b1, &sum2);
SubtractAndAccumulate_SSE2(a0, b0, &sum1);
SubtractAndAccumulate_SSE2(a1, b1, &sum2);
sum = _mm_add_epi32(sum, _mm_add_epi32(sum1, sum2));
a += 2 * BPS;
b += 2 * BPS;
@ -973,18 +981,18 @@ static WEBP_INLINE int SSE_16xN(const uint8_t* a, const uint8_t* b,
return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
}
static int SSE16x16(const uint8_t* a, const uint8_t* b) {
return SSE_16xN(a, b, 8);
static int SSE16x16_SSE2(const uint8_t* a, const uint8_t* b) {
return SSE_16xN_SSE2(a, b, 8);
}
static int SSE16x8(const uint8_t* a, const uint8_t* b) {
return SSE_16xN(a, b, 4);
static int SSE16x8_SSE2(const uint8_t* a, const uint8_t* b) {
return SSE_16xN_SSE2(a, b, 4);
}
#define LOAD_8x16b(ptr) \
_mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(ptr)), zero)
static int SSE8x8(const uint8_t* a, const uint8_t* b) {
static int SSE8x8_SSE2(const uint8_t* a, const uint8_t* b) {
const __m128i zero = _mm_setzero_si128();
int num_pairs = 4;
__m128i sum = zero;
@ -1011,7 +1019,7 @@ static int SSE8x8(const uint8_t* a, const uint8_t* b) {
}
#undef LOAD_8x16b
static int SSE4x4(const uint8_t* a, const uint8_t* b) {
static int SSE4x4_SSE2(const uint8_t* a, const uint8_t* b) {
const __m128i zero = _mm_setzero_si128();
// Load values. Note that we read 8 pixels instead of 4,
@ -1048,7 +1056,7 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
//------------------------------------------------------------------------------
static void Mean16x4(const uint8_t* ref, uint32_t dc[4]) {
static void Mean16x4_SSE2(const uint8_t* ref, uint32_t dc[4]) {
const __m128i mask = _mm_set1_epi16(0x00ff);
const __m128i a0 = _mm_loadu_si128((const __m128i*)&ref[BPS * 0]);
const __m128i a1 = _mm_loadu_si128((const __m128i*)&ref[BPS * 1]);
@ -1086,8 +1094,8 @@ static void Mean16x4(const uint8_t* ref, uint32_t dc[4]) {
// Hadamard transform
// Returns the weighted sum of the absolute value of transformed coefficients.
// w[] contains a row-major 4 by 4 symmetric matrix.
static int TTransform(const uint8_t* inA, const uint8_t* inB,
const uint16_t* const w) {
static int TTransform_SSE2(const uint8_t* inA, const uint8_t* inB,
const uint16_t* const w) {
int32_t sum[4];
__m128i tmp_0, tmp_1, tmp_2, tmp_3;
const __m128i zero = _mm_setzero_si128();
@ -1187,19 +1195,19 @@ static int TTransform(const uint8_t* inA, const uint8_t* inB,
return sum[0] + sum[1] + sum[2] + sum[3];
}
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
const int diff_sum = TTransform(a, b, w);
static int Disto4x4_SSE2(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
const int diff_sum = TTransform_SSE2(a, b, w);
return abs(diff_sum) >> 5;
}
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
static int Disto16x16_SSE2(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
int D = 0;
int x, y;
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
for (x = 0; x < 16; x += 4) {
D += Disto4x4(a + x + y, b + x + y, w);
D += Disto4x4_SSE2(a + x + y, b + x + y, w);
}
}
return D;
@ -1209,9 +1217,9 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
// Quantization
//
static WEBP_INLINE int DoQuantizeBlock(int16_t in[16], int16_t out[16],
const uint16_t* const sharpen,
const VP8Matrix* const mtx) {
static WEBP_INLINE int DoQuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
const uint16_t* const sharpen,
const VP8Matrix* const mtx) {
const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
const __m128i zero = _mm_setzero_si128();
__m128i coeff0, coeff8;
@ -1321,22 +1329,22 @@ static WEBP_INLINE int DoQuantizeBlock(int16_t in[16], int16_t out[16],
return (_mm_movemask_epi8(_mm_cmpeq_epi8(packed_out, zero)) != 0xffff);
}
static int QuantizeBlock(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
return DoQuantizeBlock(in, out, &mtx->sharpen_[0], mtx);
static int QuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
return DoQuantizeBlock_SSE2(in, out, &mtx->sharpen_[0], mtx);
}
static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
return DoQuantizeBlock(in, out, NULL, mtx);
static int QuantizeBlockWHT_SSE2(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
return DoQuantizeBlock_SSE2(in, out, NULL, mtx);
}
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
const VP8Matrix* const mtx) {
static int Quantize2Blocks_SSE2(int16_t in[32], int16_t out[32],
const VP8Matrix* const mtx) {
int nz;
const uint16_t* const sharpen = &mtx->sharpen_[0];
nz = DoQuantizeBlock(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
nz |= DoQuantizeBlock(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
nz = DoQuantizeBlock_SSE2(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
nz |= DoQuantizeBlock_SSE2(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
return nz;
}
@ -1346,24 +1354,24 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
extern void VP8EncDspInitSSE2(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitSSE2(void) {
VP8CollectHistogram = CollectHistogram;
VP8EncPredLuma16 = Intra16Preds;
VP8EncPredChroma8 = IntraChromaPreds;
VP8EncPredLuma4 = Intra4Preds;
VP8EncQuantizeBlock = QuantizeBlock;
VP8EncQuantize2Blocks = Quantize2Blocks;
VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
VP8ITransform = ITransform;
VP8FTransform = FTransform;
VP8FTransform2 = FTransform2;
VP8FTransformWHT = FTransformWHT;
VP8SSE16x16 = SSE16x16;
VP8SSE16x8 = SSE16x8;
VP8SSE8x8 = SSE8x8;
VP8SSE4x4 = SSE4x4;
VP8TDisto4x4 = Disto4x4;
VP8TDisto16x16 = Disto16x16;
VP8Mean16x4 = Mean16x4;
VP8CollectHistogram = CollectHistogram_SSE2;
VP8EncPredLuma16 = Intra16Preds_SSE2;
VP8EncPredChroma8 = IntraChromaPreds_SSE2;
VP8EncPredLuma4 = Intra4Preds_SSE2;
VP8EncQuantizeBlock = QuantizeBlock_SSE2;
VP8EncQuantize2Blocks = Quantize2Blocks_SSE2;
VP8EncQuantizeBlockWHT = QuantizeBlockWHT_SSE2;
VP8ITransform = ITransform_SSE2;
VP8FTransform = FTransform_SSE2;
VP8FTransform2 = FTransform2_SSE2;
VP8FTransformWHT = FTransformWHT_SSE2;
VP8SSE16x16 = SSE16x16_SSE2;
VP8SSE16x8 = SSE16x8_SSE2;
VP8SSE8x8 = SSE8x8_SSE2;
VP8SSE4x4 = SSE4x4_SSE2;
VP8TDisto4x4 = Disto4x4_SSE2;
VP8TDisto16x16 = Disto16x16_SSE2;
VP8Mean16x4 = Mean16x4_SSE2;
}
#else // !WEBP_USE_SSE2

View File

@ -11,21 +11,21 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE41)
#include <smmintrin.h>
#include <stdlib.h> // for abs()
#include "./common_sse2.h"
#include "../enc/vp8i_enc.h"
#include "src/dsp/common_sse2.h"
#include "src/enc/vp8i_enc.h"
//------------------------------------------------------------------------------
// Compute susceptibility based on DCT-coeff histograms.
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
VP8Histogram* const histo) {
static void CollectHistogram_SSE41(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
VP8Histogram* const histo) {
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
int j;
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
@ -70,8 +70,8 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
// Hadamard transform
// Returns the weighted sum of the absolute value of transformed coefficients.
// w[] contains a row-major 4 by 4 symmetric matrix.
static int TTransform(const uint8_t* inA, const uint8_t* inB,
const uint16_t* const w) {
static int TTransform_SSE41(const uint8_t* inA, const uint8_t* inB,
const uint16_t* const w) {
int32_t sum[4];
__m128i tmp_0, tmp_1, tmp_2, tmp_3;
@ -168,19 +168,19 @@ static int TTransform(const uint8_t* inA, const uint8_t* inB,
return sum[0] + sum[1] + sum[2] + sum[3];
}
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
const int diff_sum = TTransform(a, b, w);
static int Disto4x4_SSE41(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
const int diff_sum = TTransform_SSE41(a, b, w);
return abs(diff_sum) >> 5;
}
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
static int Disto16x16_SSE41(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
int D = 0;
int x, y;
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
for (x = 0; x < 16; x += 4) {
D += Disto4x4(a + x + y, b + x + y, w);
D += Disto4x4_SSE41(a + x + y, b + x + y, w);
}
}
return D;
@ -197,9 +197,9 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
2 * (D) + 1, 2 * (D) + 0, 2 * (C) + 1, 2 * (C) + 0, \
2 * (B) + 1, 2 * (B) + 0, 2 * (A) + 1, 2 * (A) + 0)
static WEBP_INLINE int DoQuantizeBlock(int16_t in[16], int16_t out[16],
const uint16_t* const sharpen,
const VP8Matrix* const mtx) {
static WEBP_INLINE int DoQuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
const uint16_t* const sharpen,
const VP8Matrix* const mtx) {
const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
const __m128i zero = _mm_setzero_si128();
__m128i out0, out8;
@ -300,22 +300,22 @@ static WEBP_INLINE int DoQuantizeBlock(int16_t in[16], int16_t out[16],
#undef PSHUFB_CST
static int QuantizeBlock(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
return DoQuantizeBlock(in, out, &mtx->sharpen_[0], mtx);
static int QuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
return DoQuantizeBlock_SSE41(in, out, &mtx->sharpen_[0], mtx);
}
static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
return DoQuantizeBlock(in, out, NULL, mtx);
static int QuantizeBlockWHT_SSE41(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
return DoQuantizeBlock_SSE41(in, out, NULL, mtx);
}
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
const VP8Matrix* const mtx) {
static int Quantize2Blocks_SSE41(int16_t in[32], int16_t out[32],
const VP8Matrix* const mtx) {
int nz;
const uint16_t* const sharpen = &mtx->sharpen_[0];
nz = DoQuantizeBlock(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
nz |= DoQuantizeBlock(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
nz = DoQuantizeBlock_SSE41(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
nz |= DoQuantizeBlock_SSE41(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
return nz;
}
@ -324,12 +324,12 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
extern void VP8EncDspInitSSE41(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitSSE41(void) {
VP8CollectHistogram = CollectHistogram;
VP8EncQuantizeBlock = QuantizeBlock;
VP8EncQuantize2Blocks = Quantize2Blocks;
VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
VP8TDisto4x4 = Disto4x4;
VP8TDisto16x16 = Disto16x16;
VP8CollectHistogram = CollectHistogram_SSE41;
VP8EncQuantizeBlock = QuantizeBlock_SSE41;
VP8EncQuantize2Blocks = Quantize2Blocks_SSE41;
VP8EncQuantizeBlockWHT = QuantizeBlockWHT_SSE41;
VP8TDisto4x4 = Disto4x4_SSE41;
VP8TDisto16x16 = Disto16x16_SSE41;
}
#else // !WEBP_USE_SSE41

View File

@ -11,7 +11,7 @@
//
// Author: Urvang (urvang@google.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
@ -20,16 +20,17 @@
// Helpful macro.
# define SANITY_CHECK(in, out) \
assert(in != NULL); \
assert(out != NULL); \
assert((in) != NULL); \
assert((out) != NULL); \
assert(width > 0); \
assert(height > 0); \
assert(stride >= width); \
assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
(void)height; // Silence unused warning.
static WEBP_INLINE void PredictLine(const uint8_t* src, const uint8_t* pred,
uint8_t* dst, int length, int inverse) {
#if !WEBP_NEON_OMIT_C_CODE
static WEBP_INLINE void PredictLine_C(const uint8_t* src, const uint8_t* pred,
uint8_t* dst, int length, int inverse) {
int i;
if (inverse) {
for (i = 0; i < length; ++i) dst[i] = src[i] + pred[i];
@ -41,7 +42,44 @@ static WEBP_INLINE void PredictLine(const uint8_t* src, const uint8_t* pred,
//------------------------------------------------------------------------------
// Horizontal filter.
static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows,
int inverse, uint8_t* out) {
const uint8_t* preds;
const size_t start_offset = row * stride;
const int last_row = row + num_rows;
SANITY_CHECK(in, out);
in += start_offset;
out += start_offset;
preds = inverse ? out : in;
if (row == 0) {
// Leftmost pixel is the same as input for topmost scanline.
out[0] = in[0];
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
row = 1;
preds += stride;
in += stride;
out += stride;
}
// Filter line-by-line.
while (row < last_row) {
// Leftmost pixel is predicted from above.
PredictLine_C(in, preds - stride, out, 1, inverse);
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
++row;
preds += stride;
in += stride;
out += stride;
}
}
//------------------------------------------------------------------------------
// Vertical filter.
static WEBP_INLINE void DoVerticalFilter_C(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows,
int inverse, uint8_t* out) {
@ -53,48 +91,11 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
out += start_offset;
preds = inverse ? out : in;
if (row == 0) {
// Leftmost pixel is the same as input for topmost scanline.
out[0] = in[0];
PredictLine(in + 1, preds, out + 1, width - 1, inverse);
row = 1;
preds += stride;
in += stride;
out += stride;
}
// Filter line-by-line.
while (row < last_row) {
// Leftmost pixel is predicted from above.
PredictLine(in, preds - stride, out, 1, inverse);
PredictLine(in + 1, preds, out + 1, width - 1, inverse);
++row;
preds += stride;
in += stride;
out += stride;
}
}
//------------------------------------------------------------------------------
// Vertical filter.
static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows,
int inverse, uint8_t* out) {
const uint8_t* preds;
const size_t start_offset = row * stride;
const int last_row = row + num_rows;
SANITY_CHECK(in, out);
in += start_offset;
out += start_offset;
preds = inverse ? out : in;
if (row == 0) {
// Very first top-left pixel is copied.
out[0] = in[0];
// Rest of top scan-line is left-predicted.
PredictLine(in + 1, preds, out + 1, width - 1, inverse);
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
row = 1;
in += stride;
out += stride;
@ -105,26 +106,28 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
// Filter line-by-line.
while (row < last_row) {
PredictLine(in, preds, out, width, inverse);
PredictLine_C(in, preds, out, width, inverse);
++row;
preds += stride;
in += stride;
out += stride;
}
}
#endif // !WEBP_NEON_OMIT_C_CODE
//------------------------------------------------------------------------------
// Gradient filter.
static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
static WEBP_INLINE int GradientPredictor_C(uint8_t a, uint8_t b, uint8_t c) {
const int g = a + b - c;
return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255; // clip to 8bit
}
static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows,
int inverse, uint8_t* out) {
#if !WEBP_NEON_OMIT_C_CODE
static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows,
int inverse, uint8_t* out) {
const uint8_t* preds;
const size_t start_offset = row * stride;
const int last_row = row + num_rows;
@ -136,7 +139,7 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
// left prediction for top scan-line
if (row == 0) {
out[0] = in[0];
PredictLine(in + 1, preds, out + 1, width - 1, inverse);
PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
row = 1;
preds += stride;
in += stride;
@ -147,11 +150,11 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
while (row < last_row) {
int w;
// leftmost pixel: predict from above.
PredictLine(in, preds - stride, out, 1, inverse);
PredictLine_C(in, preds - stride, out, 1, inverse);
for (w = 1; w < width; ++w) {
const int pred = GradientPredictor(preds[w - 1],
preds[w - stride],
preds[w - stride - 1]);
const int pred = GradientPredictor_C(preds[w - 1],
preds[w - stride],
preds[w - stride - 1]);
out[w] = in[w] + (inverse ? pred : -pred);
}
++row;
@ -160,32 +163,34 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
out += stride;
}
}
#endif // !WEBP_NEON_OMIT_C_CODE
#undef SANITY_CHECK
//------------------------------------------------------------------------------
static void HorizontalFilter(const uint8_t* data, int width, int height,
#if !WEBP_NEON_OMIT_C_CODE
static void HorizontalFilter_C(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoHorizontalFilter_C(data, width, height, stride, 0, height, 0,
filtered_data);
}
static void VerticalFilter_C(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoHorizontalFilter(data, width, height, stride, 0, height, 0, filtered_data);
DoVerticalFilter_C(data, width, height, stride, 0, height, 0, filtered_data);
}
static void VerticalFilter(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoVerticalFilter(data, width, height, stride, 0, height, 0, filtered_data);
static void GradientFilter_C(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoGradientFilter_C(data, width, height, stride, 0, height, 0, filtered_data);
}
static void GradientFilter(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoGradientFilter(data, width, height, stride, 0, height, 0, filtered_data);
}
#endif // !WEBP_NEON_OMIT_C_CODE
//------------------------------------------------------------------------------
static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
static void HorizontalUnfilter_C(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
uint8_t pred = (prev == NULL) ? 0 : prev[0];
int i;
for (i = 0; i < width; ++i) {
@ -194,26 +199,28 @@ static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
}
}
static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
#if !WEBP_NEON_OMIT_C_CODE
static void VerticalUnfilter_C(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
if (prev == NULL) {
HorizontalUnfilter(NULL, in, out, width);
HorizontalUnfilter_C(NULL, in, out, width);
} else {
int i;
for (i = 0; i < width; ++i) out[i] = prev[i] + in[i];
}
}
#endif // !WEBP_NEON_OMIT_C_CODE
static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
static void GradientUnfilter_C(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
if (prev == NULL) {
HorizontalUnfilter(NULL, in, out, width);
HorizontalUnfilter_C(NULL, in, out, width);
} else {
uint8_t top = prev[0], top_left = top, left = top;
int i;
for (i = 0; i < width; ++i) {
top = prev[i]; // need to read this first, in case prev==out
left = in[i] + GradientPredictor(left, top, top_left);
left = in[i] + GradientPredictor_C(left, top, top_left);
top_left = top;
out[i] = left;
}
@ -238,14 +245,18 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
if (filters_last_cpuinfo_used == VP8GetCPUInfo) return;
WebPUnfilters[WEBP_FILTER_NONE] = NULL;
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
#if !WEBP_NEON_OMIT_C_CODE
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_C;
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_C;
#endif
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_C;
WebPFilters[WEBP_FILTER_NONE] = NULL;
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
#if !WEBP_NEON_OMIT_C_CODE
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_C;
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_C;
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_C;
#endif
if (VP8GetCPUInfo != NULL) {
#if defined(WEBP_USE_SSE2)
@ -253,11 +264,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
VP8FiltersInitSSE2();
}
#endif
#if defined(WEBP_USE_NEON)
if (VP8GetCPUInfo(kNEON)) {
VP8FiltersInitNEON();
}
#endif
#if defined(WEBP_USE_MIPS_DSP_R2)
if (VP8GetCPUInfo(kMIPSdspR2)) {
VP8FiltersInitMIPSdspR2();
@ -269,5 +275,20 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
}
#endif
}
#if defined(WEBP_USE_NEON)
if (WEBP_NEON_OMIT_C_CODE ||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
VP8FiltersInitNEON();
}
#endif
assert(WebPUnfilters[WEBP_FILTER_HORIZONTAL] != NULL);
assert(WebPUnfilters[WEBP_FILTER_VERTICAL] != NULL);
assert(WebPUnfilters[WEBP_FILTER_GRADIENT] != NULL);
assert(WebPFilters[WEBP_FILTER_HORIZONTAL] != NULL);
assert(WebPFilters[WEBP_FILTER_VERTICAL] != NULL);
assert(WebPFilters[WEBP_FILTER_GRADIENT] != NULL);
filters_last_cpuinfo_used = VP8GetCPUInfo;
}

View File

@ -12,11 +12,11 @@
// Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
// Djordje Pesut (djordje.pesut@imgtec.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_MIPS_DSP_R2)
#include "../dsp/dsp.h"
#include "src/dsp/dsp.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
@ -101,8 +101,8 @@
); \
} while (0)
static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
int length) {
static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* src, uint8_t* dst,
int length) {
DO_PREDICT_LINE(src, dst, length, 0);
}
@ -192,10 +192,11 @@ static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
} \
} while (0)
static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows,
uint8_t* out) {
static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(const uint8_t* in,
int width, int height,
int stride,
int row, int num_rows,
uint8_t* out) {
const uint8_t* preds;
const size_t start_offset = row * stride;
const int last_row = row + num_rows;
@ -207,7 +208,7 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
if (row == 0) {
// Leftmost pixel is the same as input for topmost scanline.
out[0] = in[0];
PredictLine(in + 1, out + 1, width - 1);
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
row = 1;
preds += stride;
in += stride;
@ -219,9 +220,11 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
}
#undef FILTER_LINE_BY_LINE
static void HorizontalFilter(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoHorizontalFilter(data, width, height, stride, 0, height, filtered_data);
static void HorizontalFilter_MIPSdspR2(const uint8_t* data,
int width, int height,
int stride, uint8_t* filtered_data) {
DoHorizontalFilter_MIPSdspR2(data, width, height, stride, 0, height,
filtered_data);
}
//------------------------------------------------------------------------------
@ -237,9 +240,11 @@ static void HorizontalFilter(const uint8_t* data, int width, int height,
} \
} while (0)
static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows, uint8_t* out) {
static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(const uint8_t* in,
int width, int height,
int stride,
int row, int num_rows,
uint8_t* out) {
const uint8_t* preds;
const size_t start_offset = row * stride;
const int last_row = row + num_rows;
@ -252,7 +257,7 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
// Very first top-left pixel is copied.
out[0] = in[0];
// Rest of top scan-line is left-predicted.
PredictLine(in + 1, out + 1, width - 1);
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
row = 1;
in += stride;
out += stride;
@ -266,15 +271,16 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
}
#undef FILTER_LINE_BY_LINE
static void VerticalFilter(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoVerticalFilter(data, width, height, stride, 0, height, filtered_data);
static void VerticalFilter_MIPSdspR2(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoVerticalFilter_MIPSdspR2(data, width, height, stride, 0, height,
filtered_data);
}
//------------------------------------------------------------------------------
// Gradient filter.
static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
static int GradientPredictor_MIPSdspR2(uint8_t a, uint8_t b, uint8_t c) {
int temp0;
__asm__ volatile (
"addu %[temp0], %[a], %[b] \n\t"
@ -293,9 +299,9 @@ static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
int w; \
PREDICT_LINE_ONE_PASS(in, PREDS - stride, out); \
for (w = 1; w < width; ++w) { \
const int pred = GradientPredictor(PREDS[w - 1], \
PREDS[w - stride], \
PREDS[w - stride - 1]); \
const int pred = GradientPredictor_MIPSdspR2(PREDS[w - 1], \
PREDS[w - stride], \
PREDS[w - stride - 1]); \
out[w] = in[w] OPERATION pred; \
} \
++row; \
@ -304,9 +310,9 @@ static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
} \
} while (0)
static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows, uint8_t* out) {
static void DoGradientFilter_MIPSdspR2(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows, uint8_t* out) {
const uint8_t* preds;
const size_t start_offset = row * stride;
const int last_row = row + num_rows;
@ -318,7 +324,7 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
// left prediction for top scan-line
if (row == 0) {
out[0] = in[0];
PredictLine(in + 1, out + 1, width - 1);
PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
row = 1;
preds += stride;
in += stride;
@ -330,38 +336,39 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
}
#undef FILTER_LINE_BY_LINE
static void GradientFilter(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoGradientFilter(data, width, height, stride, 0, height, filtered_data);
static void GradientFilter_MIPSdspR2(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoGradientFilter_MIPSdspR2(data, width, height, stride, 0, height,
filtered_data);
}
//------------------------------------------------------------------------------
static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
static void HorizontalUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
out[0] = in[0] + (prev == NULL ? 0 : prev[0]);
DO_PREDICT_LINE(in + 1, out + 1, width - 1, 1);
}
static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
static void VerticalUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
if (prev == NULL) {
HorizontalUnfilter(NULL, in, out, width);
HorizontalUnfilter_MIPSdspR2(NULL, in, out, width);
} else {
DO_PREDICT_LINE_VERTICAL(in, prev, out, width, 1);
}
}
static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
static void GradientUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
if (prev == NULL) {
HorizontalUnfilter(NULL, in, out, width);
HorizontalUnfilter_MIPSdspR2(NULL, in, out, width);
} else {
uint8_t top = prev[0], top_left = top, left = top;
int i;
for (i = 0; i < width; ++i) {
top = prev[i]; // need to read this first, in case prev==dst
left = in[i] + GradientPredictor(left, top, top_left);
left = in[i] + GradientPredictor_MIPSdspR2(left, top, top_left);
top_left = top;
out[i] = left;
}
@ -379,13 +386,13 @@ static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
extern void VP8FiltersInitMIPSdspR2(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitMIPSdspR2(void) {
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_MIPSdspR2;
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_MIPSdspR2;
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_MIPSdspR2;
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_MIPSdspR2;
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_MIPSdspR2;
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_MIPSdspR2;
}
#else // !WEBP_USE_MIPS_DSP_R2

View File

@ -11,11 +11,11 @@
//
// Author: Prashant Patil (prashant.patil@imgtec.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_MSA)
#include "./msa_macro.h"
#include "src/dsp/msa_macro.h"
#include <assert.h>
@ -66,8 +66,8 @@ static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
//------------------------------------------------------------------------------
// Horrizontal filter
static void HorizontalFilter(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
static void HorizontalFilter_MSA(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
const uint8_t* preds = data;
const uint8_t* in = data;
uint8_t* out = filtered_data;
@ -129,8 +129,8 @@ static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
}
static void GradientFilter(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
static void GradientFilter_MSA(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
const uint8_t* in = data;
const uint8_t* preds = data;
uint8_t* out = filtered_data;
@ -157,8 +157,8 @@ static void GradientFilter(const uint8_t* data, int width, int height,
//------------------------------------------------------------------------------
// Vertical filter
static void VerticalFilter(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
static void VerticalFilter_MSA(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
const uint8_t* in = data;
const uint8_t* preds = data;
uint8_t* out = filtered_data;
@ -190,9 +190,9 @@ static void VerticalFilter(const uint8_t* data, int width, int height,
extern void VP8FiltersInitMSA(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitMSA(void) {
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_MSA;
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_MSA;
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_MSA;
}
#else // !WEBP_USE_MSA

View File

@ -11,12 +11,12 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_NEON)
#include <assert.h>
#include "./neon.h"
#include "src/dsp/neon.h"
//------------------------------------------------------------------------------
// Helpful macros.
@ -134,7 +134,7 @@ static WEBP_INLINE void DoVerticalFilter_NEON(const uint8_t* in,
}
static void VerticalFilter_NEON(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
int stride, uint8_t* filtered_data) {
DoVerticalFilter_NEON(data, width, height, stride, 0, height,
filtered_data);
}
@ -196,7 +196,7 @@ static WEBP_INLINE void DoGradientFilter_NEON(const uint8_t* in,
}
static void GradientFilter_NEON(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
int stride, uint8_t* filtered_data) {
DoGradientFilter_NEON(data, width, height, stride, 0, height,
filtered_data);
}
@ -251,9 +251,11 @@ static void VerticalUnfilter_NEON(const uint8_t* prev, const uint8_t* in,
// GradientUnfilter_NEON is correct but slower than the C-version,
// at least on ARM64. For armv7, it's a wash.
// So best is to disable it for now, but keep the idea around...
// #define USE_GRADIENT_UNFILTER
#if !defined(USE_GRADIENT_UNFILTER)
#define USE_GRADIENT_UNFILTER 0 // ALTERNATE_CODE
#endif
#if defined(USE_GRADIENT_UNFILTER)
#if (USE_GRADIENT_UNFILTER == 1)
#define GRAD_PROCESS_LANE(L) do { \
const uint8x8_t tmp1 = ROTATE_RIGHT_N(pred, 1); /* rotate predictor in */ \
const int16x8_t tmp2 = vaddq_s16(BC, U8_TO_S16(tmp1)); \
@ -292,7 +294,7 @@ static void GradientPredictInverse_NEON(const uint8_t* const in,
#undef GRAD_PROCESS_LANE
static void GradientUnfilter_NEON(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
uint8_t* out, int width) {
if (prev == NULL) {
HorizontalUnfilter_NEON(NULL, in, out, width);
} else {
@ -311,7 +313,7 @@ extern void VP8FiltersInitNEON(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitNEON(void) {
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_NEON;
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_NEON;
#if defined(USE_GRADIENT_UNFILTER)
#if (USE_GRADIENT_UNFILTER == 1)
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_NEON;
#endif

View File

@ -11,7 +11,7 @@
//
// Author: Skal (pascal.massimino@gmail.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_SSE2)
@ -24,16 +24,16 @@
// Helpful macro.
# define SANITY_CHECK(in, out) \
assert(in != NULL); \
assert(out != NULL); \
assert((in) != NULL); \
assert((out) != NULL); \
assert(width > 0); \
assert(height > 0); \
assert(stride >= width); \
assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
(void)height; // Silence unused warning.
static void PredictLineTop(const uint8_t* src, const uint8_t* pred,
uint8_t* dst, int length) {
static void PredictLineTop_SSE2(const uint8_t* src, const uint8_t* pred,
uint8_t* dst, int length) {
int i;
const int max_pos = length & ~31;
assert(length >= 0);
@ -51,7 +51,7 @@ static void PredictLineTop(const uint8_t* src, const uint8_t* pred,
}
// Special case for left-based prediction (when preds==dst-1 or preds==src-1).
static void PredictLineLeft(const uint8_t* src, uint8_t* dst, int length) {
static void PredictLineLeft_SSE2(const uint8_t* src, uint8_t* dst, int length) {
int i;
const int max_pos = length & ~31;
assert(length >= 0);
@ -71,10 +71,11 @@ static void PredictLineLeft(const uint8_t* src, uint8_t* dst, int length) {
//------------------------------------------------------------------------------
// Horizontal filter.
static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows,
uint8_t* out) {
static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* in,
int width, int height,
int stride,
int row, int num_rows,
uint8_t* out) {
const size_t start_offset = row * stride;
const int last_row = row + num_rows;
SANITY_CHECK(in, out);
@ -84,7 +85,7 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
if (row == 0) {
// Leftmost pixel is the same as input for topmost scanline.
out[0] = in[0];
PredictLineLeft(in + 1, out + 1, width - 1);
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
row = 1;
in += stride;
out += stride;
@ -94,7 +95,7 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
while (row < last_row) {
// Leftmost pixel is predicted from above.
out[0] = in[0] - in[-stride];
PredictLineLeft(in + 1, out + 1, width - 1);
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
++row;
in += stride;
out += stride;
@ -104,9 +105,10 @@ static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
//------------------------------------------------------------------------------
// Vertical filter.
static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows, uint8_t* out) {
static WEBP_INLINE void DoVerticalFilter_SSE2(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows,
uint8_t* out) {
const size_t start_offset = row * stride;
const int last_row = row + num_rows;
SANITY_CHECK(in, out);
@ -117,7 +119,7 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
// Very first top-left pixel is copied.
out[0] = in[0];
// Rest of top scan-line is left-predicted.
PredictLineLeft(in + 1, out + 1, width - 1);
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
row = 1;
in += stride;
out += stride;
@ -125,7 +127,7 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
// Filter line-by-line.
while (row < last_row) {
PredictLineTop(in, in - stride, out, width);
PredictLineTop_SSE2(in, in - stride, out, width);
++row;
in += stride;
out += stride;
@ -135,14 +137,14 @@ static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
//------------------------------------------------------------------------------
// Gradient filter.
static WEBP_INLINE int GradientPredictorC(uint8_t a, uint8_t b, uint8_t c) {
static WEBP_INLINE int GradientPredictor_SSE2(uint8_t a, uint8_t b, uint8_t c) {
const int g = a + b - c;
return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255; // clip to 8bit
}
static void GradientPredictDirect(const uint8_t* const row,
const uint8_t* const top,
uint8_t* const out, int length) {
static void GradientPredictDirect_SSE2(const uint8_t* const row,
const uint8_t* const top,
uint8_t* const out, int length) {
const int max_pos = length & ~7;
int i;
const __m128i zero = _mm_setzero_si128();
@ -161,14 +163,14 @@ static void GradientPredictDirect(const uint8_t* const row,
_mm_storel_epi64((__m128i*)(out + i), H);
}
for (; i < length; ++i) {
out[i] = row[i] - GradientPredictorC(row[i - 1], top[i], top[i - 1]);
out[i] = row[i] - GradientPredictor_SSE2(row[i - 1], top[i], top[i - 1]);
}
}
static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows,
uint8_t* out) {
static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows,
uint8_t* out) {
const size_t start_offset = row * stride;
const int last_row = row + num_rows;
SANITY_CHECK(in, out);
@ -178,7 +180,7 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
// left prediction for top scan-line
if (row == 0) {
out[0] = in[0];
PredictLineLeft(in + 1, out + 1, width - 1);
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
row = 1;
in += stride;
out += stride;
@ -187,7 +189,7 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
// Filter line-by-line.
while (row < last_row) {
out[0] = in[0] - in[-stride];
GradientPredictDirect(in + 1, in + 1 - stride, out + 1, width - 1);
GradientPredictDirect_SSE2(in + 1, in + 1 - stride, out + 1, width - 1);
++row;
in += stride;
out += stride;
@ -198,26 +200,27 @@ static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
//------------------------------------------------------------------------------
static void HorizontalFilter(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoHorizontalFilter(data, width, height, stride, 0, height, filtered_data);
static void HorizontalFilter_SSE2(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoHorizontalFilter_SSE2(data, width, height, stride, 0, height,
filtered_data);
}
static void VerticalFilter(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoVerticalFilter(data, width, height, stride, 0, height, filtered_data);
static void VerticalFilter_SSE2(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoVerticalFilter_SSE2(data, width, height, stride, 0, height, filtered_data);
}
static void GradientFilter(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoGradientFilter(data, width, height, stride, 0, height, filtered_data);
static void GradientFilter_SSE2(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoGradientFilter_SSE2(data, width, height, stride, 0, height, filtered_data);
}
//------------------------------------------------------------------------------
// Inverse transforms
static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
static void HorizontalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
int i;
__m128i last;
out[0] = in[0] + (prev == NULL ? 0 : prev[0]);
@ -238,10 +241,10 @@ static void HorizontalUnfilter(const uint8_t* prev, const uint8_t* in,
for (; i < width; ++i) out[i] = in[i] + out[i - 1];
}
static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
static void VerticalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
if (prev == NULL) {
HorizontalUnfilter(NULL, in, out, width);
HorizontalUnfilter_SSE2(NULL, in, out, width);
} else {
int i;
const int max_pos = width & ~31;
@ -260,9 +263,9 @@ static void VerticalUnfilter(const uint8_t* prev, const uint8_t* in,
}
}
static void GradientPredictInverse(const uint8_t* const in,
const uint8_t* const top,
uint8_t* const row, int length) {
static void GradientPredictInverse_SSE2(const uint8_t* const in,
const uint8_t* const top,
uint8_t* const row, int length) {
if (length > 0) {
int i;
const int max_pos = length & ~7;
@ -293,18 +296,18 @@ static void GradientPredictInverse(const uint8_t* const in,
_mm_storel_epi64((__m128i*)&row[i], out);
}
for (; i < length; ++i) {
row[i] = in[i] + GradientPredictorC(row[i - 1], top[i], top[i - 1]);
row[i] = in[i] + GradientPredictor_SSE2(row[i - 1], top[i], top[i - 1]);
}
}
}
static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
static void GradientUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
if (prev == NULL) {
HorizontalUnfilter(NULL, in, out, width);
HorizontalUnfilter_SSE2(NULL, in, out, width);
} else {
out[0] = in[0] + prev[0]; // predict from above
GradientPredictInverse(in + 1, prev + 1, out + 1, width - 1);
GradientPredictInverse_SSE2(in + 1, prev + 1, out + 1, width - 1);
}
}
@ -314,13 +317,13 @@ static void GradientUnfilter(const uint8_t* prev, const uint8_t* in,
extern void VP8FiltersInitSSE2(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitSSE2(void) {
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_SSE2;
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_SSE2;
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_SSE2;
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_SSE2;
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_SSE2;
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_SSE2;
}
#else // !WEBP_USE_SSE2

View File

@ -13,14 +13,15 @@
// Jyrki Alakuijala (jyrki@google.com)
// Urvang Joshi (urvang@google.com)
#include "./dsp.h"
#include "src/dsp/dsp.h"
#include <assert.h>
#include <math.h>
#include <stdlib.h>
#include "../dec/vp8li_dec.h"
#include "../utils/endian_inl_utils.h"
#include "./lossless.h"
#include "./lossless_common.h"
#include "src/dec/vp8li_dec.h"
#include "src/utils/endian_inl_utils.h"
#include "src/dsp/lossless.h"
#include "src/dsp/lossless_common.h"
#define MAX_DIFF_COST (1e30f)
@ -80,8 +81,9 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
}
// gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined.
#if defined(__arm__) && LOCAL_GCC_VERSION == 0x409
// gcc <= 4.9 on ARM generates incorrect code in Select() when Sub3() is
// inlined.
#if defined(__arm__) && LOCAL_GCC_VERSION <= 0x409
# define LOCAL_INLINE __attribute__ ((noinline))
#else
# define LOCAL_INLINE WEBP_INLINE
@ -107,69 +109,69 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
//------------------------------------------------------------------------------
// Predictors
static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
static uint32_t Predictor0_C(uint32_t left, const uint32_t* const top) {
(void)top;
(void)left;
return ARGB_BLACK;
}
static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
static uint32_t Predictor1_C(uint32_t left, const uint32_t* const top) {
(void)top;
return left;
}
static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
static uint32_t Predictor2_C(uint32_t left, const uint32_t* const top) {
(void)left;
return top[0];
}
static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
static uint32_t Predictor3_C(uint32_t left, const uint32_t* const top) {
(void)left;
return top[1];
}
static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
static uint32_t Predictor4_C(uint32_t left, const uint32_t* const top) {
(void)left;
return top[-1];
}
static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
static uint32_t Predictor5_C(uint32_t left, const uint32_t* const top) {
const uint32_t pred = Average3(left, top[0], top[1]);
return pred;
}
static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
static uint32_t Predictor6_C(uint32_t left, const uint32_t* const top) {
const uint32_t pred = Average2(left, top[-1]);
return pred;
}
static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
static uint32_t Predictor7_C(uint32_t left, const uint32_t* const top) {
const uint32_t pred = Average2(left, top[0]);
return pred;
}
static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
static uint32_t Predictor8_C(uint32_t left, const uint32_t* const top) {
const uint32_t pred = Average2(top[-1], top[0]);
(void)left;
return pred;
}
static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
static uint32_t Predictor9_C(uint32_t left, const uint32_t* const top) {
const uint32_t pred = Average2(top[0], top[1]);
(void)left;
return pred;
}
static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
static uint32_t Predictor10_C(uint32_t left, const uint32_t* const top) {
const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
return pred;
}
static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
static uint32_t Predictor11_C(uint32_t left, const uint32_t* const top) {
const uint32_t pred = Select(top[0], left, top[-1]);
return pred;
}
static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
static uint32_t Predictor12_C(uint32_t left, const uint32_t* const top) {
const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
return pred;
}
static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
static uint32_t Predictor13_C(uint32_t left, const uint32_t* const top) {
const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
return pred;
}
GENERATE_PREDICTOR_ADD(Predictor0, PredictorAdd0)
static void PredictorAdd1(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
GENERATE_PREDICTOR_ADD(Predictor0_C, PredictorAdd0_C)
static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int i;
uint32_t left = out[-1];
for (i = 0; i < num_pixels; ++i) {
@ -177,29 +179,29 @@ static void PredictorAdd1(const uint32_t* in, const uint32_t* upper,
}
(void)upper;
}
GENERATE_PREDICTOR_ADD(Predictor2, PredictorAdd2)
GENERATE_PREDICTOR_ADD(Predictor3, PredictorAdd3)
GENERATE_PREDICTOR_ADD(Predictor4, PredictorAdd4)
GENERATE_PREDICTOR_ADD(Predictor5, PredictorAdd5)
GENERATE_PREDICTOR_ADD(Predictor6, PredictorAdd6)
GENERATE_PREDICTOR_ADD(Predictor7, PredictorAdd7)
GENERATE_PREDICTOR_ADD(Predictor8, PredictorAdd8)
GENERATE_PREDICTOR_ADD(Predictor9, PredictorAdd9)
GENERATE_PREDICTOR_ADD(Predictor10, PredictorAdd10)
GENERATE_PREDICTOR_ADD(Predictor11, PredictorAdd11)
GENERATE_PREDICTOR_ADD(Predictor12, PredictorAdd12)
GENERATE_PREDICTOR_ADD(Predictor13, PredictorAdd13)
GENERATE_PREDICTOR_ADD(Predictor2_C, PredictorAdd2_C)
GENERATE_PREDICTOR_ADD(Predictor3_C, PredictorAdd3_C)
GENERATE_PREDICTOR_ADD(Predictor4_C, PredictorAdd4_C)
GENERATE_PREDICTOR_ADD(Predictor5_C, PredictorAdd5_C)
GENERATE_PREDICTOR_ADD(Predictor6_C, PredictorAdd6_C)
GENERATE_PREDICTOR_ADD(Predictor7_C, PredictorAdd7_C)
GENERATE_PREDICTOR_ADD(Predictor8_C, PredictorAdd8_C)
GENERATE_PREDICTOR_ADD(Predictor9_C, PredictorAdd9_C)
GENERATE_PREDICTOR_ADD(Predictor10_C, PredictorAdd10_C)
GENERATE_PREDICTOR_ADD(Predictor11_C, PredictorAdd11_C)
GENERATE_PREDICTOR_ADD(Predictor12_C, PredictorAdd12_C)
GENERATE_PREDICTOR_ADD(Predictor13_C, PredictorAdd13_C)
//------------------------------------------------------------------------------
// Inverse prediction.
static void PredictorInverseTransform(const VP8LTransform* const transform,
int y_start, int y_end,
const uint32_t* in, uint32_t* out) {
static void PredictorInverseTransform_C(const VP8LTransform* const transform,
int y_start, int y_end,
const uint32_t* in, uint32_t* out) {
const int width = transform->xsize_;
if (y_start == 0) { // First Row follows the L (mode=1) mode.
PredictorAdd0(in, NULL, 1, out);
PredictorAdd1(in + 1, NULL, width - 1, out + 1);
PredictorAdd0_C(in, NULL, 1, out);
PredictorAdd1_C(in + 1, NULL, width - 1, out + 1);
in += width;
out += width;
++y_start;
@ -217,7 +219,7 @@ static void PredictorInverseTransform(const VP8LTransform* const transform,
const uint32_t* pred_mode_src = pred_mode_base;
int x = 1;
// First pixel follows the T (mode=2) mode.
PredictorAdd2(in, out - width, 1, out);
PredictorAdd2_C(in, out - width, 1, out);
// .. the rest:
while (x < width) {
const VP8LPredictorAddSubFunc pred_func =
@ -272,8 +274,8 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
const uint32_t argb = src[i];
const uint32_t green = argb >> 8;
const uint32_t red = argb >> 16;
int new_red = red;
int new_blue = argb;
int new_red = red & 0xff;
int new_blue = argb & 0xff;
new_red += ColorTransformDelta(m->green_to_red_, green);
new_red &= 0xff;
new_blue += ColorTransformDelta(m->green_to_blue_, green);
@ -284,9 +286,9 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
}
// Color space inverse transform.
static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
int y_start, int y_end,
const uint32_t* src, uint32_t* dst) {
static void ColorSpaceInverseTransform_C(const VP8LTransform* const transform,
int y_start, int y_end,
const uint32_t* src, uint32_t* dst) {
const int width = transform->xsize_;
const int tile_width = 1 << transform->bits_;
const int mask = tile_width - 1;
@ -362,10 +364,10 @@ STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform, \
} \
}
COLOR_INDEX_INVERSE(ColorIndexInverseTransform, MapARGB, static, uint32_t, 32b,
VP8GetARGBIndex, VP8GetARGBValue)
COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha, , uint8_t,
8b, VP8GetAlphaIndex, VP8GetAlphaValue)
COLOR_INDEX_INVERSE(ColorIndexInverseTransform_C, MapARGB_C, static,
uint32_t, 32b, VP8GetARGBIndex, VP8GetARGBValue)
COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha_C, ,
uint8_t, 8b, VP8GetAlphaIndex, VP8GetAlphaValue)
#undef COLOR_INDEX_INVERSE
@ -380,7 +382,7 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out);
break;
case PREDICTOR_TRANSFORM:
PredictorInverseTransform(transform, row_start, row_end, in, out);
PredictorInverseTransform_C(transform, row_start, row_end, in, out);
if (row_end != transform->ysize_) {
// The last predicted row in this iteration will be the top-pred row
// for the first row in next iteration.
@ -389,7 +391,7 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
}
break;
case CROSS_COLOR_TRANSFORM:
ColorSpaceInverseTransform(transform, row_start, row_end, in, out);
ColorSpaceInverseTransform_C(transform, row_start, row_end, in, out);
break;
case COLOR_INDEXING_TRANSFORM:
if (in == out && transform->bits_ > 0) {
@ -403,9 +405,9 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
VP8LSubSampleSize(transform->xsize_, transform->bits_);
uint32_t* const src = out + out_stride - in_stride;
memmove(src, out, in_stride * sizeof(*src));
ColorIndexInverseTransform(transform, row_start, row_end, src, out);
ColorIndexInverseTransform_C(transform, row_start, row_end, src, out);
} else {
ColorIndexInverseTransform(transform, row_start, row_end, in, out);
ColorIndexInverseTransform_C(transform, row_start, row_end, in, out);
}
break;
}
@ -452,7 +454,7 @@ void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
const uint32_t argb = *src++;
const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf);
#ifdef WEBP_SWAP_16BIT_CSP
#if (WEBP_SWAP_16BIT_CSP == 1)
*dst++ = ba;
*dst++ = rg;
#else
@ -469,7 +471,7 @@ void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
const uint32_t argb = *src++;
const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f);
#ifdef WEBP_SWAP_16BIT_CSP
#if (WEBP_SWAP_16BIT_CSP == 1)
*dst++ = gb;
*dst++ = rg;
#else
@ -496,22 +498,7 @@ static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
const uint32_t argb = *src++;
#if !defined(WORDS_BIGENDIAN)
#if !defined(WEBP_REFERENCE_IMPLEMENTATION)
WebPUint32ToMem(dst, BSwap32(argb));
#else // WEBP_REFERENCE_IMPLEMENTATION
dst[0] = (argb >> 24) & 0xff;
dst[1] = (argb >> 16) & 0xff;
dst[2] = (argb >> 8) & 0xff;
dst[3] = (argb >> 0) & 0xff;
#endif
#else // WORDS_BIGENDIAN
dst[0] = (argb >> 0) & 0xff;
dst[1] = (argb >> 8) & 0xff;
dst[2] = (argb >> 16) & 0xff;
dst[3] = (argb >> 24) & 0xff;
#endif
dst += sizeof(argb);
}
} else {
@ -593,23 +580,23 @@ extern void VP8LDspInitMSA(void);
static volatile VP8CPUInfo lossless_last_cpuinfo_used =
(VP8CPUInfo)&lossless_last_cpuinfo_used;
#define COPY_PREDICTOR_ARRAY(IN, OUT) do { \
(OUT)[0] = IN##0; \
(OUT)[1] = IN##1; \
(OUT)[2] = IN##2; \
(OUT)[3] = IN##3; \
(OUT)[4] = IN##4; \
(OUT)[5] = IN##5; \
(OUT)[6] = IN##6; \
(OUT)[7] = IN##7; \
(OUT)[8] = IN##8; \
(OUT)[9] = IN##9; \
(OUT)[10] = IN##10; \
(OUT)[11] = IN##11; \
(OUT)[12] = IN##12; \
(OUT)[13] = IN##13; \
(OUT)[14] = IN##0; /* <- padding security sentinels*/ \
(OUT)[15] = IN##0; \
#define COPY_PREDICTOR_ARRAY(IN, OUT) do { \
(OUT)[0] = IN##0_C; \
(OUT)[1] = IN##1_C; \
(OUT)[2] = IN##2_C; \
(OUT)[3] = IN##3_C; \
(OUT)[4] = IN##4_C; \
(OUT)[5] = IN##5_C; \
(OUT)[6] = IN##6_C; \
(OUT)[7] = IN##7_C; \
(OUT)[8] = IN##8_C; \
(OUT)[9] = IN##9_C; \
(OUT)[10] = IN##10_C; \
(OUT)[11] = IN##11_C; \
(OUT)[12] = IN##12_C; \
(OUT)[13] = IN##13_C; \
(OUT)[14] = IN##0_C; /* <- padding security sentinels*/ \
(OUT)[15] = IN##0_C; \
} while (0);
WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
@ -620,18 +607,21 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd)
COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd_C)
#if !WEBP_NEON_OMIT_C_CODE
VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
VP8LTransformColorInverse = VP8LTransformColorInverse_C;
VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
#endif
VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
VP8LMapColor32b = MapARGB;
VP8LMapColor8b = MapAlpha;
VP8LMapColor32b = MapARGB_C;
VP8LMapColor8b = MapAlpha_C;
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
if (VP8GetCPUInfo != NULL) {
@ -640,11 +630,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
VP8LDspInitSSE2();
}
#endif
#if defined(WEBP_USE_NEON)
if (VP8GetCPUInfo(kNEON)) {
VP8LDspInitNEON();
}
#endif
#if defined(WEBP_USE_MIPS_DSP_R2)
if (VP8GetCPUInfo(kMIPSdspR2)) {
VP8LDspInitMIPSdspR2();
@ -656,6 +641,24 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
}
#endif
}
#if defined(WEBP_USE_NEON)
if (WEBP_NEON_OMIT_C_CODE ||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
VP8LDspInitNEON();
}
#endif
assert(VP8LAddGreenToBlueAndRed != NULL);
assert(VP8LTransformColorInverse != NULL);
assert(VP8LConvertBGRAToRGBA != NULL);
assert(VP8LConvertBGRAToRGB != NULL);
assert(VP8LConvertBGRAToBGR != NULL);
assert(VP8LConvertBGRAToRGBA4444 != NULL);
assert(VP8LConvertBGRAToRGB565 != NULL);
assert(VP8LMapColor32b != NULL);
assert(VP8LMapColor8b != NULL);
lossless_last_cpuinfo_used = VP8GetCPUInfo;
}
#undef COPY_PREDICTOR_ARRAY

View File

@ -15,18 +15,18 @@
#ifndef WEBP_DSP_LOSSLESS_H_
#define WEBP_DSP_LOSSLESS_H_
#include "../webp/types.h"
#include "../webp/decode.h"
#include "src/webp/types.h"
#include "src/webp/decode.h"
#include "../enc/histogram_enc.h"
#include "../utils/utils.h"
#include "src/enc/histogram_enc.h"
#include "src/utils/utils.h"
#ifdef __cplusplus
extern "C" {
#endif
#ifdef WEBP_EXPERIMENTAL_FEATURES
#include "../enc/delta_palettization_enc.h"
#include "src/enc/delta_palettization_enc.h"
#endif // WEBP_EXPERIMENTAL_FEATURES
//------------------------------------------------------------------------------
@ -124,7 +124,7 @@ void VP8LDspInit(void);
typedef void (*VP8LProcessEncBlueAndRedFunc)(uint32_t* dst, int num_pixels);
extern VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
typedef void (*VP8LTransformColorFunc)(const VP8LMultipliers* const m,
uint32_t* const dst, int num_pixels);
uint32_t* dst, int num_pixels);
extern VP8LTransformColorFunc VP8LTransformColor;
typedef void (*VP8LCollectColorBlueTransformsFunc)(
const uint32_t* argb, int stride,

View File

@ -16,9 +16,9 @@
#ifndef WEBP_DSP_LOSSLESS_COMMON_H_
#define WEBP_DSP_LOSSLESS_COMMON_H_
#include "../webp/types.h"
#include "src/webp/types.h"
#include "../utils/utils.h"
#include "src/utils/utils.h"
#ifdef __cplusplus
extern "C" {

Some files were not shown because too many files have changed in this diff Show More