Compare commits

..

64 Commits

Author SHA1 Message Date
46e18c0a25 vwebp: fix incorrect clipping w/NO_BLEND
when the previous frame does not specify dispose to background only the
current frame's rectangle should be cleared

related to bug #245

(cherry picked from commit 469ba2cdfd)

Change-Id: I2fc4f5be99057e0bf87d8fedec57b06859b070bd
2015-10-23 13:10:17 -07:00
fcfde90b9c update issue tracker url
code.google.com -> bugs.chromium.org

(cherry picked from commit 4b9186b2eb)

Change-Id: I0dc99a85c29657415401160df3c7dd0423f96457
2015-10-20 22:44:52 -07:00
8c3fb330e5 update AUTHORS
missed in 0.4.3

Change-Id: I2b488307dfec592264467847adcbc3de0a6c83f3
2015-10-19 15:41:24 -07:00
808d4a686e update NEWS
Change-Id: I301be57ed6d925164f3827a79c6215ab79f120c7
2015-10-19 15:41:24 -07:00
62864042c0 bump version to 0.4.4
libwebp{,decoder} - 0.4.4
libwebp libtool - 5.4.0
libwebpdecoder libtool - 1.4.0

mux/demux - 0.2.2 (unchanged)
libtool - 1.2.0 (unchanged)

Change-Id: I7d421dc47ad4d25a17450ce1b04562c5d58c596b
2015-10-19 15:41:23 -07:00
b8b314ab39 doc/webp-container-spec: update repo browser link
gerrit.chromium.org is deprecated, use chromium.googlesource.com.

(cherry picked from commit f0486968ba)

Change-Id: Iaa6d6d18798dbd8cce908988287387f5cb8e8e64
2015-10-19 15:41:23 -07:00
c3953e37c9 fix typo: constitutes -> constitute
(cherry picked from commit 5fe1fe37a5)

Change-Id: I5b20ef41f4a810e11a4499b46b5e7dc93247beed
2015-10-19 15:41:23 -07:00
cd377e291c Use __has_builtin to check clang support
Older versions of Xcode with clang reporting versions 4.[012] and 5.0
did not include support for __builtin_bswap16. Checking in this manner
avoids using brittle version checks.

Matches a change to libvpx:
https://chromium-review.googlesource.com/305573
to fix:
https://code.google.com/p/webm/issues/detail?id=1082

(cherry picked from commit d26d9def80)

Change-Id: I23ea466ee1b53b12cd3fb45f65a2186c8dda95a1
2015-10-19 15:41:22 -07:00
e2e89806f7 wicdec: fix alpha detection w/64bpp BGRA/RGBA
(cherry picked from commit badfcbaa1e)

Change-Id: Ia712cf736e490d482a52b63d8e2816d0b7035cd0
2015-10-19 15:41:22 -07:00
5c3fe77dd8 iosbuild: fix linking with Xcode 7 / iOS SDK 9
-fembed-bitcode is the default, a framework built without this flag will
fail to link against an application using it.

BUG=267

(cherry picked from commit db1321a6a2)

Change-Id: I83461cb058b1866ac99b3f0bdfa890933e88ed26
2015-10-19 15:41:22 -07:00
f9f5498b6c VP8LAllocateHistogramSet: align histogram[] entries
fixes issue #262: a SIGBUS when accessing a misaligned double in
VP8LHistogram

(cherry picked from commit cd82440ec7)

Change-Id: Ic78cc5366d7e43d892c375b6a69dce2379db931b
2015-10-19 15:41:21 -07:00
3026db2ee5 Loosen the buffer size checks for Y/U/V/A too.
(follow-up to 15ca5014)

(cherry picked from commit 017f8cccec)

Change-Id: Ia122e96f616bd6317c24b69c9534cb7919b8a4a4
2015-10-19 15:41:21 -07:00
d089362d07 loosen the padding check on buffer size
Strictly speaking, the last (or first) row doesn't require padding.

cf https://code.google.com/p/webp/issues/detail?id=258

(cherry picked from commit 15ca5014f1)

Change-Id: Ie9ec8eb776fec1f5cea4cf9e21e81901fd79bf33
2015-10-19 15:41:21 -07:00
53d22c5b3e dec_neon: add whitespace around stringizing operator
prevents unintentional side-effects (though unlikely in this case) with
future compilers, cf:
eebaf97 dsp/mips: add whitespace around stringizing operator

(cherry picked from commit d623a8706f)

Change-Id: I0537091fcc97b4f54d0a156c3c83a28c51456b17
2015-10-19 15:41:21 -07:00
8bcc4d4523 dsp/mips: add whitespace around stringizing operator
fixes compile with gcc 5.1
BUG=259

(cherry picked from commit eebaf97f5a)

Change-Id: Ideb39c6290ab8569b1b6cc835bea11c822d0286c
2015-10-19 15:41:20 -07:00
d49c44f450 Container spec: clarify ordering of ALPH chunk.
Reported by user: https://code.google.com/p/webp/issues/detail?id=255

(cherry picked from commit 585d93dbba)

Change-Id: I9c027ea828d5a367b317744fad7607a16ed52fa5
2015-10-19 15:41:20 -07:00
382de22c84 msvc: fix pointer type warning in BitsLog2Floor
_BitScanReverse() takes an unsigned long*
http://msdn.microsoft.com/en-us/library/fbxyd7zd.aspx

fixes:
C4057: 'function': 'unsigned long *' differs in indirection to slightly
different base types from 'uint32_t *'

fixes issue #253

(cherry picked from commit 0250dfcc19)

Change-Id: I0101ef7be18c7ed188b35e9b17e7f71290953786
2015-10-19 15:41:20 -07:00
84ecd9d85c FlattenSimilarBlocks should only be tried when blending is possible.
This is because, FlattenSimilarBlocks() replaces some opaque pixels by
transparent ones. This results in an equivalent output only if blending
is turned on for the current frame.

(cherry picked from commit 5cccdadf2e)

Change-Id: I05612c952fdbd4b3a6e0ac9f3a7d49822f0cfb9b
2015-10-19 15:41:19 -07:00
f55ebbba82 backport rescaler fix
backported from: 7df9389, 5ff0079

Change-Id: I11b4d97c3c483431528be9ccbd9895baac8c6a63
2015-10-19 15:41:13 -07:00
2ff633c938 fix mips2 build target
tested with mips1 and mips2; this should cover 3/4 as well.
fixes an ftbfs reported on the debian issue tracker:
https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=785000

(cherry picked from commit bf46d0acff)

Change-Id: I2458487c92bd638589fdfec5adb4f22102a5960c
2015-10-15 21:12:28 -07:00
326b5fb863 update ChangeLog
Change-Id: I794a5d58005bb0934ce0de06483567e8ed6bd8eb
2015-03-10 23:06:09 -07:00
a661e50bcb Disable NEON code on Native Client
The NEON assember in libwebp has not yet been ported
to Native Client. This changes disables it.
Related issue:
https://code.google.com/p/nativeclient/issues/detail?id=3205

(cherry picked from commit ac4f5784a0)

Change-Id: I200291db7aa79d40c1f10cff7622c9b8599e6886
2015-03-10 20:29:08 -07:00
fcd94e925a update ChangeLog
Change-Id: I60c273c650a305fe36564ccc5fb1c8d7ea18118f
2015-03-04 11:30:23 -08:00
569fe5789e update NEWS
Change-Id: Iade773fc3b9961fcd9b4112a3972cfc68e3670f2
2015-03-03 19:19:50 -08:00
bd852f5d81 bump version to 0.4.3
libwebp{,decoder} - 0.4.3
libwebp libtool - 5.3.0
libwebpdecoder libtool - 1.3.0

mux/demux - 0.2.2 (unchanged)
libtool - 1.2.0 (unchanged)

Change-Id: Ie8c35ffc20c1bfd782bdafd99da6c6b1373022c1
2015-03-03 19:05:40 -08:00
2d58b64f51 WebPPictureRescale: add a note about 0 width/height
(cherry picked from commit 0f773693bf)

Change-Id: I3890bb3fd32a148d7dd24c714546160c6c59d4ea
2015-03-03 17:53:49 -08:00
a0d8ca576f examples/Android.mk: add webpmux_example target
renamed from 'webpmux' to avoid name clash with the library module name

(cherry picked from commit 6cef0e4fa4)

Change-Id: I33bbdbdcb25a6f35bd85c9a0dbbb93b2428b05f3
2015-03-03 17:53:49 -08:00
34b1d29e3c Android.mk: add webpmux target
(cherry picked from commit 53c16ff047)

Change-Id: I60fc898fd804e23f08d760694192c5d04adcae91
2015-03-03 17:53:49 -08:00
75619881e6 Android.mk: add webpdemux target
(cherry picked from commit 21852a00a1)

Change-Id: I2fbbefbee59a96c52f5addcfc5bfe1216caad5cc
2015-03-03 17:53:48 -08:00
a98757650a Android.mk: add webpdecoder{,_static} targets
webpdecoder_static is reused to create libwebpdecoder.so and
libwebp.{a,so}

(cherry picked from commit 8697a3bcc8)

Change-Id: I940293cb755040c0ea45dc13f22624de8f355867
2015-03-03 17:53:48 -08:00
a6d4859725 Android.mk: split source lists per-directory
will allow reuse in future targets

(cherry picked from commit 4a67049113)

Conflicts:
	Android.mk

Change-Id: Iededc19d954226e62f2d2383a2b80f268d613647
2015-03-03 17:53:48 -08:00
77544d5f5b fix iOS arm64 build with Xcode 6.3
the standard vtbl functions are available there [1][2].
based on a patch from: aaroncrespo
fixes issue #243.

[1]
http://adcdownload.apple.com//Developer_Tools/Xcode_6.3_beta/Xcode_6.3_beta_Release_Notes.pdf
[2] Apple LLVM Compiler Version 6.1
- Xcode 6.3 updates the Apple LLVM compiler to version 6.1.0.
[...]
Support for the arm64 architecture has been significantly revised to
align with ARM's implementation, where the most visible impact is that a
few of the vector intrinsics have changed to match ARM's specifications.

(cherry picked from commit 602a00f93f)

Change-Id: I79a0016f44b9dbe36d0373f7f00a50ab3c2ca447
2015-03-03 17:53:47 -08:00
6dea15784d doc/webp-container-spec: note MSB order for chunk diagrams
addresses question in issue #241

(cherry picked from commit b510fbfe3b)

Change-Id: Iff6a172d5822f6ec8b9bc0951a1c9cd3f98c9251
2015-03-03 17:53:47 -08:00
f7cd57b23d doc/webp-container-spec: cosmetics
partially normalize indent, vertical whitespace and capitalization with
the copy used on developers.google.com/speed/webp

(cherry picked from commit e7d3df2314)

Change-Id: I8044418eeb9eaf5bd5c799675c74f6f845d503d6
2015-03-03 17:53:47 -08:00
1d6b250b07 vwebp: clear canvas at the beginning of each loop
this is in line with the recommendation in the spec, cf.,
5603947 webp-container-spec: clarify background clear on loop

(cherry picked from commit 1579de3cae)

Change-Id: Id3910395b05a1a1f2804be841b61f97bd4bac593
2015-03-03 17:53:46 -08:00
f97b3f86bf webp-container-spec: clarify background clear on loop
at the beginning of the loop there's an implicit clear of the entire
canvas to the background (or application defined) color. this avoids
adding the final composited frame to the first.

(cherry picked from commit 560394798f)

Change-Id: Ia3a52cf4482c6176334a5c9c99a0ddd07d1776e7
2015-03-03 17:53:46 -08:00
4ba83c1759 vwebp: remove unnecessary static Help() prototype
all uses occur after its declaration

(cherry picked from commit 2c906c407c)

Change-Id: I775642ce6d1dec3bc6da2fa0d5d87490992c7e6c
2015-03-03 17:53:46 -08:00
d34e8e3d18 vwebp/animation: display last frame on end-of-loop
previously the first frame would be redisplayed, which might be
unexpected if the final frame was meant to be a composite, for example.

(cherry picked from commit 0f017b56f3)

Change-Id: I4da795623c71501e2fa426e8fba8fb2ffcbab58a
2015-03-03 17:53:45 -08:00
bbbc524fb4 dec/vp8: clear 'dither_' on skipped blocks
DitherRow() only checks this value, not 'skip_' so previously it was
uninitialized for these blocks.

(cherry picked from commit 66935fb9ee)

Change-Id: I0f698b81854ee9d91edacb51c1e3bdab9cba96f2
2015-03-03 17:53:45 -08:00
0339fa26eb lossless_neon: enable subtract green for aarch64
similar to:
1ba61b0 enable NEON intrinsics in aarch64 builds

vtbl1_u8 is available everywhere but Xcode-based iOS arm64 builds, use
vtbl1q_u8 there.

performance varies based on the input, 1-3% on encode was observed

(cherry picked from commit 416e1cea9b)

Change-Id: Ifec35b37eb856acfcf69ed7f16fa078cd40b7034
2015-03-03 17:53:45 -08:00
5a0c2207f4 Regression fix for lossless decoding
Reported here: https://code.google.com/p/webp/issues/detail?id=239

At the beginning of method 'DecodeImageData', pixels up to
'dec->last_pixel_' are assumed to be already cached. So, at the end of
previous call to that method also, that assumption should hold true.

Hence, we should cache all pixels up to 'src' regardless of 'src_last'.

This affects lossless incremental decoding only, as that is when
src_last and src_end differ.
Note: alpha decoding is implicitly incremental, as alpha decoding of
only the rows 'y_end - y_start' happens during FinishRow() call. So, this bug
affects alpha decoding in non-incremental decoding flow as well.

This bug was introduced in: https://gerrit.chromium.org/gerrit/#/c/59716.

(cherry picked from commit 783a8cda24)

Change-Id: Ide6edfeb2609b02aff701e1bd9fd776da0a16be0
2015-03-03 17:53:44 -08:00
6e3a31d595 wicdec: (msvs) quiet some /analyze warnings
add additional return checks and asserts to avoid:
C6102: Using 'XXX' from failed function call ...

(cherry picked from commit 9b228b5416)

Change-Id: I51f5fa630324e0cd7b2d9fceefecb4f4021474b1
2015-03-03 17:53:44 -08:00
b49a578135 dwebp/WritePNG: mark png variables volatile
these are used on both sides of the setjmp().

(cherry picked from commit 7a191398ca)

Change-Id: I4a789bfb3a5d56946a22286c5a140008d90e1ba2
2015-03-03 17:53:43 -08:00
0a4391a196 dwebp: include setjmp.h w/WEBP_HAVE_PNG
setjmp() is used in WritePNG().

(cherry picked from commit 775dfad297)

Change-Id: Iadd836272fc7d368d635c891507ce2a08c4d3dec
2015-03-03 17:53:43 -08:00
90f1ec58a9 dwebp: correct sign in format strings
width / height are unsigned; fixes a warning with msvs /analyze:
C6340: Mismatch on sign: 'const unsigned int' passed as _Param_(4) when
some signed type is required in call to 'fprintf'.

(cherry picked from commit 47d26be760)

Change-Id: I5f1fad4c93745baf17d70178a5e66579ccd2b155
2015-03-03 17:53:43 -08:00
b61ce861f3 VP8LEncodeStream: add an assert
check enc->argb_ to quiet an msvs /analyze warning:
C6387: 'enc->argb_+y*width' could be '0':  this does not adhere to the
specification for the function 'memcpy'.

(cherry picked from commit f0e0677b87)

Change-Id: I87544e92ee0d3ea38942a475c30c6d552f9877b7
2015-03-03 17:53:42 -08:00
df1081bb82 dsp/cpu: (msvs) add include for __cpuidex
and only use it on x86 / x64 where it's available.
has the side-effect of quieting a msvs /analyze warning:
C6001: Using uninitialized memory 'cpu_info'.

(cherry picked from commit 0de5f33e31)

Change-Id: Iae51be3b22b2ee949cfc473eeea9fd9fb6b3c2cb
2015-03-03 17:53:42 -08:00
39aa055529 dsp/cpu: (msvs) avoid immintrin.h on _M_ARM
_xgetgv() isn't relevant there anyway

broken since:
279e661 Merge "dsp/cpu: add include for _xgetbv() w/MSVS"

(cherry picked from commit 4fbe9cf202)

Change-Id: Iaa7bc0c5be9c06bfffab39e194c64c09bf5b5a27
2015-03-03 17:53:42 -08:00
f814f429ca dsp/cpu: add include for _xgetbv() w/MSVS
explicitly add immintrin.h instead of transitively picking it up via
windows.h presumably. makes the code easier to move around.

(cherry picked from commit b6c0428e8c)

Change-Id: If70d5143ac94fc331da763ce034358858e460e06
2015-03-03 17:53:41 -08:00
8508ab99a7 cpu: fix AVX2 detection for gcc/clang targets
ecx needs to be set to 0; the visual studio builds were already doing
this.

https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family

(cherry picked from commit d7c4b02a57)

Change-Id: I95efb115b4d50bbdb6b14fca2aa63d0a24974e55
2015-03-03 17:53:41 -08:00
5769623b6f fix handling of zero-sized partition #0 corner case
reported in https://code.google.com/p/webp/issues/detail?id=237

An empty partition #0 should be indicative of a bitstream error.
The previous code was correct, only an assert was triggered in debug mode.
But we might as well handle the case properly right away...

(cherry picked from commit 205c7f26af)

Change-Id: I4dc31a46191fa9e65659c9a5bf5de9605e93f2f5
2015-03-03 17:53:40 -08:00
b2e71a9080 make the 'last_cpuinfo_used' variable names unique
allows the sources to be #include'd in some hackish builds (don't do
that!)

(cherry picked from commit 67f601cd46)

Conflicts:
	src/dsp/alpha_processing.c
	src/dsp/argb.c
	src/dsp/dec.c
	src/dsp/enc.c
	src/dsp/lossless.c
	src/dsp/upsampling.c
	src/dsp/yuv.c

Change-Id: I0c7a43acbebd0e2d5068845e6daa8ce47361cd91
2015-03-02 18:43:41 -08:00
1273e84517 add -Wformat-nonliteral and -Wformat-security
can be useful, not sure they are a subset of the flags we
use already...

(cherry picked from commit 80d950d94e)

Change-Id: Iec742a99427a791d9527368302a1136df2ff96cd
2015-03-02 18:43:35 -08:00
3ae78eb757 multi-thread fix: lock each entry points with a static var
we compare the current VP8GetCPUInfo pointer to the last used.
This is less code overall and each implementation is still
testable separately (by just changing VP8GetCPUInfo, but not
a separate threads!)

(cherry picked from commit a437694a17)

Conflicts:
	src/dsp/alpha_processing.c
	src/dsp/argb.c
	src/dsp/dec.c
	src/dsp/enc.c
	src/dsp/lossless.c
	src/dsp/upsampling.c
	src/dsp/yuv.c

Change-Id: Ia13fa8ffc4561a884508f6ab71ed0d1b9f1ce59b
2015-03-02 18:43:31 -08:00
5c1eeda922 webp-container-spec: remove references to fragments
this portion of the format was never finalized

(cherry picked from commit a66e66c79d)

Change-Id: I80aa1b27457a0e52b047c7284df2f58b181ca5d8
2015-03-02 18:43:26 -08:00
c5ceea4899 enc_neon: fix building with non-Xcode clang (iOS)
check for __apple_build_version__ to distinguish the two; a version
check could work as Apple bumped Xcode's to 5.x/6.x, but it's unclear
how upstream will deal with their versioning as they go 3.6+, so avoid
it for now.

(cherry picked from commit a3946b8956)

Change-Id: I67cda67c4f68e262a92d805a63cc1496374be063
2015-03-02 18:43:20 -08:00
d0859d69de iosbuild: add x64_64 simulator support
based on the patch here:
https://github.com/pixelkind/webp-ios-build

(cherry picked from commit a96ccf8fde)

Change-Id: Iaa346b751e5f18e8cf13a8e5c4064b0c2a3f5f6c
2015-03-02 18:43:14 -08:00
046732ca65 WebPEncode: Support encoding same pic twice (even if modified)
This wasn't working for this specific scenario:
- Encode an RGBA 'pic' (with trivial alpha) using lossy encoding.
(so that pic->a == NULL after import happens).
- Modify the 'pic->argb' so that it has non-trivial alpha.
- Encode the same 'pic' again.
This used to fail to encode alpha data as pic->a == NULL.

(cherry picked from commit e4f4dddba3)

Change-Id: Ieaaa7bd09825c42f54fbd99e6781d98f0b19cc0c
2015-03-02 18:43:08 -08:00
4426f50179 webp/types.h: use inline for clang++/-std=c++11
at least clang 3.[45] in c++ mode with -std=c++11 define __STRICT_ANSI__
this change set WEBP_INLINE to inline for c++/non-strict-ansi/> c99

fixes crbug.com/428383

(cherry picked from commit 6638710b9e)

Change-Id: Ief2b934353c336a75865c73c90cc3dc5e4f83913
2015-03-02 18:43:02 -08:00
e297fc7171 gif2webp: Use the default hint instead of WEBP_HINT_GRAPH.
This is much faster and the compression is slightly better too.

(cherry picked from commit c94ed49efd)

Change-Id: Ibf0d10eea83bfabfcc44ee497074767462ff41b1
2015-03-02 18:42:54 -08:00
855fe4354b Makefile.vc: add a 'legacy' RTLIBCFG option
disables buffer security checks (/GS-) and any machine optimizations
(e.g., sse2)

fixes issue #228

(cherry picked from commit 34c20c06c8)

Change-Id: I81fa483dc1654199b2017626320383d2d63317dc
2015-03-02 18:42:48 -08:00
b7eb6d55c7 gif2webp: Support GIF_DISPOSE_RESTORE_PREVIOUS
Tweaked the gif2webp_util API to support this.

Requested in: https://code.google.com/p/webp/issues/detail?id=144

(cherry picked from commit 65e5eb8a62)

Change-Id: I0e8c4edc39227355cd8d3acc55795186e25d0c3a
2015-03-02 18:42:40 -08:00
5691bdd9da gif2webp: Handle frames with odd offsets + disposal to background.
Snapping odd offsets in GIF to even offsets in WebP was causing extra row/column
being disposed in such cases.

Code is rewritten to maintain previous and current canvas (it used to maintain
previous canvas and current frame earlier). And we recompute change rectangles
as those from GIF may no longer apply.
Also, this renders methods like ReduceTransparency() and ConvertToKeyFrame()
redundant, as internally maintained current canvas is always independent of
previous canvases.

Disposal method choice: we pick the disposal method that results in the smallest
change rectangle.

(cherry picked from commit e4c829efe9)

Conflicts:
	examples/gif2webp_util.c

Change-Id: Ic31186d98fe1a2a790a89d1571b17e3abd127e79
2015-03-02 18:42:36 -08:00
8301da1380 stopwatch.h: fix includes
WEBP_INLINE -> webp/types.h
memcpy -> string.h

(cherry picked from commit 54edbf65ff)

Change-Id: Iab2ea8b553dc98be75eede751de62ab0292d1f97
2015-03-02 18:42:28 -08:00
56 changed files with 1861 additions and 1060 deletions

View File

@ -16,6 +16,7 @@ Contributors:
- Pascal Massimino (pascal dot massimino at gmail dot com)
- Paweł Hajdan, Jr (phajdan dot jr at chromium dot org)
- Pierre Joye (pierre dot php at gmail dot com)
- Sam Clegg (sbc at chromium dot org)
- Scott LaVarnway (slavarnway at google dot com)
- Scott Talbot (s at chikachow dot org)
- Slobodan Prijic (slobodan dot prijic at imgtec dot com)

View File

@ -10,8 +10,6 @@ ifeq ($(APP_OPTIM),release)
endif
endif
include $(CLEAR_VARS)
ifneq ($(findstring armeabi-v7a, $(TARGET_ARCH_ABI)),)
# Setting LOCAL_ARM_NEON will enable -mfpu=neon which may cause illegal
# instructions to be generated for armv7a code. Instead target the neon code
@ -21,7 +19,7 @@ else
NEON := c
endif
LOCAL_SRC_FILES := \
dec_srcs := \
src/dec/alpha.c \
src/dec/buffer.c \
src/dec/frame.c \
@ -32,6 +30,11 @@ LOCAL_SRC_FILES := \
src/dec/vp8.c \
src/dec/vp8l.c \
src/dec/webp.c \
demux_srcs := \
src/demux/demux.c \
dsp_dec_srcs := \
src/dsp/alpha_processing.c \
src/dsp/alpha_processing_sse2.c \
src/dsp/cpu.c \
@ -40,11 +43,6 @@ LOCAL_SRC_FILES := \
src/dsp/dec_mips32.c \
src/dsp/dec_neon.$(NEON) \
src/dsp/dec_sse2.c \
src/dsp/enc.c \
src/dsp/enc_avx2.c \
src/dsp/enc_mips32.c \
src/dsp/enc_neon.$(NEON) \
src/dsp/enc_sse2.c \
src/dsp/lossless.c \
src/dsp/lossless_mips32.c \
src/dsp/lossless_neon.$(NEON) \
@ -55,6 +53,15 @@ LOCAL_SRC_FILES := \
src/dsp/yuv.c \
src/dsp/yuv_mips32.c \
src/dsp/yuv_sse2.c \
dsp_enc_srcs := \
src/dsp/enc.c \
src/dsp/enc_avx2.c \
src/dsp/enc_mips32.c \
src/dsp/enc_neon.$(NEON) \
src/dsp/enc_sse2.c \
enc_srcs := \
src/enc/alpha.c \
src/enc/analysis.c \
src/enc/backward_references.c \
@ -75,19 +82,38 @@ LOCAL_SRC_FILES := \
src/enc/tree.c \
src/enc/vp8l.c \
src/enc/webpenc.c \
mux_srcs := \
src/mux/muxedit.c \
src/mux/muxinternal.c \
src/mux/muxread.c \
utils_dec_srcs := \
src/utils/bit_reader.c \
src/utils/bit_writer.c \
src/utils/color_cache.c \
src/utils/filters.c \
src/utils/huffman.c \
src/utils/huffman_encode.c \
src/utils/quant_levels.c \
src/utils/quant_levels_dec.c \
src/utils/random.c \
src/utils/rescaler.c \
src/utils/thread.c \
src/utils/utils.c \
utils_enc_srcs := \
src/utils/bit_writer.c \
src/utils/huffman_encode.c \
src/utils/quant_levels.c \
################################################################################
# libwebpdecoder
include $(CLEAR_VARS)
LOCAL_SRC_FILES := \
$(dec_srcs) \
$(dsp_dec_srcs) \
$(utils_dec_srcs) \
LOCAL_CFLAGS := $(WEBP_CFLAGS)
LOCAL_C_INCLUDES += $(LOCAL_PATH)/src
@ -96,6 +122,38 @@ LOCAL_ARM_MODE := arm
LOCAL_STATIC_LIBRARIES := cpufeatures
LOCAL_MODULE := webpdecoder_static
include $(BUILD_STATIC_LIBRARY)
ifeq ($(ENABLE_SHARED),1)
include $(CLEAR_VARS)
LOCAL_WHOLE_STATIC_LIBRARIES := webpdecoder_static
LOCAL_MODULE := webpdecoder
include $(BUILD_SHARED_LIBRARY)
endif # ENABLE_SHARED=1
################################################################################
# libwebp
include $(CLEAR_VARS)
LOCAL_SRC_FILES := \
$(dsp_enc_srcs) \
$(enc_srcs) \
$(utils_enc_srcs) \
LOCAL_CFLAGS := $(WEBP_CFLAGS)
LOCAL_C_INCLUDES += $(LOCAL_PATH)/src
# prefer arm over thumb mode for performance gains
LOCAL_ARM_MODE := arm
LOCAL_WHOLE_STATIC_LIBRARIES := webpdecoder_static
LOCAL_MODULE := webp
ifeq ($(ENABLE_SHARED),1)
@ -104,6 +162,54 @@ else
include $(BUILD_STATIC_LIBRARY)
endif
################################################################################
# libwebpdemux
include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(demux_srcs)
LOCAL_CFLAGS := $(WEBP_CFLAGS)
LOCAL_C_INCLUDES += $(LOCAL_PATH)/src
# prefer arm over thumb mode for performance gains
LOCAL_ARM_MODE := arm
LOCAL_MODULE := webpdemux
ifeq ($(ENABLE_SHARED),1)
LOCAL_SHARED_LIBRARIES := webp
include $(BUILD_SHARED_LIBRARY)
else
LOCAL_STATIC_LIBRARIES := webp
include $(BUILD_STATIC_LIBRARY)
endif
################################################################################
# libwebpmux
include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(mux_srcs)
LOCAL_CFLAGS := $(WEBP_CFLAGS)
LOCAL_C_INCLUDES += $(LOCAL_PATH)/src
# prefer arm over thumb mode for performance gains
LOCAL_ARM_MODE := arm
LOCAL_MODULE := webpmux
ifeq ($(ENABLE_SHARED),1)
LOCAL_SHARED_LIBRARIES := webp
include $(BUILD_SHARED_LIBRARY)
else
LOCAL_STATIC_LIBRARIES := webp
include $(BUILD_STATIC_LIBRARY)
endif
################################################################################
include $(LOCAL_PATH)/examples/Android.mk
$(call import-module,android/cpufeatures)

View File

@ -1,3 +1,47 @@
a661e50 Disable NEON code on Native Client
fcd94e9 update ChangeLog (tag: v0.4.3-rc1)
569fe57 update NEWS
bd852f5 bump version to 0.4.3
2d58b64 WebPPictureRescale: add a note about 0 width/height
a0d8ca5 examples/Android.mk: add webpmux_example target
34b1d29 Android.mk: add webpmux target
7561988 Android.mk: add webpdemux target
a987576 Android.mk: add webpdecoder{,_static} targets
a6d4859 Android.mk: split source lists per-directory
77544d5 fix iOS arm64 build with Xcode 6.3
6dea157 doc/webp-container-spec: note MSB order for chunk diagrams
f7cd57b doc/webp-container-spec: cosmetics
1d6b250 vwebp: clear canvas at the beginning of each loop
f97b3f8 webp-container-spec: clarify background clear on loop
4ba83c1 vwebp: remove unnecessary static Help() prototype
d34e8e3 vwebp/animation: display last frame on end-of-loop
bbbc524 dec/vp8: clear 'dither_' on skipped blocks
0339fa2 lossless_neon: enable subtract green for aarch64
5a0c220 Regression fix for lossless decoding
6e3a31d wicdec: (msvs) quiet some /analyze warnings
b49a578 dwebp/WritePNG: mark png variables volatile
0a4391a dwebp: include setjmp.h w/WEBP_HAVE_PNG
90f1ec5 dwebp: correct sign in format strings
b61ce86 VP8LEncodeStream: add an assert
df1081b dsp/cpu: (msvs) add include for __cpuidex
39aa055 dsp/cpu: (msvs) avoid immintrin.h on _M_ARM
f814f42 dsp/cpu: add include for _xgetbv() w/MSVS
8508ab9 cpu: fix AVX2 detection for gcc/clang targets
5769623 fix handling of zero-sized partition #0 corner case
b2e71a9 make the 'last_cpuinfo_used' variable names unique
1273e84 add -Wformat-nonliteral and -Wformat-security
3ae78eb multi-thread fix: lock each entry points with a static var
5c1eeda webp-container-spec: remove references to fragments
c5ceea4 enc_neon: fix building with non-Xcode clang (iOS)
d0859d6 iosbuild: add x64_64 simulator support
046732c WebPEncode: Support encoding same pic twice (even if modified)
4426f50 webp/types.h: use inline for clang++/-std=c++11
e297fc7 gif2webp: Use the default hint instead of WEBP_HINT_GRAPH.
855fe43 Makefile.vc: add a 'legacy' RTLIBCFG option
b7eb6d5 gif2webp: Support GIF_DISPOSE_RESTORE_PREVIOUS
5691bdd gif2webp: Handle frames with odd offsets + disposal to background.
8301da1 stopwatch.h: fix includes
6a2209a update ChangeLog (tag: v0.4.2, origin/0.4.2, 0.4.2)
36cad6a bit_reader.h: cosmetics: fix a typo
e2ecae6 enc_mips32: workaround gcc-4.9 bug
243e68d update ChangeLog (tag: v0.4.2-rc2)
@ -74,7 +118,7 @@ c2fc52e restore encode API compatibility
793368e restore decode API compatibility
b8984f3 gif2webp: fix compile with giflib 5.1.0
222f9b1 gif2webp: simplify giflib version checking
d2cc61b Extend MakeARGB32() to accept Alpha channel. (master)
d2cc61b Extend MakeARGB32() to accept Alpha channel.
4595b62 Merge "use explicit size of kErrorMessages[] arrays"
157de01 Merge "Actuate memory stats for PRINT_MEMORY_INFO"
fbda2f4 JPEG decoder: delay conversion to YUV to WebPEncode() call

View File

@ -27,7 +27,7 @@ PLATFORM_LDFLAGS = /SAFESEH
NOLOGO = /nologo
CCNODBG = cl.exe $(NOLOGO) /O2 /DNDEBUG
CCDEBUG = cl.exe $(NOLOGO) /Od /Gm /Zi /D_DEBUG /RTC1
CFLAGS = /Isrc $(NOLOGO) /W3 /EHsc /c /GS
CFLAGS = /Isrc $(NOLOGO) /W3 /EHsc /c
CFLAGS = $(CFLAGS) /DWIN32 /D_CRT_SECURE_NO_WARNINGS /DWIN32_LEAN_AND_MEAN
CFLAGS = $(CFLAGS) /DHAVE_WINCODEC_H /DWEBP_USE_THREAD
LDFLAGS = /LARGEADDRESSAWARE /MANIFEST /NXCOMPAT /DYNAMICBASE
@ -54,6 +54,11 @@ AVX2_FLAGS = /arch:AVX2
!IF "$(RTLIBCFG)" == "static"
RTLIB = /MT
RTLIBD = /MTd
!ELSE IF "$(RTLIBCFG)" == "legacy"
RTLIBCFG = static
RTLIB = /MT
RTLIBD = /MTd
CFLAGS = $(CFLAGS) /GS- /arch:IA32
!ELSE
RTLIB = /MD
RTLIBD = /MDd
@ -139,6 +144,7 @@ CFGSET = TRUE
!MESSAGE - all - build (de)mux-based targets for CFG
!MESSAGE
!MESSAGE RTLIBCFG controls the runtime library linkage - 'static' or 'dynamic'.
!MESSAGE 'legacy' will produce a Windows 2000 compatible library.
!MESSAGE OBJDIR is the path where you like to build (obj, bins, etc.),
!MESSAGE defaults to ..\obj

14
NEWS
View File

@ -1,3 +1,17 @@
- 10/15/15: version 0.4.4
This is a binary compatible release.
* rescaling out-of-bounds read fix (issue #254)
* various build fixes and improvements (issues #253, #259, #262, #267, #268)
* container documentation update
* gif2webp transparency fix (issue #245)
- 3/3/15: version 0.4.3
This is a binary compatible release.
* Android / gcc / iOS / MSVS build fixes and improvements
* lossless decode fix (issue #239 -- since 0.4.0)
* documentation / vwebp updates for animation
* multi-threading fix (issue #234)
- 10/13/14: version 0.4.2
This is a binary compatible release.
* Android / gcc build fixes

View File

@ -17,7 +17,7 @@ or agree to the institution of patent litigation or any other patent
enforcement activity against any entity (including a cross-claim or
counterclaim in a lawsuit) alleging that any of these implementations of WebM
or any code incorporated within any of these implementations of WebM
constitutes direct or contributory patent infringement, or inducement of
constitute direct or contributory patent infringement, or inducement of
patent infringement, then any patent rights granted to you under this License
for these implementations of WebM shall terminate as of the date such
litigation is filed.

4
README
View File

@ -4,7 +4,7 @@
\__\__/\____/\_____/__/ ____ ___
/ _/ / \ \ / _ \/ _/
/ \_/ / / \ \ __/ \__
\____/____/\_____/_____/____/v0.4.2
\____/____/\_____/_____/____/v0.4.4
Description:
============
@ -596,7 +596,7 @@ Bugs:
=====
Please report all bugs to our issue tracker:
http://code.google.com/p/webp/issues
https://bugs.chromium.org/p/webp
Patches welcome! See this page to get started:
http://www.webmproject.org/code/contribute/submitting-patches/

View File

@ -175,7 +175,7 @@ Bugs:
=====
Please report all bugs to our issue tracker:
http://code.google.com/p/webp/issues
https://bugs.chromium.org/p/webp
Patches welcome! See this page to get started:
http://www.webmproject.org/code/contribute/submitting-patches/

View File

@ -1,5 +1,5 @@
AC_INIT([libwebp], [0.4.2],
[http://code.google.com/p/webp/issues],,
AC_INIT([libwebp], [0.4.4],
[https://bugs.chromium.org/p/webp],,
[http://developers.google.com/speed/webp])
AC_CANONICAL_HOST
AC_PREREQ([2.60])
@ -54,6 +54,7 @@ AC_DEFUN([TEST_AND_ADD_CFLAGS],
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wall])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wdeclaration-after-statement])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wextra])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wformat-nonliteral])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wformat-security])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wmissing-declarations])
TEST_AND_ADD_CFLAGS([AM_CFLAGS], [-Wmissing-prototypes])

View File

@ -46,25 +46,16 @@ for:
* **Animation.** An image may have multiple frames with pauses between them,
making it an animation.
* **Image Fragmentation.** A single bitstream in WebP has an inherent
limitation for width or height of 2^14 pixels, and, when using VP8, a 512
KiB limit on the size of the first compressed partition. To support larger
images, the format supports images that are composed of multiple fragments,
each encoded as a separate bitstream. All fragments logically form a single
image: they have common metadata, color profile, etc. Image fragmentation
may also improve efficiency for larger images, e.g., grass can be encoded
differently than sky.
The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
"SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
document are to be interpreted as described in [RFC 2119][].
Bit numbering in chunk diagrams starts at `0` for the most significant bit
('MSB 0') as described in [RFC 1166][].
**Note:** Out of the features mentioned above, lossy compression, lossless
compression, transparency, metadata, color profile and animation are finalized
and are to be considered stable. On the other hand, image fragmentation is
experimental as of now, and is open to discussion, feedback and comments.
The same is indicated using annotation "_status: experimental_" in the relevant
sections of this document.
and are to be considered stable.
Terminology & Basics
------------------------
@ -79,7 +70,7 @@ Below are additional terms used throughout this document:
_Reader/Writer_
: Code that reads WebP files is referred to as a _reader_, while code that
writes them is referred to as a _writer_.
writes them is referred to as a _writer_.
_uint16_
@ -101,10 +92,12 @@ _FourCC_
_1-based_
: An unsigned integer field storing values offset by `-1`. e.g., Such a field
would store value _25_ as _24_.
would store value _25_ as _24_.
RIFF file format
RIFF File Format
----------------
The WebP file format is based on the RIFF (resource interchange file format)
document format.
@ -144,7 +137,8 @@ _ChunkHeader('ABCD')_
chunks that apply to any RIFF file format, while FourCCs specific to a file
format are all lowercase. WebP does not follow this convention.
WebP file header
WebP File Header
----------------
0 1 2 3
@ -164,8 +158,8 @@ WebP file header
File Size: 32 bits (_uint32_)
: The size of the file in bytes starting at offset 8. The maximum value of
this field is 2^32 minus 10 bytes and thus the size of the whole file is at
most 4GiB minus 2 bytes.
this field is 2^32 minus 10 bytes and thus the size of the whole file is at
most 4GiB minus 2 bytes.
'WEBP': 32 bits
@ -177,7 +171,8 @@ the 'WEBP' FourCC. The file SHOULD NOT contain anything after it. As the size
of any chunk is even, the size given by the RIFF header is also even. The
contents of individual chunks will be described in the following sections.
Simple file format (lossy)
Simple File Format (Lossy)
--------------------------
This layout SHOULD be used if the image requires _lossy_ encoding and does not
@ -215,7 +210,8 @@ width and height. That is assumed to be the width and height of the canvas.
The VP8 specification describes how to decode the image into Y'CbCr
format. To convert to RGB, Rec. 601 SHOULD be used.
Simple file format (lossless)
Simple File Format (Lossless)
-----------------------------
**Note:** Older readers may not support files using the lossless format.
@ -253,7 +249,8 @@ The current specification of the VP8L bitstream can be found at
contains the VP8L image width and height. That is assumed to be the width
and height of the canvas.
Extended file format
Extended File Format
--------------------
**Note:** Older readers may not support files using the extended format.
@ -274,13 +271,15 @@ An extended format file consists of:
* An optional list of [unknown chunks](#unknown-chunks). _\[status: experimental\]_
For a _still image_, the _image data_ consists of a single frame, whereas for
an _animated image_, it consists of multiple frames. More details about frames
can be found in the [Animation](#animation) section.
For a _still image_, the _image data_ consists of a single frame, which is made
up of:
Moreover, each frame can be fragmented or non-fragmented, as will be described
in the [Extended WebP file header](#extended_header) section. More details about
fragments can be found in the [Fragments](#fragments) section.
* An optional [alpha subchunk](#alpha).
* A [bitstream subchunk](#bitstream-vp8vp8l).
For an _animated image_, the _image data_ consists of multiple frames. More
details about frames can be found in the [Animation](#animation) section.
All chunks SHOULD be placed in the same order as listed above. If a chunk
appears in the wrong place, the file is invalid, but readers MAY parse the
@ -302,7 +301,7 @@ Extended WebP file header:
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| ChunkHeader('VP8X') |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Rsv|I|L|E|X|A|F| Reserved |
|Rsv|I|L|E|X|A|R| Reserved |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Canvas Width Minus One | ...
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@ -320,7 +319,7 @@ ICC profile (I): 1 bit
Alpha (L): 1 bit
: Set if any of the frames of the image contain transparency information
("alpha").
("alpha").
EXIF metadata (E): 1 bit
@ -333,11 +332,11 @@ XMP metadata (X): 1 bit
Animation (A): 1 bit
: Set if this is an animated image. Data in 'ANIM' and 'ANMF' chunks should be
used to control the animation.
used to control the animation.
Image Fragmentation (F): 1 bit _\[status: experimental\]_
Reserved (R): 1 bit
: Set if any of the frames in the image are represented by fragments.
: SHOULD be `0`.
Reserved: 24 bits
@ -382,9 +381,9 @@ animation.
Background Color: 32 bits (_uint32_)
: The default background color of the canvas in \[Blue, Green, Red, Alpha\]
byte order. This color MAY be used to fill the unused space on the canvas around
the frames, as well as the transparent pixels of the first frame. Background
color is also used when disposal method is `1`.
byte order. This color MAY be used to fill the unused space on the canvas
around the frames, as well as the transparent pixels of the first frame.
Background color is also used when disposal method is `1`.
**Note**:
@ -394,6 +393,9 @@ color is also used when disposal method is `1`.
* Viewer applications SHOULD treat the background color value as a hint, and
are not required to use it.
* The canvas is cleared at the start of each loop. The background color MAY be
used to achieve this.
Loop Count: 16 bits (_uint16_)
: The number of times to loop the animation. `0` means infinitely.
@ -402,7 +404,6 @@ This chunk MUST appear if the _Animation_ flag in the VP8X chunk is set.
If the _Animation_ flag is not set and this chunk is present, it
SHOULD be ignored.
ANMF chunk:
For animated images, this chunk contains information about a _single_ frame.
@ -445,8 +446,8 @@ Frame Height Minus One: 24 bits (_uint24_)
Frame Duration: 24 bits (_uint24_)
: The time to wait before displaying the next frame, in 1 millisecond units.
In particular, frame duration of 0 is useful when one wants to update multiple
areas of the canvas at once during the animation.
In particular, frame duration of 0 is useful when one wants to update
multiple areas of the canvas at once during the animation.
Reserved: 6 bits
@ -454,28 +455,28 @@ Reserved: 6 bits
Blending method (B): 1 bit
: Indicates how transparent pixels of _the current frame_ are to be blended with
corresponding pixels of the previous canvas:
: Indicates how transparent pixels of _the current frame_ are to be blended
with corresponding pixels of the previous canvas:
* `0`: Use alpha blending. After disposing of the previous frame, render the
current frame on the canvas using [alpha-blending](#alpha-blending). If the
current frame does not have an alpha channel, assume alpha value of 255,
effectively replacing the rectangle.
* `0`: Use alpha blending. After disposing of the previous frame, render the
current frame on the canvas using [alpha-blending](#alpha-blending). If
the current frame does not have an alpha channel, assume alpha value of
255, effectively replacing the rectangle.
* `1`: Do not blend. After disposing of the previous frame, render the
current frame on the canvas by overwriting the rectangle covered by the
current frame.
* `1`: Do not blend. After disposing of the previous frame, render the
current frame on the canvas by overwriting the rectangle covered by the
current frame.
Disposal method (D): 1 bit
: Indicates how _the current frame_ is to be treated after it has been displayed
(before rendering the next frame) on the canvas:
: Indicates how _the current frame_ is to be treated after it has been
displayed (before rendering the next frame) on the canvas:
* `0`: Do not dispose. Leave the canvas as is.
* `0`: Do not dispose. Leave the canvas as is.
* `1`: Dispose to background color. Fill the _rectangle_ on the canvas covered
by the _current frame_ with background color specified in the
[ANIM chunk](#anim_chunk).
* `1`: Dispose to background color. Fill the _rectangle_ on the canvas
covered by the _current frame_ with background color specified in the
[ANIM chunk](#anim_chunk).
**Notes**:
@ -506,9 +507,7 @@ Disposal method (D): 1 bit
Frame Data: _Chunk Size_ - `16` bytes
: For a fragmented frame, it consists of multiple [fragment chunks](#fragments).
: For a non-fragmented frame, it consists of:
: Consists of:
* An optional [alpha subchunk](#alpha) for the frame.
@ -519,49 +518,6 @@ Frame Data: _Chunk Size_ - `16` bytes
**Note**: The 'ANMF' payload, _Frame Data_ above, consists of individual
_padded_ chunks as described by the [RIFF file format](#riff-file-format).
#### Fragments _\[status: experimental\]_
For images that are represented by fragments, this chunk contains data for
a single fragment. If the _Image Fragmentation Flag_ is not set, then this chunk
SHOULD NOT be present.
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| ChunkHeader('FRGM') |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Fragment X | ...
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
... Fragment Y | Fragment Data |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Fragment X: 24 bits (_uint24_)
: The X coordinate of the upper left corner of the fragment is `Fragment X * 2`
Fragment Y: 24 bits (_uint24_)
: The Y coordinate of the upper left corner of the fragment is `Fragment Y * 2`
Fragment Data: _Chunk Size_ - `6` bytes
: It contains:
* An optional [alpha subchunk](#alpha) for the fragment.
* The [bitstream subchunk](#bitstream-vp8vp8l) for the fragment.
* An optional list of [unknown chunks](#unknown-chunks).
Note: The width and height of the fragment is obtained from the bitstream
subchunk.
The fragments of a frame SHOULD have the following properties:
* They collectively cover the whole frame.
* No pair of fragments have any overlapping region on the frame.
* No portion of any fragment should be located outside of the canvas.
#### Alpha
0 1 2 3
@ -579,20 +535,20 @@ Reserved (Rsv): 2 bits
Pre-processing (P): 2 bits
: These INFORMATIVE bits are used to signal the pre-processing that has
been performed during compression. The decoder can use this information to
e.g. dither the values or smooth the gradients prior to display.
been performed during compression. The decoder can use this information to
e.g. dither the values or smooth the gradients prior to display.
* `0`: no pre-processing
* `1`: level reduction
* `0`: no pre-processing
* `1`: level reduction
Filtering method (F): 2 bits
: The filtering method used:
* `0`: None.
* `1`: Horizontal filter.
* `2`: Vertical filter.
* `3`: Gradient filter.
* `0`: None.
* `1`: Horizontal filter.
* `2`: Vertical filter.
* `3`: Gradient filter.
For each pixel, filtering is performed using the following calculations.
Assume the alpha values surrounding the current `X` position are labeled as:
@ -636,15 +592,15 @@ Compression method (C): 2 bits
: The compression method used:
* `0`: No compression.
* `1`: Compressed using the WebP lossless format.
* `0`: No compression.
* `1`: Compressed using the WebP lossless format.
Alpha bitstream: _Chunk Size_ - `1` bytes
: Encoded alpha bitstream.
This optional chunk contains encoded alpha data for this frame/fragment. A
frame/fragment containing a 'VP8L' chunk SHOULD NOT contain this chunk.
This optional chunk contains encoded alpha data for this frame. A frame
containing a 'VP8L' chunk SHOULD NOT contain this chunk.
**Rationale**: The transparency information is already part of the 'VP8L'
chunk.
@ -675,15 +631,15 @@ compression method is '0') or compressed using the lossless format
#### Bitstream (VP8/VP8L)
This chunk contains compressed bitstream data for a single frame/fragment.
This chunk contains compressed bitstream data for a single frame.
A bitstream chunk may be either (i) a VP8 chunk, using "VP8 " (note the
significant fourth-character space) as its tag _or_ (ii) a VP8L chunk, using
"VP8L" as its tag.
The formats of VP8 and VP8L chunks are as described in sections
[Simple file format (lossy)](#simple-file-format-lossy)
and [Simple file format (lossless)](#simple-file-format-lossless) respectively.
[Simple File Format (Lossy)](#simple-file-format-lossy)
and [Simple File Format (Lossless)](#simple-file-format-lossless) respectively.
#### Color profile
@ -731,7 +687,6 @@ EXIF Metadata: _Chunk Size_ bytes
: image metadata in EXIF format.
XMP chunk:
0 1 2 3
@ -762,47 +717,17 @@ A file MAY contain unknown chunks:
* At the end of the file as described in [Extended WebP file
header](#extended_header) section.
* At the end of FRGM and ANMF chunks as described in [Fragments](#fragments)
and [Animation](#animation) sections.
* At the end of ANMF chunks as described in the
[Animation](#animation) section.
Readers SHOULD ignore these chunks. Writers SHOULD preserve them in their
original order (unless they specifically intend to modify these chunks).
### Assembling the Canvas from fragments/frames
### Assembling the Canvas from frames
Here we provide an overview of how a reader should assemble a canvas in case
of a fragmented-image and in case of an animated image. The notation
_VP8X.field_ means the field in the 'VP8X' chunk with the same description.
Displaying a _fragmented image_ canvas MUST be equivalent to the following
pseudocode: _\[status: experimental\]_
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
assert VP8X.flags.hasFragments
canvas ← new black image of size VP8X.canvasWidth x VP8X.canvasHeight.
frgm_params ← nil
for chunk in image_data:
assert chunk.tag is "FRGM"
frgm_params.fragmentX = Fragment X
frgm_params.fragmentY = Fragment Y
for subchunk in 'Fragment Data':
if subchunk.tag == "ALPH":
assert alpha subchunks not found in 'Fragment Data' earlier
frgm_params.alpha = alpha_data
else if subchunk.tag == "VP8 " OR subchunk.tag == "VP8L":
assert bitstream subchunks not found in 'Fragment Data' earlier
frgm_params.bitstream = bitstream_data
frgm_params.fragmentWidth = Width extracted from bitstream subchunk
frgm_params.fragmentHeight = Height extracted from bitstream subchunk
assert VP8X.canvasWidth >=
frgm_params.fragmentX + frgm_params.fragmentWidth
assert VP8X.canvasHeight >=
frgm_params.fragmentY + frgm_params.fragmentHeight
assert fragment has the properties mentioned in "Image Fragments" section.
render fragment with frame_params.alpha and frame_params.bitstream on canvas
with top-left corner in (frgm_params.fragmentX, frgm_params.fragmentY).
canvas contains the decoded canvas.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Here we provide an overview of how a reader should assemble a canvas in the
case of an animated image. The notation _VP8X.field_ means the field in the
'VP8X' chunk with the same description.
Displaying an _animated image_ canvas MUST be equivalent to the following
pseudocode:
@ -810,28 +735,25 @@ pseudocode:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
assert VP8X.flags.hasAnimation
canvas ← new image of size VP8X.canvasWidth x VP8X.canvasHeight with
background color ANIM.background_color.
background color ANIM.background_color.
loop_count ← ANIM.loopCount
dispose_method ← ANIM.disposeMethod
if loop_count == 0:
loop_count = ∞
frame_params ← nil
for loop = 0, ..., loop_count - 1
assert next chunk in image_data is ANMF
frame_params.frameX = Frame X
frame_params.frameY = Frame Y
frame_params.frameWidth = Frame Width Minus One + 1
frame_params.frameHeight = Frame Height Minus One + 1
frame_params.frameDuration = Frame Duration
assert VP8X.canvasWidth >= frame_params.frameX + frame_params.frameWidth
assert VP8X.canvasHeight >= frame_params.frameY + frame_params.frameHeight
if VP8X.flags.hasFragments and first subchunk in 'Frame Data' is FRGM
// Fragmented frame.
frame_params.{bitstream,alpha} = canvas decoded from subchunks in
'Frame Data' as per the pseudocode for
_fragmented image_ above.
else
// Non-fragmented frame.
assert next chunk in image_data is ANMF
for loop = 0..loop_count - 1
clear canvas to ANIM.background_color or application defined color
until eof or non-ANMF chunk
frame_params.frameX = Frame X
frame_params.frameY = Frame Y
frame_params.frameWidth = Frame Width Minus One + 1
frame_params.frameHeight = Frame Height Minus One + 1
frame_params.frameDuration = Frame Duration
frame_right = frame_params.frameX + frame_params.frameWidth
frame_bottom = frame_params.frameY + frame_params.frameHeight
assert VP8X.canvasWidth >= frame_right
assert VP8X.canvasHeight >= frame_bottom
for subchunk in 'Frame Data':
if subchunk.tag == "ALPH":
assert alpha subchunks not found in 'Frame Data' earlier
@ -839,14 +761,15 @@ for loop = 0, ..., loop_count - 1
else if subchunk.tag == "VP8 " OR subchunk.tag == "VP8L":
assert bitstream subchunks not found in 'Frame Data' earlier
frame_params.bitstream = bitstream_data
render frame with frame_params.alpha and frame_params.bitstream on canvas
with top-left corner in (frame_params.frameX, frame_params.frameY), using
dispose method dispose_method.
Show the contents of the image for frame_params.frameDuration * 1ms.
canvas contains the decoded canvas.
render frame with frame_params.alpha and frame_params.bitstream on
canvas with top-left corner at (frame_params.frameX,
frame_params.frameY), using dispose method dispose_method.
canvas contains the decoded image.
Show the contents of the canvas for frame_params.frameDuration * 1ms.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Example file layouts
Example File Layouts
--------------------
A lossy encoded image with alpha may look as follows:
@ -878,17 +801,6 @@ RIFF/WEBP
+- XMP (metadata)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
A fragmented image may look as follows:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
RIFF/WEBP
+- VP8X (descriptions of features used)
+- FRGM (fragment1 parameters + data)
+- FRGM (fragment2 parameters + data)
+- FRGM (fragment3 parameters + data)
+- FRGM (fragment4 parameters + data)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
An animated image with EXIF metadata may look as follows:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -903,7 +815,8 @@ RIFF/WEBP
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[vp8spec]: http://tools.ietf.org/html/rfc6386
[webpllspec]: https://gerrit.chromium.org/gerrit/gitweb?p=webm/libwebp.git;a=blob;f=doc/webp-lossless-bitstream-spec.txt;hb=master
[webpllspec]: https://chromium.googlesource.com/webm/libwebp/+/master/doc/webp-lossless-bitstream-spec.txt
[iccspec]: http://www.color.org/icc_specs2.xalter
[metadata]: http://www.metadataworkinggroup.org/pdf/mwg_guidance.pdf
[rfc 1166]: http://tools.ietf.org/html/rfc1166
[rfc 2119]: http://tools.ietf.org/html/rfc2119

View File

@ -1,5 +1,8 @@
LOCAL_PATH := $(call my-dir)
################################################################################
# libexample_util
include $(CLEAR_VARS)
LOCAL_SRC_FILES := \
@ -12,6 +15,9 @@ LOCAL_MODULE := example_util
include $(BUILD_STATIC_LIBRARY)
################################################################################
# cwebp
include $(CLEAR_VARS)
# Note: to enable jpeg/png encoding the sources from AOSP can be used with
@ -32,6 +38,9 @@ LOCAL_MODULE := cwebp
include $(BUILD_EXECUTABLE)
################################################################################
# dwebp
include $(CLEAR_VARS)
LOCAL_SRC_FILES := \
@ -44,3 +53,19 @@ LOCAL_STATIC_LIBRARIES := example_util webp
LOCAL_MODULE := dwebp
include $(BUILD_EXECUTABLE)
################################################################################
# webpmux
include $(CLEAR_VARS)
LOCAL_SRC_FILES := \
webpmux.c \
LOCAL_CFLAGS := $(WEBP_CFLAGS)
LOCAL_C_INCLUDES := $(LOCAL_PATH)/../src
LOCAL_STATIC_LIBRARIES := example_util webpmux webp
LOCAL_MODULE := webpmux_example
include $(BUILD_EXECUTABLE)

View File

@ -22,6 +22,7 @@
#ifdef WEBP_HAVE_PNG
#include <png.h>
#include <setjmp.h> // note: this must be included *after* png.h
#endif
#ifdef HAVE_WINCODEC_H
@ -192,8 +193,8 @@ static int WritePNG(FILE* out_file, const WebPDecBuffer* const buffer) {
uint8_t* const rgb = buffer->u.RGBA.rgba;
const int stride = buffer->u.RGBA.stride;
const int has_alpha = (buffer->colorspace == MODE_RGBA);
png_structp png;
png_infop info;
volatile png_structp png;
volatile png_infop info;
png_uint_32 y;
png = png_create_write_struct(PNG_LIBPNG_VER_STRING,
@ -203,11 +204,11 @@ static int WritePNG(FILE* out_file, const WebPDecBuffer* const buffer) {
}
info = png_create_info_struct(png);
if (info == NULL) {
png_destroy_write_struct(&png, NULL);
png_destroy_write_struct((png_structpp)&png, NULL);
return 0;
}
if (setjmp(png_jmpbuf(png))) {
png_destroy_write_struct(&png, &info);
png_destroy_write_struct((png_structpp)&png, (png_infopp)&info);
return 0;
}
png_init_io(png, out_file);
@ -221,7 +222,7 @@ static int WritePNG(FILE* out_file, const WebPDecBuffer* const buffer) {
png_write_rows(png, &row, 1);
}
png_write_end(png, info);
png_destroy_write_struct(&png, &info);
png_destroy_write_struct((png_structpp)&png, (png_infopp)&info);
return 1;
}
#else // !HAVE_WINCODEC_H && !WEBP_HAVE_PNG
@ -244,10 +245,10 @@ static int WritePPM(FILE* fout, const WebPDecBuffer* const buffer, int alpha) {
uint32_t y;
if (alpha) {
fprintf(fout, "P7\nWIDTH %d\nHEIGHT %d\nDEPTH 4\nMAXVAL 255\n"
fprintf(fout, "P7\nWIDTH %u\nHEIGHT %u\nDEPTH 4\nMAXVAL 255\n"
"TUPLTYPE RGB_ALPHA\nENDHDR\n", width, height);
} else {
fprintf(fout, "P6\n%d %d\n255\n", width, height);
fprintf(fout, "P6\n%u %u\n255\n", width, height);
}
for (y = 0; y < height; ++y) {
if (fwrite(rgb + y * stride, width, bytes_per_px, fout) != bytes_per_px) {
@ -404,7 +405,7 @@ static int WriteAlphaPlane(FILE* fout, const WebPDecBuffer* const buffer) {
const int a_stride = buffer->u.YUVA.a_stride;
uint32_t y;
assert(a != NULL);
fprintf(fout, "P5\n%d %d\n255\n", width, height);
fprintf(fout, "P5\n%u %u\n255\n", width, height);
for (y = 0; y < height; ++y) {
if (fwrite(a + y * a_stride, width, 1, fout) != 1) {
return 0;

View File

@ -46,18 +46,7 @@
//------------------------------------------------------------------------------
static int transparent_index; // Index of transparent color in the map.
static void ResetFrameInfo(WebPMuxFrameInfo* const info) {
WebPDataInit(&info->bitstream);
info->x_offset = 0;
info->y_offset = 0;
info->duration = 0;
info->id = WEBP_CHUNK_ANMF;
info->dispose_method = WEBP_MUX_DISPOSE_NONE;
info->blend_method = WEBP_MUX_BLEND;
transparent_index = -1; // Opaque frame by default.
}
static int transparent_index = -1; // Opaque frame by default.
static void SanitizeKeyFrameIntervals(size_t* const kmin_ptr,
size_t* const kmax_ptr) {
@ -270,7 +259,8 @@ int main(int argc, const char *argv[]) {
GifFileType* gif = NULL;
WebPConfig config;
WebPPicture frame;
WebPMuxFrameInfo info;
int duration = 0;
FrameDisposeMethod orig_dispose = FRAME_DISPOSE_NONE;
WebPMuxAnimParams anim = { WHITE_COLOR, 0 };
WebPFrameCache* cache = NULL;
@ -290,14 +280,11 @@ int main(int argc, const char *argv[]) {
size_t kmax = 0;
int allow_mixed = 0; // If true, each frame can be lossy or lossless.
ResetFrameInfo(&info);
if (!WebPConfigInit(&config) || !WebPPictureInit(&frame)) {
fprintf(stderr, "Error! Version mismatch!\n");
return -1;
}
config.lossless = 1; // Use lossless compression by default.
config.image_hint = WEBP_HINT_GRAPH; // always low-color
if (argc == 1) {
Help();
@ -499,7 +486,8 @@ int main(int argc, const char *argv[]) {
goto End;
}
if (!WebPFrameCacheAddFrame(cache, &config, &gif_rect, &frame, &info)) {
if (!WebPFrameCacheAddFrame(cache, &config, &gif_rect, orig_dispose,
duration, &frame)) {
fprintf(stderr, "Error! Cannot encode frame as WebP\n");
fprintf(stderr, "Error code: %d\n", frame.error_code);
}
@ -515,7 +503,9 @@ int main(int argc, const char *argv[]) {
// In GIF, graphic control extensions are optional for a frame, so we
// may not get one before reading the next frame. To handle this case,
// we reset frame properties to reasonable defaults for the next frame.
ResetFrameInfo(&info);
orig_dispose = FRAME_DISPOSE_NONE;
duration = 0;
transparent_index = -1; // Opaque frame by default.
break;
}
case EXTENSION_RECORD_TYPE: {
@ -533,20 +523,19 @@ int main(int argc, const char *argv[]) {
const int dispose = (flags >> GIF_DISPOSE_SHIFT) & GIF_DISPOSE_MASK;
const int delay = data[2] | (data[3] << 8); // In 10 ms units.
if (data[0] != 4) goto End;
info.duration = delay * 10; // Duration is in 1 ms units for WebP.
if (dispose == 3) {
static int warning_printed = 0;
if (!warning_printed) {
fprintf(stderr, "WARNING: GIF_DISPOSE_RESTORE unsupported.\n");
warning_printed = 1;
}
// failsafe. TODO(urvang): emulate the correct behaviour by
// recoding the whole frame.
info.dispose_method = WEBP_MUX_DISPOSE_BACKGROUND;
} else {
info.dispose_method =
(dispose == 2) ? WEBP_MUX_DISPOSE_BACKGROUND
: WEBP_MUX_DISPOSE_NONE;
duration = delay * 10; // Duration is in 1 ms units for WebP.
switch (dispose) {
case 3:
orig_dispose = FRAME_DISPOSE_RESTORE_PREVIOUS;
break;
case 2:
orig_dispose = FRAME_DISPOSE_BACKGROUND;
break;
case 1:
case 0:
default:
orig_dispose = FRAME_DISPOSE_NONE;
break;
}
transparent_index = (flags & GIF_TRANSPARENT_MASK) ? data[4] : -1;
break;

File diff suppressed because it is too large Load Diff

View File

@ -29,6 +29,13 @@ extern "C" {
struct WebPPicture;
// Includes all disposal methods, even the ones not supported by WebP bitstream.
typedef enum FrameDisposeMethod {
FRAME_DISPOSE_NONE,
FRAME_DISPOSE_BACKGROUND,
FRAME_DISPOSE_RESTORE_PREVIOUS
} FrameDisposeMethod;
typedef struct {
int x_offset, y_offset, width, height;
} WebPFrameRect;
@ -53,15 +60,15 @@ WebPFrameCache* WebPFrameCacheNew(int width, int height,
// Release all the frame data from 'cache' and free 'cache'.
void WebPFrameCacheDelete(WebPFrameCache* const cache);
// Given an image described by 'frame', 'info' and 'orig_rect', optimize it for
// WebP, encode it and add it to 'cache'. 'orig_rect' can be NULL.
// This takes care of frame disposal too, according to 'info->dispose_method'.
// Given an image described by 'frame', 'rect', 'dispose_method' and 'duration',
// optimize it for WebP, encode it and add it to 'cache'. 'rect' can be NULL.
// This takes care of frame disposal too, according to 'dispose_method'.
// Returns false in case of error (and sets frame->error_code accordingly).
int WebPFrameCacheAddFrame(WebPFrameCache* const cache,
const WebPConfig* const config,
const WebPFrameRect* const orig_rect,
WebPPicture* const frame,
WebPMuxFrameInfo* const info);
const WebPFrameRect* const rect,
FrameDisposeMethod dispose_method, int duration,
WebPPicture* const frame);
// Flush the *ready* frames from cache and add them to 'mux'. If 'verbose' is
// true, prints the information about these frames.

View File

@ -14,6 +14,8 @@
#ifndef WEBP_EXAMPLES_STOPWATCH_H_
#define WEBP_EXAMPLES_STOPWATCH_H_
#include "webp/types.h"
#if defined _WIN32 && !defined __GNUC__
#include <windows.h>
@ -37,6 +39,7 @@ static WEBP_INLINE double StopwatchReadAndReset(Stopwatch* watch) {
#else /* !_WIN32 */
#include <string.h> // memcpy
#include <sys/time.h>
typedef struct timeval Stopwatch;

View File

@ -42,8 +42,6 @@
#define snprintf _snprintf
#endif
static void Help(void);
// Unfortunate global variables. Gathered into a struct for comfort.
static struct {
int has_animation;
@ -82,6 +80,16 @@ static void ClearParams(void) {
kParams.dmux = NULL;
}
// Sets the previous frame to the dimensions of the canvas and has it dispose
// to background to cause the canvas to be cleared.
static void ClearPreviousFrame(void) {
WebPIterator* const prev = &kParams.prev_frame;
prev->width = kParams.canvas_width;
prev->height = kParams.canvas_height;
prev->x_offset = prev->y_offset = 0;
prev->dispose_method = WEBP_MUX_DISPOSE_BACKGROUND;
}
// -----------------------------------------------------------------------------
// Color profile handling
static int ApplyColorProfile(const WebPData* const profile,
@ -181,6 +189,8 @@ static void decode_callback(int what) {
if (WebPDemuxGetFrame(kParams.dmux, 1, curr)) {
--kParams.loop_count;
kParams.done = (kParams.loop_count == 0);
if (kParams.done) return;
ClearPreviousFrame();
} else {
kParams.decoding_error = 1;
kParams.done = 1;
@ -298,19 +308,24 @@ static void HandleDisplay(void) {
// they will be incorrect if the window is resized.
// glScissor() takes window coordinates (0,0 at bottom left).
int window_x, window_y;
int frame_w, frame_h;
if (prev->dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) {
// Clear the previous frame rectangle.
window_x = prev->x_offset;
window_y = kParams.canvas_height - prev->y_offset - prev->height;
frame_w = prev->width;
frame_h = prev->height;
} else { // curr->blend_method == WEBP_MUX_NO_BLEND.
// We simulate no-blending behavior by first clearing the current frame
// rectangle (to a checker-board) and then alpha-blending against it.
window_x = curr->x_offset;
window_y = kParams.canvas_height - curr->y_offset - curr->height;
frame_w = curr->width;
frame_h = curr->height;
}
glEnable(GL_SCISSOR_TEST);
// Only update the requested area, not the whole canvas.
glScissor(window_x, window_y, prev->width, prev->height);
glScissor(window_x, window_y, frame_w, frame_h);
glClear(GL_COLOR_BUFFER_BIT); // use clear color
DrawCheckerBoard();
@ -395,7 +410,6 @@ int main(int argc, char *argv[]) {
int c;
WebPDecoderConfig* const config = &kParams.config;
WebPIterator* const curr = &kParams.curr_frame;
WebPIterator* const prev = &kParams.prev_frame;
if (!WebPInitDecoderConfig(config)) {
fprintf(stderr, "Library version mismatch!\n");
@ -486,10 +500,7 @@ int main(int argc, char *argv[]) {
printf("Canvas: %d x %d\n", kParams.canvas_width, kParams.canvas_height);
}
prev->width = kParams.canvas_width;
prev->height = kParams.canvas_height;
prev->x_offset = prev->y_offset = 0;
prev->dispose_method = WEBP_MUX_DISPOSE_BACKGROUND;
ClearPreviousFrame();
memset(&kParams.iccp, 0, sizeof(kParams.iccp));
kParams.has_color_profile =

View File

@ -15,6 +15,7 @@
#include "webp/config.h"
#endif
#include <assert.h>
#include <stdio.h>
#ifdef HAVE_WINCODEC_H
@ -72,6 +73,12 @@ WEBP_DEFINE_GUID(GUID_WICPixelFormat32bppBGRA_,
WEBP_DEFINE_GUID(GUID_WICPixelFormat32bppRGBA_,
0xf5c7ad2d, 0x6a8d, 0x43dd,
0xa7, 0xa8, 0xa2, 0x99, 0x35, 0x26, 0x1a, 0xe9);
WEBP_DEFINE_GUID(GUID_WICPixelFormat64bppBGRA_,
0x1562ff7c, 0xd352, 0x46f9,
0x97, 0x9e, 0x42, 0x97, 0x6b, 0x79, 0x22, 0x46);
WEBP_DEFINE_GUID(GUID_WICPixelFormat64bppRGBA_,
0x6fddc324, 0x4e03, 0x4bfe,
0xb1, 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x16);
static HRESULT OpenInputStream(const char* filename, IStream** stream) {
HRESULT hr = S_OK;
@ -109,6 +116,7 @@ static HRESULT ExtractICCP(IWICImagingFactory* const factory,
IFS(IWICBitmapFrameDecode_GetColorContexts(frame,
count, color_contexts,
&num_color_contexts));
assert(FAILED(hr) || num_color_contexts <= count);
for (i = 0; SUCCEEDED(hr) && i < num_color_contexts; ++i) {
WICColorContextType type;
IFS(IWICColorContext_GetType(color_contexts[i], &type));
@ -116,7 +124,7 @@ static HRESULT ExtractICCP(IWICImagingFactory* const factory,
UINT size;
IFS(IWICColorContext_GetProfileBytes(color_contexts[i],
0, NULL, &size));
if (size > 0) {
if (SUCCEEDED(hr) && size > 0) {
iccp->bytes = (uint8_t*)malloc(size);
if (iccp->bytes == NULL) {
hr = E_OUTOFMEMORY;
@ -194,7 +202,11 @@ static int HasAlpha(IWICImagingFactory* const factory,
has_alpha = IsEqualGUID(MAKE_REFGUID(pixel_format),
MAKE_REFGUID(GUID_WICPixelFormat32bppRGBA_)) ||
IsEqualGUID(MAKE_REFGUID(pixel_format),
MAKE_REFGUID(GUID_WICPixelFormat32bppBGRA_));
MAKE_REFGUID(GUID_WICPixelFormat32bppBGRA_)) ||
IsEqualGUID(MAKE_REFGUID(pixel_format),
MAKE_REFGUID(GUID_WICPixelFormat64bppRGBA_)) ||
IsEqualGUID(MAKE_REFGUID(pixel_format),
MAKE_REFGUID(GUID_WICPixelFormat64bppBGRA_));
}
return has_alpha;
}
@ -261,7 +273,7 @@ int ReadPictureWithWIC(const char* const filename,
IFS(IWICBitmapFrameDecode_GetPixelFormat(frame, &src_pixel_format));
IFS(IWICBitmapDecoder_GetContainerFormat(decoder, &src_container_format));
if (keep_alpha) {
if (SUCCEEDED(hr) && keep_alpha) {
const GUID** guid;
for (guid = kAlphaContainers; *guid != NULL; ++guid) {
if (IsEqualGUID(MAKE_REFGUID(src_container_format),

View File

@ -26,7 +26,9 @@ readonly OLDPATH=${PATH}
# Add iPhoneOS-V6 to the list of platforms below if you need armv6 support.
# Note that iPhoneOS-V6 support is not available with the iOS6 SDK.
readonly PLATFORMS="iPhoneSimulator iPhoneOS-V7 iPhoneOS-V7s iPhoneOS-V7-arm64"
PLATFORMS="iPhoneSimulator iPhoneSimulator64"
PLATFORMS+=" iPhoneOS-V7 iPhoneOS-V7s iPhoneOS-V7-arm64"
readonly PLATFORMS
readonly SRCDIR=$(dirname $0)
readonly TOPDIR=$(pwd)
readonly BUILDDIR="${TOPDIR}/iosbuild"
@ -39,6 +41,8 @@ LIBLIST=''
if [[ -z "${SDK}" ]]; then
echo "iOS SDK not available"
exit 1
elif [[ ${SDK%%.*} -gt 8 ]]; then
EXTRA_CFLAGS="-fembed-bitcode"
elif [[ ${SDK} < 6.0 ]]; then
echo "You need iOS SDK version 6.0 or above"
exit 1
@ -78,6 +82,9 @@ for PLATFORM in ${PLATFORMS}; do
elif [[ "${PLATFORM}" == "iPhoneOS-V6" ]]; then
PLATFORM="iPhoneOS"
ARCH="armv6"
elif [[ "${PLATFORM}" == "iPhoneSimulator64" ]]; then
PLATFORM="iPhoneSimulator"
ARCH="x86_64"
else
ARCH="i386"
fi
@ -89,7 +96,7 @@ for PLATFORM in ${PLATFORMS}; do
SDKROOT="${PLATFORMSROOT}/"
SDKROOT+="${PLATFORM}.platform/Developer/SDKs/${PLATFORM}${SDK}.sdk/"
CFLAGS="-arch ${ARCH2:-${ARCH}} -pipe -isysroot ${SDKROOT} -O3 -DNDEBUG"
CFLAGS+=" -miphoneos-version-min=6.0"
CFLAGS+=" -miphoneos-version-min=6.0 ${EXTRA_CFLAGS}"
set -x
export PATH="${DEVROOT}/usr/bin:${OLDPATH}"

View File

@ -67,6 +67,8 @@ EXTRA_FLAGS += -Wmissing-prototypes
EXTRA_FLAGS += -Wmissing-declarations
EXTRA_FLAGS += -Wdeclaration-after-statement
EXTRA_FLAGS += -Wshadow
EXTRA_FLAGS += -Wformat-security -Wformat-nonliteral
# EXTRA_FLAGS += -Wvla
# AVX2-specific flags:

View File

@ -1,5 +1,5 @@
.\" Hey, EMACS: -*- nroff -*-
.TH CWEBP 1 "Oct 13, 2014"
.TH CWEBP 1 "October 19, 2015"
.SH NAME
cwebp \- compress an image file to a WebP file
.SH SYNOPSIS
@ -259,7 +259,7 @@ Only print brief information (output file size and PSNR) for testing purpose.
.SH BUGS
Please report all bugs to our issue tracker:
http://code.google.com/p/webp/issues
https://bugs.chromium.org/p/webp
.br
Patches welcome! See this page to get started:
http://www.webmproject.org/code/contribute/submitting-patches/

View File

@ -1,5 +1,5 @@
.\" Hey, EMACS: -*- nroff -*-
.TH DWEBP 1 "July 22, 2014"
.TH DWEBP 1 "October 19, 2015"
.SH NAME
dwebp \- decompress a WebP file to an image file
.SH SYNOPSIS
@ -106,7 +106,7 @@ Disable all assembly optimizations.
.SH BUGS
Please report all bugs to our issue tracker:
http://code.google.com/p/webp/issues
https://bugs.chromium.org/p/webp
.br
Patches welcome! See this page to get started:
http://www.webmproject.org/code/contribute/submitting-patches/

View File

@ -1,5 +1,5 @@
.\" Hey, EMACS: -*- nroff -*-
.TH GIF2WEBP 1 "March 7, 2014"
.TH GIF2WEBP 1 "October 19, 2015"
.SH NAME
gif2webp \- Convert a GIF image to WebP
.SH SYNOPSIS
@ -111,7 +111,7 @@ Do not print anything.
.SH BUGS
Please report all bugs to our issue tracker:
http://code.google.com/p/webp/issues
https://bugs.chromium.org/p/webp
.br
Patches welcome! See this page to get started:
http://www.webmproject.org/code/contribute/submitting-patches/

View File

@ -1,5 +1,5 @@
.\" Hey, EMACS: -*- nroff -*-
.TH VWEBP 1 "July 23, 2014"
.TH VWEBP 1 "October 19, 2015"
.SH NAME
vwebp \- decompress a WebP file and display it in a window
.SH SYNOPSIS
@ -65,7 +65,7 @@ Quit.
.SH BUGS
Please report all bugs to our issue tracker:
http://code.google.com/p/webp/issues
https://bugs.chromium.org/p/webp
.br
Patches welcome! See this page to get started:
http://www.webmproject.org/code/contribute/submitting-patches/

View File

@ -1,5 +1,5 @@
.\" Hey, EMACS: -*- nroff -*-
.TH WEBPMUX 1 "August 28, 2014"
.TH WEBPMUX 1 "October 19, 2015"
.SH NAME
webpmux \- create animated WebP files from non\-animated WebP images, extract
frames from animated WebP images, and manage XMP/EXIF metadata and ICC profile.
@ -129,7 +129,7 @@ The nature of EXIF, XMP and ICC data is not checked and is assumed to be valid.
.SH BUGS
Please report all bugs to our issue tracker:
http://code.google.com/p/webp/issues
https://bugs.chromium.org/p/webp
.br
Patches welcome! See this page to get started:
http://www.webmproject.org/code/contribute/submitting\-patches/

View File

@ -35,7 +35,7 @@ libwebp_la_LIBADD += utils/libwebputils.la
# other than the ones listed on the command line, i.e., after linking, it will
# not have unresolved symbols. Some platforms (Windows among them) require all
# symbols in shared libraries to be resolved at library creation.
libwebp_la_LDFLAGS = -no-undefined -version-info 5:2:0
libwebp_la_LDFLAGS = -no-undefined -version-info 5:4:0
libwebpincludedir = $(includedir)/webp
pkgconfig_DATA = libwebp.pc
@ -47,7 +47,7 @@ if BUILD_LIBWEBPDECODER
libwebpdecoder_la_LIBADD += dsp/libwebpdspdecode.la
libwebpdecoder_la_LIBADD += utils/libwebputilsdecode.la
libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 1:2:0
libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 1:4:0
pkgconfig_DATA += libwebpdecoder.pc
endif

View File

@ -33,6 +33,11 @@ static int IsValidColorspace(int webp_csp_mode) {
return (webp_csp_mode >= MODE_RGB && webp_csp_mode < MODE_LAST);
}
// strictly speaking, the very last (or first, if flipped) row
// doesn't require padding.
#define MIN_BUFFER_SIZE(WIDTH, HEIGHT, STRIDE) \
(uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH)
static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
int ok = 1;
const WEBP_CSP_MODE mode = buffer->colorspace;
@ -42,20 +47,22 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
ok = 0;
} else if (!WebPIsRGBMode(mode)) { // YUV checks
const WebPYUVABuffer* const buf = &buffer->u.YUVA;
const int uv_width = (width + 1) / 2;
const int uv_height = (height + 1) / 2;
const int y_stride = abs(buf->y_stride);
const int u_stride = abs(buf->u_stride);
const int v_stride = abs(buf->v_stride);
const int a_stride = abs(buf->a_stride);
const uint64_t y_size = (uint64_t)y_stride * height;
const uint64_t u_size = (uint64_t)u_stride * ((height + 1) / 2);
const uint64_t v_size = (uint64_t)v_stride * ((height + 1) / 2);
const uint64_t a_size = (uint64_t)a_stride * height;
const uint64_t y_size = MIN_BUFFER_SIZE(width, height, y_stride);
const uint64_t u_size = MIN_BUFFER_SIZE(uv_width, uv_height, u_stride);
const uint64_t v_size = MIN_BUFFER_SIZE(uv_width, uv_height, v_stride);
const uint64_t a_size = MIN_BUFFER_SIZE(width, height, a_stride);
ok &= (y_size <= buf->y_size);
ok &= (u_size <= buf->u_size);
ok &= (v_size <= buf->v_size);
ok &= (y_stride >= width);
ok &= (u_stride >= (width + 1) / 2);
ok &= (v_stride >= (width + 1) / 2);
ok &= (u_stride >= uv_width);
ok &= (v_stride >= uv_width);
ok &= (buf->y != NULL);
ok &= (buf->u != NULL);
ok &= (buf->v != NULL);
@ -67,13 +74,14 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
} else { // RGB checks
const WebPRGBABuffer* const buf = &buffer->u.RGBA;
const int stride = abs(buf->stride);
const uint64_t size = (uint64_t)stride * height;
const uint64_t size = MIN_BUFFER_SIZE(width, height, stride);
ok &= (size <= buf->size);
ok &= (stride >= width * kModeBpp[mode]);
ok &= (buf->rgba != NULL);
}
return ok ? VP8_STATUS_OK : VP8_STATUS_INVALID_PARAM;
}
#undef MIN_BUFFER_SIZE
static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
const int w = buffer->width;

View File

@ -357,30 +357,33 @@ static VP8StatusCode DecodeVP8FrameHeader(WebPIDecoder* const idec) {
}
// Partition #0
static int CopyParts0Data(WebPIDecoder* const idec) {
static VP8StatusCode CopyParts0Data(WebPIDecoder* const idec) {
VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
VP8BitReader* const br = &dec->br_;
const size_t psize = br->buf_end_ - br->buf_;
const size_t part_size = br->buf_end_ - br->buf_;
MemBuffer* const mem = &idec->mem_;
assert(!idec->is_lossless_);
assert(mem->part0_buf_ == NULL);
assert(psize > 0);
assert(psize <= mem->part0_size_); // Format limit: no need for runtime check
// the following is a format limitation, no need for runtime check:
assert(part_size <= mem->part0_size_);
if (part_size == 0) { // can't have zero-size partition #0
return VP8_STATUS_BITSTREAM_ERROR;
}
if (mem->mode_ == MEM_MODE_APPEND) {
// We copy and grab ownership of the partition #0 data.
uint8_t* const part0_buf = (uint8_t*)WebPSafeMalloc(1ULL, psize);
uint8_t* const part0_buf = (uint8_t*)WebPSafeMalloc(1ULL, part_size);
if (part0_buf == NULL) {
return 0;
return VP8_STATUS_OUT_OF_MEMORY;
}
memcpy(part0_buf, br->buf_, psize);
memcpy(part0_buf, br->buf_, part_size);
mem->part0_buf_ = part0_buf;
br->buf_ = part0_buf;
br->buf_end_ = part0_buf + psize;
br->buf_end_ = part0_buf + part_size;
} else {
// Else: just keep pointers to the partition #0's data in dec_->br_.
}
mem->start_ += psize;
return 1;
mem->start_ += part_size;
return VP8_STATUS_OK;
}
static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
@ -414,8 +417,10 @@ static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
dec->mt_method_ = VP8GetThreadMethod(params->options, NULL,
io->width, io->height);
VP8InitDithering(params->options, dec);
if (!CopyParts0Data(idec)) {
return IDecError(idec, VP8_STATUS_OUT_OF_MEMORY);
dec->status_ = CopyParts0Data(idec);
if (dec->status_ != VP8_STATUS_OK) {
return IDecError(idec, dec->status_);
}
// Finish setting up the decoding parameters. Will call io->setup().

View File

@ -322,37 +322,31 @@ static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) {
const size_t work_size = 2 * out_width; // scratch memory for luma rescaler
const size_t uv_work_size = 2 * uv_out_width; // and for each u/v ones
size_t tmp_size;
int32_t* work;
rescaler_t* work;
tmp_size = (work_size + 2 * uv_work_size) * sizeof(*work);
if (has_alpha) {
tmp_size += work_size * sizeof(*work);
}
p->memory = WebPSafeCalloc(1ULL, tmp_size);
p->memory = WebPSafeMalloc(1ULL, tmp_size);
if (p->memory == NULL) {
return 0; // memory error
}
work = (int32_t*)p->memory;
work = (rescaler_t*)p->memory;
WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h,
buf->y, out_width, out_height, buf->y_stride, 1,
io->mb_w, out_width, io->mb_h, out_height,
work);
WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height,
buf->u, uv_out_width, uv_out_height, buf->u_stride, 1,
uv_in_width, uv_out_width,
uv_in_height, uv_out_height,
work + work_size);
WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height,
buf->v, uv_out_width, uv_out_height, buf->v_stride, 1,
uv_in_width, uv_out_width,
uv_in_height, uv_out_height,
work + work_size + uv_work_size);
p->emit = EmitRescaledYUV;
if (has_alpha) {
WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h,
buf->a, out_width, out_height, buf->a_stride, 1,
io->mb_w, out_width, io->mb_h, out_height,
work + work_size + 2 * uv_work_size);
p->emit_alpha = EmitRescaledAlphaYUV;
WebPInitAlphaProcessing();
@ -375,9 +369,9 @@ static int ExportRGB(WebPDecParams* const p, int y_pos) {
WebPRescalerHasPendingOutput(&p->scaler_u)) {
assert(p->last_y + y_pos + num_lines_out < p->output->height);
assert(p->scaler_u.y_accum == p->scaler_v.y_accum);
WebPRescalerExportRow(&p->scaler_y, 0);
WebPRescalerExportRow(&p->scaler_u, 0);
WebPRescalerExportRow(&p->scaler_v, 0);
WebPRescalerExportRow(&p->scaler_y);
WebPRescalerExportRow(&p->scaler_u);
WebPRescalerExportRow(&p->scaler_v);
convert(p->scaler_y.dst, p->scaler_u.dst, p->scaler_v.dst,
dst, p->scaler_y.dst_width);
dst += buf->stride;
@ -425,7 +419,7 @@ static int ExportAlpha(WebPDecParams* const p, int y_pos) {
while (WebPRescalerHasPendingOutput(&p->scaler_a)) {
int i;
assert(p->last_y + y_pos + num_lines_out < p->output->height);
WebPRescalerExportRow(&p->scaler_a, 0);
WebPRescalerExportRow(&p->scaler_a);
for (i = 0; i < width; ++i) {
const uint32_t alpha_value = p->scaler_a.dst[i];
dst[4 * i] = alpha_value;
@ -458,7 +452,7 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos) {
while (WebPRescalerHasPendingOutput(&p->scaler_a)) {
int i;
assert(p->last_y + y_pos + num_lines_out < p->output->height);
WebPRescalerExportRow(&p->scaler_a, 0);
WebPRescalerExportRow(&p->scaler_a);
for (i = 0; i < width; ++i) {
// Fill in the alpha value (converted to 4 bits).
const uint32_t alpha_value = p->scaler_a.dst[i] >> 4;
@ -495,7 +489,7 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
const int uv_in_width = (io->mb_w + 1) >> 1;
const int uv_in_height = (io->mb_h + 1) >> 1;
const size_t work_size = 2 * out_width; // scratch memory for one rescaler
int32_t* work; // rescalers work area
rescaler_t* work; // rescalers work area
uint8_t* tmp; // tmp storage for scaled YUV444 samples before RGB conversion
size_t tmp_size1, tmp_size2, total_size;
@ -506,30 +500,26 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
tmp_size2 += out_width;
}
total_size = tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp);
p->memory = WebPSafeCalloc(1ULL, total_size);
p->memory = WebPSafeMalloc(1ULL, total_size);
if (p->memory == NULL) {
return 0; // memory error
}
work = (int32_t*)p->memory;
work = (rescaler_t*)p->memory;
tmp = (uint8_t*)(work + tmp_size1);
WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h,
tmp + 0 * out_width, out_width, out_height, 0, 1,
io->mb_w, out_width, io->mb_h, out_height,
work + 0 * work_size);
WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height,
tmp + 1 * out_width, out_width, out_height, 0, 1,
io->mb_w, 2 * out_width, io->mb_h, 2 * out_height,
work + 1 * work_size);
WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height,
tmp + 2 * out_width, out_width, out_height, 0, 1,
io->mb_w, 2 * out_width, io->mb_h, 2 * out_height,
work + 2 * work_size);
p->emit = EmitRescaledRGB;
if (has_alpha) {
WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h,
tmp + 3 * out_width, out_width, out_height, 0, 1,
io->mb_w, out_width, io->mb_h, out_height,
work + 3 * work_size);
p->emit_alpha = EmitRescaledAlphaRGB;
if (p->output->colorspace == MODE_RGBA_4444 ||

View File

@ -562,6 +562,7 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
}
block->non_zero_y_ = 0;
block->non_zero_uv_ = 0;
block->dither_ = 0;
}
if (dec->filter_type_ > 0) { // store filter info

View File

@ -31,7 +31,7 @@ extern "C" {
// version numbers
#define DEC_MAJ_VERSION 0
#define DEC_MIN_VERSION 4
#define DEC_REV_VERSION 2
#define DEC_REV_VERSION 4
// intra prediction modes
enum { B_DC_PRED = 0, // 4x4 modes

View File

@ -390,13 +390,13 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
const int in_height = io->mb_h;
const int out_height = io->scaled_height;
const uint64_t work_size = 2 * num_channels * (uint64_t)out_width;
int32_t* work; // Rescaler work area.
const uint64_t scaled_data_size = num_channels * (uint64_t)out_width;
rescaler_t* work; // Rescaler work area.
const uint64_t scaled_data_size = (uint64_t)out_width;
uint32_t* scaled_data; // Temporary storage for scaled BGRA data.
const uint64_t memory_size = sizeof(*dec->rescaler) +
work_size * sizeof(*work) +
scaled_data_size * sizeof(*scaled_data);
uint8_t* memory = (uint8_t*)WebPSafeCalloc(memory_size, sizeof(*memory));
uint8_t* memory = (uint8_t*)WebPSafeMalloc(memory_size, sizeof(*memory));
if (memory == NULL) {
dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
return 0;
@ -406,13 +406,12 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
dec->rescaler = (WebPRescaler*)memory;
memory += sizeof(*dec->rescaler);
work = (int32_t*)memory;
work = (rescaler_t*)memory;
memory += work_size * sizeof(*work);
scaled_data = (uint32_t*)memory;
WebPRescalerInit(dec->rescaler, in_width, in_height, (uint8_t*)scaled_data,
out_width, out_height, 0, num_channels,
in_width, out_width, in_height, out_height, work);
out_width, out_height, 0, num_channels, work);
return 1;
}
@ -427,7 +426,7 @@ static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace,
int num_lines_out = 0;
while (WebPRescalerHasPendingOutput(rescaler)) {
uint8_t* const dst = rgba + num_lines_out * rgba_stride;
WebPRescalerExportRow(rescaler, 0);
WebPRescalerExportRow(rescaler);
WebPMultARGBRow(src, dst_width, 1);
VP8LConvertFromBGRA(src, dst_width, colorspace, dst);
++num_lines_out;
@ -545,7 +544,7 @@ static int ExportYUVA(const VP8LDecoder* const dec, int y_pos) {
const int dst_width = rescaler->dst_width;
int num_lines_out = 0;
while (WebPRescalerHasPendingOutput(rescaler)) {
WebPRescalerExportRow(rescaler, 0);
WebPRescalerExportRow(rescaler);
WebPMultARGBRow(src, dst_width, 1);
ConvertToYUVA(src, dst_width, y_pos, dec->output_);
++y_pos;
@ -900,7 +899,7 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
process_func(dec, row);
}
}
if (src < src_last) {
if (src < src_end) {
if (col & mask) htree_group = GetHtreeGroupForPos(hdr, col, row);
if (color_cache != NULL) {
while (last_cached < src) {

View File

@ -311,7 +311,12 @@ int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
extern void WebPInitAlphaProcessingSSE2(void);
static volatile VP8CPUInfo alpha_processing_last_cpuinfo_used =
(VP8CPUInfo)&alpha_processing_last_cpuinfo_used;
void WebPInitAlphaProcessing(void) {
if (alpha_processing_last_cpuinfo_used == VP8GetCPUInfo) return;
WebPMultARGBRow = MultARGBRow;
WebPMultRow = MultRow;
WebPApplyAlphaMultiply = ApplyAlphaMultiply;
@ -326,4 +331,5 @@ void WebPInitAlphaProcessing(void) {
}
#endif
}
alpha_processing_last_cpuinfo_used = VP8GetCPUInfo;
}

View File

@ -29,16 +29,18 @@ static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
"cpuid\n"
"xchg %%edi, %%ebx\n"
: "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type));
: "a"(info_type), "c"(0));
}
#elif defined(__i386__) || defined(__x86_64__)
static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
__asm__ volatile (
"cpuid\n"
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
: "a"(info_type));
: "a"(info_type), "c"(0));
}
#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1
#elif (defined(_M_X64) || defined(_M_IX86)) && \
defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1
#include <intrin.h>
#define GetCPUInfo(info, type) __cpuidex(info, type, 0) // set ecx=0
#elif defined(WEBP_MSC_SSE2)
#define GetCPUInfo __cpuid
@ -55,7 +57,9 @@ static WEBP_INLINE uint64_t xgetbv(void) {
: "=a"(eax), "=d"(edx) : "c" (ecx));
return ((uint64_t)edx << 32) | eax;
}
#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1
#elif (defined(_M_X64) || defined(_M_IX86)) && \
defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1
#include <immintrin.h>
#define xgetbv() _xgetbv(0)
#elif defined(_MSC_VER) && defined(_M_IX86)
static WEBP_INLINE uint64_t xgetbv(void) {

View File

@ -688,7 +688,12 @@ extern void VP8DspInitSSE2(void);
extern void VP8DspInitNEON(void);
extern void VP8DspInitMIPS32(void);
static volatile VP8CPUInfo dec_last_cpuinfo_used =
(VP8CPUInfo)&dec_last_cpuinfo_used;
void VP8DspInit(void) {
if (dec_last_cpuinfo_used == VP8GetCPUInfo) return;
VP8InitClipTables();
VP8TransformWHT = TransformWHT;
@ -727,5 +732,5 @@ void VP8DspInit(void) {
}
#endif
}
dec_last_cpuinfo_used = VP8GetCPUInfo;
}

View File

@ -24,24 +24,24 @@
// Load/Store vertical edge
#define LOAD8x4(c1, c2, c3, c4, b1, b2, stride) \
"vld4.8 {" #c1"[0], " #c2"[0], " #c3"[0], " #c4"[0]}," #b1 "," #stride"\n" \
"vld4.8 {" #c1"[1], " #c2"[1], " #c3"[1], " #c4"[1]}," #b2 "," #stride"\n" \
"vld4.8 {" #c1"[2], " #c2"[2], " #c3"[2], " #c4"[2]}," #b1 "," #stride"\n" \
"vld4.8 {" #c1"[3], " #c2"[3], " #c3"[3], " #c4"[3]}," #b2 "," #stride"\n" \
"vld4.8 {" #c1"[4], " #c2"[4], " #c3"[4], " #c4"[4]}," #b1 "," #stride"\n" \
"vld4.8 {" #c1"[5], " #c2"[5], " #c3"[5], " #c4"[5]}," #b2 "," #stride"\n" \
"vld4.8 {" #c1"[6], " #c2"[6], " #c3"[6], " #c4"[6]}," #b1 "," #stride"\n" \
"vld4.8 {" #c1"[7], " #c2"[7], " #c3"[7], " #c4"[7]}," #b2 "," #stride"\n"
"vld4.8 {" #c1 "[0]," #c2 "[0]," #c3 "[0]," #c4 "[0]}," #b1 "," #stride "\n" \
"vld4.8 {" #c1 "[1]," #c2 "[1]," #c3 "[1]," #c4 "[1]}," #b2 "," #stride "\n" \
"vld4.8 {" #c1 "[2]," #c2 "[2]," #c3 "[2]," #c4 "[2]}," #b1 "," #stride "\n" \
"vld4.8 {" #c1 "[3]," #c2 "[3]," #c3 "[3]," #c4 "[3]}," #b2 "," #stride "\n" \
"vld4.8 {" #c1 "[4]," #c2 "[4]," #c3 "[4]," #c4 "[4]}," #b1 "," #stride "\n" \
"vld4.8 {" #c1 "[5]," #c2 "[5]," #c3 "[5]," #c4 "[5]}," #b2 "," #stride "\n" \
"vld4.8 {" #c1 "[6]," #c2 "[6]," #c3 "[6]," #c4 "[6]}," #b1 "," #stride "\n" \
"vld4.8 {" #c1 "[7]," #c2 "[7]," #c3 "[7]," #c4 "[7]}," #b2 "," #stride "\n"
#define STORE8x2(c1, c2, p, stride) \
"vst2.8 {" #c1"[0], " #c2"[0]}," #p "," #stride " \n" \
"vst2.8 {" #c1"[1], " #c2"[1]}," #p "," #stride " \n" \
"vst2.8 {" #c1"[2], " #c2"[2]}," #p "," #stride " \n" \
"vst2.8 {" #c1"[3], " #c2"[3]}," #p "," #stride " \n" \
"vst2.8 {" #c1"[4], " #c2"[4]}," #p "," #stride " \n" \
"vst2.8 {" #c1"[5], " #c2"[5]}," #p "," #stride " \n" \
"vst2.8 {" #c1"[6], " #c2"[6]}," #p "," #stride " \n" \
"vst2.8 {" #c1"[7], " #c2"[7]}," #p "," #stride " \n"
"vst2.8 {" #c1 "[0], " #c2 "[0]}," #p "," #stride " \n" \
"vst2.8 {" #c1 "[1], " #c2 "[1]}," #p "," #stride " \n" \
"vst2.8 {" #c1 "[2], " #c2 "[2]}," #p "," #stride " \n" \
"vst2.8 {" #c1 "[3], " #c2 "[3]}," #p "," #stride " \n" \
"vst2.8 {" #c1 "[4], " #c2 "[4]}," #p "," #stride " \n" \
"vst2.8 {" #c1 "[5], " #c2 "[5]}," #p "," #stride " \n" \
"vst2.8 {" #c1 "[6], " #c2 "[6]}," #p "," #stride " \n" \
"vst2.8 {" #c1 "[7], " #c2 "[7]}," #p "," #stride " \n"
#if !defined(WORK_AROUND_GCC)

View File

@ -36,14 +36,9 @@ extern "C" {
# define LOCAL_GCC_PREREQ(maj, min) 0
#endif
#ifdef __clang__
# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
# define LOCAL_CLANG_PREREQ(maj, min) \
(LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
#else
# define LOCAL_CLANG_VERSION 0
# define LOCAL_CLANG_PREREQ(maj, min) 0
#endif // __clang__
#ifndef __has_builtin
# define __has_builtin(x) 0
#endif
#if defined(_MSC_VER) && _MSC_VER > 1310 && \
(defined(_M_X64) || defined(_M_IX86))
@ -66,11 +61,15 @@ extern "C" {
#define WEBP_ANDROID_NEON // Android targets that might support NEON
#endif
#if defined(__ARM_NEON__) || defined(WEBP_ANDROID_NEON) || defined(__aarch64__)
// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
// inline assembly would need to be modified for use with Native Client.
#if (defined(__ARM_NEON__) || defined(WEBP_ANDROID_NEON) || \
defined(__aarch64__)) && !defined(__native_client__)
#define WEBP_USE_NEON
#endif
#if defined(__mips__) && !defined(__mips64) && (__mips_isa_rev < 6)
#if defined(__mips__) && !defined(__mips64) && \
defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
#define WEBP_USE_MIPS32
#if (__mips_isa_rev >= 2)
#define WEBP_USE_MIPS32_R2

View File

@ -692,7 +692,12 @@ extern void VP8EncDspInitAVX2(void);
extern void VP8EncDspInitNEON(void);
extern void VP8EncDspInitMIPS32(void);
static volatile VP8CPUInfo enc_last_cpuinfo_used =
(VP8CPUInfo)&enc_last_cpuinfo_used;
void VP8EncDspInit(void) {
if (enc_last_cpuinfo_used == VP8GetCPUInfo) return;
VP8DspInit(); // common inverse transforms
InitTables();
@ -737,5 +742,6 @@ void VP8EncDspInit(void) {
}
#endif
}
enc_last_cpuinfo_used = VP8GetCPUInfo;
}

View File

@ -34,26 +34,26 @@ static const int kC2 = 35468;
// TEMP0..TEMP3 - registers for corresponding tmp elements
// TEMP4..TEMP5 - temporary registers
#define VERTICAL_PASS(A, B, C, D, TEMP4, TEMP0, TEMP1, TEMP2, TEMP3) \
"lh %[temp16], "#A"(%[temp20]) \n\t" \
"lh %[temp18], "#B"(%[temp20]) \n\t" \
"lh %[temp17], "#C"(%[temp20]) \n\t" \
"lh %[temp19], "#D"(%[temp20]) \n\t" \
"addu %["#TEMP4"], %[temp16], %[temp18] \n\t" \
"subu %[temp16], %[temp16], %[temp18] \n\t" \
"mul %["#TEMP0"], %[temp17], %[kC2] \n\t" \
"mul %[temp18], %[temp19], %[kC1] \n\t" \
"mul %[temp17], %[temp17], %[kC1] \n\t" \
"mul %[temp19], %[temp19], %[kC2] \n\t" \
"sra %["#TEMP0"], %["#TEMP0"], 16 \n\n" \
"sra %[temp18], %[temp18], 16 \n\n" \
"sra %[temp17], %[temp17], 16 \n\n" \
"sra %[temp19], %[temp19], 16 \n\n" \
"subu %["#TEMP2"], %["#TEMP0"], %[temp18] \n\t" \
"addu %["#TEMP3"], %[temp17], %[temp19] \n\t" \
"addu %["#TEMP0"], %["#TEMP4"], %["#TEMP3"] \n\t" \
"addu %["#TEMP1"], %[temp16], %["#TEMP2"] \n\t" \
"subu %["#TEMP2"], %[temp16], %["#TEMP2"] \n\t" \
"subu %["#TEMP3"], %["#TEMP4"], %["#TEMP3"] \n\t"
"lh %[temp16], " #A "(%[temp20]) \n\t" \
"lh %[temp18], " #B "(%[temp20]) \n\t" \
"lh %[temp17], " #C "(%[temp20]) \n\t" \
"lh %[temp19], " #D "(%[temp20]) \n\t" \
"addu %[" #TEMP4 "], %[temp16], %[temp18] \n\t" \
"subu %[temp16], %[temp16], %[temp18] \n\t" \
"mul %[" #TEMP0 "], %[temp17], %[kC2] \n\t" \
"mul %[temp18], %[temp19], %[kC1] \n\t" \
"mul %[temp17], %[temp17], %[kC1] \n\t" \
"mul %[temp19], %[temp19], %[kC2] \n\t" \
"sra %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\n" \
"sra %[temp18], %[temp18], 16 \n\n" \
"sra %[temp17], %[temp17], 16 \n\n" \
"sra %[temp19], %[temp19], 16 \n\n" \
"subu %[" #TEMP2 "], %[" #TEMP0 "], %[temp18] \n\t" \
"addu %[" #TEMP3 "], %[temp17], %[temp19] \n\t" \
"addu %[" #TEMP0 "], %[" #TEMP4 "], %[" #TEMP3 "] \n\t" \
"addu %[" #TEMP1 "], %[temp16], %[" #TEMP2 "] \n\t" \
"subu %[" #TEMP2 "], %[temp16], %[" #TEMP2 "] \n\t" \
"subu %[" #TEMP3 "], %[" #TEMP4 "], %[" #TEMP3 "] \n\t"
// macro for one horizontal pass in ITransformOne
// MUL and STORE macros inlined
@ -61,59 +61,59 @@ static const int kC2 = 35468;
// temp0..temp15 holds tmp[0]..tmp[15]
// A..D - offsets in bytes to load from ref and store to dst buffer
// TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
#define HORIZONTAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \
"addiu %["#TEMP0"], %["#TEMP0"], 4 \n\t" \
"addu %[temp16], %["#TEMP0"], %["#TEMP8"] \n\t" \
"subu %[temp17], %["#TEMP0"], %["#TEMP8"] \n\t" \
"mul %["#TEMP0"], %["#TEMP4"], %[kC2] \n\t" \
"mul %["#TEMP8"], %["#TEMP12"], %[kC1] \n\t" \
"mul %["#TEMP4"], %["#TEMP4"], %[kC1] \n\t" \
"mul %["#TEMP12"], %["#TEMP12"], %[kC2] \n\t" \
"sra %["#TEMP0"], %["#TEMP0"], 16 \n\t" \
"sra %["#TEMP8"], %["#TEMP8"], 16 \n\t" \
"sra %["#TEMP4"], %["#TEMP4"], 16 \n\t" \
"sra %["#TEMP12"], %["#TEMP12"], 16 \n\t" \
"subu %[temp18], %["#TEMP0"], %["#TEMP8"] \n\t" \
"addu %[temp19], %["#TEMP4"], %["#TEMP12"] \n\t" \
"addu %["#TEMP0"], %[temp16], %[temp19] \n\t" \
"addu %["#TEMP4"], %[temp17], %[temp18] \n\t" \
"subu %["#TEMP8"], %[temp17], %[temp18] \n\t" \
"subu %["#TEMP12"], %[temp16], %[temp19] \n\t" \
"lw %[temp20], 0(%[args]) \n\t" \
"sra %["#TEMP0"], %["#TEMP0"], 3 \n\t" \
"sra %["#TEMP4"], %["#TEMP4"], 3 \n\t" \
"sra %["#TEMP8"], %["#TEMP8"], 3 \n\t" \
"sra %["#TEMP12"], %["#TEMP12"], 3 \n\t" \
"lbu %[temp16], "#A"(%[temp20]) \n\t" \
"lbu %[temp17], "#B"(%[temp20]) \n\t" \
"lbu %[temp18], "#C"(%[temp20]) \n\t" \
"lbu %[temp19], "#D"(%[temp20]) \n\t" \
"addu %["#TEMP0"], %[temp16], %["#TEMP0"] \n\t" \
"addu %["#TEMP4"], %[temp17], %["#TEMP4"] \n\t" \
"addu %["#TEMP8"], %[temp18], %["#TEMP8"] \n\t" \
"addu %["#TEMP12"], %[temp19], %["#TEMP12"] \n\t" \
"slt %[temp16], %["#TEMP0"], $zero \n\t" \
"slt %[temp17], %["#TEMP4"], $zero \n\t" \
"slt %[temp18], %["#TEMP8"], $zero \n\t" \
"slt %[temp19], %["#TEMP12"], $zero \n\t" \
"movn %["#TEMP0"], $zero, %[temp16] \n\t" \
"movn %["#TEMP4"], $zero, %[temp17] \n\t" \
"movn %["#TEMP8"], $zero, %[temp18] \n\t" \
"movn %["#TEMP12"], $zero, %[temp19] \n\t" \
"addiu %[temp20], $zero, 255 \n\t" \
"slt %[temp16], %["#TEMP0"], %[temp20] \n\t" \
"slt %[temp17], %["#TEMP4"], %[temp20] \n\t" \
"slt %[temp18], %["#TEMP8"], %[temp20] \n\t" \
"slt %[temp19], %["#TEMP12"], %[temp20] \n\t" \
"movz %["#TEMP0"], %[temp20], %[temp16] \n\t" \
"movz %["#TEMP4"], %[temp20], %[temp17] \n\t" \
"lw %[temp16], 8(%[args]) \n\t" \
"movz %["#TEMP8"], %[temp20], %[temp18] \n\t" \
"movz %["#TEMP12"], %[temp20], %[temp19] \n\t" \
"sb %["#TEMP0"], "#A"(%[temp16]) \n\t" \
"sb %["#TEMP4"], "#B"(%[temp16]) \n\t" \
"sb %["#TEMP8"], "#C"(%[temp16]) \n\t" \
"sb %["#TEMP12"], "#D"(%[temp16]) \n\t"
#define HORIZONTAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \
"addiu %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \
"addu %[temp16], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
"subu %[temp17], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
"mul %[" #TEMP0 "], %[" #TEMP4 "], %[kC2] \n\t" \
"mul %[" #TEMP8 "], %[" #TEMP12 "], %[kC1] \n\t" \
"mul %[" #TEMP4 "], %[" #TEMP4 "], %[kC1] \n\t" \
"mul %[" #TEMP12 "], %[" #TEMP12 "], %[kC2] \n\t" \
"sra %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\t" \
"sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \
"sra %[" #TEMP4 "], %[" #TEMP4 "], 16 \n\t" \
"sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \
"subu %[temp18], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
"addu %[temp19], %[" #TEMP4 "], %[" #TEMP12 "] \n\t" \
"addu %[" #TEMP0 "], %[temp16], %[temp19] \n\t" \
"addu %[" #TEMP4 "], %[temp17], %[temp18] \n\t" \
"subu %[" #TEMP8 "], %[temp17], %[temp18] \n\t" \
"subu %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \
"lw %[temp20], 0(%[args]) \n\t" \
"sra %[" #TEMP0 "], %[" #TEMP0 "], 3 \n\t" \
"sra %[" #TEMP4 "], %[" #TEMP4 "], 3 \n\t" \
"sra %[" #TEMP8 "], %[" #TEMP8 "], 3 \n\t" \
"sra %[" #TEMP12 "], %[" #TEMP12 "], 3 \n\t" \
"lbu %[temp16], " #A "(%[temp20]) \n\t" \
"lbu %[temp17], " #B "(%[temp20]) \n\t" \
"lbu %[temp18], " #C "(%[temp20]) \n\t" \
"lbu %[temp19], " #D "(%[temp20]) \n\t" \
"addu %[" #TEMP0 "], %[temp16], %[" #TEMP0 "] \n\t" \
"addu %[" #TEMP4 "], %[temp17], %[" #TEMP4 "] \n\t" \
"addu %[" #TEMP8 "], %[temp18], %[" #TEMP8 "] \n\t" \
"addu %[" #TEMP12 "], %[temp19], %[" #TEMP12 "] \n\t" \
"slt %[temp16], %[" #TEMP0 "], $zero \n\t" \
"slt %[temp17], %[" #TEMP4 "], $zero \n\t" \
"slt %[temp18], %[" #TEMP8 "], $zero \n\t" \
"slt %[temp19], %[" #TEMP12 "], $zero \n\t" \
"movn %[" #TEMP0 "], $zero, %[temp16] \n\t" \
"movn %[" #TEMP4 "], $zero, %[temp17] \n\t" \
"movn %[" #TEMP8 "], $zero, %[temp18] \n\t" \
"movn %[" #TEMP12 "], $zero, %[temp19] \n\t" \
"addiu %[temp20], $zero, 255 \n\t" \
"slt %[temp16], %[" #TEMP0 "], %[temp20] \n\t" \
"slt %[temp17], %[" #TEMP4 "], %[temp20] \n\t" \
"slt %[temp18], %[" #TEMP8 "], %[temp20] \n\t" \
"slt %[temp19], %[" #TEMP12 "], %[temp20] \n\t" \
"movz %[" #TEMP0 "], %[temp20], %[temp16] \n\t" \
"movz %[" #TEMP4 "], %[temp20], %[temp17] \n\t" \
"lw %[temp16], 8(%[args]) \n\t" \
"movz %[" #TEMP8 "], %[temp20], %[temp18] \n\t" \
"movz %[" #TEMP12 "], %[temp20], %[temp19] \n\t" \
"sb %[" #TEMP0 "], " #A "(%[temp16]) \n\t" \
"sb %[" #TEMP4 "], " #B "(%[temp16]) \n\t" \
"sb %[" #TEMP8 "], " #C "(%[temp16]) \n\t" \
"sb %[" #TEMP12 "], " #D "(%[temp16]) \n\t"
// Does one or two inverse transforms.
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
@ -164,9 +164,9 @@ static void ITransform(const uint8_t* ref, const int16_t* in,
// K - offset in bytes (kZigzag[n] * 4)
// N - offset in bytes (n * 2)
#define QUANTIZE_ONE(J, K, N) \
"lh %[temp0], "#J"(%[ppin]) \n\t" \
"lhu %[temp1], "#J"(%[ppsharpen]) \n\t" \
"lw %[temp2], "#K"(%[ppzthresh]) \n\t" \
"lh %[temp0], " #J "(%[ppin]) \n\t" \
"lhu %[temp1], " #J "(%[ppsharpen]) \n\t" \
"lw %[temp2], " #K "(%[ppzthresh]) \n\t" \
"sra %[sign], %[temp0], 15 \n\t" \
"xor %[coeff], %[temp0], %[sign] \n\t" \
"subu %[coeff], %[coeff], %[sign] \n\t" \
@ -175,9 +175,9 @@ static void ITransform(const uint8_t* ref, const int16_t* in,
"addiu %[temp5], $zero, 0 \n\t" \
"addiu %[level], $zero, 0 \n\t" \
"beqz %[temp4], 2f \n\t" \
"lhu %[temp1], "#J"(%[ppiq]) \n\t" \
"lw %[temp2], "#K"(%[ppbias]) \n\t" \
"lhu %[temp3], "#J"(%[ppq]) \n\t" \
"lhu %[temp1], " #J "(%[ppiq]) \n\t" \
"lw %[temp2], " #K "(%[ppbias]) \n\t" \
"lhu %[temp3], " #J "(%[ppq]) \n\t" \
"mul %[level], %[coeff], %[temp1] \n\t" \
"addu %[level], %[level], %[temp2] \n\t" \
"sra %[level], %[level], 17 \n\t" \
@ -187,8 +187,8 @@ static void ITransform(const uint8_t* ref, const int16_t* in,
"subu %[level], %[level], %[sign] \n\t" \
"mul %[temp5], %[level], %[temp3] \n\t" \
"2: \n\t" \
"sh %[temp5], "#J"(%[ppin]) \n\t" \
"sh %[level], "#N"(%[pout]) \n\t"
"sh %[temp5], " #J "(%[ppin]) \n\t" \
"sh %[level], " #N "(%[pout]) \n\t"
static int QuantizeBlock(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
@ -249,14 +249,14 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
// E..H - offsets in bytes to store first results to tmp buffer
// E1..H1 - offsets in bytes to store second results to tmp buffer
#define HORIZONTAL_PASS(A, B, C, D, E, F, G, H, E1, F1, G1, H1) \
"lbu %[temp0], "#A"(%[a]) \n\t" \
"lbu %[temp1], "#B"(%[a]) \n\t" \
"lbu %[temp2], "#C"(%[a]) \n\t" \
"lbu %[temp3], "#D"(%[a]) \n\t" \
"lbu %[temp4], "#A"(%[b]) \n\t" \
"lbu %[temp5], "#B"(%[b]) \n\t" \
"lbu %[temp6], "#C"(%[b]) \n\t" \
"lbu %[temp7], "#D"(%[b]) \n\t" \
"lbu %[temp0], " #A "(%[a]) \n\t" \
"lbu %[temp1], " #B "(%[a]) \n\t" \
"lbu %[temp2], " #C "(%[a]) \n\t" \
"lbu %[temp3], " #D "(%[a]) \n\t" \
"lbu %[temp4], " #A "(%[b]) \n\t" \
"lbu %[temp5], " #B "(%[b]) \n\t" \
"lbu %[temp6], " #C "(%[b]) \n\t" \
"lbu %[temp7], " #D "(%[b]) \n\t" \
"addu %[temp8], %[temp0], %[temp2] \n\t" \
"subu %[temp0], %[temp0], %[temp2] \n\t" \
"addu %[temp2], %[temp1], %[temp3] \n\t" \
@ -273,14 +273,14 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
"subu %[temp3], %[temp3], %[temp6] \n\t" \
"addu %[temp6], %[temp4], %[temp5] \n\t" \
"subu %[temp4], %[temp4], %[temp5] \n\t" \
"sw %[temp7], "#E"(%[tmp]) \n\t" \
"sw %[temp2], "#H"(%[tmp]) \n\t" \
"sw %[temp8], "#F"(%[tmp]) \n\t" \
"sw %[temp0], "#G"(%[tmp]) \n\t" \
"sw %[temp1], "#E1"(%[tmp]) \n\t" \
"sw %[temp3], "#H1"(%[tmp]) \n\t" \
"sw %[temp6], "#F1"(%[tmp]) \n\t" \
"sw %[temp4], "#G1"(%[tmp]) \n\t"
"sw %[temp7], " #E "(%[tmp]) \n\t" \
"sw %[temp2], " #H "(%[tmp]) \n\t" \
"sw %[temp8], " #F "(%[tmp]) \n\t" \
"sw %[temp0], " #G "(%[tmp]) \n\t" \
"sw %[temp1], " #E1 "(%[tmp]) \n\t" \
"sw %[temp3], " #H1 "(%[tmp]) \n\t" \
"sw %[temp6], " #F1 "(%[tmp]) \n\t" \
"sw %[temp4], " #G1 "(%[tmp]) \n\t"
// macro for one vertical pass in Disto4x4 (TTransform)
// two calls of function TTransform are merged into single one
@ -295,10 +295,10 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
// A1..D1 - offsets in bytes to load second results from tmp buffer
// E..H - offsets in bytes to load from w buffer
#define VERTICAL_PASS(A, B, C, D, A1, B1, C1, D1, E, F, G, H) \
"lw %[temp0], "#A1"(%[tmp]) \n\t" \
"lw %[temp1], "#C1"(%[tmp]) \n\t" \
"lw %[temp2], "#B1"(%[tmp]) \n\t" \
"lw %[temp3], "#D1"(%[tmp]) \n\t" \
"lw %[temp0], " #A1 "(%[tmp]) \n\t" \
"lw %[temp1], " #C1 "(%[tmp]) \n\t" \
"lw %[temp2], " #B1 "(%[tmp]) \n\t" \
"lw %[temp3], " #D1 "(%[tmp]) \n\t" \
"addu %[temp8], %[temp0], %[temp1] \n\t" \
"subu %[temp0], %[temp0], %[temp1] \n\t" \
"addu %[temp1], %[temp2], %[temp3] \n\t" \
@ -319,18 +319,18 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
"subu %[temp1], %[temp1], %[temp5] \n\t" \
"subu %[temp0], %[temp0], %[temp6] \n\t" \
"subu %[temp8], %[temp8], %[temp7] \n\t" \
"lhu %[temp4], "#E"(%[w]) \n\t" \
"lhu %[temp5], "#F"(%[w]) \n\t" \
"lhu %[temp6], "#G"(%[w]) \n\t" \
"lhu %[temp7], "#H"(%[w]) \n\t" \
"lhu %[temp4], " #E "(%[w]) \n\t" \
"lhu %[temp5], " #F "(%[w]) \n\t" \
"lhu %[temp6], " #G "(%[w]) \n\t" \
"lhu %[temp7], " #H "(%[w]) \n\t" \
"madd %[temp4], %[temp3] \n\t" \
"madd %[temp5], %[temp1] \n\t" \
"madd %[temp6], %[temp0] \n\t" \
"madd %[temp7], %[temp8] \n\t" \
"lw %[temp0], "#A"(%[tmp]) \n\t" \
"lw %[temp1], "#C"(%[tmp]) \n\t" \
"lw %[temp2], "#B"(%[tmp]) \n\t" \
"lw %[temp3], "#D"(%[tmp]) \n\t" \
"lw %[temp0], " #A "(%[tmp]) \n\t" \
"lw %[temp1], " #C "(%[tmp]) \n\t" \
"lw %[temp2], " #B "(%[tmp]) \n\t" \
"lw %[temp3], " #D "(%[tmp]) \n\t" \
"addu %[temp8], %[temp0], %[temp1] \n\t" \
"subu %[temp0], %[temp0], %[temp1] \n\t" \
"addu %[temp1], %[temp2], %[temp3] \n\t" \
@ -407,71 +407,71 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
// temp0..temp15 holds tmp[0]..tmp[15]
// A..D - offsets in bytes to load from src and ref buffers
// TEMP0..TEMP3 - registers for corresponding tmp elements
#define HORIZONTAL_PASS(A, B, C, D, TEMP0, TEMP1, TEMP2, TEMP3) \
"lw %["#TEMP1"], 0(%[args]) \n\t" \
"lw %["#TEMP2"], 4(%[args]) \n\t" \
"lbu %[temp16], "#A"(%["#TEMP1"]) \n\t" \
"lbu %[temp17], "#A"(%["#TEMP2"]) \n\t" \
"lbu %[temp18], "#B"(%["#TEMP1"]) \n\t" \
"lbu %[temp19], "#B"(%["#TEMP2"]) \n\t" \
"subu %[temp20], %[temp16], %[temp17] \n\t" \
"lbu %[temp16], "#C"(%["#TEMP1"]) \n\t" \
"lbu %[temp17], "#C"(%["#TEMP2"]) \n\t" \
"subu %["#TEMP0"], %[temp18], %[temp19] \n\t" \
"lbu %[temp18], "#D"(%["#TEMP1"]) \n\t" \
"lbu %[temp19], "#D"(%["#TEMP2"]) \n\t" \
"subu %["#TEMP1"], %[temp16], %[temp17] \n\t" \
"subu %["#TEMP2"], %[temp18], %[temp19] \n\t" \
"addu %["#TEMP3"], %[temp20], %["#TEMP2"] \n\t" \
"subu %["#TEMP2"], %[temp20], %["#TEMP2"] \n\t" \
"addu %[temp20], %["#TEMP0"], %["#TEMP1"] \n\t" \
"subu %["#TEMP0"], %["#TEMP0"], %["#TEMP1"] \n\t" \
"mul %[temp16], %["#TEMP2"], %[c5352] \n\t" \
"mul %[temp17], %["#TEMP2"], %[c2217] \n\t" \
"mul %[temp18], %["#TEMP0"], %[c5352] \n\t" \
"mul %[temp19], %["#TEMP0"], %[c2217] \n\t" \
"addu %["#TEMP1"], %["#TEMP3"], %[temp20] \n\t" \
"subu %[temp20], %["#TEMP3"], %[temp20] \n\t" \
"sll %["#TEMP0"], %["#TEMP1"], 3 \n\t" \
"sll %["#TEMP2"], %[temp20], 3 \n\t" \
"addiu %[temp16], %[temp16], 1812 \n\t" \
"addiu %[temp17], %[temp17], 937 \n\t" \
"addu %[temp16], %[temp16], %[temp19] \n\t" \
"subu %[temp17], %[temp17], %[temp18] \n\t" \
"sra %["#TEMP1"], %[temp16], 9 \n\t" \
"sra %["#TEMP3"], %[temp17], 9 \n\t"
#define HORIZONTAL_PASS(A, B, C, D, TEMP0, TEMP1, TEMP2, TEMP3) \
"lw %[" #TEMP1 "], 0(%[args]) \n\t" \
"lw %[" #TEMP2 "], 4(%[args]) \n\t" \
"lbu %[temp16], " #A "(%[" #TEMP1 "]) \n\t" \
"lbu %[temp17], " #A "(%[" #TEMP2 "]) \n\t" \
"lbu %[temp18], " #B "(%[" #TEMP1 "]) \n\t" \
"lbu %[temp19], " #B "(%[" #TEMP2 "]) \n\t" \
"subu %[temp20], %[temp16], %[temp17] \n\t" \
"lbu %[temp16], " #C "(%[" #TEMP1 "]) \n\t" \
"lbu %[temp17], " #C "(%[" #TEMP2 "]) \n\t" \
"subu %[" #TEMP0 "], %[temp18], %[temp19] \n\t" \
"lbu %[temp18], " #D "(%[" #TEMP1 "]) \n\t" \
"lbu %[temp19], " #D "(%[" #TEMP2 "]) \n\t" \
"subu %[" #TEMP1 "], %[temp16], %[temp17] \n\t" \
"subu %[" #TEMP2 "], %[temp18], %[temp19] \n\t" \
"addu %[" #TEMP3 "], %[temp20], %[" #TEMP2 "] \n\t" \
"subu %[" #TEMP2 "], %[temp20], %[" #TEMP2 "] \n\t" \
"addu %[temp20], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \
"subu %[" #TEMP0 "], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \
"mul %[temp16], %[" #TEMP2 "], %[c5352] \n\t" \
"mul %[temp17], %[" #TEMP2 "], %[c2217] \n\t" \
"mul %[temp18], %[" #TEMP0 "], %[c5352] \n\t" \
"mul %[temp19], %[" #TEMP0 "], %[c2217] \n\t" \
"addu %[" #TEMP1 "], %[" #TEMP3 "], %[temp20] \n\t" \
"subu %[temp20], %[" #TEMP3 "], %[temp20] \n\t" \
"sll %[" #TEMP0 "], %[" #TEMP1 "], 3 \n\t" \
"sll %[" #TEMP2 "], %[temp20], 3 \n\t" \
"addiu %[temp16], %[temp16], 1812 \n\t" \
"addiu %[temp17], %[temp17], 937 \n\t" \
"addu %[temp16], %[temp16], %[temp19] \n\t" \
"subu %[temp17], %[temp17], %[temp18] \n\t" \
"sra %[" #TEMP1 "], %[temp16], 9 \n\t" \
"sra %[" #TEMP3 "], %[temp17], 9 \n\t"
// macro for one vertical pass in FTransform
// temp0..temp15 holds tmp[0]..tmp[15]
// A..D - offsets in bytes to store to out buffer
// TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
#define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \
"addu %[temp16], %["#TEMP0"], %["#TEMP12"] \n\t" \
"subu %[temp19], %["#TEMP0"], %["#TEMP12"] \n\t" \
"addu %[temp17], %["#TEMP4"], %["#TEMP8"] \n\t" \
"subu %[temp18], %["#TEMP4"], %["#TEMP8"] \n\t" \
"mul %["#TEMP8"], %[temp19], %[c2217] \n\t" \
"mul %["#TEMP12"], %[temp18], %[c2217] \n\t" \
"mul %["#TEMP4"], %[temp19], %[c5352] \n\t" \
"mul %[temp18], %[temp18], %[c5352] \n\t" \
"addiu %[temp16], %[temp16], 7 \n\t" \
"addu %["#TEMP0"], %[temp16], %[temp17] \n\t" \
"sra %["#TEMP0"], %["#TEMP0"], 4 \n\t" \
"addu %["#TEMP12"], %["#TEMP12"], %["#TEMP4"] \n\t" \
"subu %["#TEMP4"], %[temp16], %[temp17] \n\t" \
"sra %["#TEMP4"], %["#TEMP4"], 4 \n\t" \
"addiu %["#TEMP8"], %["#TEMP8"], 30000 \n\t" \
"addiu %["#TEMP12"], %["#TEMP12"], 12000 \n\t" \
"addiu %["#TEMP8"], %["#TEMP8"], 21000 \n\t" \
"subu %["#TEMP8"], %["#TEMP8"], %[temp18] \n\t" \
"sra %["#TEMP12"], %["#TEMP12"], 16 \n\t" \
"sra %["#TEMP8"], %["#TEMP8"], 16 \n\t" \
"addiu %[temp16], %["#TEMP12"], 1 \n\t" \
"movn %["#TEMP12"], %[temp16], %[temp19] \n\t" \
"sh %["#TEMP0"], "#A"(%[temp20]) \n\t" \
"sh %["#TEMP4"], "#C"(%[temp20]) \n\t" \
"sh %["#TEMP8"], "#D"(%[temp20]) \n\t" \
"sh %["#TEMP12"], "#B"(%[temp20]) \n\t"
#define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \
"addu %[temp16], %[" #TEMP0 "], %[" #TEMP12 "] \n\t" \
"subu %[temp19], %[" #TEMP0 "], %[" #TEMP12 "] \n\t" \
"addu %[temp17], %[" #TEMP4 "], %[" #TEMP8 "] \n\t" \
"subu %[temp18], %[" #TEMP4 "], %[" #TEMP8 "] \n\t" \
"mul %[" #TEMP8 "], %[temp19], %[c2217] \n\t" \
"mul %[" #TEMP12 "], %[temp18], %[c2217] \n\t" \
"mul %[" #TEMP4 "], %[temp19], %[c5352] \n\t" \
"mul %[temp18], %[temp18], %[c5352] \n\t" \
"addiu %[temp16], %[temp16], 7 \n\t" \
"addu %[" #TEMP0 "], %[temp16], %[temp17] \n\t" \
"sra %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \
"addu %[" #TEMP12 "], %[" #TEMP12 "], %[" #TEMP4 "] \n\t" \
"subu %[" #TEMP4 "], %[temp16], %[temp17] \n\t" \
"sra %[" #TEMP4 "], %[" #TEMP4 "], 4 \n\t" \
"addiu %[" #TEMP8 "], %[" #TEMP8 "], 30000 \n\t" \
"addiu %[" #TEMP12 "], %[" #TEMP12 "], 12000 \n\t" \
"addiu %[" #TEMP8 "], %[" #TEMP8 "], 21000 \n\t" \
"subu %[" #TEMP8 "], %[" #TEMP8 "], %[temp18] \n\t" \
"sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \
"sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \
"addiu %[temp16], %[" #TEMP12 "], 1 \n\t" \
"movn %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \
"sh %[" #TEMP0 "], " #A "(%[temp20]) \n\t" \
"sh %[" #TEMP4 "], " #C "(%[temp20]) \n\t" \
"sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
"sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
@ -622,14 +622,14 @@ int VP8GetResidualCostMIPS32(int ctx0, const VP8Residual* const res) {
}
#define GET_SSE_INNER(A, B, C, D) \
"lbu %[temp0], "#A"(%[a]) \n\t" \
"lbu %[temp1], "#A"(%[b]) \n\t" \
"lbu %[temp2], "#B"(%[a]) \n\t" \
"lbu %[temp3], "#B"(%[b]) \n\t" \
"lbu %[temp4], "#C"(%[a]) \n\t" \
"lbu %[temp5], "#C"(%[b]) \n\t" \
"lbu %[temp6], "#D"(%[a]) \n\t" \
"lbu %[temp7], "#D"(%[b]) \n\t" \
"lbu %[temp0], " #A "(%[a]) \n\t" \
"lbu %[temp1], " #A "(%[b]) \n\t" \
"lbu %[temp2], " #B "(%[a]) \n\t" \
"lbu %[temp3], " #B "(%[b]) \n\t" \
"lbu %[temp4], " #C "(%[a]) \n\t" \
"lbu %[temp5], " #C "(%[b]) \n\t" \
"lbu %[temp6], " #D "(%[a]) \n\t" \
"lbu %[temp7], " #D "(%[b]) \n\t" \
"subu %[temp0], %[temp0], %[temp1] \n\t" \
"subu %[temp2], %[temp2], %[temp3] \n\t" \
"subu %[temp4], %[temp4], %[temp5] \n\t" \

View File

@ -1012,8 +1012,10 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
const int16x8_t out0 = Quantize(in, mtx, 0);
const int16x8_t out1 = Quantize(in, mtx, 8);
uint8x8x4_t shuffles;
// vtbl4_u8 is marked unavailable for iOS arm64, use wider versions there.
#if defined(__APPLE__) && defined(__aarch64__)
// vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
// non-standard versions there.
#if defined(__APPLE__) && defined(__aarch64__) && \
defined(__apple_build_version__) && (__apple_build_version__< 6020037)
uint8x16x2_t all_out;
INIT_VECTOR2(all_out, vreinterpretq_u8_s16(out0), vreinterpretq_u8_s16(out1));
INIT_VECTOR4(shuffles,

View File

@ -1590,7 +1590,12 @@ extern void VP8LDspInitSSE2(void);
extern void VP8LDspInitNEON(void);
extern void VP8LDspInitMIPS32(void);
static volatile VP8CPUInfo lossless_last_cpuinfo_used =
(VP8CPUInfo)&lossless_last_cpuinfo_used;
void VP8LDspInit(void) {
if (lossless_last_cpuinfo_used == VP8GetCPUInfo) return;
memcpy(VP8LPredictors, kPredictorsC, sizeof(VP8LPredictors));
VP8LSubtractGreenFromBlueAndRed = VP8LSubtractGreenFromBlueAndRed_C;
@ -1634,6 +1639,7 @@ void VP8LDspInit(void) {
}
#endif
}
lossless_last_cpuinfo_used = VP8GetCPUInfo;
}
//------------------------------------------------------------------------------

View File

@ -285,28 +285,28 @@ static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X,
// literal_ and successive histograms could be unaligned
// so we must use ulw and usw
#define ADD_TO_OUT(A, B, C, D, E, P0, P1, P2) \
"ulw %[temp0], "#A"(%["#P0"]) \n\t" \
"ulw %[temp1], "#B"(%["#P0"]) \n\t" \
"ulw %[temp2], "#C"(%["#P0"]) \n\t" \
"ulw %[temp3], "#D"(%["#P0"]) \n\t" \
"ulw %[temp4], "#A"(%["#P1"]) \n\t" \
"ulw %[temp5], "#B"(%["#P1"]) \n\t" \
"ulw %[temp6], "#C"(%["#P1"]) \n\t" \
"ulw %[temp7], "#D"(%["#P1"]) \n\t" \
"ulw %[temp0], " #A "(%[" #P0 "]) \n\t" \
"ulw %[temp1], " #B "(%[" #P0 "]) \n\t" \
"ulw %[temp2], " #C "(%[" #P0 "]) \n\t" \
"ulw %[temp3], " #D "(%[" #P0 "]) \n\t" \
"ulw %[temp4], " #A "(%[" #P1 "]) \n\t" \
"ulw %[temp5], " #B "(%[" #P1 "]) \n\t" \
"ulw %[temp6], " #C "(%[" #P1 "]) \n\t" \
"ulw %[temp7], " #D "(%[" #P1 "]) \n\t" \
"addu %[temp4], %[temp4], %[temp0] \n\t" \
"addu %[temp5], %[temp5], %[temp1] \n\t" \
"addu %[temp6], %[temp6], %[temp2] \n\t" \
"addu %[temp7], %[temp7], %[temp3] \n\t" \
"addiu %["#P0"], %["#P0"], 16 \n\t" \
".if "#E" == 1 \n\t" \
"addiu %["#P1"], %["#P1"], 16 \n\t" \
"addiu %[" #P0 "], %[" #P0 "], 16 \n\t" \
".if " #E " == 1 \n\t" \
"addiu %[" #P1 "], %[" #P1 "], 16 \n\t" \
".endif \n\t" \
"usw %[temp4], "#A"(%["#P2"]) \n\t" \
"usw %[temp5], "#B"(%["#P2"]) \n\t" \
"usw %[temp6], "#C"(%["#P2"]) \n\t" \
"usw %[temp7], "#D"(%["#P2"]) \n\t" \
"addiu %["#P2"], %["#P2"], 16 \n\t" \
"bne %["#P0"], %[LoopEnd], 1b \n\t" \
"usw %[temp4], " #A "(%[" #P2 "]) \n\t" \
"usw %[temp5], " #B "(%[" #P2 "]) \n\t" \
"usw %[temp6], " #C "(%[" #P2 "]) \n\t" \
"usw %[temp7], " #D "(%[" #P2 "]) \n\t" \
"addiu %[" #P2 "], %[" #P2 "], 16 \n\t" \
"bne %[" #P0 "], %[LoopEnd], 1b \n\t" \
".set pop \n\t" \
#define ASM_END_COMMON_0 \

View File

@ -259,20 +259,45 @@ static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
//------------------------------------------------------------------------------
// Subtract-Green Transform
// vtbl? are unavailable in iOS/arm64 builds.
#if !defined(__aarch64__)
// vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
// non-standard versions there.
#if defined(__APPLE__) && defined(__aarch64__) && \
defined(__apple_build_version__) && (__apple_build_version__< 6020037)
#define USE_VTBLQ
#endif
// 255 = byte will be zero'd
#ifdef USE_VTBLQ
// 255 = byte will be zeroed
static const uint8_t kGreenShuffle[16] = {
1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255
};
static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
const uint8x16_t shuffle) {
return vcombine_u8(vtbl1q_u8(argb, vget_low_u8(shuffle)),
vtbl1q_u8(argb, vget_high_u8(shuffle)));
}
#else // !USE_VTBLQ
// 255 = byte will be zeroed
static const uint8_t kGreenShuffle[8] = { 1, 255, 1, 255, 5, 255, 5, 255 };
static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
const uint8x8_t shuffle) {
return vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle),
vtbl1_u8(vget_high_u8(argb), shuffle));
}
#endif // USE_VTBLQ
static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
const uint32_t* const end = argb_data + (num_pixels & ~3);
#ifdef USE_VTBLQ
const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);
#else
const uint8x8_t shuffle = vld1_u8(kGreenShuffle);
#endif
for (; argb_data < end; argb_data += 4) {
const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);
const uint8x16_t greens =
vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle),
vtbl1_u8(vget_high_u8(argb), shuffle));
const uint8x16_t greens = DoGreenShuffle(argb, shuffle);
vst1q_u8((uint8_t*)argb_data, vsubq_u8(argb, greens));
}
// fallthrough and finish off with plain-C
@ -281,19 +306,21 @@ static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {
const uint32_t* const end = argb_data + (num_pixels & ~3);
#ifdef USE_VTBLQ
const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);
#else
const uint8x8_t shuffle = vld1_u8(kGreenShuffle);
#endif
for (; argb_data < end; argb_data += 4) {
const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);
const uint8x16_t greens =
vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle),
vtbl1_u8(vget_high_u8(argb), shuffle));
const uint8x16_t greens = DoGreenShuffle(argb, shuffle);
vst1q_u8((uint8_t*)argb_data, vaddq_u8(argb, greens));
}
// fallthrough and finish off with plain-C
VP8LAddGreenToBlueAndRed_C(argb_data, num_pixels & 3);
}
#endif // !__aarch64__
#undef USE_VTBLQ
#endif // USE_INTRINSICS
@ -320,11 +347,9 @@ void VP8LDspInitNEON(void) {
VP8LPredictors[12] = Predictor12;
VP8LPredictors[13] = Predictor13;
#if !defined(__aarch64__)
VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
#endif
#endif
#endif // WEBP_USE_NEON
}

View File

@ -189,7 +189,12 @@ const WebPYUV444Converter WebPYUV444Converters[MODE_LAST] = {
extern void WebPInitUpsamplersSSE2(void);
extern void WebPInitUpsamplersNEON(void);
static volatile VP8CPUInfo upsampling_last_cpuinfo_used2 =
(VP8CPUInfo)&upsampling_last_cpuinfo_used2;
void WebPInitUpsamplers(void) {
if (upsampling_last_cpuinfo_used2 == VP8GetCPUInfo) return;
#ifdef FANCY_UPSAMPLING
WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePair;
WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair;
@ -217,6 +222,7 @@ void WebPInitUpsamplers(void) {
#endif
}
#endif // FANCY_UPSAMPLING
upsampling_last_cpuinfo_used2 = VP8GetCPUInfo;
}
//------------------------------------------------------------------------------

View File

@ -123,7 +123,12 @@ WebPSamplerRowFunc WebPSamplers[MODE_LAST];
extern void WebPInitSamplersSSE2(void);
extern void WebPInitSamplersMIPS32(void);
static volatile VP8CPUInfo yuv_last_cpuinfo_used =
(VP8CPUInfo)&yuv_last_cpuinfo_used;
void WebPInitSamplers(void) {
if (yuv_last_cpuinfo_used == VP8GetCPUInfo) return;
WebPSamplers[MODE_RGB] = YuvToRgbRow;
WebPSamplers[MODE_RGBA] = YuvToRgbaRow;
WebPSamplers[MODE_BGR] = YuvToBgrRow;
@ -149,6 +154,7 @@ void WebPInitSamplers(void) {
}
#endif // WEBP_USE_MIPS32
}
yuv_last_cpuinfo_used = VP8GetCPUInfo;
}
//-----------------------------------------------------------------------------

View File

@ -20,6 +20,9 @@
#include "../dsp/lossless.h"
#include "../utils/utils.h"
#define ALIGN_CST 15
#define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST)
#define MAX_COST 1.e38
// Number of partitions for the three dominant (literal, red and blue) symbol
@ -101,9 +104,9 @@ VP8LHistogram* VP8LAllocateHistogram(int cache_bits) {
VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
int i;
VP8LHistogramSet* set;
const size_t total_size = sizeof(*set)
+ sizeof(*set->histograms) * size
+ (size_t)VP8LGetHistogramSize(cache_bits) * size;
const int histo_size = VP8LGetHistogramSize(cache_bits);
const size_t total_size =
sizeof(*set) + size * (sizeof(*set->histograms) + histo_size + ALIGN_CST);
uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
if (memory == NULL) return NULL;
@ -114,12 +117,12 @@ VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
set->max_size = size;
set->size = size;
for (i = 0; i < size; ++i) {
memory = (uint8_t*)DO_ALIGN(memory);
set->histograms[i] = (VP8LHistogram*)memory;
// literal_ won't necessary be aligned.
set->histograms[i]->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));
VP8LHistogramInit(set->histograms[i], cache_bits);
// There's no padding/alignment between successive histograms.
memory += VP8LGetHistogramSize(cache_bits);
memory += histo_size;
}
return set;
}

View File

@ -175,17 +175,13 @@ static void RescalePlane(const uint8_t* src,
int src_width, int src_height, int src_stride,
uint8_t* dst,
int dst_width, int dst_height, int dst_stride,
int32_t* const work,
rescaler_t* const work,
int num_channels) {
WebPRescaler rescaler;
int y = 0;
WebPRescalerInit(&rescaler, src_width, src_height,
dst, dst_width, dst_height, dst_stride,
num_channels,
src_width, dst_width,
src_height, dst_height,
work);
memset(work, 0, 2 * dst_width * num_channels * sizeof(*work));
num_channels, work);
while (y < src_height) {
y += WebPRescalerImport(&rescaler, src_height - y,
src + y * src_stride, src_stride);
@ -209,7 +205,7 @@ static void AlphaMultiplyY(WebPPicture* const pic, int inverse) {
int WebPPictureRescale(WebPPicture* pic, int width, int height) {
WebPPicture tmp;
int prev_width, prev_height;
int32_t* work;
rescaler_t* work;
if (pic == NULL) return 0;
prev_width = pic->width;
@ -231,7 +227,7 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) {
if (!WebPPictureAlloc(&tmp)) return 0;
if (!pic->use_argb) {
work = (int32_t*)WebPSafeMalloc(2ULL * width, sizeof(*work));
work = (rescaler_t*)WebPSafeMalloc(2ULL * width, sizeof(*work));
if (work == NULL) {
WebPPictureFree(&tmp);
return 0;
@ -259,7 +255,7 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) {
tmp.v,
HALVE(width), HALVE(height), tmp.uv_stride, work, 1);
} else {
work = (int32_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work));
work = (rescaler_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work));
if (work == NULL) {
WebPPictureFree(&tmp);
return 0;

View File

@ -30,7 +30,7 @@ extern "C" {
// version numbers
#define ENC_MAJ_VERSION 0
#define ENC_MIN_VERSION 4
#define ENC_REV_VERSION 2
#define ENC_REV_VERSION 4
// intra prediction modes
enum { B_DC_PRED = 0, // 4x4 modes

View File

@ -1081,6 +1081,7 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
int y;
err = AllocateTransformBuffer(enc, width, height);
if (err != VP8_ENC_OK) goto Error;
assert(enc->argb_ != NULL);
for (y = 0; y < height; ++y) {
memcpy(enc->argb_ + y * width,
picture->argb + y * picture->argb_stride,

View File

@ -326,7 +326,7 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
if (!config->lossless) {
VP8Encoder* enc = NULL;
if (pic->y == NULL || pic->u == NULL || pic->v == NULL) {
if (pic->use_argb || pic->y == NULL || pic->u == NULL || pic->v == NULL) {
// Make sure we have YUVA samples.
if (config->preprocessing & 4) {
#if WEBP_ENCODER_ABI_VERSION > 0x0204

View File

@ -35,15 +35,15 @@
#endif
#if !defined(HAVE_CONFIG_H)
// clang-3.3 and gcc-4.3 have builtin functions for swap32/swap64
#if LOCAL_GCC_PREREQ(4,3) || LOCAL_CLANG_PREREQ(3,3)
#define HAVE_BUILTIN_BSWAP32
#define HAVE_BUILTIN_BSWAP64
#endif
// clang-3.3 and gcc-4.8 have a builtin function for swap16
#if LOCAL_GCC_PREREQ(4,8) || LOCAL_CLANG_PREREQ(3,3)
#if LOCAL_GCC_PREREQ(4,8) || __has_builtin(__builtin_bswap16)
#define HAVE_BUILTIN_BSWAP16
#endif
#if LOCAL_GCC_PREREQ(4,3) || __has_builtin(__builtin_bswap32)
#define HAVE_BUILTIN_BSWAP32
#endif
#if LOCAL_GCC_PREREQ(4,3) || __has_builtin(__builtin_bswap64)
#define HAVE_BUILTIN_BSWAP64
#endif
#endif // !HAVE_CONFIG_H
static WEBP_INLINE uint16_t BSwap16(uint16_t x) {

View File

@ -13,77 +13,192 @@
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "./rescaler.h"
#include "../dsp/dsp.h"
//------------------------------------------------------------------------------
// Implementations of critical functions ImportRow / ExportRow
void (*WebPRescalerImportRow)(WebPRescaler* const wrk,
const uint8_t* const src, int channel) = NULL;
void (*WebPRescalerExportRow)(WebPRescaler* const wrk, int x_out) = NULL;
// Import a row of data and save its contribution in the rescaler.
// 'channel' denotes the channel number to be imported. 'Expand' corresponds to
// the wrk->x_expand case. Otherwise, 'Shrink' is to be used.
typedef void (*WebPRescalerImportRowFunc)(WebPRescaler* const wrk,
const uint8_t* src);
static WebPRescalerImportRowFunc WebPRescalerImportRowExpand;
static WebPRescalerImportRowFunc WebPRescalerImportRowShrink;
#define RFIX 30
#define MULT_FIX(x, y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX)
// Export one row (starting at x_out position) from rescaler.
// 'Expand' corresponds to the wrk->y_expand case.
// Otherwise 'Shrink' is to be used
typedef void (*WebPRescalerExportRowFunc)(WebPRescaler* const wrk);
static WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
static WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
static void ImportRowC(WebPRescaler* const wrk,
const uint8_t* const src, int channel) {
#define WEBP_RESCALER_RFIX 32 // fixed-point precision for multiplies
#define WEBP_RESCALER_ONE (1ull << WEBP_RESCALER_RFIX)
#define WEBP_RESCALER_FRAC(x, y) \
((uint32_t)(((uint64_t)(x) << WEBP_RESCALER_RFIX) / (y)))
#define ROUNDER (WEBP_RESCALER_ONE >> 1)
#define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
static void ImportRowExpandC(WebPRescaler* const wrk, const uint8_t* src) {
const int x_stride = wrk->num_channels;
const int x_out_max = wrk->dst_width * wrk->num_channels;
int x_in = channel;
int x_out;
int accum = 0;
if (!wrk->x_expand) {
int sum = 0;
for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
accum += wrk->x_add;
for (; accum > 0; accum -= wrk->x_sub) {
sum += src[x_in];
x_in += x_stride;
}
{ // Emit next horizontal pixel.
const int32_t base = src[x_in];
const int32_t frac = base * (-accum);
x_in += x_stride;
wrk->frow[x_out] = (sum + base) * wrk->x_sub - frac;
// fresh fractional start for next pixel
sum = (int)MULT_FIX(frac, wrk->fx_scale);
}
}
} else { // simple bilinear interpolation
int left = src[channel], right = src[channel];
for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
int channel;
assert(!WebPRescalerInputDone(wrk));
assert(wrk->x_expand);
for (channel = 0; channel < x_stride; ++channel) {
int x_in = channel;
int x_out = channel;
// simple bilinear interpolation
int accum = wrk->x_add;
int left = src[x_in];
int right = (wrk->src_width > 1) ? src[x_in + x_stride] : left;
x_in += x_stride;
while (1) {
wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum;
x_out += x_stride;
if (x_out >= x_out_max) break;
accum -= wrk->x_sub;
if (accum < 0) {
left = right;
x_in += x_stride;
assert(x_in < wrk->src_width * x_stride);
right = src[x_in];
accum += wrk->x_add;
}
wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum;
accum -= wrk->x_sub;
}
}
// Accumulate the contribution of the new row.
for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
wrk->irow[x_out] += wrk->frow[x_out];
assert(wrk->x_sub == 0 /* <- special case for src_width=1 */ || accum == 0);
}
}
static void ExportRowC(WebPRescaler* const wrk, int x_out) {
if (wrk->y_accum <= 0) {
uint8_t* const dst = wrk->dst;
int32_t* const irow = wrk->irow;
const int32_t* const frow = wrk->frow;
const int yscale = wrk->fy_scale * (-wrk->y_accum);
const int x_out_max = wrk->dst_width * wrk->num_channels;
for (; x_out < x_out_max; ++x_out) {
const int frac = (int)MULT_FIX(frow[x_out], yscale);
static void ImportRowShrinkC(WebPRescaler* const wrk, const uint8_t* src) {
const int x_stride = wrk->num_channels;
const int x_out_max = wrk->dst_width * wrk->num_channels;
int channel;
assert(!WebPRescalerInputDone(wrk));
assert(!wrk->x_expand);
for (channel = 0; channel < x_stride; ++channel) {
int x_in = channel;
int x_out = channel;
uint32_t sum = 0;
int accum = 0;
while (x_out < x_out_max) {
uint32_t base = 0;
accum += wrk->x_add;
while (accum > 0) {
accum -= wrk->x_sub;
assert(x_in < wrk->src_width * x_stride);
base = src[x_in];
sum += base;
x_in += x_stride;
}
{ // Emit next horizontal pixel.
const rescaler_t frac = base * (-accum);
wrk->frow[x_out] = sum * wrk->x_sub - frac;
// fresh fractional start for next pixel
sum = (int)MULT_FIX(frac, wrk->fx_scale);
}
x_out += x_stride;
}
assert(accum == 0);
}
}
//------------------------------------------------------------------------------
// Row export
static void ExportRowExpandC(WebPRescaler* const wrk) {
int x_out;
uint8_t* const dst = wrk->dst;
rescaler_t* const irow = wrk->irow;
const int x_out_max = wrk->dst_width * wrk->num_channels;
const rescaler_t* const frow = wrk->frow;
assert(!WebPRescalerOutputDone(wrk));
assert(wrk->y_accum <= 0);
assert(wrk->y_expand);
assert(wrk->y_sub != 0);
if (wrk->y_accum == 0) {
for (x_out = 0; x_out < x_out_max; ++x_out) {
const uint32_t J = frow[x_out];
const int v = (int)MULT_FIX(J, wrk->fy_scale);
assert(v >= 0 && v <= 255);
dst[x_out] = v;
}
} else {
const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
for (x_out = 0; x_out < x_out_max; ++x_out) {
const uint64_t I = (uint64_t)A * frow[x_out]
+ (uint64_t)B * irow[x_out];
const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
const int v = (int)MULT_FIX(J, wrk->fy_scale);
assert(v >= 0 && v <= 255);
dst[x_out] = v;
}
}
}
static void ExportRowShrinkC(WebPRescaler* const wrk) {
int x_out;
uint8_t* const dst = wrk->dst;
rescaler_t* const irow = wrk->irow;
const int x_out_max = wrk->dst_width * wrk->num_channels;
const rescaler_t* const frow = wrk->frow;
const uint32_t yscale = wrk->fy_scale * (-wrk->y_accum);
assert(!WebPRescalerOutputDone(wrk));
assert(wrk->y_accum <= 0);
assert(!wrk->y_expand);
if (yscale) {
for (x_out = 0; x_out < x_out_max; ++x_out) {
const uint32_t frac = (uint32_t)MULT_FIX(frow[x_out], yscale);
const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
dst[x_out] = (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
assert(v >= 0 && v <= 255);
dst[x_out] = v;
irow[x_out] = frac; // new fractional start
}
} else {
for (x_out = 0; x_out < x_out_max; ++x_out) {
const int v = (int)MULT_FIX(irow[x_out], wrk->fxy_scale);
assert(v >= 0 && v <= 255);
dst[x_out] = v;
irow[x_out] = 0;
}
}
}
//------------------------------------------------------------------------------
// Main entry calls
void WebPRescalerImportRow(WebPRescaler* const wrk, const uint8_t* src) {
assert(!WebPRescalerInputDone(wrk));
if (!wrk->x_expand) {
WebPRescalerImportRowShrink(wrk, src);
} else {
WebPRescalerImportRowExpand(wrk, src);
}
}
void WebPRescalerExportRow(WebPRescaler* const wrk) {
if (wrk->y_accum <= 0) {
assert(!WebPRescalerOutputDone(wrk));
if (wrk->y_expand) {
WebPRescalerExportRowExpand(wrk);
} else if (wrk->fxy_scale) {
WebPRescalerExportRowShrink(wrk);
} else { // very special case for src = dst = 1x1
int i;
assert(wrk->src_width == 1 && wrk->dst_width <= 2);
assert(wrk->src_height == 1 && wrk->dst_height == 1);
for (i = 0; i < wrk->num_channels * wrk->dst_width; ++i) {
wrk->dst[i] = wrk->irow[i];
wrk->irow[i] = 0;
}
}
wrk->y_accum += wrk->y_add;
wrk->dst += wrk->dst_stride;
++wrk->dst_y;
}
}
@ -92,23 +207,25 @@ static void ExportRowC(WebPRescaler* const wrk, int x_out) {
#if defined(WEBP_USE_MIPS32)
static void ImportRowMIPS(WebPRescaler* const wrk,
const uint8_t* const src, int channel) {
static void ImportRowShrinkMIPS(WebPRescaler* const wrk, const uint8_t* src) {
const int x_stride = wrk->num_channels;
const int x_out_max = wrk->dst_width * wrk->num_channels;
const int fx_scale = wrk->fx_scale;
const int x_add = wrk->x_add;
const int x_sub = wrk->x_sub;
int* frow = wrk->frow + channel;
int* irow = wrk->irow + channel;
const uint8_t* src1 = src + channel;
int temp1, temp2, temp3;
int base, frac, sum;
int accum, accum1;
const int x_stride1 = x_stride << 2;
int loop_c = x_out_max - channel;
int channel;
assert(!wrk->x_expand);
assert(!WebPRescalerInputDone(wrk));
for (channel = 0; channel < x_stride; ++channel) {
const uint8_t* src1 = src + channel;
rescaler_t* frow = wrk->frow + channel;
int temp1, temp2, temp3;
int base, frac, sum;
int accum, accum1;
int loop_c = x_out_max - channel;
if (!wrk->x_expand) {
__asm__ volatile (
"li %[temp1], 0x8000 \n\t"
"li %[temp2], 0x10000 \n\t"
@ -116,179 +233,295 @@ static void ImportRowMIPS(WebPRescaler* const wrk,
"li %[accum], 0 \n\t"
"1: \n\t"
"addu %[accum], %[accum], %[x_add] \n\t"
"li %[base], 0 \n\t"
"blez %[accum], 3f \n\t"
"2: \n\t"
"lbu %[temp3], 0(%[src1]) \n\t"
"lbu %[base], 0(%[src1]) \n\t"
"subu %[accum], %[accum], %[x_sub] \n\t"
"addu %[src1], %[src1], %[x_stride] \n\t"
"addu %[sum], %[sum], %[temp3] \n\t"
"addu %[sum], %[sum], %[base] \n\t"
"bgtz %[accum], 2b \n\t"
"3: \n\t"
"lbu %[base], 0(%[src1]) \n\t"
"addu %[src1], %[src1], %[x_stride] \n\t"
"negu %[accum1], %[accum] \n\t"
"mul %[frac], %[base], %[accum1] \n\t"
"addu %[temp3], %[sum], %[base] \n\t"
"mul %[temp3], %[temp3], %[x_sub] \n\t"
"lw %[base], 0(%[irow]) \n\t"
"mul %[temp3], %[sum], %[x_sub] \n\t"
"subu %[loop_c], %[loop_c], %[x_stride] \n\t"
"sll %[accum1], %[frac], 2 \n\t"
"mult %[temp1], %[temp2] \n\t"
"madd %[accum1], %[fx_scale] \n\t"
"maddu %[frac], %[fx_scale] \n\t"
"mfhi %[sum] \n\t"
"subu %[temp3], %[temp3], %[frac] \n\t"
"sw %[temp3], 0(%[frow]) \n\t"
"add %[base], %[base], %[temp3] \n\t"
"sw %[base], 0(%[irow]) \n\t"
"addu %[irow], %[irow], %[x_stride1] \n\t"
"addu %[frow], %[frow], %[x_stride1] \n\t"
"bgtz %[loop_c], 1b \n\t"
: [accum]"=&r"(accum), [src1]"+r"(src1), [temp3]"=&r"(temp3),
[sum]"=&r"(sum), [base]"=&r"(base), [frac]"=&r"(frac),
[frow]"+r"(frow), [accum1]"=&r"(accum1),
[temp2]"=&r"(temp2), [temp1]"=&r"(temp1)
: [x_stride]"r"(x_stride), [fx_scale]"r"(fx_scale),
[x_sub]"r"(x_sub), [x_add]"r"(x_add),
[loop_c]"r"(loop_c), [x_stride1]"r"(x_stride1)
: "memory", "hi", "lo"
);
assert(accum == 0);
}
}
: [accum] "=&r" (accum), [src1] "+r" (src1), [temp3] "=&r" (temp3),
[sum] "=&r" (sum), [base] "=&r" (base), [frac] "=&r" (frac),
[frow] "+r" (frow), [irow] "+r" (irow), [accum1] "=&r" (accum1),
[temp2] "=&r" (temp2), [temp1] "=&r" (temp1)
: [x_stride] "r" (x_stride), [fx_scale] "r" (fx_scale),
[x_sub] "r" (x_sub), [x_add] "r" (x_add),
[loop_c] "r" (loop_c), [x_stride1] "r" (x_stride1)
static void ImportRowExpandMIPS(WebPRescaler* const wrk, const uint8_t* src) {
const int x_stride = wrk->num_channels;
const int x_out_max = wrk->dst_width * wrk->num_channels;
const int x_add = wrk->x_add;
const int x_sub = wrk->x_sub;
const int src_width = wrk->src_width;
const int x_stride1 = x_stride << 2;
int channel;
assert(wrk->x_expand);
assert(!WebPRescalerInputDone(wrk));
for (channel = 0; channel < x_stride; ++channel) {
const uint8_t* src1 = src + channel;
rescaler_t* frow = wrk->frow + channel;
int temp1, temp2, temp3, temp4;
int frac;
int accum;
int x_out = channel;
__asm__ volatile (
"addiu %[temp3], %[src_width], -1 \n\t"
"lbu %[temp2], 0(%[src1]) \n\t"
"addu %[src1], %[src1], %[x_stride] \n\t"
"bgtz %[temp3], 0f \n\t"
"addiu %[temp1], %[temp2], 0 \n\t"
"b 3f \n\t"
"0: \n\t"
"lbu %[temp1], 0(%[src1]) \n\t"
"3: \n\t"
"addiu %[accum], %[x_add], 0 \n\t"
"1: \n\t"
"subu %[temp3], %[temp2], %[temp1] \n\t"
"mul %[temp3], %[temp3], %[accum] \n\t"
"mul %[temp4], %[temp1], %[x_add] \n\t"
"addu %[temp3], %[temp4], %[temp3] \n\t"
"sw %[temp3], 0(%[frow]) \n\t"
"addu %[frow], %[frow], %[x_stride1] \n\t"
"addu %[x_out], %[x_out], %[x_stride] \n\t"
"subu %[temp3], %[x_out], %[x_out_max] \n\t"
"bgez %[temp3], 2f \n\t"
"subu %[accum], %[accum], %[x_sub] \n\t"
"bgez %[accum], 4f \n\t"
"addiu %[temp2], %[temp1], 0 \n\t"
"addu %[src1], %[src1], %[x_stride] \n\t"
"lbu %[temp1], 0(%[src1]) \n\t"
"addu %[accum], %[accum], %[x_add] \n\t"
"4: \n\t"
"b 1b \n\t"
"2: \n\t"
: [src1]"+r"(src1), [accum]"=&r"(accum), [temp1]"=&r"(temp1),
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
[x_out]"+r"(x_out), [frac]"=&r"(frac), [frow]"+r"(frow)
: [x_stride]"r"(x_stride), [x_add]"r"(x_add), [x_sub]"r"(x_sub),
[x_stride1]"r"(x_stride1), [src_width]"r"(src_width),
[x_out_max]"r"(x_out_max)
: "memory", "hi", "lo"
);
assert(wrk->x_sub == 0 /* <- special case for src_width=1 */ || accum == 0);
}
}
//------------------------------------------------------------------------------
// Row export
static void ExportRowExpandMIPS(WebPRescaler* const wrk) {
uint8_t* dst = wrk->dst;
rescaler_t* irow = wrk->irow;
const int x_out_max = wrk->dst_width * wrk->num_channels;
const rescaler_t* frow = wrk->frow;
int temp0, temp1, temp3, temp4, temp5, loop_end;
const int temp2 = (int)wrk->fy_scale;
const int temp6 = x_out_max << 2;
assert(!WebPRescalerOutputDone(wrk));
assert(wrk->y_accum <= 0);
assert(wrk->y_expand);
assert(wrk->y_sub != 0);
if (wrk->y_accum == 0) {
__asm__ volatile (
"li %[temp3], 0x10000 \n\t"
"li %[temp4], 0x8000 \n\t"
"addu %[loop_end], %[frow], %[temp6] \n\t"
"1: \n\t"
"lw %[temp0], 0(%[frow]) \n\t"
"addiu %[dst], %[dst], 1 \n\t"
"addiu %[frow], %[frow], 4 \n\t"
"mult %[temp3], %[temp4] \n\t"
"maddu %[temp0], %[temp2] \n\t"
"mfhi %[temp5] \n\t"
"sb %[temp5], -1(%[dst]) \n\t"
"bne %[frow], %[loop_end], 1b \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
[dst]"+r"(dst), [loop_end]"=&r"(loop_end)
: [temp2]"r"(temp2), [temp6]"r"(temp6)
: "memory", "hi", "lo"
);
} else {
const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
__asm__ volatile (
"li %[temp3], 0x10000 \n\t"
"li %[temp4], 0x8000 \n\t"
"addu %[loop_end], %[frow], %[temp6] \n\t"
"1: \n\t"
"lw %[temp0], 0(%[frow]) \n\t"
"lw %[temp1], 0(%[irow]) \n\t"
"addiu %[dst], %[dst], 1 \n\t"
"mult %[temp3], %[temp4] \n\t"
"maddu %[A], %[temp0] \n\t"
"maddu %[B], %[temp1] \n\t"
"addiu %[frow], %[frow], 4 \n\t"
"addiu %[irow], %[irow], 4 \n\t"
"mfhi %[temp5] \n\t"
"mult %[temp3], %[temp4] \n\t"
"maddu %[temp5], %[temp2] \n\t"
"mfhi %[temp5] \n\t"
"sb %[temp5], -1(%[dst]) \n\t"
"bne %[frow], %[loop_end], 1b \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
[irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end)
: [temp2]"r"(temp2), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B)
: "memory", "hi", "lo"
);
}
}
static void ExportRowShrinkMIPS(WebPRescaler* const wrk) {
const int x_out_max = wrk->dst_width * wrk->num_channels;
uint8_t* dst = wrk->dst;
rescaler_t* irow = wrk->irow;
const rescaler_t* frow = wrk->frow;
const int yscale = wrk->fy_scale * (-wrk->y_accum);
int temp0, temp1, temp3, temp4, temp5, loop_end;
const int temp2 = (int)wrk->fxy_scale;
const int temp6 = x_out_max << 2;
assert(!WebPRescalerOutputDone(wrk));
assert(wrk->y_accum <= 0);
assert(!wrk->y_expand);
assert(wrk->fxy_scale != 0);
if (yscale) {
__asm__ volatile (
"li %[temp3], 0x10000 \n\t"
"li %[temp4], 0x8000 \n\t"
"addu %[loop_end], %[frow], %[temp6] \n\t"
"1: \n\t"
"lw %[temp0], 0(%[frow]) \n\t"
"mult %[temp3], %[temp4] \n\t"
"addiu %[frow], %[frow], 4 \n\t"
"maddu %[temp0], %[yscale] \n\t"
"mfhi %[temp1] \n\t"
"lw %[temp0], 0(%[irow]) \n\t"
"addiu %[dst], %[dst], 1 \n\t"
"addiu %[irow], %[irow], 4 \n\t"
"subu %[temp0], %[temp0], %[temp1] \n\t"
"mult %[temp3], %[temp4] \n\t"
"maddu %[temp0], %[temp2] \n\t"
"mfhi %[temp5] \n\t"
"sw %[temp1], -4(%[irow]) \n\t"
"sb %[temp5], -1(%[dst]) \n\t"
"bne %[frow], %[loop_end], 1b \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
[irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end)
: [temp2]"r"(temp2), [yscale]"r"(yscale), [temp6]"r"(temp6)
: "memory", "hi", "lo"
);
} else {
__asm__ volatile (
"lbu %[temp1], 0(%[src1]) \n\t"
"move %[temp2], %[temp1] \n\t"
"li %[accum], 0 \n\t"
"1: \n\t"
"bgez %[accum], 2f \n\t"
"move %[temp2], %[temp1] \n\t"
"addu %[src1], %[x_stride] \n\t"
"lbu %[temp1], 0(%[src1]) \n\t"
"addu %[accum], %[x_add] \n\t"
"2: \n\t"
"subu %[temp3], %[temp2], %[temp1] \n\t"
"mul %[temp3], %[temp3], %[accum] \n\t"
"mul %[base], %[temp1], %[x_add] \n\t"
"subu %[accum], %[accum], %[x_sub] \n\t"
"lw %[frac], 0(%[irow]) \n\t"
"subu %[loop_c], %[loop_c], %[x_stride] \n\t"
"addu %[temp3], %[base], %[temp3] \n\t"
"sw %[temp3], 0(%[frow]) \n\t"
"addu %[frow], %[x_stride1] \n\t"
"addu %[frac], %[temp3] \n\t"
"sw %[frac], 0(%[irow]) \n\t"
"addu %[irow], %[x_stride1] \n\t"
"bgtz %[loop_c], 1b \n\t"
: [src1] "+r" (src1), [accum] "=&r" (accum), [temp1] "=&r" (temp1),
[temp2] "=&r" (temp2), [temp3] "=&r" (temp3), [base] "=&r" (base),
[frac] "=&r" (frac), [frow] "+r" (frow), [irow] "+r" (irow)
: [x_stride] "r" (x_stride), [x_add] "r" (x_add), [x_sub] "r" (x_sub),
[x_stride1] "r" (x_stride1), [loop_c] "r" (loop_c)
"li %[temp3], 0x10000 \n\t"
"li %[temp4], 0x8000 \n\t"
"addu %[loop_end], %[irow], %[temp6] \n\t"
"1: \n\t"
"lw %[temp0], 0(%[irow]) \n\t"
"addiu %[dst], %[dst], 1 \n\t"
"addiu %[irow], %[irow], 4 \n\t"
"mult %[temp3], %[temp4] \n\t"
"maddu %[temp0], %[temp2] \n\t"
"mfhi %[temp5] \n\t"
"sw $zero, -4(%[irow]) \n\t"
"sb %[temp5], -1(%[dst]) \n\t"
"bne %[irow], %[loop_end], 1b \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow),
[dst]"+r"(dst), [loop_end]"=&r"(loop_end)
: [temp2]"r"(temp2), [temp6]"r"(temp6)
: "memory", "hi", "lo"
);
}
}
static void ExportRowMIPS(WebPRescaler* const wrk, int x_out) {
if (wrk->y_accum <= 0) {
uint8_t* const dst = wrk->dst;
int32_t* const irow = wrk->irow;
const int32_t* const frow = wrk->frow;
const int yscale = wrk->fy_scale * (-wrk->y_accum);
const int x_out_max = wrk->dst_width * wrk->num_channels;
// if wrk->fxy_scale can fit into 32 bits use optimized code,
// otherwise use C code
if ((wrk->fxy_scale >> 32) == 0) {
int temp0, temp1, temp3, temp4, temp5, temp6, temp7, loop_end;
const int temp2 = (int)(wrk->fxy_scale);
const int temp8 = x_out_max << 2;
uint8_t* dst_t = (uint8_t*)dst;
int32_t* irow_t = (int32_t*)irow;
const int32_t* frow_t = (const int32_t*)frow;
__asm__ volatile(
"addiu %[temp6], $zero, -256 \n\t"
"addiu %[temp7], $zero, 255 \n\t"
"li %[temp3], 0x10000 \n\t"
"li %[temp4], 0x8000 \n\t"
"addu %[loop_end], %[frow_t], %[temp8] \n\t"
"1: \n\t"
"lw %[temp0], 0(%[frow_t]) \n\t"
"mult %[temp3], %[temp4] \n\t"
"addiu %[frow_t], %[frow_t], 4 \n\t"
"sll %[temp0], %[temp0], 2 \n\t"
"madd %[temp0], %[yscale] \n\t"
"mfhi %[temp1] \n\t"
"lw %[temp0], 0(%[irow_t]) \n\t"
"addiu %[dst_t], %[dst_t], 1 \n\t"
"addiu %[irow_t], %[irow_t], 4 \n\t"
"subu %[temp0], %[temp0], %[temp1] \n\t"
"mult %[temp3], %[temp4] \n\t"
"sll %[temp0], %[temp0], 2 \n\t"
"madd %[temp0], %[temp2] \n\t"
"mfhi %[temp5] \n\t"
"sw %[temp1], -4(%[irow_t]) \n\t"
"and %[temp0], %[temp5], %[temp6] \n\t"
"slti %[temp1], %[temp5], 0 \n\t"
"beqz %[temp0], 2f \n\t"
"xor %[temp5], %[temp5], %[temp5] \n\t"
"movz %[temp5], %[temp7], %[temp1] \n\t"
"2: \n\t"
"sb %[temp5], -1(%[dst_t]) \n\t"
"bne %[frow_t], %[loop_end], 1b \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
[temp7]"=&r"(temp7), [frow_t]"+r"(frow_t), [irow_t]"+r"(irow_t),
[dst_t]"+r"(dst_t), [loop_end]"=&r"(loop_end)
: [temp2]"r"(temp2), [yscale]"r"(yscale), [temp8]"r"(temp8)
: "memory", "hi", "lo"
);
wrk->y_accum += wrk->y_add;
wrk->dst += wrk->dst_stride;
} else {
ExportRowC(wrk, x_out);
}
}
}
#endif // WEBP_USE_MIPS32
//------------------------------------------------------------------------------
void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,
uint8_t* const dst, int dst_width, int dst_height,
int dst_stride, int num_channels, int x_add, int x_sub,
int y_add, int y_sub, int32_t* const work) {
uint8_t* const dst,
int dst_width, int dst_height, int dst_stride,
int num_channels, rescaler_t* const work) {
const int x_add = src_width, x_sub = dst_width;
const int y_add = src_height, y_sub = dst_height;
wrk->x_expand = (src_width < dst_width);
wrk->y_expand = (src_height < dst_height);
wrk->src_width = src_width;
wrk->src_height = src_height;
wrk->dst_width = dst_width;
wrk->dst_height = dst_height;
wrk->src_y = 0;
wrk->dst_y = 0;
wrk->dst = dst;
wrk->dst_stride = dst_stride;
wrk->num_channels = num_channels;
// for 'x_expand', we use bilinear interpolation
wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add - x_sub;
wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add;
wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub;
wrk->y_accum = y_add;
wrk->y_add = y_add;
wrk->y_sub = y_sub;
wrk->fx_scale = (1 << RFIX) / x_sub;
wrk->fy_scale = (1 << RFIX) / y_sub;
wrk->fxy_scale = wrk->x_expand ?
((int64_t)dst_height << RFIX) / (x_sub * src_height) :
((int64_t)dst_height << RFIX) / (x_add * src_height);
if (!wrk->x_expand) { // fx_scale is not used otherwise
wrk->fx_scale = WEBP_RESCALER_FRAC(1, wrk->x_sub);
}
// vertical scaling parameters
wrk->y_add = wrk->y_expand ? y_add - 1 : y_add;
wrk->y_sub = wrk->y_expand ? y_sub - 1 : y_sub;
wrk->y_accum = wrk->y_expand ? wrk->y_sub : wrk->y_add;
if (!wrk->y_expand) {
// this is WEBP_RESCALER_FRAC(dst_height, x_add * y_add) without the cast.
const uint64_t ratio =
(uint64_t)dst_height * WEBP_RESCALER_ONE / (wrk->x_add * wrk->y_add);
if (ratio != (uint32_t)ratio) {
// We can't represent the ratio with the current fixed-point precision.
// => We special-case fxy_scale = 0, in WebPRescalerExportRow().
wrk->fxy_scale = 0;
} else {
wrk->fxy_scale = (uint32_t)ratio;
}
wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->y_sub);
} else {
wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->x_add);
// wrk->fxy_scale is unused here.
}
wrk->irow = work;
wrk->frow = work + num_channels * dst_width;
memset(work, 0, 2 * dst_width * num_channels * sizeof(*work));
if (WebPRescalerImportRow == NULL) {
WebPRescalerImportRow = ImportRowC;
WebPRescalerExportRow = ExportRowC;
if (WebPRescalerImportRowExpand == NULL) {
WebPRescalerImportRowExpand = ImportRowExpandC;
WebPRescalerImportRowShrink = ImportRowShrinkC;
WebPRescalerExportRowExpand = ExportRowExpandC;
WebPRescalerExportRowShrink = ExportRowShrinkC;
if (VP8GetCPUInfo != NULL) {
#if defined(WEBP_USE_MIPS32)
if (VP8GetCPUInfo(kMIPS32)) {
WebPRescalerImportRow = ImportRowMIPS;
WebPRescalerExportRow = ExportRowMIPS;
WebPRescalerImportRowExpand = ImportRowExpandMIPS;
WebPRescalerImportRowShrink = ImportRowShrinkMIPS;
WebPRescalerExportRowExpand = ExportRowExpandMIPS;
WebPRescalerExportRowShrink = ExportRowShrinkMIPS;
}
#endif
}
@ -296,7 +529,10 @@ void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,
}
#undef MULT_FIX
#undef RFIX
#undef WEBP_RESCALER_RFIX
#undef WEBP_RESCALER_ONE
#undef WEBP_RESCALER_FRAC
#undef ROUNDER
//------------------------------------------------------------------------------
// all-in-one calls
@ -309,11 +545,20 @@ int WebPRescaleNeededLines(const WebPRescaler* const wrk, int max_num_lines) {
int WebPRescalerImport(WebPRescaler* const wrk, int num_lines,
const uint8_t* src, int src_stride) {
int total_imported = 0;
while (total_imported < num_lines && wrk->y_accum > 0) {
int channel;
for (channel = 0; channel < wrk->num_channels; ++channel) {
WebPRescalerImportRow(wrk, src, channel);
while (total_imported < num_lines && !WebPRescalerHasPendingOutput(wrk)) {
if (wrk->y_expand) {
rescaler_t* const tmp = wrk->irow;
wrk->irow = wrk->frow;
wrk->frow = tmp;
}
WebPRescalerImportRow(wrk, src);
if (!wrk->y_expand) { // Accumulate the contribution of the new row.
int x;
for (x = 0; x < wrk->num_channels * wrk->dst_width; ++x) {
wrk->irow[x] += wrk->frow[x];
}
}
++wrk->src_y;
src += src_stride;
++total_imported;
wrk->y_accum -= wrk->y_sub;
@ -324,7 +569,7 @@ int WebPRescalerImport(WebPRescaler* const wrk, int num_lines,
int WebPRescalerExport(WebPRescaler* const rescaler) {
int total_exported = 0;
while (WebPRescalerHasPendingOutput(rescaler)) {
WebPRescalerExportRow(rescaler, 0);
WebPRescalerExportRow(rescaler);
++total_exported;
}
return total_exported;

View File

@ -21,20 +21,23 @@ extern "C" {
#include "../webp/types.h"
// Structure used for on-the-fly rescaling
typedef uint32_t rescaler_t; // type for side-buffer
typedef struct {
int x_expand; // true if we're expanding in the x direction
int y_expand; // true if we're expanding in the y direction
int num_channels; // bytes to jump between pixels
int fy_scale, fx_scale; // fixed-point scaling factor
int64_t fxy_scale; // ''
// we need hpel-precise add/sub increments, for the downsampled U/V planes.
uint32_t fx_scale; // fixed-point scaling factors
uint32_t fy_scale; // ''
uint32_t fxy_scale; // ''
int y_accum; // vertical accumulator
int y_add, y_sub; // vertical increments (add ~= src, sub ~= dst)
int x_add, x_sub; // horizontal increments (add ~= src, sub ~= dst)
int y_add, y_sub; // vertical increments
int x_add, x_sub; // horizontal increments
int src_width, src_height; // source dimensions
int dst_width, dst_height; // destination dimensions
int src_y, dst_y; // row counters for input and output
uint8_t* dst;
int dst_stride;
int32_t* irow, *frow; // work buffer
rescaler_t* irow, *frow; // work buffer
} WebPRescaler;
// Initialize a rescaler given scratch area 'work' and dimensions of src & dst.
@ -43,9 +46,7 @@ void WebPRescalerInit(WebPRescaler* const rescaler,
uint8_t* const dst,
int dst_width, int dst_height, int dst_stride,
int num_channels,
int x_add, int x_sub,
int y_add, int y_sub,
int32_t* const work);
rescaler_t* const work);
// Returns the number of input lines needed next to produce one output line,
// considering that the maximum available input lines are 'max_num_lines'.
@ -57,21 +58,29 @@ int WebPRescaleNeededLines(const WebPRescaler* const rescaler,
int WebPRescalerImport(WebPRescaler* const rescaler, int num_rows,
const uint8_t* src, int src_stride);
// Import a row of data and save its contribution in the rescaler.
// 'channel' denotes the channel number to be imported.
extern void (*WebPRescalerImportRow)(WebPRescaler* const wrk,
const uint8_t* const src, int channel);
// Export one row (starting at x_out position) from rescaler.
extern void (*WebPRescalerExportRow)(WebPRescaler* const wrk, int x_out);
// Return true if there is pending output rows ready.
static WEBP_INLINE
int WebPRescalerHasPendingOutput(const WebPRescaler* const rescaler) {
return (rescaler->y_accum <= 0);
}
// Export as many rows as possible. Return the numbers of rows written.
int WebPRescalerExport(WebPRescaler* const rescaler);
void WebPRescalerImportRow(WebPRescaler* const wrk,
const uint8_t* src);
// Export one row (starting at x_out position) from rescaler.
void WebPRescalerExportRow(WebPRescaler* const wrk);
// Return true if input is finished
static WEBP_INLINE
int WebPRescalerInputDone(const WebPRescaler* const rescaler) {
return (rescaler->src_y >= rescaler->src_height);
}
// Return true if output is finished
static WEBP_INLINE
int WebPRescalerOutputDone(const WebPRescaler* const rescaler) {
return (rescaler->dst_y >= rescaler->dst_height);
}
// Return true if there are pending output rows ready.
static WEBP_INLINE
int WebPRescalerHasPendingOutput(const WebPRescaler* const rescaler) {
return !WebPRescalerOutputDone(rescaler) && (rescaler->y_accum <= 0);
}
//------------------------------------------------------------------------------

View File

@ -90,7 +90,7 @@ static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
#pragma intrinsic(_BitScanReverse)
static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
uint32_t first_set_bit;
unsigned long first_set_bit;
_BitScanReverse(&first_set_bit, n);
return first_set_bit;
}

View File

@ -419,7 +419,9 @@ WEBP_EXTERN(int) WebPPictureView(const WebPPicture* src,
WEBP_EXTERN(int) WebPPictureIsView(const WebPPicture* picture);
// Rescale a picture to new dimension width x height.
// Now gamma correction is applied.
// If either 'width' or 'height' (but not both) is 0 the corresponding
// dimension will be calculated preserving the aspect ratio.
// No gamma correction is applied.
// Returns false in case of error (invalid parameter or insufficient memory).
WEBP_EXTERN(int) WebPPictureRescale(WebPPicture* pic, int width, int height);

View File

@ -18,10 +18,11 @@
#ifndef _MSC_VER
#include <inttypes.h>
#ifdef __STRICT_ANSI__
#define WEBP_INLINE
#else /* __STRICT_ANSI__ */
#if defined(__cplusplus) || !defined(__STRICT_ANSI__) || \
(defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L)
#define WEBP_INLINE inline
#else
#define WEBP_INLINE
#endif
#else
typedef signed char int8_t;