Compare commits

...

21 Commits

Author SHA1 Message Date
1525784d5b update ChangeLog
Change-Id: I839ad7871f0bbe7a8a3a900b3176d2754eef0f6e
2015-10-23 13:20:40 -07:00
46e18c0a25 vwebp: fix incorrect clipping w/NO_BLEND
when the previous frame does not specify dispose to background only the
current frame's rectangle should be cleared

related to bug #245

(cherry picked from commit 469ba2cdfd)

Change-Id: I2fc4f5be99057e0bf87d8fedec57b06859b070bd
2015-10-23 13:10:17 -07:00
fcfde90b9c update issue tracker url
code.google.com -> bugs.chromium.org

(cherry picked from commit 4b9186b2eb)

Change-Id: I0dc99a85c29657415401160df3c7dd0423f96457
2015-10-20 22:44:52 -07:00
8c3fb330e5 update AUTHORS
missed in 0.4.3

Change-Id: I2b488307dfec592264467847adcbc3de0a6c83f3
2015-10-19 15:41:24 -07:00
808d4a686e update NEWS
Change-Id: I301be57ed6d925164f3827a79c6215ab79f120c7
2015-10-19 15:41:24 -07:00
62864042c0 bump version to 0.4.4
libwebp{,decoder} - 0.4.4
libwebp libtool - 5.4.0
libwebpdecoder libtool - 1.4.0

mux/demux - 0.2.2 (unchanged)
libtool - 1.2.0 (unchanged)

Change-Id: I7d421dc47ad4d25a17450ce1b04562c5d58c596b
2015-10-19 15:41:23 -07:00
b8b314ab39 doc/webp-container-spec: update repo browser link
gerrit.chromium.org is deprecated, use chromium.googlesource.com.

(cherry picked from commit f0486968ba)

Change-Id: Iaa6d6d18798dbd8cce908988287387f5cb8e8e64
2015-10-19 15:41:23 -07:00
c3953e37c9 fix typo: constitutes -> constitute
(cherry picked from commit 5fe1fe37a5)

Change-Id: I5b20ef41f4a810e11a4499b46b5e7dc93247beed
2015-10-19 15:41:23 -07:00
cd377e291c Use __has_builtin to check clang support
Older versions of Xcode with clang reporting versions 4.[012] and 5.0
did not include support for __builtin_bswap16. Checking in this manner
avoids using brittle version checks.

Matches a change to libvpx:
https://chromium-review.googlesource.com/305573
to fix:
https://code.google.com/p/webm/issues/detail?id=1082

(cherry picked from commit d26d9def80)

Change-Id: I23ea466ee1b53b12cd3fb45f65a2186c8dda95a1
2015-10-19 15:41:22 -07:00
e2e89806f7 wicdec: fix alpha detection w/64bpp BGRA/RGBA
(cherry picked from commit badfcbaa1e)

Change-Id: Ia712cf736e490d482a52b63d8e2816d0b7035cd0
2015-10-19 15:41:22 -07:00
5c3fe77dd8 iosbuild: fix linking with Xcode 7 / iOS SDK 9
-fembed-bitcode is the default, a framework built without this flag will
fail to link against an application using it.

BUG=267

(cherry picked from commit db1321a6a2)

Change-Id: I83461cb058b1866ac99b3f0bdfa890933e88ed26
2015-10-19 15:41:22 -07:00
f9f5498b6c VP8LAllocateHistogramSet: align histogram[] entries
fixes issue #262: a SIGBUS when accessing a misaligned double in
VP8LHistogram

(cherry picked from commit cd82440ec7)

Change-Id: Ic78cc5366d7e43d892c375b6a69dce2379db931b
2015-10-19 15:41:21 -07:00
3026db2ee5 Loosen the buffer size checks for Y/U/V/A too.
(follow-up to 15ca5014)

(cherry picked from commit 017f8cccec)

Change-Id: Ia122e96f616bd6317c24b69c9534cb7919b8a4a4
2015-10-19 15:41:21 -07:00
d089362d07 loosen the padding check on buffer size
Strictly speaking, the last (or first) row doesn't require padding.

cf https://code.google.com/p/webp/issues/detail?id=258

(cherry picked from commit 15ca5014f1)

Change-Id: Ie9ec8eb776fec1f5cea4cf9e21e81901fd79bf33
2015-10-19 15:41:21 -07:00
53d22c5b3e dec_neon: add whitespace around stringizing operator
prevents unintentional side-effects (though unlikely in this case) with
future compilers, cf:
eebaf97 dsp/mips: add whitespace around stringizing operator

(cherry picked from commit d623a8706f)

Change-Id: I0537091fcc97b4f54d0a156c3c83a28c51456b17
2015-10-19 15:41:21 -07:00
8bcc4d4523 dsp/mips: add whitespace around stringizing operator
fixes compile with gcc 5.1
BUG=259

(cherry picked from commit eebaf97f5a)

Change-Id: Ideb39c6290ab8569b1b6cc835bea11c822d0286c
2015-10-19 15:41:20 -07:00
d49c44f450 Container spec: clarify ordering of ALPH chunk.
Reported by user: https://code.google.com/p/webp/issues/detail?id=255

(cherry picked from commit 585d93dbba)

Change-Id: I9c027ea828d5a367b317744fad7607a16ed52fa5
2015-10-19 15:41:20 -07:00
382de22c84 msvc: fix pointer type warning in BitsLog2Floor
_BitScanReverse() takes an unsigned long*
http://msdn.microsoft.com/en-us/library/fbxyd7zd.aspx

fixes:
C4057: 'function': 'unsigned long *' differs in indirection to slightly
different base types from 'uint32_t *'

fixes issue #253

(cherry picked from commit 0250dfcc19)

Change-Id: I0101ef7be18c7ed188b35e9b17e7f71290953786
2015-10-19 15:41:20 -07:00
84ecd9d85c FlattenSimilarBlocks should only be tried when blending is possible.
This is because, FlattenSimilarBlocks() replaces some opaque pixels by
transparent ones. This results in an equivalent output only if blending
is turned on for the current frame.

(cherry picked from commit 5cccdadf2e)

Change-Id: I05612c952fdbd4b3a6e0ac9f3a7d49822f0cfb9b
2015-10-19 15:41:19 -07:00
f55ebbba82 backport rescaler fix
backported from: 7df9389, 5ff0079

Change-Id: I11b4d97c3c483431528be9ccbd9895baac8c6a63
2015-10-19 15:41:13 -07:00
2ff633c938 fix mips2 build target
tested with mips1 and mips2; this should cover 3/4 as well.
fixes an ftbfs reported on the debian issue tracker:
https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=785000

(cherry picked from commit bf46d0acff)

Change-Id: I2458487c92bd638589fdfec5adb4f22102a5960c
2015-10-15 21:12:28 -07:00
33 changed files with 817 additions and 521 deletions

View File

@ -16,6 +16,7 @@ Contributors:
- Pascal Massimino (pascal dot massimino at gmail dot com)
- Paweł Hajdan, Jr (phajdan dot jr at chromium dot org)
- Pierre Joye (pierre dot php at gmail dot com)
- Sam Clegg (sbc at chromium dot org)
- Scott LaVarnway (slavarnway at google dot com)
- Scott Talbot (s at chikachow dot org)
- Slobodan Prijic (slobodan dot prijic at imgtec dot com)

View File

@ -1,3 +1,24 @@
46e18c0 vwebp: fix incorrect clipping w/NO_BLEND
fcfde90 update issue tracker url
8c3fb33 update AUTHORS
808d4a6 update NEWS
6286404 bump version to 0.4.4
b8b314a doc/webp-container-spec: update repo browser link
c3953e3 fix typo: constitutes -> constitute
cd377e2 Use __has_builtin to check clang support
e2e8980 wicdec: fix alpha detection w/64bpp BGRA/RGBA
5c3fe77 iosbuild: fix linking with Xcode 7 / iOS SDK 9
f9f5498 VP8LAllocateHistogramSet: align histogram[] entries
3026db2 Loosen the buffer size checks for Y/U/V/A too.
d089362 loosen the padding check on buffer size
53d22c5 dec_neon: add whitespace around stringizing operator
8bcc4d4 dsp/mips: add whitespace around stringizing operator
d49c44f Container spec: clarify ordering of ALPH chunk.
382de22 msvc: fix pointer type warning in BitsLog2Floor
84ecd9d FlattenSimilarBlocks should only be tried when blending is possible.
f55ebbb backport rescaler fix
2ff633c fix mips2 build target
326b5fb update ChangeLog (tag: v0.4.3, origin/0.4.3, 0.4.3)
a661e50 Disable NEON code on Native Client
fcd94e9 update ChangeLog (tag: v0.4.3-rc1)
569fe57 update NEWS

7
NEWS
View File

@ -1,3 +1,10 @@
- 10/15/15: version 0.4.4
This is a binary compatible release.
* rescaling out-of-bounds read fix (issue #254)
* various build fixes and improvements (issues #253, #259, #262, #267, #268)
* container documentation update
* gif2webp transparency fix (issue #245)
- 3/3/15: version 0.4.3
This is a binary compatible release.
* Android / gcc / iOS / MSVS build fixes and improvements

View File

@ -17,7 +17,7 @@ or agree to the institution of patent litigation or any other patent
enforcement activity against any entity (including a cross-claim or
counterclaim in a lawsuit) alleging that any of these implementations of WebM
or any code incorporated within any of these implementations of WebM
constitutes direct or contributory patent infringement, or inducement of
constitute direct or contributory patent infringement, or inducement of
patent infringement, then any patent rights granted to you under this License
for these implementations of WebM shall terminate as of the date such
litigation is filed.

4
README
View File

@ -4,7 +4,7 @@
\__\__/\____/\_____/__/ ____ ___
/ _/ / \ \ / _ \/ _/
/ \_/ / / \ \ __/ \__
\____/____/\_____/_____/____/v0.4.3
\____/____/\_____/_____/____/v0.4.4
Description:
============
@ -596,7 +596,7 @@ Bugs:
=====
Please report all bugs to our issue tracker:
http://code.google.com/p/webp/issues
https://bugs.chromium.org/p/webp
Patches welcome! See this page to get started:
http://www.webmproject.org/code/contribute/submitting-patches/

View File

@ -175,7 +175,7 @@ Bugs:
=====
Please report all bugs to our issue tracker:
http://code.google.com/p/webp/issues
https://bugs.chromium.org/p/webp
Patches welcome! See this page to get started:
http://www.webmproject.org/code/contribute/submitting-patches/

View File

@ -1,5 +1,5 @@
AC_INIT([libwebp], [0.4.3],
[http://code.google.com/p/webp/issues],,
AC_INIT([libwebp], [0.4.4],
[https://bugs.chromium.org/p/webp],,
[http://developers.google.com/speed/webp])
AC_CANONICAL_HOST
AC_PREREQ([2.60])

View File

@ -271,9 +271,15 @@ An extended format file consists of:
* An optional list of [unknown chunks](#unknown-chunks). _\[status: experimental\]_
For a _still image_, the _image data_ consists of a single frame, whereas for
an _animated image_, it consists of multiple frames. More details about frames
can be found in the [Animation](#animation) section.
For a _still image_, the _image data_ consists of a single frame, which is made
up of:
* An optional [alpha subchunk](#alpha).
* A [bitstream subchunk](#bitstream-vp8vp8l).
For an _animated image_, the _image data_ consists of multiple frames. More
details about frames can be found in the [Animation](#animation) section.
All chunks SHOULD be placed in the same order as listed above. If a chunk
appears in the wrong place, the file is invalid, but readers MAY parse the
@ -809,7 +815,7 @@ RIFF/WEBP
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[vp8spec]: http://tools.ietf.org/html/rfc6386
[webpllspec]: https://gerrit.chromium.org/gerrit/gitweb?p=webm/libwebp.git;a=blob;f=doc/webp-lossless-bitstream-spec.txt;hb=master
[webpllspec]: https://chromium.googlesource.com/webm/libwebp/+/master/doc/webp-lossless-bitstream-spec.txt
[iccspec]: http://www.color.org/icc_specs2.xalter
[metadata]: http://www.metadataworkinggroup.org/pdf/mwg_guidance.pdf
[rfc 1166]: http://tools.ietf.org/html/rfc1166

View File

@ -671,11 +671,9 @@ static WebPEncodingError GenerateCandidates(
}
}
if (candidate_lossy->evaluate) {
if (!is_key_frame) {
// For lossy compression of a frame, it's better to:
// * Replace transparent pixels of 'curr' with actual RGB values,
// whenever possible, and
// * Replace similar blocks of pixels by a transparent block.
if (use_blending) {
// For lossy compression of a frame, it's better to replace similar blocks
// of pixels by a transparent block.
if (!curr_canvas_saved) { // save if not already done so.
CopyPixels(curr_canvas, curr_canvas_tmp);
}
@ -684,7 +682,7 @@ static WebPEncodingError GenerateCandidates(
error_code = EncodeCandidate(sub_frame, rect, config_lossy, use_blending,
duration, candidate_lossy);
if (error_code != VP8_ENC_OK) return error_code;
if (!is_key_frame) {
if (use_blending) {
CopyPixels(curr_canvas_tmp, curr_canvas); // restore
}
}

View File

@ -308,19 +308,24 @@ static void HandleDisplay(void) {
// they will be incorrect if the window is resized.
// glScissor() takes window coordinates (0,0 at bottom left).
int window_x, window_y;
int frame_w, frame_h;
if (prev->dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) {
// Clear the previous frame rectangle.
window_x = prev->x_offset;
window_y = kParams.canvas_height - prev->y_offset - prev->height;
frame_w = prev->width;
frame_h = prev->height;
} else { // curr->blend_method == WEBP_MUX_NO_BLEND.
// We simulate no-blending behavior by first clearing the current frame
// rectangle (to a checker-board) and then alpha-blending against it.
window_x = curr->x_offset;
window_y = kParams.canvas_height - curr->y_offset - curr->height;
frame_w = curr->width;
frame_h = curr->height;
}
glEnable(GL_SCISSOR_TEST);
// Only update the requested area, not the whole canvas.
glScissor(window_x, window_y, prev->width, prev->height);
glScissor(window_x, window_y, frame_w, frame_h);
glClear(GL_COLOR_BUFFER_BIT); // use clear color
DrawCheckerBoard();

View File

@ -73,6 +73,12 @@ WEBP_DEFINE_GUID(GUID_WICPixelFormat32bppBGRA_,
WEBP_DEFINE_GUID(GUID_WICPixelFormat32bppRGBA_,
0xf5c7ad2d, 0x6a8d, 0x43dd,
0xa7, 0xa8, 0xa2, 0x99, 0x35, 0x26, 0x1a, 0xe9);
WEBP_DEFINE_GUID(GUID_WICPixelFormat64bppBGRA_,
0x1562ff7c, 0xd352, 0x46f9,
0x97, 0x9e, 0x42, 0x97, 0x6b, 0x79, 0x22, 0x46);
WEBP_DEFINE_GUID(GUID_WICPixelFormat64bppRGBA_,
0x6fddc324, 0x4e03, 0x4bfe,
0xb1, 0x85, 0x3d, 0x77, 0x76, 0x8d, 0xc9, 0x16);
static HRESULT OpenInputStream(const char* filename, IStream** stream) {
HRESULT hr = S_OK;
@ -196,7 +202,11 @@ static int HasAlpha(IWICImagingFactory* const factory,
has_alpha = IsEqualGUID(MAKE_REFGUID(pixel_format),
MAKE_REFGUID(GUID_WICPixelFormat32bppRGBA_)) ||
IsEqualGUID(MAKE_REFGUID(pixel_format),
MAKE_REFGUID(GUID_WICPixelFormat32bppBGRA_));
MAKE_REFGUID(GUID_WICPixelFormat32bppBGRA_)) ||
IsEqualGUID(MAKE_REFGUID(pixel_format),
MAKE_REFGUID(GUID_WICPixelFormat64bppRGBA_)) ||
IsEqualGUID(MAKE_REFGUID(pixel_format),
MAKE_REFGUID(GUID_WICPixelFormat64bppBGRA_));
}
return has_alpha;
}

View File

@ -41,6 +41,8 @@ LIBLIST=''
if [[ -z "${SDK}" ]]; then
echo "iOS SDK not available"
exit 1
elif [[ ${SDK%%.*} -gt 8 ]]; then
EXTRA_CFLAGS="-fembed-bitcode"
elif [[ ${SDK} < 6.0 ]]; then
echo "You need iOS SDK version 6.0 or above"
exit 1
@ -94,7 +96,7 @@ for PLATFORM in ${PLATFORMS}; do
SDKROOT="${PLATFORMSROOT}/"
SDKROOT+="${PLATFORM}.platform/Developer/SDKs/${PLATFORM}${SDK}.sdk/"
CFLAGS="-arch ${ARCH2:-${ARCH}} -pipe -isysroot ${SDKROOT} -O3 -DNDEBUG"
CFLAGS+=" -miphoneos-version-min=6.0"
CFLAGS+=" -miphoneos-version-min=6.0 ${EXTRA_CFLAGS}"
set -x
export PATH="${DEVROOT}/usr/bin:${OLDPATH}"

View File

@ -1,5 +1,5 @@
.\" Hey, EMACS: -*- nroff -*-
.TH CWEBP 1 "Oct 13, 2014"
.TH CWEBP 1 "October 19, 2015"
.SH NAME
cwebp \- compress an image file to a WebP file
.SH SYNOPSIS
@ -259,7 +259,7 @@ Only print brief information (output file size and PSNR) for testing purpose.
.SH BUGS
Please report all bugs to our issue tracker:
http://code.google.com/p/webp/issues
https://bugs.chromium.org/p/webp
.br
Patches welcome! See this page to get started:
http://www.webmproject.org/code/contribute/submitting-patches/

View File

@ -1,5 +1,5 @@
.\" Hey, EMACS: -*- nroff -*-
.TH DWEBP 1 "July 22, 2014"
.TH DWEBP 1 "October 19, 2015"
.SH NAME
dwebp \- decompress a WebP file to an image file
.SH SYNOPSIS
@ -106,7 +106,7 @@ Disable all assembly optimizations.
.SH BUGS
Please report all bugs to our issue tracker:
http://code.google.com/p/webp/issues
https://bugs.chromium.org/p/webp
.br
Patches welcome! See this page to get started:
http://www.webmproject.org/code/contribute/submitting-patches/

View File

@ -1,5 +1,5 @@
.\" Hey, EMACS: -*- nroff -*-
.TH GIF2WEBP 1 "March 7, 2014"
.TH GIF2WEBP 1 "October 19, 2015"
.SH NAME
gif2webp \- Convert a GIF image to WebP
.SH SYNOPSIS
@ -111,7 +111,7 @@ Do not print anything.
.SH BUGS
Please report all bugs to our issue tracker:
http://code.google.com/p/webp/issues
https://bugs.chromium.org/p/webp
.br
Patches welcome! See this page to get started:
http://www.webmproject.org/code/contribute/submitting-patches/

View File

@ -1,5 +1,5 @@
.\" Hey, EMACS: -*- nroff -*-
.TH VWEBP 1 "July 23, 2014"
.TH VWEBP 1 "October 19, 2015"
.SH NAME
vwebp \- decompress a WebP file and display it in a window
.SH SYNOPSIS
@ -65,7 +65,7 @@ Quit.
.SH BUGS
Please report all bugs to our issue tracker:
http://code.google.com/p/webp/issues
https://bugs.chromium.org/p/webp
.br
Patches welcome! See this page to get started:
http://www.webmproject.org/code/contribute/submitting-patches/

View File

@ -1,5 +1,5 @@
.\" Hey, EMACS: -*- nroff -*-
.TH WEBPMUX 1 "August 28, 2014"
.TH WEBPMUX 1 "October 19, 2015"
.SH NAME
webpmux \- create animated WebP files from non\-animated WebP images, extract
frames from animated WebP images, and manage XMP/EXIF metadata and ICC profile.
@ -129,7 +129,7 @@ The nature of EXIF, XMP and ICC data is not checked and is assumed to be valid.
.SH BUGS
Please report all bugs to our issue tracker:
http://code.google.com/p/webp/issues
https://bugs.chromium.org/p/webp
.br
Patches welcome! See this page to get started:
http://www.webmproject.org/code/contribute/submitting\-patches/

View File

@ -35,7 +35,7 @@ libwebp_la_LIBADD += utils/libwebputils.la
# other than the ones listed on the command line, i.e., after linking, it will
# not have unresolved symbols. Some platforms (Windows among them) require all
# symbols in shared libraries to be resolved at library creation.
libwebp_la_LDFLAGS = -no-undefined -version-info 5:3:0
libwebp_la_LDFLAGS = -no-undefined -version-info 5:4:0
libwebpincludedir = $(includedir)/webp
pkgconfig_DATA = libwebp.pc
@ -47,7 +47,7 @@ if BUILD_LIBWEBPDECODER
libwebpdecoder_la_LIBADD += dsp/libwebpdspdecode.la
libwebpdecoder_la_LIBADD += utils/libwebputilsdecode.la
libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 1:3:0
libwebpdecoder_la_LDFLAGS = -no-undefined -version-info 1:4:0
pkgconfig_DATA += libwebpdecoder.pc
endif

View File

@ -33,6 +33,11 @@ static int IsValidColorspace(int webp_csp_mode) {
return (webp_csp_mode >= MODE_RGB && webp_csp_mode < MODE_LAST);
}
// strictly speaking, the very last (or first, if flipped) row
// doesn't require padding.
#define MIN_BUFFER_SIZE(WIDTH, HEIGHT, STRIDE) \
(uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH)
static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
int ok = 1;
const WEBP_CSP_MODE mode = buffer->colorspace;
@ -42,20 +47,22 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
ok = 0;
} else if (!WebPIsRGBMode(mode)) { // YUV checks
const WebPYUVABuffer* const buf = &buffer->u.YUVA;
const int uv_width = (width + 1) / 2;
const int uv_height = (height + 1) / 2;
const int y_stride = abs(buf->y_stride);
const int u_stride = abs(buf->u_stride);
const int v_stride = abs(buf->v_stride);
const int a_stride = abs(buf->a_stride);
const uint64_t y_size = (uint64_t)y_stride * height;
const uint64_t u_size = (uint64_t)u_stride * ((height + 1) / 2);
const uint64_t v_size = (uint64_t)v_stride * ((height + 1) / 2);
const uint64_t a_size = (uint64_t)a_stride * height;
const uint64_t y_size = MIN_BUFFER_SIZE(width, height, y_stride);
const uint64_t u_size = MIN_BUFFER_SIZE(uv_width, uv_height, u_stride);
const uint64_t v_size = MIN_BUFFER_SIZE(uv_width, uv_height, v_stride);
const uint64_t a_size = MIN_BUFFER_SIZE(width, height, a_stride);
ok &= (y_size <= buf->y_size);
ok &= (u_size <= buf->u_size);
ok &= (v_size <= buf->v_size);
ok &= (y_stride >= width);
ok &= (u_stride >= (width + 1) / 2);
ok &= (v_stride >= (width + 1) / 2);
ok &= (u_stride >= uv_width);
ok &= (v_stride >= uv_width);
ok &= (buf->y != NULL);
ok &= (buf->u != NULL);
ok &= (buf->v != NULL);
@ -67,13 +74,14 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
} else { // RGB checks
const WebPRGBABuffer* const buf = &buffer->u.RGBA;
const int stride = abs(buf->stride);
const uint64_t size = (uint64_t)stride * height;
const uint64_t size = MIN_BUFFER_SIZE(width, height, stride);
ok &= (size <= buf->size);
ok &= (stride >= width * kModeBpp[mode]);
ok &= (buf->rgba != NULL);
}
return ok ? VP8_STATUS_OK : VP8_STATUS_INVALID_PARAM;
}
#undef MIN_BUFFER_SIZE
static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
const int w = buffer->width;

View File

@ -322,37 +322,31 @@ static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) {
const size_t work_size = 2 * out_width; // scratch memory for luma rescaler
const size_t uv_work_size = 2 * uv_out_width; // and for each u/v ones
size_t tmp_size;
int32_t* work;
rescaler_t* work;
tmp_size = (work_size + 2 * uv_work_size) * sizeof(*work);
if (has_alpha) {
tmp_size += work_size * sizeof(*work);
}
p->memory = WebPSafeCalloc(1ULL, tmp_size);
p->memory = WebPSafeMalloc(1ULL, tmp_size);
if (p->memory == NULL) {
return 0; // memory error
}
work = (int32_t*)p->memory;
work = (rescaler_t*)p->memory;
WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h,
buf->y, out_width, out_height, buf->y_stride, 1,
io->mb_w, out_width, io->mb_h, out_height,
work);
WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height,
buf->u, uv_out_width, uv_out_height, buf->u_stride, 1,
uv_in_width, uv_out_width,
uv_in_height, uv_out_height,
work + work_size);
WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height,
buf->v, uv_out_width, uv_out_height, buf->v_stride, 1,
uv_in_width, uv_out_width,
uv_in_height, uv_out_height,
work + work_size + uv_work_size);
p->emit = EmitRescaledYUV;
if (has_alpha) {
WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h,
buf->a, out_width, out_height, buf->a_stride, 1,
io->mb_w, out_width, io->mb_h, out_height,
work + work_size + 2 * uv_work_size);
p->emit_alpha = EmitRescaledAlphaYUV;
WebPInitAlphaProcessing();
@ -375,9 +369,9 @@ static int ExportRGB(WebPDecParams* const p, int y_pos) {
WebPRescalerHasPendingOutput(&p->scaler_u)) {
assert(p->last_y + y_pos + num_lines_out < p->output->height);
assert(p->scaler_u.y_accum == p->scaler_v.y_accum);
WebPRescalerExportRow(&p->scaler_y, 0);
WebPRescalerExportRow(&p->scaler_u, 0);
WebPRescalerExportRow(&p->scaler_v, 0);
WebPRescalerExportRow(&p->scaler_y);
WebPRescalerExportRow(&p->scaler_u);
WebPRescalerExportRow(&p->scaler_v);
convert(p->scaler_y.dst, p->scaler_u.dst, p->scaler_v.dst,
dst, p->scaler_y.dst_width);
dst += buf->stride;
@ -425,7 +419,7 @@ static int ExportAlpha(WebPDecParams* const p, int y_pos) {
while (WebPRescalerHasPendingOutput(&p->scaler_a)) {
int i;
assert(p->last_y + y_pos + num_lines_out < p->output->height);
WebPRescalerExportRow(&p->scaler_a, 0);
WebPRescalerExportRow(&p->scaler_a);
for (i = 0; i < width; ++i) {
const uint32_t alpha_value = p->scaler_a.dst[i];
dst[4 * i] = alpha_value;
@ -458,7 +452,7 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos) {
while (WebPRescalerHasPendingOutput(&p->scaler_a)) {
int i;
assert(p->last_y + y_pos + num_lines_out < p->output->height);
WebPRescalerExportRow(&p->scaler_a, 0);
WebPRescalerExportRow(&p->scaler_a);
for (i = 0; i < width; ++i) {
// Fill in the alpha value (converted to 4 bits).
const uint32_t alpha_value = p->scaler_a.dst[i] >> 4;
@ -495,7 +489,7 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
const int uv_in_width = (io->mb_w + 1) >> 1;
const int uv_in_height = (io->mb_h + 1) >> 1;
const size_t work_size = 2 * out_width; // scratch memory for one rescaler
int32_t* work; // rescalers work area
rescaler_t* work; // rescalers work area
uint8_t* tmp; // tmp storage for scaled YUV444 samples before RGB conversion
size_t tmp_size1, tmp_size2, total_size;
@ -506,30 +500,26 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
tmp_size2 += out_width;
}
total_size = tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp);
p->memory = WebPSafeCalloc(1ULL, total_size);
p->memory = WebPSafeMalloc(1ULL, total_size);
if (p->memory == NULL) {
return 0; // memory error
}
work = (int32_t*)p->memory;
work = (rescaler_t*)p->memory;
tmp = (uint8_t*)(work + tmp_size1);
WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h,
tmp + 0 * out_width, out_width, out_height, 0, 1,
io->mb_w, out_width, io->mb_h, out_height,
work + 0 * work_size);
WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height,
tmp + 1 * out_width, out_width, out_height, 0, 1,
io->mb_w, 2 * out_width, io->mb_h, 2 * out_height,
work + 1 * work_size);
WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height,
tmp + 2 * out_width, out_width, out_height, 0, 1,
io->mb_w, 2 * out_width, io->mb_h, 2 * out_height,
work + 2 * work_size);
p->emit = EmitRescaledRGB;
if (has_alpha) {
WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h,
tmp + 3 * out_width, out_width, out_height, 0, 1,
io->mb_w, out_width, io->mb_h, out_height,
work + 3 * work_size);
p->emit_alpha = EmitRescaledAlphaRGB;
if (p->output->colorspace == MODE_RGBA_4444 ||

View File

@ -31,7 +31,7 @@ extern "C" {
// version numbers
#define DEC_MAJ_VERSION 0
#define DEC_MIN_VERSION 4
#define DEC_REV_VERSION 3
#define DEC_REV_VERSION 4
// intra prediction modes
enum { B_DC_PRED = 0, // 4x4 modes

View File

@ -390,13 +390,13 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
const int in_height = io->mb_h;
const int out_height = io->scaled_height;
const uint64_t work_size = 2 * num_channels * (uint64_t)out_width;
int32_t* work; // Rescaler work area.
const uint64_t scaled_data_size = num_channels * (uint64_t)out_width;
rescaler_t* work; // Rescaler work area.
const uint64_t scaled_data_size = (uint64_t)out_width;
uint32_t* scaled_data; // Temporary storage for scaled BGRA data.
const uint64_t memory_size = sizeof(*dec->rescaler) +
work_size * sizeof(*work) +
scaled_data_size * sizeof(*scaled_data);
uint8_t* memory = (uint8_t*)WebPSafeCalloc(memory_size, sizeof(*memory));
uint8_t* memory = (uint8_t*)WebPSafeMalloc(memory_size, sizeof(*memory));
if (memory == NULL) {
dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
return 0;
@ -406,13 +406,12 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
dec->rescaler = (WebPRescaler*)memory;
memory += sizeof(*dec->rescaler);
work = (int32_t*)memory;
work = (rescaler_t*)memory;
memory += work_size * sizeof(*work);
scaled_data = (uint32_t*)memory;
WebPRescalerInit(dec->rescaler, in_width, in_height, (uint8_t*)scaled_data,
out_width, out_height, 0, num_channels,
in_width, out_width, in_height, out_height, work);
out_width, out_height, 0, num_channels, work);
return 1;
}
@ -427,7 +426,7 @@ static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace,
int num_lines_out = 0;
while (WebPRescalerHasPendingOutput(rescaler)) {
uint8_t* const dst = rgba + num_lines_out * rgba_stride;
WebPRescalerExportRow(rescaler, 0);
WebPRescalerExportRow(rescaler);
WebPMultARGBRow(src, dst_width, 1);
VP8LConvertFromBGRA(src, dst_width, colorspace, dst);
++num_lines_out;
@ -545,7 +544,7 @@ static int ExportYUVA(const VP8LDecoder* const dec, int y_pos) {
const int dst_width = rescaler->dst_width;
int num_lines_out = 0;
while (WebPRescalerHasPendingOutput(rescaler)) {
WebPRescalerExportRow(rescaler, 0);
WebPRescalerExportRow(rescaler);
WebPMultARGBRow(src, dst_width, 1);
ConvertToYUVA(src, dst_width, y_pos, dec->output_);
++y_pos;

View File

@ -24,24 +24,24 @@
// Load/Store vertical edge
#define LOAD8x4(c1, c2, c3, c4, b1, b2, stride) \
"vld4.8 {" #c1"[0], " #c2"[0], " #c3"[0], " #c4"[0]}," #b1 "," #stride"\n" \
"vld4.8 {" #c1"[1], " #c2"[1], " #c3"[1], " #c4"[1]}," #b2 "," #stride"\n" \
"vld4.8 {" #c1"[2], " #c2"[2], " #c3"[2], " #c4"[2]}," #b1 "," #stride"\n" \
"vld4.8 {" #c1"[3], " #c2"[3], " #c3"[3], " #c4"[3]}," #b2 "," #stride"\n" \
"vld4.8 {" #c1"[4], " #c2"[4], " #c3"[4], " #c4"[4]}," #b1 "," #stride"\n" \
"vld4.8 {" #c1"[5], " #c2"[5], " #c3"[5], " #c4"[5]}," #b2 "," #stride"\n" \
"vld4.8 {" #c1"[6], " #c2"[6], " #c3"[6], " #c4"[6]}," #b1 "," #stride"\n" \
"vld4.8 {" #c1"[7], " #c2"[7], " #c3"[7], " #c4"[7]}," #b2 "," #stride"\n"
"vld4.8 {" #c1 "[0]," #c2 "[0]," #c3 "[0]," #c4 "[0]}," #b1 "," #stride "\n" \
"vld4.8 {" #c1 "[1]," #c2 "[1]," #c3 "[1]," #c4 "[1]}," #b2 "," #stride "\n" \
"vld4.8 {" #c1 "[2]," #c2 "[2]," #c3 "[2]," #c4 "[2]}," #b1 "," #stride "\n" \
"vld4.8 {" #c1 "[3]," #c2 "[3]," #c3 "[3]," #c4 "[3]}," #b2 "," #stride "\n" \
"vld4.8 {" #c1 "[4]," #c2 "[4]," #c3 "[4]," #c4 "[4]}," #b1 "," #stride "\n" \
"vld4.8 {" #c1 "[5]," #c2 "[5]," #c3 "[5]," #c4 "[5]}," #b2 "," #stride "\n" \
"vld4.8 {" #c1 "[6]," #c2 "[6]," #c3 "[6]," #c4 "[6]}," #b1 "," #stride "\n" \
"vld4.8 {" #c1 "[7]," #c2 "[7]," #c3 "[7]," #c4 "[7]}," #b2 "," #stride "\n"
#define STORE8x2(c1, c2, p, stride) \
"vst2.8 {" #c1"[0], " #c2"[0]}," #p "," #stride " \n" \
"vst2.8 {" #c1"[1], " #c2"[1]}," #p "," #stride " \n" \
"vst2.8 {" #c1"[2], " #c2"[2]}," #p "," #stride " \n" \
"vst2.8 {" #c1"[3], " #c2"[3]}," #p "," #stride " \n" \
"vst2.8 {" #c1"[4], " #c2"[4]}," #p "," #stride " \n" \
"vst2.8 {" #c1"[5], " #c2"[5]}," #p "," #stride " \n" \
"vst2.8 {" #c1"[6], " #c2"[6]}," #p "," #stride " \n" \
"vst2.8 {" #c1"[7], " #c2"[7]}," #p "," #stride " \n"
"vst2.8 {" #c1 "[0], " #c2 "[0]}," #p "," #stride " \n" \
"vst2.8 {" #c1 "[1], " #c2 "[1]}," #p "," #stride " \n" \
"vst2.8 {" #c1 "[2], " #c2 "[2]}," #p "," #stride " \n" \
"vst2.8 {" #c1 "[3], " #c2 "[3]}," #p "," #stride " \n" \
"vst2.8 {" #c1 "[4], " #c2 "[4]}," #p "," #stride " \n" \
"vst2.8 {" #c1 "[5], " #c2 "[5]}," #p "," #stride " \n" \
"vst2.8 {" #c1 "[6], " #c2 "[6]}," #p "," #stride " \n" \
"vst2.8 {" #c1 "[7], " #c2 "[7]}," #p "," #stride " \n"
#if !defined(WORK_AROUND_GCC)

View File

@ -36,14 +36,9 @@ extern "C" {
# define LOCAL_GCC_PREREQ(maj, min) 0
#endif
#ifdef __clang__
# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
# define LOCAL_CLANG_PREREQ(maj, min) \
(LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
#else
# define LOCAL_CLANG_VERSION 0
# define LOCAL_CLANG_PREREQ(maj, min) 0
#endif // __clang__
#ifndef __has_builtin
# define __has_builtin(x) 0
#endif
#if defined(_MSC_VER) && _MSC_VER > 1310 && \
(defined(_M_X64) || defined(_M_IX86))
@ -73,7 +68,8 @@ extern "C" {
#define WEBP_USE_NEON
#endif
#if defined(__mips__) && !defined(__mips64) && (__mips_isa_rev < 6)
#if defined(__mips__) && !defined(__mips64) && \
defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
#define WEBP_USE_MIPS32
#if (__mips_isa_rev >= 2)
#define WEBP_USE_MIPS32_R2

View File

@ -34,26 +34,26 @@ static const int kC2 = 35468;
// TEMP0..TEMP3 - registers for corresponding tmp elements
// TEMP4..TEMP5 - temporary registers
#define VERTICAL_PASS(A, B, C, D, TEMP4, TEMP0, TEMP1, TEMP2, TEMP3) \
"lh %[temp16], "#A"(%[temp20]) \n\t" \
"lh %[temp18], "#B"(%[temp20]) \n\t" \
"lh %[temp17], "#C"(%[temp20]) \n\t" \
"lh %[temp19], "#D"(%[temp20]) \n\t" \
"addu %["#TEMP4"], %[temp16], %[temp18] \n\t" \
"subu %[temp16], %[temp16], %[temp18] \n\t" \
"mul %["#TEMP0"], %[temp17], %[kC2] \n\t" \
"mul %[temp18], %[temp19], %[kC1] \n\t" \
"mul %[temp17], %[temp17], %[kC1] \n\t" \
"mul %[temp19], %[temp19], %[kC2] \n\t" \
"sra %["#TEMP0"], %["#TEMP0"], 16 \n\n" \
"sra %[temp18], %[temp18], 16 \n\n" \
"sra %[temp17], %[temp17], 16 \n\n" \
"sra %[temp19], %[temp19], 16 \n\n" \
"subu %["#TEMP2"], %["#TEMP0"], %[temp18] \n\t" \
"addu %["#TEMP3"], %[temp17], %[temp19] \n\t" \
"addu %["#TEMP0"], %["#TEMP4"], %["#TEMP3"] \n\t" \
"addu %["#TEMP1"], %[temp16], %["#TEMP2"] \n\t" \
"subu %["#TEMP2"], %[temp16], %["#TEMP2"] \n\t" \
"subu %["#TEMP3"], %["#TEMP4"], %["#TEMP3"] \n\t"
"lh %[temp16], " #A "(%[temp20]) \n\t" \
"lh %[temp18], " #B "(%[temp20]) \n\t" \
"lh %[temp17], " #C "(%[temp20]) \n\t" \
"lh %[temp19], " #D "(%[temp20]) \n\t" \
"addu %[" #TEMP4 "], %[temp16], %[temp18] \n\t" \
"subu %[temp16], %[temp16], %[temp18] \n\t" \
"mul %[" #TEMP0 "], %[temp17], %[kC2] \n\t" \
"mul %[temp18], %[temp19], %[kC1] \n\t" \
"mul %[temp17], %[temp17], %[kC1] \n\t" \
"mul %[temp19], %[temp19], %[kC2] \n\t" \
"sra %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\n" \
"sra %[temp18], %[temp18], 16 \n\n" \
"sra %[temp17], %[temp17], 16 \n\n" \
"sra %[temp19], %[temp19], 16 \n\n" \
"subu %[" #TEMP2 "], %[" #TEMP0 "], %[temp18] \n\t" \
"addu %[" #TEMP3 "], %[temp17], %[temp19] \n\t" \
"addu %[" #TEMP0 "], %[" #TEMP4 "], %[" #TEMP3 "] \n\t" \
"addu %[" #TEMP1 "], %[temp16], %[" #TEMP2 "] \n\t" \
"subu %[" #TEMP2 "], %[temp16], %[" #TEMP2 "] \n\t" \
"subu %[" #TEMP3 "], %[" #TEMP4 "], %[" #TEMP3 "] \n\t"
// macro for one horizontal pass in ITransformOne
// MUL and STORE macros inlined
@ -61,59 +61,59 @@ static const int kC2 = 35468;
// temp0..temp15 holds tmp[0]..tmp[15]
// A..D - offsets in bytes to load from ref and store to dst buffer
// TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
#define HORIZONTAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \
"addiu %["#TEMP0"], %["#TEMP0"], 4 \n\t" \
"addu %[temp16], %["#TEMP0"], %["#TEMP8"] \n\t" \
"subu %[temp17], %["#TEMP0"], %["#TEMP8"] \n\t" \
"mul %["#TEMP0"], %["#TEMP4"], %[kC2] \n\t" \
"mul %["#TEMP8"], %["#TEMP12"], %[kC1] \n\t" \
"mul %["#TEMP4"], %["#TEMP4"], %[kC1] \n\t" \
"mul %["#TEMP12"], %["#TEMP12"], %[kC2] \n\t" \
"sra %["#TEMP0"], %["#TEMP0"], 16 \n\t" \
"sra %["#TEMP8"], %["#TEMP8"], 16 \n\t" \
"sra %["#TEMP4"], %["#TEMP4"], 16 \n\t" \
"sra %["#TEMP12"], %["#TEMP12"], 16 \n\t" \
"subu %[temp18], %["#TEMP0"], %["#TEMP8"] \n\t" \
"addu %[temp19], %["#TEMP4"], %["#TEMP12"] \n\t" \
"addu %["#TEMP0"], %[temp16], %[temp19] \n\t" \
"addu %["#TEMP4"], %[temp17], %[temp18] \n\t" \
"subu %["#TEMP8"], %[temp17], %[temp18] \n\t" \
"subu %["#TEMP12"], %[temp16], %[temp19] \n\t" \
"lw %[temp20], 0(%[args]) \n\t" \
"sra %["#TEMP0"], %["#TEMP0"], 3 \n\t" \
"sra %["#TEMP4"], %["#TEMP4"], 3 \n\t" \
"sra %["#TEMP8"], %["#TEMP8"], 3 \n\t" \
"sra %["#TEMP12"], %["#TEMP12"], 3 \n\t" \
"lbu %[temp16], "#A"(%[temp20]) \n\t" \
"lbu %[temp17], "#B"(%[temp20]) \n\t" \
"lbu %[temp18], "#C"(%[temp20]) \n\t" \
"lbu %[temp19], "#D"(%[temp20]) \n\t" \
"addu %["#TEMP0"], %[temp16], %["#TEMP0"] \n\t" \
"addu %["#TEMP4"], %[temp17], %["#TEMP4"] \n\t" \
"addu %["#TEMP8"], %[temp18], %["#TEMP8"] \n\t" \
"addu %["#TEMP12"], %[temp19], %["#TEMP12"] \n\t" \
"slt %[temp16], %["#TEMP0"], $zero \n\t" \
"slt %[temp17], %["#TEMP4"], $zero \n\t" \
"slt %[temp18], %["#TEMP8"], $zero \n\t" \
"slt %[temp19], %["#TEMP12"], $zero \n\t" \
"movn %["#TEMP0"], $zero, %[temp16] \n\t" \
"movn %["#TEMP4"], $zero, %[temp17] \n\t" \
"movn %["#TEMP8"], $zero, %[temp18] \n\t" \
"movn %["#TEMP12"], $zero, %[temp19] \n\t" \
"addiu %[temp20], $zero, 255 \n\t" \
"slt %[temp16], %["#TEMP0"], %[temp20] \n\t" \
"slt %[temp17], %["#TEMP4"], %[temp20] \n\t" \
"slt %[temp18], %["#TEMP8"], %[temp20] \n\t" \
"slt %[temp19], %["#TEMP12"], %[temp20] \n\t" \
"movz %["#TEMP0"], %[temp20], %[temp16] \n\t" \
"movz %["#TEMP4"], %[temp20], %[temp17] \n\t" \
"lw %[temp16], 8(%[args]) \n\t" \
"movz %["#TEMP8"], %[temp20], %[temp18] \n\t" \
"movz %["#TEMP12"], %[temp20], %[temp19] \n\t" \
"sb %["#TEMP0"], "#A"(%[temp16]) \n\t" \
"sb %["#TEMP4"], "#B"(%[temp16]) \n\t" \
"sb %["#TEMP8"], "#C"(%[temp16]) \n\t" \
"sb %["#TEMP12"], "#D"(%[temp16]) \n\t"
#define HORIZONTAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \
"addiu %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \
"addu %[temp16], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
"subu %[temp17], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
"mul %[" #TEMP0 "], %[" #TEMP4 "], %[kC2] \n\t" \
"mul %[" #TEMP8 "], %[" #TEMP12 "], %[kC1] \n\t" \
"mul %[" #TEMP4 "], %[" #TEMP4 "], %[kC1] \n\t" \
"mul %[" #TEMP12 "], %[" #TEMP12 "], %[kC2] \n\t" \
"sra %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\t" \
"sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \
"sra %[" #TEMP4 "], %[" #TEMP4 "], 16 \n\t" \
"sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \
"subu %[temp18], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \
"addu %[temp19], %[" #TEMP4 "], %[" #TEMP12 "] \n\t" \
"addu %[" #TEMP0 "], %[temp16], %[temp19] \n\t" \
"addu %[" #TEMP4 "], %[temp17], %[temp18] \n\t" \
"subu %[" #TEMP8 "], %[temp17], %[temp18] \n\t" \
"subu %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \
"lw %[temp20], 0(%[args]) \n\t" \
"sra %[" #TEMP0 "], %[" #TEMP0 "], 3 \n\t" \
"sra %[" #TEMP4 "], %[" #TEMP4 "], 3 \n\t" \
"sra %[" #TEMP8 "], %[" #TEMP8 "], 3 \n\t" \
"sra %[" #TEMP12 "], %[" #TEMP12 "], 3 \n\t" \
"lbu %[temp16], " #A "(%[temp20]) \n\t" \
"lbu %[temp17], " #B "(%[temp20]) \n\t" \
"lbu %[temp18], " #C "(%[temp20]) \n\t" \
"lbu %[temp19], " #D "(%[temp20]) \n\t" \
"addu %[" #TEMP0 "], %[temp16], %[" #TEMP0 "] \n\t" \
"addu %[" #TEMP4 "], %[temp17], %[" #TEMP4 "] \n\t" \
"addu %[" #TEMP8 "], %[temp18], %[" #TEMP8 "] \n\t" \
"addu %[" #TEMP12 "], %[temp19], %[" #TEMP12 "] \n\t" \
"slt %[temp16], %[" #TEMP0 "], $zero \n\t" \
"slt %[temp17], %[" #TEMP4 "], $zero \n\t" \
"slt %[temp18], %[" #TEMP8 "], $zero \n\t" \
"slt %[temp19], %[" #TEMP12 "], $zero \n\t" \
"movn %[" #TEMP0 "], $zero, %[temp16] \n\t" \
"movn %[" #TEMP4 "], $zero, %[temp17] \n\t" \
"movn %[" #TEMP8 "], $zero, %[temp18] \n\t" \
"movn %[" #TEMP12 "], $zero, %[temp19] \n\t" \
"addiu %[temp20], $zero, 255 \n\t" \
"slt %[temp16], %[" #TEMP0 "], %[temp20] \n\t" \
"slt %[temp17], %[" #TEMP4 "], %[temp20] \n\t" \
"slt %[temp18], %[" #TEMP8 "], %[temp20] \n\t" \
"slt %[temp19], %[" #TEMP12 "], %[temp20] \n\t" \
"movz %[" #TEMP0 "], %[temp20], %[temp16] \n\t" \
"movz %[" #TEMP4 "], %[temp20], %[temp17] \n\t" \
"lw %[temp16], 8(%[args]) \n\t" \
"movz %[" #TEMP8 "], %[temp20], %[temp18] \n\t" \
"movz %[" #TEMP12 "], %[temp20], %[temp19] \n\t" \
"sb %[" #TEMP0 "], " #A "(%[temp16]) \n\t" \
"sb %[" #TEMP4 "], " #B "(%[temp16]) \n\t" \
"sb %[" #TEMP8 "], " #C "(%[temp16]) \n\t" \
"sb %[" #TEMP12 "], " #D "(%[temp16]) \n\t"
// Does one or two inverse transforms.
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
@ -164,9 +164,9 @@ static void ITransform(const uint8_t* ref, const int16_t* in,
// K - offset in bytes (kZigzag[n] * 4)
// N - offset in bytes (n * 2)
#define QUANTIZE_ONE(J, K, N) \
"lh %[temp0], "#J"(%[ppin]) \n\t" \
"lhu %[temp1], "#J"(%[ppsharpen]) \n\t" \
"lw %[temp2], "#K"(%[ppzthresh]) \n\t" \
"lh %[temp0], " #J "(%[ppin]) \n\t" \
"lhu %[temp1], " #J "(%[ppsharpen]) \n\t" \
"lw %[temp2], " #K "(%[ppzthresh]) \n\t" \
"sra %[sign], %[temp0], 15 \n\t" \
"xor %[coeff], %[temp0], %[sign] \n\t" \
"subu %[coeff], %[coeff], %[sign] \n\t" \
@ -175,9 +175,9 @@ static void ITransform(const uint8_t* ref, const int16_t* in,
"addiu %[temp5], $zero, 0 \n\t" \
"addiu %[level], $zero, 0 \n\t" \
"beqz %[temp4], 2f \n\t" \
"lhu %[temp1], "#J"(%[ppiq]) \n\t" \
"lw %[temp2], "#K"(%[ppbias]) \n\t" \
"lhu %[temp3], "#J"(%[ppq]) \n\t" \
"lhu %[temp1], " #J "(%[ppiq]) \n\t" \
"lw %[temp2], " #K "(%[ppbias]) \n\t" \
"lhu %[temp3], " #J "(%[ppq]) \n\t" \
"mul %[level], %[coeff], %[temp1] \n\t" \
"addu %[level], %[level], %[temp2] \n\t" \
"sra %[level], %[level], 17 \n\t" \
@ -187,8 +187,8 @@ static void ITransform(const uint8_t* ref, const int16_t* in,
"subu %[level], %[level], %[sign] \n\t" \
"mul %[temp5], %[level], %[temp3] \n\t" \
"2: \n\t" \
"sh %[temp5], "#J"(%[ppin]) \n\t" \
"sh %[level], "#N"(%[pout]) \n\t"
"sh %[temp5], " #J "(%[ppin]) \n\t" \
"sh %[level], " #N "(%[pout]) \n\t"
static int QuantizeBlock(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
@ -249,14 +249,14 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
// E..H - offsets in bytes to store first results to tmp buffer
// E1..H1 - offsets in bytes to store second results to tmp buffer
#define HORIZONTAL_PASS(A, B, C, D, E, F, G, H, E1, F1, G1, H1) \
"lbu %[temp0], "#A"(%[a]) \n\t" \
"lbu %[temp1], "#B"(%[a]) \n\t" \
"lbu %[temp2], "#C"(%[a]) \n\t" \
"lbu %[temp3], "#D"(%[a]) \n\t" \
"lbu %[temp4], "#A"(%[b]) \n\t" \
"lbu %[temp5], "#B"(%[b]) \n\t" \
"lbu %[temp6], "#C"(%[b]) \n\t" \
"lbu %[temp7], "#D"(%[b]) \n\t" \
"lbu %[temp0], " #A "(%[a]) \n\t" \
"lbu %[temp1], " #B "(%[a]) \n\t" \
"lbu %[temp2], " #C "(%[a]) \n\t" \
"lbu %[temp3], " #D "(%[a]) \n\t" \
"lbu %[temp4], " #A "(%[b]) \n\t" \
"lbu %[temp5], " #B "(%[b]) \n\t" \
"lbu %[temp6], " #C "(%[b]) \n\t" \
"lbu %[temp7], " #D "(%[b]) \n\t" \
"addu %[temp8], %[temp0], %[temp2] \n\t" \
"subu %[temp0], %[temp0], %[temp2] \n\t" \
"addu %[temp2], %[temp1], %[temp3] \n\t" \
@ -273,14 +273,14 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
"subu %[temp3], %[temp3], %[temp6] \n\t" \
"addu %[temp6], %[temp4], %[temp5] \n\t" \
"subu %[temp4], %[temp4], %[temp5] \n\t" \
"sw %[temp7], "#E"(%[tmp]) \n\t" \
"sw %[temp2], "#H"(%[tmp]) \n\t" \
"sw %[temp8], "#F"(%[tmp]) \n\t" \
"sw %[temp0], "#G"(%[tmp]) \n\t" \
"sw %[temp1], "#E1"(%[tmp]) \n\t" \
"sw %[temp3], "#H1"(%[tmp]) \n\t" \
"sw %[temp6], "#F1"(%[tmp]) \n\t" \
"sw %[temp4], "#G1"(%[tmp]) \n\t"
"sw %[temp7], " #E "(%[tmp]) \n\t" \
"sw %[temp2], " #H "(%[tmp]) \n\t" \
"sw %[temp8], " #F "(%[tmp]) \n\t" \
"sw %[temp0], " #G "(%[tmp]) \n\t" \
"sw %[temp1], " #E1 "(%[tmp]) \n\t" \
"sw %[temp3], " #H1 "(%[tmp]) \n\t" \
"sw %[temp6], " #F1 "(%[tmp]) \n\t" \
"sw %[temp4], " #G1 "(%[tmp]) \n\t"
// macro for one vertical pass in Disto4x4 (TTransform)
// two calls of function TTransform are merged into single one
@ -295,10 +295,10 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
// A1..D1 - offsets in bytes to load second results from tmp buffer
// E..H - offsets in bytes to load from w buffer
#define VERTICAL_PASS(A, B, C, D, A1, B1, C1, D1, E, F, G, H) \
"lw %[temp0], "#A1"(%[tmp]) \n\t" \
"lw %[temp1], "#C1"(%[tmp]) \n\t" \
"lw %[temp2], "#B1"(%[tmp]) \n\t" \
"lw %[temp3], "#D1"(%[tmp]) \n\t" \
"lw %[temp0], " #A1 "(%[tmp]) \n\t" \
"lw %[temp1], " #C1 "(%[tmp]) \n\t" \
"lw %[temp2], " #B1 "(%[tmp]) \n\t" \
"lw %[temp3], " #D1 "(%[tmp]) \n\t" \
"addu %[temp8], %[temp0], %[temp1] \n\t" \
"subu %[temp0], %[temp0], %[temp1] \n\t" \
"addu %[temp1], %[temp2], %[temp3] \n\t" \
@ -319,18 +319,18 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
"subu %[temp1], %[temp1], %[temp5] \n\t" \
"subu %[temp0], %[temp0], %[temp6] \n\t" \
"subu %[temp8], %[temp8], %[temp7] \n\t" \
"lhu %[temp4], "#E"(%[w]) \n\t" \
"lhu %[temp5], "#F"(%[w]) \n\t" \
"lhu %[temp6], "#G"(%[w]) \n\t" \
"lhu %[temp7], "#H"(%[w]) \n\t" \
"lhu %[temp4], " #E "(%[w]) \n\t" \
"lhu %[temp5], " #F "(%[w]) \n\t" \
"lhu %[temp6], " #G "(%[w]) \n\t" \
"lhu %[temp7], " #H "(%[w]) \n\t" \
"madd %[temp4], %[temp3] \n\t" \
"madd %[temp5], %[temp1] \n\t" \
"madd %[temp6], %[temp0] \n\t" \
"madd %[temp7], %[temp8] \n\t" \
"lw %[temp0], "#A"(%[tmp]) \n\t" \
"lw %[temp1], "#C"(%[tmp]) \n\t" \
"lw %[temp2], "#B"(%[tmp]) \n\t" \
"lw %[temp3], "#D"(%[tmp]) \n\t" \
"lw %[temp0], " #A "(%[tmp]) \n\t" \
"lw %[temp1], " #C "(%[tmp]) \n\t" \
"lw %[temp2], " #B "(%[tmp]) \n\t" \
"lw %[temp3], " #D "(%[tmp]) \n\t" \
"addu %[temp8], %[temp0], %[temp1] \n\t" \
"subu %[temp0], %[temp0], %[temp1] \n\t" \
"addu %[temp1], %[temp2], %[temp3] \n\t" \
@ -407,71 +407,71 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
// temp0..temp15 holds tmp[0]..tmp[15]
// A..D - offsets in bytes to load from src and ref buffers
// TEMP0..TEMP3 - registers for corresponding tmp elements
#define HORIZONTAL_PASS(A, B, C, D, TEMP0, TEMP1, TEMP2, TEMP3) \
"lw %["#TEMP1"], 0(%[args]) \n\t" \
"lw %["#TEMP2"], 4(%[args]) \n\t" \
"lbu %[temp16], "#A"(%["#TEMP1"]) \n\t" \
"lbu %[temp17], "#A"(%["#TEMP2"]) \n\t" \
"lbu %[temp18], "#B"(%["#TEMP1"]) \n\t" \
"lbu %[temp19], "#B"(%["#TEMP2"]) \n\t" \
"subu %[temp20], %[temp16], %[temp17] \n\t" \
"lbu %[temp16], "#C"(%["#TEMP1"]) \n\t" \
"lbu %[temp17], "#C"(%["#TEMP2"]) \n\t" \
"subu %["#TEMP0"], %[temp18], %[temp19] \n\t" \
"lbu %[temp18], "#D"(%["#TEMP1"]) \n\t" \
"lbu %[temp19], "#D"(%["#TEMP2"]) \n\t" \
"subu %["#TEMP1"], %[temp16], %[temp17] \n\t" \
"subu %["#TEMP2"], %[temp18], %[temp19] \n\t" \
"addu %["#TEMP3"], %[temp20], %["#TEMP2"] \n\t" \
"subu %["#TEMP2"], %[temp20], %["#TEMP2"] \n\t" \
"addu %[temp20], %["#TEMP0"], %["#TEMP1"] \n\t" \
"subu %["#TEMP0"], %["#TEMP0"], %["#TEMP1"] \n\t" \
"mul %[temp16], %["#TEMP2"], %[c5352] \n\t" \
"mul %[temp17], %["#TEMP2"], %[c2217] \n\t" \
"mul %[temp18], %["#TEMP0"], %[c5352] \n\t" \
"mul %[temp19], %["#TEMP0"], %[c2217] \n\t" \
"addu %["#TEMP1"], %["#TEMP3"], %[temp20] \n\t" \
"subu %[temp20], %["#TEMP3"], %[temp20] \n\t" \
"sll %["#TEMP0"], %["#TEMP1"], 3 \n\t" \
"sll %["#TEMP2"], %[temp20], 3 \n\t" \
"addiu %[temp16], %[temp16], 1812 \n\t" \
"addiu %[temp17], %[temp17], 937 \n\t" \
"addu %[temp16], %[temp16], %[temp19] \n\t" \
"subu %[temp17], %[temp17], %[temp18] \n\t" \
"sra %["#TEMP1"], %[temp16], 9 \n\t" \
"sra %["#TEMP3"], %[temp17], 9 \n\t"
#define HORIZONTAL_PASS(A, B, C, D, TEMP0, TEMP1, TEMP2, TEMP3) \
"lw %[" #TEMP1 "], 0(%[args]) \n\t" \
"lw %[" #TEMP2 "], 4(%[args]) \n\t" \
"lbu %[temp16], " #A "(%[" #TEMP1 "]) \n\t" \
"lbu %[temp17], " #A "(%[" #TEMP2 "]) \n\t" \
"lbu %[temp18], " #B "(%[" #TEMP1 "]) \n\t" \
"lbu %[temp19], " #B "(%[" #TEMP2 "]) \n\t" \
"subu %[temp20], %[temp16], %[temp17] \n\t" \
"lbu %[temp16], " #C "(%[" #TEMP1 "]) \n\t" \
"lbu %[temp17], " #C "(%[" #TEMP2 "]) \n\t" \
"subu %[" #TEMP0 "], %[temp18], %[temp19] \n\t" \
"lbu %[temp18], " #D "(%[" #TEMP1 "]) \n\t" \
"lbu %[temp19], " #D "(%[" #TEMP2 "]) \n\t" \
"subu %[" #TEMP1 "], %[temp16], %[temp17] \n\t" \
"subu %[" #TEMP2 "], %[temp18], %[temp19] \n\t" \
"addu %[" #TEMP3 "], %[temp20], %[" #TEMP2 "] \n\t" \
"subu %[" #TEMP2 "], %[temp20], %[" #TEMP2 "] \n\t" \
"addu %[temp20], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \
"subu %[" #TEMP0 "], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \
"mul %[temp16], %[" #TEMP2 "], %[c5352] \n\t" \
"mul %[temp17], %[" #TEMP2 "], %[c2217] \n\t" \
"mul %[temp18], %[" #TEMP0 "], %[c5352] \n\t" \
"mul %[temp19], %[" #TEMP0 "], %[c2217] \n\t" \
"addu %[" #TEMP1 "], %[" #TEMP3 "], %[temp20] \n\t" \
"subu %[temp20], %[" #TEMP3 "], %[temp20] \n\t" \
"sll %[" #TEMP0 "], %[" #TEMP1 "], 3 \n\t" \
"sll %[" #TEMP2 "], %[temp20], 3 \n\t" \
"addiu %[temp16], %[temp16], 1812 \n\t" \
"addiu %[temp17], %[temp17], 937 \n\t" \
"addu %[temp16], %[temp16], %[temp19] \n\t" \
"subu %[temp17], %[temp17], %[temp18] \n\t" \
"sra %[" #TEMP1 "], %[temp16], 9 \n\t" \
"sra %[" #TEMP3 "], %[temp17], 9 \n\t"
// macro for one vertical pass in FTransform
// temp0..temp15 holds tmp[0]..tmp[15]
// A..D - offsets in bytes to store to out buffer
// TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
#define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \
"addu %[temp16], %["#TEMP0"], %["#TEMP12"] \n\t" \
"subu %[temp19], %["#TEMP0"], %["#TEMP12"] \n\t" \
"addu %[temp17], %["#TEMP4"], %["#TEMP8"] \n\t" \
"subu %[temp18], %["#TEMP4"], %["#TEMP8"] \n\t" \
"mul %["#TEMP8"], %[temp19], %[c2217] \n\t" \
"mul %["#TEMP12"], %[temp18], %[c2217] \n\t" \
"mul %["#TEMP4"], %[temp19], %[c5352] \n\t" \
"mul %[temp18], %[temp18], %[c5352] \n\t" \
"addiu %[temp16], %[temp16], 7 \n\t" \
"addu %["#TEMP0"], %[temp16], %[temp17] \n\t" \
"sra %["#TEMP0"], %["#TEMP0"], 4 \n\t" \
"addu %["#TEMP12"], %["#TEMP12"], %["#TEMP4"] \n\t" \
"subu %["#TEMP4"], %[temp16], %[temp17] \n\t" \
"sra %["#TEMP4"], %["#TEMP4"], 4 \n\t" \
"addiu %["#TEMP8"], %["#TEMP8"], 30000 \n\t" \
"addiu %["#TEMP12"], %["#TEMP12"], 12000 \n\t" \
"addiu %["#TEMP8"], %["#TEMP8"], 21000 \n\t" \
"subu %["#TEMP8"], %["#TEMP8"], %[temp18] \n\t" \
"sra %["#TEMP12"], %["#TEMP12"], 16 \n\t" \
"sra %["#TEMP8"], %["#TEMP8"], 16 \n\t" \
"addiu %[temp16], %["#TEMP12"], 1 \n\t" \
"movn %["#TEMP12"], %[temp16], %[temp19] \n\t" \
"sh %["#TEMP0"], "#A"(%[temp20]) \n\t" \
"sh %["#TEMP4"], "#C"(%[temp20]) \n\t" \
"sh %["#TEMP8"], "#D"(%[temp20]) \n\t" \
"sh %["#TEMP12"], "#B"(%[temp20]) \n\t"
#define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \
"addu %[temp16], %[" #TEMP0 "], %[" #TEMP12 "] \n\t" \
"subu %[temp19], %[" #TEMP0 "], %[" #TEMP12 "] \n\t" \
"addu %[temp17], %[" #TEMP4 "], %[" #TEMP8 "] \n\t" \
"subu %[temp18], %[" #TEMP4 "], %[" #TEMP8 "] \n\t" \
"mul %[" #TEMP8 "], %[temp19], %[c2217] \n\t" \
"mul %[" #TEMP12 "], %[temp18], %[c2217] \n\t" \
"mul %[" #TEMP4 "], %[temp19], %[c5352] \n\t" \
"mul %[temp18], %[temp18], %[c5352] \n\t" \
"addiu %[temp16], %[temp16], 7 \n\t" \
"addu %[" #TEMP0 "], %[temp16], %[temp17] \n\t" \
"sra %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \
"addu %[" #TEMP12 "], %[" #TEMP12 "], %[" #TEMP4 "] \n\t" \
"subu %[" #TEMP4 "], %[temp16], %[temp17] \n\t" \
"sra %[" #TEMP4 "], %[" #TEMP4 "], 4 \n\t" \
"addiu %[" #TEMP8 "], %[" #TEMP8 "], 30000 \n\t" \
"addiu %[" #TEMP12 "], %[" #TEMP12 "], 12000 \n\t" \
"addiu %[" #TEMP8 "], %[" #TEMP8 "], 21000 \n\t" \
"subu %[" #TEMP8 "], %[" #TEMP8 "], %[temp18] \n\t" \
"sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \
"sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \
"addiu %[temp16], %[" #TEMP12 "], 1 \n\t" \
"movn %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \
"sh %[" #TEMP0 "], " #A "(%[temp20]) \n\t" \
"sh %[" #TEMP4 "], " #C "(%[temp20]) \n\t" \
"sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
"sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
@ -622,14 +622,14 @@ int VP8GetResidualCostMIPS32(int ctx0, const VP8Residual* const res) {
}
#define GET_SSE_INNER(A, B, C, D) \
"lbu %[temp0], "#A"(%[a]) \n\t" \
"lbu %[temp1], "#A"(%[b]) \n\t" \
"lbu %[temp2], "#B"(%[a]) \n\t" \
"lbu %[temp3], "#B"(%[b]) \n\t" \
"lbu %[temp4], "#C"(%[a]) \n\t" \
"lbu %[temp5], "#C"(%[b]) \n\t" \
"lbu %[temp6], "#D"(%[a]) \n\t" \
"lbu %[temp7], "#D"(%[b]) \n\t" \
"lbu %[temp0], " #A "(%[a]) \n\t" \
"lbu %[temp1], " #A "(%[b]) \n\t" \
"lbu %[temp2], " #B "(%[a]) \n\t" \
"lbu %[temp3], " #B "(%[b]) \n\t" \
"lbu %[temp4], " #C "(%[a]) \n\t" \
"lbu %[temp5], " #C "(%[b]) \n\t" \
"lbu %[temp6], " #D "(%[a]) \n\t" \
"lbu %[temp7], " #D "(%[b]) \n\t" \
"subu %[temp0], %[temp0], %[temp1] \n\t" \
"subu %[temp2], %[temp2], %[temp3] \n\t" \
"subu %[temp4], %[temp4], %[temp5] \n\t" \

View File

@ -285,28 +285,28 @@ static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X,
// literal_ and successive histograms could be unaligned
// so we must use ulw and usw
#define ADD_TO_OUT(A, B, C, D, E, P0, P1, P2) \
"ulw %[temp0], "#A"(%["#P0"]) \n\t" \
"ulw %[temp1], "#B"(%["#P0"]) \n\t" \
"ulw %[temp2], "#C"(%["#P0"]) \n\t" \
"ulw %[temp3], "#D"(%["#P0"]) \n\t" \
"ulw %[temp4], "#A"(%["#P1"]) \n\t" \
"ulw %[temp5], "#B"(%["#P1"]) \n\t" \
"ulw %[temp6], "#C"(%["#P1"]) \n\t" \
"ulw %[temp7], "#D"(%["#P1"]) \n\t" \
"ulw %[temp0], " #A "(%[" #P0 "]) \n\t" \
"ulw %[temp1], " #B "(%[" #P0 "]) \n\t" \
"ulw %[temp2], " #C "(%[" #P0 "]) \n\t" \
"ulw %[temp3], " #D "(%[" #P0 "]) \n\t" \
"ulw %[temp4], " #A "(%[" #P1 "]) \n\t" \
"ulw %[temp5], " #B "(%[" #P1 "]) \n\t" \
"ulw %[temp6], " #C "(%[" #P1 "]) \n\t" \
"ulw %[temp7], " #D "(%[" #P1 "]) \n\t" \
"addu %[temp4], %[temp4], %[temp0] \n\t" \
"addu %[temp5], %[temp5], %[temp1] \n\t" \
"addu %[temp6], %[temp6], %[temp2] \n\t" \
"addu %[temp7], %[temp7], %[temp3] \n\t" \
"addiu %["#P0"], %["#P0"], 16 \n\t" \
".if "#E" == 1 \n\t" \
"addiu %["#P1"], %["#P1"], 16 \n\t" \
"addiu %[" #P0 "], %[" #P0 "], 16 \n\t" \
".if " #E " == 1 \n\t" \
"addiu %[" #P1 "], %[" #P1 "], 16 \n\t" \
".endif \n\t" \
"usw %[temp4], "#A"(%["#P2"]) \n\t" \
"usw %[temp5], "#B"(%["#P2"]) \n\t" \
"usw %[temp6], "#C"(%["#P2"]) \n\t" \
"usw %[temp7], "#D"(%["#P2"]) \n\t" \
"addiu %["#P2"], %["#P2"], 16 \n\t" \
"bne %["#P0"], %[LoopEnd], 1b \n\t" \
"usw %[temp4], " #A "(%[" #P2 "]) \n\t" \
"usw %[temp5], " #B "(%[" #P2 "]) \n\t" \
"usw %[temp6], " #C "(%[" #P2 "]) \n\t" \
"usw %[temp7], " #D "(%[" #P2 "]) \n\t" \
"addiu %[" #P2 "], %[" #P2 "], 16 \n\t" \
"bne %[" #P0 "], %[LoopEnd], 1b \n\t" \
".set pop \n\t" \
#define ASM_END_COMMON_0 \

View File

@ -20,6 +20,9 @@
#include "../dsp/lossless.h"
#include "../utils/utils.h"
#define ALIGN_CST 15
#define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST)
#define MAX_COST 1.e38
// Number of partitions for the three dominant (literal, red and blue) symbol
@ -101,9 +104,9 @@ VP8LHistogram* VP8LAllocateHistogram(int cache_bits) {
VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
int i;
VP8LHistogramSet* set;
const size_t total_size = sizeof(*set)
+ sizeof(*set->histograms) * size
+ (size_t)VP8LGetHistogramSize(cache_bits) * size;
const int histo_size = VP8LGetHistogramSize(cache_bits);
const size_t total_size =
sizeof(*set) + size * (sizeof(*set->histograms) + histo_size + ALIGN_CST);
uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
if (memory == NULL) return NULL;
@ -114,12 +117,12 @@ VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
set->max_size = size;
set->size = size;
for (i = 0; i < size; ++i) {
memory = (uint8_t*)DO_ALIGN(memory);
set->histograms[i] = (VP8LHistogram*)memory;
// literal_ won't necessary be aligned.
set->histograms[i]->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));
VP8LHistogramInit(set->histograms[i], cache_bits);
// There's no padding/alignment between successive histograms.
memory += VP8LGetHistogramSize(cache_bits);
memory += histo_size;
}
return set;
}

View File

@ -175,17 +175,13 @@ static void RescalePlane(const uint8_t* src,
int src_width, int src_height, int src_stride,
uint8_t* dst,
int dst_width, int dst_height, int dst_stride,
int32_t* const work,
rescaler_t* const work,
int num_channels) {
WebPRescaler rescaler;
int y = 0;
WebPRescalerInit(&rescaler, src_width, src_height,
dst, dst_width, dst_height, dst_stride,
num_channels,
src_width, dst_width,
src_height, dst_height,
work);
memset(work, 0, 2 * dst_width * num_channels * sizeof(*work));
num_channels, work);
while (y < src_height) {
y += WebPRescalerImport(&rescaler, src_height - y,
src + y * src_stride, src_stride);
@ -209,7 +205,7 @@ static void AlphaMultiplyY(WebPPicture* const pic, int inverse) {
int WebPPictureRescale(WebPPicture* pic, int width, int height) {
WebPPicture tmp;
int prev_width, prev_height;
int32_t* work;
rescaler_t* work;
if (pic == NULL) return 0;
prev_width = pic->width;
@ -231,7 +227,7 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) {
if (!WebPPictureAlloc(&tmp)) return 0;
if (!pic->use_argb) {
work = (int32_t*)WebPSafeMalloc(2ULL * width, sizeof(*work));
work = (rescaler_t*)WebPSafeMalloc(2ULL * width, sizeof(*work));
if (work == NULL) {
WebPPictureFree(&tmp);
return 0;
@ -259,7 +255,7 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) {
tmp.v,
HALVE(width), HALVE(height), tmp.uv_stride, work, 1);
} else {
work = (int32_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work));
work = (rescaler_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work));
if (work == NULL) {
WebPPictureFree(&tmp);
return 0;

View File

@ -30,7 +30,7 @@ extern "C" {
// version numbers
#define ENC_MAJ_VERSION 0
#define ENC_MIN_VERSION 4
#define ENC_REV_VERSION 3
#define ENC_REV_VERSION 4
// intra prediction modes
enum { B_DC_PRED = 0, // 4x4 modes

View File

@ -35,15 +35,15 @@
#endif
#if !defined(HAVE_CONFIG_H)
// clang-3.3 and gcc-4.3 have builtin functions for swap32/swap64
#if LOCAL_GCC_PREREQ(4,3) || LOCAL_CLANG_PREREQ(3,3)
#define HAVE_BUILTIN_BSWAP32
#define HAVE_BUILTIN_BSWAP64
#endif
// clang-3.3 and gcc-4.8 have a builtin function for swap16
#if LOCAL_GCC_PREREQ(4,8) || LOCAL_CLANG_PREREQ(3,3)
#if LOCAL_GCC_PREREQ(4,8) || __has_builtin(__builtin_bswap16)
#define HAVE_BUILTIN_BSWAP16
#endif
#if LOCAL_GCC_PREREQ(4,3) || __has_builtin(__builtin_bswap32)
#define HAVE_BUILTIN_BSWAP32
#endif
#if LOCAL_GCC_PREREQ(4,3) || __has_builtin(__builtin_bswap64)
#define HAVE_BUILTIN_BSWAP64
#endif
#endif // !HAVE_CONFIG_H
static WEBP_INLINE uint16_t BSwap16(uint16_t x) {

View File

@ -13,77 +13,192 @@
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "./rescaler.h"
#include "../dsp/dsp.h"
//------------------------------------------------------------------------------
// Implementations of critical functions ImportRow / ExportRow
void (*WebPRescalerImportRow)(WebPRescaler* const wrk,
const uint8_t* const src, int channel) = NULL;
void (*WebPRescalerExportRow)(WebPRescaler* const wrk, int x_out) = NULL;
// Import a row of data and save its contribution in the rescaler.
// 'channel' denotes the channel number to be imported. 'Expand' corresponds to
// the wrk->x_expand case. Otherwise, 'Shrink' is to be used.
typedef void (*WebPRescalerImportRowFunc)(WebPRescaler* const wrk,
const uint8_t* src);
static WebPRescalerImportRowFunc WebPRescalerImportRowExpand;
static WebPRescalerImportRowFunc WebPRescalerImportRowShrink;
#define RFIX 30
#define MULT_FIX(x, y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX)
// Export one row (starting at x_out position) from rescaler.
// 'Expand' corresponds to the wrk->y_expand case.
// Otherwise 'Shrink' is to be used
typedef void (*WebPRescalerExportRowFunc)(WebPRescaler* const wrk);
static WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
static WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
static void ImportRowC(WebPRescaler* const wrk,
const uint8_t* const src, int channel) {
#define WEBP_RESCALER_RFIX 32 // fixed-point precision for multiplies
#define WEBP_RESCALER_ONE (1ull << WEBP_RESCALER_RFIX)
#define WEBP_RESCALER_FRAC(x, y) \
((uint32_t)(((uint64_t)(x) << WEBP_RESCALER_RFIX) / (y)))
#define ROUNDER (WEBP_RESCALER_ONE >> 1)
#define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
static void ImportRowExpandC(WebPRescaler* const wrk, const uint8_t* src) {
const int x_stride = wrk->num_channels;
const int x_out_max = wrk->dst_width * wrk->num_channels;
int x_in = channel;
int x_out;
int accum = 0;
if (!wrk->x_expand) {
int sum = 0;
for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
accum += wrk->x_add;
for (; accum > 0; accum -= wrk->x_sub) {
sum += src[x_in];
x_in += x_stride;
}
{ // Emit next horizontal pixel.
const int32_t base = src[x_in];
const int32_t frac = base * (-accum);
x_in += x_stride;
wrk->frow[x_out] = (sum + base) * wrk->x_sub - frac;
// fresh fractional start for next pixel
sum = (int)MULT_FIX(frac, wrk->fx_scale);
}
}
} else { // simple bilinear interpolation
int left = src[channel], right = src[channel];
for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
int channel;
assert(!WebPRescalerInputDone(wrk));
assert(wrk->x_expand);
for (channel = 0; channel < x_stride; ++channel) {
int x_in = channel;
int x_out = channel;
// simple bilinear interpolation
int accum = wrk->x_add;
int left = src[x_in];
int right = (wrk->src_width > 1) ? src[x_in + x_stride] : left;
x_in += x_stride;
while (1) {
wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum;
x_out += x_stride;
if (x_out >= x_out_max) break;
accum -= wrk->x_sub;
if (accum < 0) {
left = right;
x_in += x_stride;
assert(x_in < wrk->src_width * x_stride);
right = src[x_in];
accum += wrk->x_add;
}
wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum;
accum -= wrk->x_sub;
}
}
// Accumulate the contribution of the new row.
for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
wrk->irow[x_out] += wrk->frow[x_out];
assert(wrk->x_sub == 0 /* <- special case for src_width=1 */ || accum == 0);
}
}
static void ExportRowC(WebPRescaler* const wrk, int x_out) {
if (wrk->y_accum <= 0) {
uint8_t* const dst = wrk->dst;
int32_t* const irow = wrk->irow;
const int32_t* const frow = wrk->frow;
const int yscale = wrk->fy_scale * (-wrk->y_accum);
const int x_out_max = wrk->dst_width * wrk->num_channels;
for (; x_out < x_out_max; ++x_out) {
const int frac = (int)MULT_FIX(frow[x_out], yscale);
static void ImportRowShrinkC(WebPRescaler* const wrk, const uint8_t* src) {
const int x_stride = wrk->num_channels;
const int x_out_max = wrk->dst_width * wrk->num_channels;
int channel;
assert(!WebPRescalerInputDone(wrk));
assert(!wrk->x_expand);
for (channel = 0; channel < x_stride; ++channel) {
int x_in = channel;
int x_out = channel;
uint32_t sum = 0;
int accum = 0;
while (x_out < x_out_max) {
uint32_t base = 0;
accum += wrk->x_add;
while (accum > 0) {
accum -= wrk->x_sub;
assert(x_in < wrk->src_width * x_stride);
base = src[x_in];
sum += base;
x_in += x_stride;
}
{ // Emit next horizontal pixel.
const rescaler_t frac = base * (-accum);
wrk->frow[x_out] = sum * wrk->x_sub - frac;
// fresh fractional start for next pixel
sum = (int)MULT_FIX(frac, wrk->fx_scale);
}
x_out += x_stride;
}
assert(accum == 0);
}
}
//------------------------------------------------------------------------------
// Row export
static void ExportRowExpandC(WebPRescaler* const wrk) {
int x_out;
uint8_t* const dst = wrk->dst;
rescaler_t* const irow = wrk->irow;
const int x_out_max = wrk->dst_width * wrk->num_channels;
const rescaler_t* const frow = wrk->frow;
assert(!WebPRescalerOutputDone(wrk));
assert(wrk->y_accum <= 0);
assert(wrk->y_expand);
assert(wrk->y_sub != 0);
if (wrk->y_accum == 0) {
for (x_out = 0; x_out < x_out_max; ++x_out) {
const uint32_t J = frow[x_out];
const int v = (int)MULT_FIX(J, wrk->fy_scale);
assert(v >= 0 && v <= 255);
dst[x_out] = v;
}
} else {
const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
for (x_out = 0; x_out < x_out_max; ++x_out) {
const uint64_t I = (uint64_t)A * frow[x_out]
+ (uint64_t)B * irow[x_out];
const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
const int v = (int)MULT_FIX(J, wrk->fy_scale);
assert(v >= 0 && v <= 255);
dst[x_out] = v;
}
}
}
static void ExportRowShrinkC(WebPRescaler* const wrk) {
int x_out;
uint8_t* const dst = wrk->dst;
rescaler_t* const irow = wrk->irow;
const int x_out_max = wrk->dst_width * wrk->num_channels;
const rescaler_t* const frow = wrk->frow;
const uint32_t yscale = wrk->fy_scale * (-wrk->y_accum);
assert(!WebPRescalerOutputDone(wrk));
assert(wrk->y_accum <= 0);
assert(!wrk->y_expand);
if (yscale) {
for (x_out = 0; x_out < x_out_max; ++x_out) {
const uint32_t frac = (uint32_t)MULT_FIX(frow[x_out], yscale);
const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
dst[x_out] = (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
assert(v >= 0 && v <= 255);
dst[x_out] = v;
irow[x_out] = frac; // new fractional start
}
} else {
for (x_out = 0; x_out < x_out_max; ++x_out) {
const int v = (int)MULT_FIX(irow[x_out], wrk->fxy_scale);
assert(v >= 0 && v <= 255);
dst[x_out] = v;
irow[x_out] = 0;
}
}
}
//------------------------------------------------------------------------------
// Main entry calls
void WebPRescalerImportRow(WebPRescaler* const wrk, const uint8_t* src) {
assert(!WebPRescalerInputDone(wrk));
if (!wrk->x_expand) {
WebPRescalerImportRowShrink(wrk, src);
} else {
WebPRescalerImportRowExpand(wrk, src);
}
}
void WebPRescalerExportRow(WebPRescaler* const wrk) {
if (wrk->y_accum <= 0) {
assert(!WebPRescalerOutputDone(wrk));
if (wrk->y_expand) {
WebPRescalerExportRowExpand(wrk);
} else if (wrk->fxy_scale) {
WebPRescalerExportRowShrink(wrk);
} else { // very special case for src = dst = 1x1
int i;
assert(wrk->src_width == 1 && wrk->dst_width <= 2);
assert(wrk->src_height == 1 && wrk->dst_height == 1);
for (i = 0; i < wrk->num_channels * wrk->dst_width; ++i) {
wrk->dst[i] = wrk->irow[i];
wrk->irow[i] = 0;
}
}
wrk->y_accum += wrk->y_add;
wrk->dst += wrk->dst_stride;
++wrk->dst_y;
}
}
@ -92,23 +207,25 @@ static void ExportRowC(WebPRescaler* const wrk, int x_out) {
#if defined(WEBP_USE_MIPS32)
static void ImportRowMIPS(WebPRescaler* const wrk,
const uint8_t* const src, int channel) {
static void ImportRowShrinkMIPS(WebPRescaler* const wrk, const uint8_t* src) {
const int x_stride = wrk->num_channels;
const int x_out_max = wrk->dst_width * wrk->num_channels;
const int fx_scale = wrk->fx_scale;
const int x_add = wrk->x_add;
const int x_sub = wrk->x_sub;
int* frow = wrk->frow + channel;
int* irow = wrk->irow + channel;
const uint8_t* src1 = src + channel;
int temp1, temp2, temp3;
int base, frac, sum;
int accum, accum1;
const int x_stride1 = x_stride << 2;
int loop_c = x_out_max - channel;
int channel;
assert(!wrk->x_expand);
assert(!WebPRescalerInputDone(wrk));
for (channel = 0; channel < x_stride; ++channel) {
const uint8_t* src1 = src + channel;
rescaler_t* frow = wrk->frow + channel;
int temp1, temp2, temp3;
int base, frac, sum;
int accum, accum1;
int loop_c = x_out_max - channel;
if (!wrk->x_expand) {
__asm__ volatile (
"li %[temp1], 0x8000 \n\t"
"li %[temp2], 0x10000 \n\t"
@ -116,179 +233,295 @@ static void ImportRowMIPS(WebPRescaler* const wrk,
"li %[accum], 0 \n\t"
"1: \n\t"
"addu %[accum], %[accum], %[x_add] \n\t"
"li %[base], 0 \n\t"
"blez %[accum], 3f \n\t"
"2: \n\t"
"lbu %[temp3], 0(%[src1]) \n\t"
"lbu %[base], 0(%[src1]) \n\t"
"subu %[accum], %[accum], %[x_sub] \n\t"
"addu %[src1], %[src1], %[x_stride] \n\t"
"addu %[sum], %[sum], %[temp3] \n\t"
"addu %[sum], %[sum], %[base] \n\t"
"bgtz %[accum], 2b \n\t"
"3: \n\t"
"lbu %[base], 0(%[src1]) \n\t"
"addu %[src1], %[src1], %[x_stride] \n\t"
"negu %[accum1], %[accum] \n\t"
"mul %[frac], %[base], %[accum1] \n\t"
"addu %[temp3], %[sum], %[base] \n\t"
"mul %[temp3], %[temp3], %[x_sub] \n\t"
"lw %[base], 0(%[irow]) \n\t"
"mul %[temp3], %[sum], %[x_sub] \n\t"
"subu %[loop_c], %[loop_c], %[x_stride] \n\t"
"sll %[accum1], %[frac], 2 \n\t"
"mult %[temp1], %[temp2] \n\t"
"madd %[accum1], %[fx_scale] \n\t"
"maddu %[frac], %[fx_scale] \n\t"
"mfhi %[sum] \n\t"
"subu %[temp3], %[temp3], %[frac] \n\t"
"sw %[temp3], 0(%[frow]) \n\t"
"add %[base], %[base], %[temp3] \n\t"
"sw %[base], 0(%[irow]) \n\t"
"addu %[irow], %[irow], %[x_stride1] \n\t"
"addu %[frow], %[frow], %[x_stride1] \n\t"
"bgtz %[loop_c], 1b \n\t"
: [accum]"=&r"(accum), [src1]"+r"(src1), [temp3]"=&r"(temp3),
[sum]"=&r"(sum), [base]"=&r"(base), [frac]"=&r"(frac),
[frow]"+r"(frow), [accum1]"=&r"(accum1),
[temp2]"=&r"(temp2), [temp1]"=&r"(temp1)
: [x_stride]"r"(x_stride), [fx_scale]"r"(fx_scale),
[x_sub]"r"(x_sub), [x_add]"r"(x_add),
[loop_c]"r"(loop_c), [x_stride1]"r"(x_stride1)
: "memory", "hi", "lo"
);
assert(accum == 0);
}
}
: [accum] "=&r" (accum), [src1] "+r" (src1), [temp3] "=&r" (temp3),
[sum] "=&r" (sum), [base] "=&r" (base), [frac] "=&r" (frac),
[frow] "+r" (frow), [irow] "+r" (irow), [accum1] "=&r" (accum1),
[temp2] "=&r" (temp2), [temp1] "=&r" (temp1)
: [x_stride] "r" (x_stride), [fx_scale] "r" (fx_scale),
[x_sub] "r" (x_sub), [x_add] "r" (x_add),
[loop_c] "r" (loop_c), [x_stride1] "r" (x_stride1)
static void ImportRowExpandMIPS(WebPRescaler* const wrk, const uint8_t* src) {
const int x_stride = wrk->num_channels;
const int x_out_max = wrk->dst_width * wrk->num_channels;
const int x_add = wrk->x_add;
const int x_sub = wrk->x_sub;
const int src_width = wrk->src_width;
const int x_stride1 = x_stride << 2;
int channel;
assert(wrk->x_expand);
assert(!WebPRescalerInputDone(wrk));
for (channel = 0; channel < x_stride; ++channel) {
const uint8_t* src1 = src + channel;
rescaler_t* frow = wrk->frow + channel;
int temp1, temp2, temp3, temp4;
int frac;
int accum;
int x_out = channel;
__asm__ volatile (
"addiu %[temp3], %[src_width], -1 \n\t"
"lbu %[temp2], 0(%[src1]) \n\t"
"addu %[src1], %[src1], %[x_stride] \n\t"
"bgtz %[temp3], 0f \n\t"
"addiu %[temp1], %[temp2], 0 \n\t"
"b 3f \n\t"
"0: \n\t"
"lbu %[temp1], 0(%[src1]) \n\t"
"3: \n\t"
"addiu %[accum], %[x_add], 0 \n\t"
"1: \n\t"
"subu %[temp3], %[temp2], %[temp1] \n\t"
"mul %[temp3], %[temp3], %[accum] \n\t"
"mul %[temp4], %[temp1], %[x_add] \n\t"
"addu %[temp3], %[temp4], %[temp3] \n\t"
"sw %[temp3], 0(%[frow]) \n\t"
"addu %[frow], %[frow], %[x_stride1] \n\t"
"addu %[x_out], %[x_out], %[x_stride] \n\t"
"subu %[temp3], %[x_out], %[x_out_max] \n\t"
"bgez %[temp3], 2f \n\t"
"subu %[accum], %[accum], %[x_sub] \n\t"
"bgez %[accum], 4f \n\t"
"addiu %[temp2], %[temp1], 0 \n\t"
"addu %[src1], %[src1], %[x_stride] \n\t"
"lbu %[temp1], 0(%[src1]) \n\t"
"addu %[accum], %[accum], %[x_add] \n\t"
"4: \n\t"
"b 1b \n\t"
"2: \n\t"
: [src1]"+r"(src1), [accum]"=&r"(accum), [temp1]"=&r"(temp1),
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
[x_out]"+r"(x_out), [frac]"=&r"(frac), [frow]"+r"(frow)
: [x_stride]"r"(x_stride), [x_add]"r"(x_add), [x_sub]"r"(x_sub),
[x_stride1]"r"(x_stride1), [src_width]"r"(src_width),
[x_out_max]"r"(x_out_max)
: "memory", "hi", "lo"
);
assert(wrk->x_sub == 0 /* <- special case for src_width=1 */ || accum == 0);
}
}
//------------------------------------------------------------------------------
// Row export
static void ExportRowExpandMIPS(WebPRescaler* const wrk) {
uint8_t* dst = wrk->dst;
rescaler_t* irow = wrk->irow;
const int x_out_max = wrk->dst_width * wrk->num_channels;
const rescaler_t* frow = wrk->frow;
int temp0, temp1, temp3, temp4, temp5, loop_end;
const int temp2 = (int)wrk->fy_scale;
const int temp6 = x_out_max << 2;
assert(!WebPRescalerOutputDone(wrk));
assert(wrk->y_accum <= 0);
assert(wrk->y_expand);
assert(wrk->y_sub != 0);
if (wrk->y_accum == 0) {
__asm__ volatile (
"li %[temp3], 0x10000 \n\t"
"li %[temp4], 0x8000 \n\t"
"addu %[loop_end], %[frow], %[temp6] \n\t"
"1: \n\t"
"lw %[temp0], 0(%[frow]) \n\t"
"addiu %[dst], %[dst], 1 \n\t"
"addiu %[frow], %[frow], 4 \n\t"
"mult %[temp3], %[temp4] \n\t"
"maddu %[temp0], %[temp2] \n\t"
"mfhi %[temp5] \n\t"
"sb %[temp5], -1(%[dst]) \n\t"
"bne %[frow], %[loop_end], 1b \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
[dst]"+r"(dst), [loop_end]"=&r"(loop_end)
: [temp2]"r"(temp2), [temp6]"r"(temp6)
: "memory", "hi", "lo"
);
} else {
const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
__asm__ volatile (
"li %[temp3], 0x10000 \n\t"
"li %[temp4], 0x8000 \n\t"
"addu %[loop_end], %[frow], %[temp6] \n\t"
"1: \n\t"
"lw %[temp0], 0(%[frow]) \n\t"
"lw %[temp1], 0(%[irow]) \n\t"
"addiu %[dst], %[dst], 1 \n\t"
"mult %[temp3], %[temp4] \n\t"
"maddu %[A], %[temp0] \n\t"
"maddu %[B], %[temp1] \n\t"
"addiu %[frow], %[frow], 4 \n\t"
"addiu %[irow], %[irow], 4 \n\t"
"mfhi %[temp5] \n\t"
"mult %[temp3], %[temp4] \n\t"
"maddu %[temp5], %[temp2] \n\t"
"mfhi %[temp5] \n\t"
"sb %[temp5], -1(%[dst]) \n\t"
"bne %[frow], %[loop_end], 1b \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
[irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end)
: [temp2]"r"(temp2), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B)
: "memory", "hi", "lo"
);
}
}
static void ExportRowShrinkMIPS(WebPRescaler* const wrk) {
const int x_out_max = wrk->dst_width * wrk->num_channels;
uint8_t* dst = wrk->dst;
rescaler_t* irow = wrk->irow;
const rescaler_t* frow = wrk->frow;
const int yscale = wrk->fy_scale * (-wrk->y_accum);
int temp0, temp1, temp3, temp4, temp5, loop_end;
const int temp2 = (int)wrk->fxy_scale;
const int temp6 = x_out_max << 2;
assert(!WebPRescalerOutputDone(wrk));
assert(wrk->y_accum <= 0);
assert(!wrk->y_expand);
assert(wrk->fxy_scale != 0);
if (yscale) {
__asm__ volatile (
"li %[temp3], 0x10000 \n\t"
"li %[temp4], 0x8000 \n\t"
"addu %[loop_end], %[frow], %[temp6] \n\t"
"1: \n\t"
"lw %[temp0], 0(%[frow]) \n\t"
"mult %[temp3], %[temp4] \n\t"
"addiu %[frow], %[frow], 4 \n\t"
"maddu %[temp0], %[yscale] \n\t"
"mfhi %[temp1] \n\t"
"lw %[temp0], 0(%[irow]) \n\t"
"addiu %[dst], %[dst], 1 \n\t"
"addiu %[irow], %[irow], 4 \n\t"
"subu %[temp0], %[temp0], %[temp1] \n\t"
"mult %[temp3], %[temp4] \n\t"
"maddu %[temp0], %[temp2] \n\t"
"mfhi %[temp5] \n\t"
"sw %[temp1], -4(%[irow]) \n\t"
"sb %[temp5], -1(%[dst]) \n\t"
"bne %[frow], %[loop_end], 1b \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
[irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end)
: [temp2]"r"(temp2), [yscale]"r"(yscale), [temp6]"r"(temp6)
: "memory", "hi", "lo"
);
} else {
__asm__ volatile (
"lbu %[temp1], 0(%[src1]) \n\t"
"move %[temp2], %[temp1] \n\t"
"li %[accum], 0 \n\t"
"1: \n\t"
"bgez %[accum], 2f \n\t"
"move %[temp2], %[temp1] \n\t"
"addu %[src1], %[x_stride] \n\t"
"lbu %[temp1], 0(%[src1]) \n\t"
"addu %[accum], %[x_add] \n\t"
"2: \n\t"
"subu %[temp3], %[temp2], %[temp1] \n\t"
"mul %[temp3], %[temp3], %[accum] \n\t"
"mul %[base], %[temp1], %[x_add] \n\t"
"subu %[accum], %[accum], %[x_sub] \n\t"
"lw %[frac], 0(%[irow]) \n\t"
"subu %[loop_c], %[loop_c], %[x_stride] \n\t"
"addu %[temp3], %[base], %[temp3] \n\t"
"sw %[temp3], 0(%[frow]) \n\t"
"addu %[frow], %[x_stride1] \n\t"
"addu %[frac], %[temp3] \n\t"
"sw %[frac], 0(%[irow]) \n\t"
"addu %[irow], %[x_stride1] \n\t"
"bgtz %[loop_c], 1b \n\t"
: [src1] "+r" (src1), [accum] "=&r" (accum), [temp1] "=&r" (temp1),
[temp2] "=&r" (temp2), [temp3] "=&r" (temp3), [base] "=&r" (base),
[frac] "=&r" (frac), [frow] "+r" (frow), [irow] "+r" (irow)
: [x_stride] "r" (x_stride), [x_add] "r" (x_add), [x_sub] "r" (x_sub),
[x_stride1] "r" (x_stride1), [loop_c] "r" (loop_c)
"li %[temp3], 0x10000 \n\t"
"li %[temp4], 0x8000 \n\t"
"addu %[loop_end], %[irow], %[temp6] \n\t"
"1: \n\t"
"lw %[temp0], 0(%[irow]) \n\t"
"addiu %[dst], %[dst], 1 \n\t"
"addiu %[irow], %[irow], 4 \n\t"
"mult %[temp3], %[temp4] \n\t"
"maddu %[temp0], %[temp2] \n\t"
"mfhi %[temp5] \n\t"
"sw $zero, -4(%[irow]) \n\t"
"sb %[temp5], -1(%[dst]) \n\t"
"bne %[irow], %[loop_end], 1b \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow),
[dst]"+r"(dst), [loop_end]"=&r"(loop_end)
: [temp2]"r"(temp2), [temp6]"r"(temp6)
: "memory", "hi", "lo"
);
}
}
static void ExportRowMIPS(WebPRescaler* const wrk, int x_out) {
if (wrk->y_accum <= 0) {
uint8_t* const dst = wrk->dst;
int32_t* const irow = wrk->irow;
const int32_t* const frow = wrk->frow;
const int yscale = wrk->fy_scale * (-wrk->y_accum);
const int x_out_max = wrk->dst_width * wrk->num_channels;
// if wrk->fxy_scale can fit into 32 bits use optimized code,
// otherwise use C code
if ((wrk->fxy_scale >> 32) == 0) {
int temp0, temp1, temp3, temp4, temp5, temp6, temp7, loop_end;
const int temp2 = (int)(wrk->fxy_scale);
const int temp8 = x_out_max << 2;
uint8_t* dst_t = (uint8_t*)dst;
int32_t* irow_t = (int32_t*)irow;
const int32_t* frow_t = (const int32_t*)frow;
__asm__ volatile(
"addiu %[temp6], $zero, -256 \n\t"
"addiu %[temp7], $zero, 255 \n\t"
"li %[temp3], 0x10000 \n\t"
"li %[temp4], 0x8000 \n\t"
"addu %[loop_end], %[frow_t], %[temp8] \n\t"
"1: \n\t"
"lw %[temp0], 0(%[frow_t]) \n\t"
"mult %[temp3], %[temp4] \n\t"
"addiu %[frow_t], %[frow_t], 4 \n\t"
"sll %[temp0], %[temp0], 2 \n\t"
"madd %[temp0], %[yscale] \n\t"
"mfhi %[temp1] \n\t"
"lw %[temp0], 0(%[irow_t]) \n\t"
"addiu %[dst_t], %[dst_t], 1 \n\t"
"addiu %[irow_t], %[irow_t], 4 \n\t"
"subu %[temp0], %[temp0], %[temp1] \n\t"
"mult %[temp3], %[temp4] \n\t"
"sll %[temp0], %[temp0], 2 \n\t"
"madd %[temp0], %[temp2] \n\t"
"mfhi %[temp5] \n\t"
"sw %[temp1], -4(%[irow_t]) \n\t"
"and %[temp0], %[temp5], %[temp6] \n\t"
"slti %[temp1], %[temp5], 0 \n\t"
"beqz %[temp0], 2f \n\t"
"xor %[temp5], %[temp5], %[temp5] \n\t"
"movz %[temp5], %[temp7], %[temp1] \n\t"
"2: \n\t"
"sb %[temp5], -1(%[dst_t]) \n\t"
"bne %[frow_t], %[loop_end], 1b \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
[temp7]"=&r"(temp7), [frow_t]"+r"(frow_t), [irow_t]"+r"(irow_t),
[dst_t]"+r"(dst_t), [loop_end]"=&r"(loop_end)
: [temp2]"r"(temp2), [yscale]"r"(yscale), [temp8]"r"(temp8)
: "memory", "hi", "lo"
);
wrk->y_accum += wrk->y_add;
wrk->dst += wrk->dst_stride;
} else {
ExportRowC(wrk, x_out);
}
}
}
#endif // WEBP_USE_MIPS32
//------------------------------------------------------------------------------
void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,
uint8_t* const dst, int dst_width, int dst_height,
int dst_stride, int num_channels, int x_add, int x_sub,
int y_add, int y_sub, int32_t* const work) {
uint8_t* const dst,
int dst_width, int dst_height, int dst_stride,
int num_channels, rescaler_t* const work) {
const int x_add = src_width, x_sub = dst_width;
const int y_add = src_height, y_sub = dst_height;
wrk->x_expand = (src_width < dst_width);
wrk->y_expand = (src_height < dst_height);
wrk->src_width = src_width;
wrk->src_height = src_height;
wrk->dst_width = dst_width;
wrk->dst_height = dst_height;
wrk->src_y = 0;
wrk->dst_y = 0;
wrk->dst = dst;
wrk->dst_stride = dst_stride;
wrk->num_channels = num_channels;
// for 'x_expand', we use bilinear interpolation
wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add - x_sub;
wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add;
wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub;
wrk->y_accum = y_add;
wrk->y_add = y_add;
wrk->y_sub = y_sub;
wrk->fx_scale = (1 << RFIX) / x_sub;
wrk->fy_scale = (1 << RFIX) / y_sub;
wrk->fxy_scale = wrk->x_expand ?
((int64_t)dst_height << RFIX) / (x_sub * src_height) :
((int64_t)dst_height << RFIX) / (x_add * src_height);
if (!wrk->x_expand) { // fx_scale is not used otherwise
wrk->fx_scale = WEBP_RESCALER_FRAC(1, wrk->x_sub);
}
// vertical scaling parameters
wrk->y_add = wrk->y_expand ? y_add - 1 : y_add;
wrk->y_sub = wrk->y_expand ? y_sub - 1 : y_sub;
wrk->y_accum = wrk->y_expand ? wrk->y_sub : wrk->y_add;
if (!wrk->y_expand) {
// this is WEBP_RESCALER_FRAC(dst_height, x_add * y_add) without the cast.
const uint64_t ratio =
(uint64_t)dst_height * WEBP_RESCALER_ONE / (wrk->x_add * wrk->y_add);
if (ratio != (uint32_t)ratio) {
// We can't represent the ratio with the current fixed-point precision.
// => We special-case fxy_scale = 0, in WebPRescalerExportRow().
wrk->fxy_scale = 0;
} else {
wrk->fxy_scale = (uint32_t)ratio;
}
wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->y_sub);
} else {
wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->x_add);
// wrk->fxy_scale is unused here.
}
wrk->irow = work;
wrk->frow = work + num_channels * dst_width;
memset(work, 0, 2 * dst_width * num_channels * sizeof(*work));
if (WebPRescalerImportRow == NULL) {
WebPRescalerImportRow = ImportRowC;
WebPRescalerExportRow = ExportRowC;
if (WebPRescalerImportRowExpand == NULL) {
WebPRescalerImportRowExpand = ImportRowExpandC;
WebPRescalerImportRowShrink = ImportRowShrinkC;
WebPRescalerExportRowExpand = ExportRowExpandC;
WebPRescalerExportRowShrink = ExportRowShrinkC;
if (VP8GetCPUInfo != NULL) {
#if defined(WEBP_USE_MIPS32)
if (VP8GetCPUInfo(kMIPS32)) {
WebPRescalerImportRow = ImportRowMIPS;
WebPRescalerExportRow = ExportRowMIPS;
WebPRescalerImportRowExpand = ImportRowExpandMIPS;
WebPRescalerImportRowShrink = ImportRowShrinkMIPS;
WebPRescalerExportRowExpand = ExportRowExpandMIPS;
WebPRescalerExportRowShrink = ExportRowShrinkMIPS;
}
#endif
}
@ -296,7 +529,10 @@ void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,
}
#undef MULT_FIX
#undef RFIX
#undef WEBP_RESCALER_RFIX
#undef WEBP_RESCALER_ONE
#undef WEBP_RESCALER_FRAC
#undef ROUNDER
//------------------------------------------------------------------------------
// all-in-one calls
@ -309,11 +545,20 @@ int WebPRescaleNeededLines(const WebPRescaler* const wrk, int max_num_lines) {
int WebPRescalerImport(WebPRescaler* const wrk, int num_lines,
const uint8_t* src, int src_stride) {
int total_imported = 0;
while (total_imported < num_lines && wrk->y_accum > 0) {
int channel;
for (channel = 0; channel < wrk->num_channels; ++channel) {
WebPRescalerImportRow(wrk, src, channel);
while (total_imported < num_lines && !WebPRescalerHasPendingOutput(wrk)) {
if (wrk->y_expand) {
rescaler_t* const tmp = wrk->irow;
wrk->irow = wrk->frow;
wrk->frow = tmp;
}
WebPRescalerImportRow(wrk, src);
if (!wrk->y_expand) { // Accumulate the contribution of the new row.
int x;
for (x = 0; x < wrk->num_channels * wrk->dst_width; ++x) {
wrk->irow[x] += wrk->frow[x];
}
}
++wrk->src_y;
src += src_stride;
++total_imported;
wrk->y_accum -= wrk->y_sub;
@ -324,7 +569,7 @@ int WebPRescalerImport(WebPRescaler* const wrk, int num_lines,
int WebPRescalerExport(WebPRescaler* const rescaler) {
int total_exported = 0;
while (WebPRescalerHasPendingOutput(rescaler)) {
WebPRescalerExportRow(rescaler, 0);
WebPRescalerExportRow(rescaler);
++total_exported;
}
return total_exported;

View File

@ -21,20 +21,23 @@ extern "C" {
#include "../webp/types.h"
// Structure used for on-the-fly rescaling
typedef uint32_t rescaler_t; // type for side-buffer
typedef struct {
int x_expand; // true if we're expanding in the x direction
int y_expand; // true if we're expanding in the y direction
int num_channels; // bytes to jump between pixels
int fy_scale, fx_scale; // fixed-point scaling factor
int64_t fxy_scale; // ''
// we need hpel-precise add/sub increments, for the downsampled U/V planes.
uint32_t fx_scale; // fixed-point scaling factors
uint32_t fy_scale; // ''
uint32_t fxy_scale; // ''
int y_accum; // vertical accumulator
int y_add, y_sub; // vertical increments (add ~= src, sub ~= dst)
int x_add, x_sub; // horizontal increments (add ~= src, sub ~= dst)
int y_add, y_sub; // vertical increments
int x_add, x_sub; // horizontal increments
int src_width, src_height; // source dimensions
int dst_width, dst_height; // destination dimensions
int src_y, dst_y; // row counters for input and output
uint8_t* dst;
int dst_stride;
int32_t* irow, *frow; // work buffer
rescaler_t* irow, *frow; // work buffer
} WebPRescaler;
// Initialize a rescaler given scratch area 'work' and dimensions of src & dst.
@ -43,9 +46,7 @@ void WebPRescalerInit(WebPRescaler* const rescaler,
uint8_t* const dst,
int dst_width, int dst_height, int dst_stride,
int num_channels,
int x_add, int x_sub,
int y_add, int y_sub,
int32_t* const work);
rescaler_t* const work);
// Returns the number of input lines needed next to produce one output line,
// considering that the maximum available input lines are 'max_num_lines'.
@ -57,21 +58,29 @@ int WebPRescaleNeededLines(const WebPRescaler* const rescaler,
int WebPRescalerImport(WebPRescaler* const rescaler, int num_rows,
const uint8_t* src, int src_stride);
// Import a row of data and save its contribution in the rescaler.
// 'channel' denotes the channel number to be imported.
extern void (*WebPRescalerImportRow)(WebPRescaler* const wrk,
const uint8_t* const src, int channel);
// Export one row (starting at x_out position) from rescaler.
extern void (*WebPRescalerExportRow)(WebPRescaler* const wrk, int x_out);
// Return true if there is pending output rows ready.
static WEBP_INLINE
int WebPRescalerHasPendingOutput(const WebPRescaler* const rescaler) {
return (rescaler->y_accum <= 0);
}
// Export as many rows as possible. Return the numbers of rows written.
int WebPRescalerExport(WebPRescaler* const rescaler);
void WebPRescalerImportRow(WebPRescaler* const wrk,
const uint8_t* src);
// Export one row (starting at x_out position) from rescaler.
void WebPRescalerExportRow(WebPRescaler* const wrk);
// Return true if input is finished
static WEBP_INLINE
int WebPRescalerInputDone(const WebPRescaler* const rescaler) {
return (rescaler->src_y >= rescaler->src_height);
}
// Return true if output is finished
static WEBP_INLINE
int WebPRescalerOutputDone(const WebPRescaler* const rescaler) {
return (rescaler->dst_y >= rescaler->dst_height);
}
// Return true if there are pending output rows ready.
static WEBP_INLINE
int WebPRescalerHasPendingOutput(const WebPRescaler* const rescaler) {
return !WebPRescalerOutputDone(rescaler) && (rescaler->y_accum <= 0);
}
//------------------------------------------------------------------------------

View File

@ -90,7 +90,7 @@ static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
#pragma intrinsic(_BitScanReverse)
static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
uint32_t first_set_bit;
unsigned long first_set_bit;
_BitScanReverse(&first_set_bit, n);
return first_set_bit;
}