mirror of
https://github.com/webmproject/libwebp.git
synced 2025-10-26 16:13:41 +01:00
introduce WEBP_REFERENCE_IMPLEMENTATION compile option
This flag will make the code use no uint64, no asm, and no fancy
trick, but instead aim at being as simple and straightforward as
possible.
Main use is to help emscripten generate proper JS code.
More code needs to be simplified later.
Also: tune the BITS values to be 24 and make use of WEBP_RIGHT_JUSTIFY
Here are the typical timing for decoding a large image:
ARM7-a:
dwebp_justify_32_neon Time to decode picture: 3.280s
dwebp_justify_24_neon Time to decode picture: 2.640s
dwebp_justify_16_neon Time to decode picture: 2.723s
dwebp_justify_8_neon Time to decode picture: 2.802s
dwebp_justify_32 Time to decode picture: 4.264s
dwebp_justify_24 Time to decode picture: 3.696s
dwebp_justify_16 Time to decode picture: 3.779s
dwebp_justify_8 Time to decode picture: 3.834s
dwebp_32_neon Time to decode picture: 4.010s
dwebp_24_neon Time to decode picture: 2.725s
dwebp_16_neon Time to decode picture: 2.852s
dwebp_8_neon Time to decode picture: 2.778s
dwebp_32 Time to decode picture: 4.587s
dwebp_24 Time to decode picture: 3.800s
dwebp_16 Time to decode picture: 3.902s
dwebp_8 Time to decode picture: 3.815s
REFERENCE (HEAD) Time to decode picture: 3.818s
x86_64:
dwebp_justify_32 Time to decode picture: 0.473s
dwebp_justify_24 Time to decode picture: 0.434s
dwebp_justify_16 Time to decode picture: 0.450s
dwebp_justify_8 Time to decode picture: 0.467s
dwebp_32 Time to decode picture: 0.474s
dwebp_24 Time to decode picture: 0.468s
dwebp_16 Time to decode picture: 0.468s
dwebp_8 Time to decode picture: 0.481s
REFERENCE (HEAD) Time to decode picture: 0.436s
i386:
dwebp_justify_32 Time to decode picture: 0.723s
dwebp_justify_24 Time to decode picture: 0.618s
dwebp_justify_16 Time to decode picture: 0.626s
dwebp_justify_8 Time to decode picture: 0.651s
dwebp_32 Time to decode picture: 0.744s
dwebp_24 Time to decode picture: 0.627s
dwebp_16 Time to decode picture: 0.642s
dwebp_8 Time to decode picture: 0.642s
Change-Id: Ie56c7235733a24f94fbfc2e4351aae36ec39c225
This commit is contained in:
@@ -426,11 +426,16 @@ static void HE8uv(uint8_t *dst) { // horizontal
|
||||
}
|
||||
|
||||
// helper for chroma-DC predictions
|
||||
static WEBP_INLINE void Put8x8uv(uint64_t v, uint8_t* dst) {
|
||||
static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
|
||||
int j;
|
||||
#ifndef WEBP_REFERENCE_IMPLEMENTATION
|
||||
const uint64_t v = (uint64_t)value * 0x0101010101010101ULL;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
*(uint64_t*)(dst + j * BPS) = v;
|
||||
}
|
||||
#else
|
||||
for (j = 0; j < 8; ++j) memset(dst + j * BPS, value, 8);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void DC8uv(uint8_t *dst) { // DC
|
||||
@@ -439,7 +444,7 @@ static void DC8uv(uint8_t *dst) { // DC
|
||||
for (i = 0; i < 8; ++i) {
|
||||
dc0 += dst[i - BPS] + dst[-1 + i * BPS];
|
||||
}
|
||||
Put8x8uv((uint64_t)((dc0 >> 4) * 0x0101010101010101ULL), dst);
|
||||
Put8x8uv(dc0 >> 4, dst);
|
||||
}
|
||||
|
||||
static void DC8uvNoLeft(uint8_t *dst) { // DC with no left samples
|
||||
@@ -448,7 +453,7 @@ static void DC8uvNoLeft(uint8_t *dst) { // DC with no left samples
|
||||
for (i = 0; i < 8; ++i) {
|
||||
dc0 += dst[i - BPS];
|
||||
}
|
||||
Put8x8uv((uint64_t)((dc0 >> 3) * 0x0101010101010101ULL), dst);
|
||||
Put8x8uv(dc0 >> 3, dst);
|
||||
}
|
||||
|
||||
static void DC8uvNoTop(uint8_t *dst) { // DC with no top samples
|
||||
@@ -457,11 +462,11 @@ static void DC8uvNoTop(uint8_t *dst) { // DC with no top samples
|
||||
for (i = 0; i < 8; ++i) {
|
||||
dc0 += dst[-1 + i * BPS];
|
||||
}
|
||||
Put8x8uv((uint64_t)((dc0 >> 3) * 0x0101010101010101ULL), dst);
|
||||
Put8x8uv(dc0 >> 3, dst);
|
||||
}
|
||||
|
||||
static void DC8uvNoTopLeft(uint8_t *dst) { // DC with nothing
|
||||
Put8x8uv(0x8080808080808080ULL, dst);
|
||||
Put8x8uv(0x80, dst);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
@@ -1079,20 +1079,27 @@ static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
uint32_t argb = *src++;
|
||||
|
||||
#if !defined(WEBP_REFERENCE_IMPLEMENTATION)
|
||||
#if !defined(__BIG_ENDIAN__) && (defined(__i386__) || defined(__x86_64__))
|
||||
__asm__ volatile("bswap %0" : "=r"(argb) : "0"(argb));
|
||||
*(uint32_t*)dst = argb;
|
||||
dst += sizeof(argb);
|
||||
#elif !defined(__BIG_ENDIAN__) && defined(_MSC_VER)
|
||||
argb = _byteswap_ulong(argb);
|
||||
*(uint32_t*)dst = argb;
|
||||
dst += sizeof(argb);
|
||||
#else
|
||||
*dst++ = (argb >> 24) & 0xff;
|
||||
*dst++ = (argb >> 16) & 0xff;
|
||||
*dst++ = (argb >> 8) & 0xff;
|
||||
*dst++ = (argb >> 0) & 0xff;
|
||||
dst[0] = (argb >> 24) & 0xff;
|
||||
dst[1] = (argb >> 16) & 0xff;
|
||||
dst[2] = (argb >> 8) & 0xff;
|
||||
dst[3] = (argb >> 0) & 0xff;
|
||||
#endif
|
||||
#else // WEBP_REFERENCE_IMPLEMENTATION
|
||||
dst[0] = (argb >> 24) & 0xff;
|
||||
dst[1] = (argb >> 16) & 0xff;
|
||||
dst[2] = (argb >> 8) & 0xff;
|
||||
dst[3] = (argb >> 0) & 0xff;
|
||||
#endif
|
||||
dst += sizeof(argb);
|
||||
}
|
||||
} else {
|
||||
memcpy(dst, src, num_pixels * sizeof(*src));
|
||||
|
||||
@@ -56,20 +56,29 @@ extern "C" {
|
||||
// -> we're back to height active 'value_' bits (marked 'v') and BITS cached
|
||||
// bits (marked 'B')
|
||||
//
|
||||
// The right-justify strategy tends to use less shifts, so let's use it:
|
||||
|
||||
#define USE_RIGHT_JUSTIFY
|
||||
// The right-justify strategy tends to use less shifts and is often faster.
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// BITS can be either 32, 24, 16 or 8.
|
||||
// Pick values that fit natural register size.
|
||||
|
||||
#if !defined(WEBP_REFERENCE_IMPLEMENTATION)
|
||||
|
||||
#define USE_RIGHT_JUSTIFY
|
||||
|
||||
#if defined(__i386__) || defined(_M_IX86) // x86 32bit
|
||||
#define BITS 16
|
||||
#elif defined(__arm__) || defined(_M_ARM) // ARM
|
||||
#define BITS 8
|
||||
#define BITS 24
|
||||
#else // reasonable default
|
||||
#define BITS 32
|
||||
#define BITS 24
|
||||
#endif
|
||||
|
||||
#else // reference choices
|
||||
|
||||
#define USE_RIGHT_JUSTIFY
|
||||
#define BITS 8
|
||||
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user