mirror of
https://github.com/webmproject/libwebp.git
synced 2025-01-27 23:22:56 +01:00
introduce a generic GetCoeffs() function pointer
We can switch at run-time between the standard GetCoeffs() critical function, that uses a fast variant of VP8GetBit(). However, some platforms have slow instructions that make standard VP8GetBit() slow. GetCoeffs() is the right level of branching to switch to GetCoeffsAlt() that avoids these slow instructions in some not-frequent cases. Next patch will upgrade VP8GetBit() to use clz, after this one is proved to be neutral speed-wise. Change-Id: Ia6cef5de9de6131574d2202bbc0bea8559c9b693
This commit is contained in:
parent
db013a8d5c
commit
8074b89eb3
@ -26,6 +26,16 @@ int WebPGetDecoderVersion(void) {
|
||||
return (DEC_MAJ_VERSION << 16) | (DEC_MIN_VERSION << 8) | DEC_REV_VERSION;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Signature and pointer-to-function for GetCoeffs() variants below.
|
||||
|
||||
typedef int (*GetCoeffsFunc)(VP8BitReader* const br,
|
||||
const VP8BandProbas* const prob[],
|
||||
int ctx, const quant_t dq, int n, int16_t* out);
|
||||
static volatile GetCoeffsFunc GetCoeffs = NULL;
|
||||
|
||||
static void InitGetCoeffs(void);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// VP8Decoder
|
||||
|
||||
@ -51,6 +61,7 @@ VP8Decoder* VP8New(void) {
|
||||
WebPGetWorkerInterface()->Init(&dec->worker_);
|
||||
dec->ready_ = 0;
|
||||
dec->num_parts_minus_one_ = 0;
|
||||
InitGetCoeffs();
|
||||
}
|
||||
return dec;
|
||||
}
|
||||
@ -422,8 +433,9 @@ static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
|
||||
}
|
||||
|
||||
// Returns the position of the last non-zero coeff plus one
|
||||
static int GetCoeffs(VP8BitReader* const br, const VP8BandProbas* const prob[],
|
||||
int ctx, const quant_t dq, int n, int16_t* out) {
|
||||
static int GetCoeffsFast(VP8BitReader* const br,
|
||||
const VP8BandProbas* const prob[],
|
||||
int ctx, const quant_t dq, int n, int16_t* out) {
|
||||
const uint8_t* p = prob[n]->probas_[ctx];
|
||||
for (; n < 16; ++n) {
|
||||
if (!VP8GetBit(br, p[0])) {
|
||||
@ -449,6 +461,46 @@ static int GetCoeffs(VP8BitReader* const br, const VP8BandProbas* const prob[],
|
||||
return 16;
|
||||
}
|
||||
|
||||
// This version of GetCoeffs() uses VP8GetBitAlt() which is an alternate version
|
||||
// of VP8GetBitAlt() targeting specific platforms.
|
||||
static int GetCoeffsAlt(VP8BitReader* const br,
|
||||
const VP8BandProbas* const prob[],
|
||||
int ctx, const quant_t dq, int n, int16_t* out) {
|
||||
const uint8_t* p = prob[n]->probas_[ctx];
|
||||
for (; n < 16; ++n) {
|
||||
if (!VP8GetBitAlt(br, p[0])) {
|
||||
return n; // previous coeff was last non-zero coeff
|
||||
}
|
||||
while (!VP8GetBitAlt(br, p[1])) { // sequence of zero coeffs
|
||||
p = prob[++n]->probas_[0];
|
||||
if (n == 16) return 16;
|
||||
}
|
||||
{ // non zero coeff
|
||||
const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas_[0];
|
||||
int v;
|
||||
if (!VP8GetBitAlt(br, p[2])) {
|
||||
v = 1;
|
||||
p = p_ctx[1];
|
||||
} else {
|
||||
v = GetLargeValue(br, p);
|
||||
p = p_ctx[2];
|
||||
}
|
||||
out[kZigzag[n]] = VP8GetSigned(br, v) * dq[n > 0];
|
||||
}
|
||||
}
|
||||
return 16;
|
||||
}
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION static void InitGetCoeffs(void) {
|
||||
if (GetCoeffs == NULL) {
|
||||
if (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kSlowSSSE3)) {
|
||||
GetCoeffs = GetCoeffsAlt;
|
||||
} else {
|
||||
GetCoeffs = GetCoeffsFast;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t NzCodeBits(uint32_t nz_coeffs, int nz, int dc_nz) {
|
||||
nz_coeffs <<= 2;
|
||||
nz_coeffs |= (nz > 3) ? 3 : (nz > 1) ? 2 : dc_nz;
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "../dsp/dsp.h"
|
||||
#include "./bit_reader.h"
|
||||
#include "./endian_inl.h"
|
||||
#include "./utils.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -163,6 +164,37 @@ int VP8GetSigned(VP8BitReader* const br, int v) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* const br, int prob) {
|
||||
// Don't move this declaration! It makes a big speed difference to store
|
||||
// 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't
|
||||
// alter br->range_ value.
|
||||
range_t range = br->range_;
|
||||
if (br->bits_ < 0) {
|
||||
VP8LoadNewBytes(br);
|
||||
}
|
||||
{
|
||||
const int pos = br->bits_;
|
||||
const range_t split = (range * prob) >> 8;
|
||||
const range_t value = (range_t)(br->value_ >> pos);
|
||||
int bit; // Don't use 'const int bit = (value > split);", it's slower.
|
||||
if (value > split) {
|
||||
range -= split + 1;
|
||||
br->value_ -= (bit_t)(split + 1) << pos;
|
||||
bit = 1;
|
||||
} else {
|
||||
range = split;
|
||||
bit = 0;
|
||||
}
|
||||
if (range <= (range_t)0x7e) {
|
||||
const int shift = kVP8Log2Range[range];
|
||||
range = kVP8NewRange[range];
|
||||
br->bits_ -= shift;
|
||||
}
|
||||
br->range_ = range;
|
||||
return bit;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user