diff --git a/src/dec/vp8.c b/src/dec/vp8.c index 61682540..9b54389f 100644 --- a/src/dec/vp8.c +++ b/src/dec/vp8.c @@ -26,6 +26,16 @@ int WebPGetDecoderVersion(void) { return (DEC_MAJ_VERSION << 16) | (DEC_MIN_VERSION << 8) | DEC_REV_VERSION; } +//------------------------------------------------------------------------------ +// Signature and pointer-to-function for GetCoeffs() variants below. + +typedef int (*GetCoeffsFunc)(VP8BitReader* const br, + const VP8BandProbas* const prob[], + int ctx, const quant_t dq, int n, int16_t* out); +static volatile GetCoeffsFunc GetCoeffs = NULL; + +static void InitGetCoeffs(void); + //------------------------------------------------------------------------------ // VP8Decoder @@ -51,6 +61,7 @@ VP8Decoder* VP8New(void) { WebPGetWorkerInterface()->Init(&dec->worker_); dec->ready_ = 0; dec->num_parts_minus_one_ = 0; + InitGetCoeffs(); } return dec; } @@ -422,8 +433,9 @@ static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) { } // Returns the position of the last non-zero coeff plus one -static int GetCoeffs(VP8BitReader* const br, const VP8BandProbas* const prob[], - int ctx, const quant_t dq, int n, int16_t* out) { +static int GetCoeffsFast(VP8BitReader* const br, + const VP8BandProbas* const prob[], + int ctx, const quant_t dq, int n, int16_t* out) { const uint8_t* p = prob[n]->probas_[ctx]; for (; n < 16; ++n) { if (!VP8GetBit(br, p[0])) { @@ -449,6 +461,46 @@ static int GetCoeffs(VP8BitReader* const br, const VP8BandProbas* const prob[], return 16; } +// This version of GetCoeffs() uses VP8GetBitAlt() which is an alternate version +// of VP8GetBitAlt() targeting specific platforms. +static int GetCoeffsAlt(VP8BitReader* const br, + const VP8BandProbas* const prob[], + int ctx, const quant_t dq, int n, int16_t* out) { + const uint8_t* p = prob[n]->probas_[ctx]; + for (; n < 16; ++n) { + if (!VP8GetBitAlt(br, p[0])) { + return n; // previous coeff was last non-zero coeff + } + while (!VP8GetBitAlt(br, p[1])) { // sequence of zero coeffs + p = prob[++n]->probas_[0]; + if (n == 16) return 16; + } + { // non zero coeff + const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas_[0]; + int v; + if (!VP8GetBitAlt(br, p[2])) { + v = 1; + p = p_ctx[1]; + } else { + v = GetLargeValue(br, p); + p = p_ctx[2]; + } + out[kZigzag[n]] = VP8GetSigned(br, v) * dq[n > 0]; + } + } + return 16; +} + +WEBP_TSAN_IGNORE_FUNCTION static void InitGetCoeffs(void) { + if (GetCoeffs == NULL) { + if (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kSlowSSSE3)) { + GetCoeffs = GetCoeffsAlt; + } else { + GetCoeffs = GetCoeffsFast; + } + } +} + static WEBP_INLINE uint32_t NzCodeBits(uint32_t nz_coeffs, int nz, int dc_nz) { nz_coeffs <<= 2; nz_coeffs |= (nz > 3) ? 3 : (nz > 1) ? 2 : dc_nz; diff --git a/src/utils/bit_reader_inl.h b/src/utils/bit_reader_inl.h index 003f6a88..db8aca9a 100644 --- a/src/utils/bit_reader_inl.h +++ b/src/utils/bit_reader_inl.h @@ -25,6 +25,7 @@ #include "../dsp/dsp.h" #include "./bit_reader.h" #include "./endian_inl.h" +#include "./utils.h" #ifdef __cplusplus extern "C" { @@ -163,6 +164,37 @@ int VP8GetSigned(VP8BitReader* const br, int v) { } } +static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* const br, int prob) { + // Don't move this declaration! It makes a big speed difference to store + // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't + // alter br->range_ value. + range_t range = br->range_; + if (br->bits_ < 0) { + VP8LoadNewBytes(br); + } + { + const int pos = br->bits_; + const range_t split = (range * prob) >> 8; + const range_t value = (range_t)(br->value_ >> pos); + int bit; // Don't use 'const int bit = (value > split);", it's slower. + if (value > split) { + range -= split + 1; + br->value_ -= (bit_t)(split + 1) << pos; + bit = 1; + } else { + range = split; + bit = 0; + } + if (range <= (range_t)0x7e) { + const int shift = kVP8Log2Range[range]; + range = kVP8NewRange[range]; + br->bits_ -= shift; + } + br->range_ = range; + return bit; + } +} + #ifdef __cplusplus } // extern "C" #endif