introduce a generic GetCoeffs() function pointer

We can switch at run-time between the standard GetCoeffs() critical
function, that uses a fast variant of VP8GetBit().
However, some platforms have slow instructions that make standard
VP8GetBit() slow. GetCoeffs() is the right level of branching to
switch to GetCoeffsAlt() that avoids these slow instructions in some
not-frequent cases.

Next patch will upgrade VP8GetBit() to use clz, after this one
is proved to be neutral speed-wise.

Change-Id: Ia6cef5de9de6131574d2202bbc0bea8559c9b693
This commit is contained in:
Pascal Massimino
2017-01-13 16:35:42 +01:00
parent db013a8d5c
commit 8074b89eb3
2 changed files with 86 additions and 2 deletions

View File

@ -25,6 +25,7 @@
#include "../dsp/dsp.h"
#include "./bit_reader.h"
#include "./endian_inl.h"
#include "./utils.h"
#ifdef __cplusplus
extern "C" {
@ -163,6 +164,37 @@ int VP8GetSigned(VP8BitReader* const br, int v) {
}
}
static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* const br, int prob) {
// Don't move this declaration! It makes a big speed difference to store
// 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't
// alter br->range_ value.
range_t range = br->range_;
if (br->bits_ < 0) {
VP8LoadNewBytes(br);
}
{
const int pos = br->bits_;
const range_t split = (range * prob) >> 8;
const range_t value = (range_t)(br->value_ >> pos);
int bit; // Don't use 'const int bit = (value > split);", it's slower.
if (value > split) {
range -= split + 1;
br->value_ -= (bit_t)(split + 1) << pos;
bit = 1;
} else {
range = split;
bit = 0;
}
if (range <= (range_t)0x7e) {
const int shift = kVP8Log2Range[range];
range = kVP8NewRange[range];
br->bits_ -= shift;
}
br->range_ = range;
return bit;
}
}
#ifdef __cplusplus
} // extern "C"
#endif