use a 8b table for C-version for clz()

30% faster on x86, 5% faster on N5.

New generic function: WebPLog2FloorC()
This function is called as fallback for BitsLog2Floor() when there's
no clz() available.

Change-Id: Ica15c6092112e514c0e200fab89c434de48d4b19
This commit is contained in:
Pascal Massimino 2017-01-13 15:36:26 +01:00
parent 8fda56126e
commit fcd4784dcd
2 changed files with 38 additions and 16 deletions

View File

@ -305,3 +305,26 @@ int WebPGetColorPalette(const WebPPicture* const pic, uint32_t* const palette) {
#undef COLOR_HASH_RIGHT_SHIFT #undef COLOR_HASH_RIGHT_SHIFT
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
#if defined(WEBP_NEED_LOG_TABLE_8BIT)
const uint8_t WebPLogTable8bit[256] = { // 31 ^ clz(i)
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
};
#endif
//------------------------------------------------------------------------------

View File

@ -107,6 +107,19 @@ static WEBP_INLINE void PutLE32(uint8_t* const data, uint32_t val) {
PutLE16(data + 2, (int)(val >> 16)); PutLE16(data + 2, (int)(val >> 16));
} }
// Returns 31 ^ clz(n) = log2(n). This is the default C-implementation, either
// based on table or not. Can be used as fallback if clz() is not available.
#define WEBP_NEED_LOG_TABLE_8BIT
extern const uint8_t WebPLogTable8bit[256];
static WEBP_INLINE int WebPLog2FloorC(uint32_t n) {
int log = 0;
while (n >= 256) {
log += 8;
n >>= 8;
}
return log + WebPLogTable8bit[n];
}
// Returns (int)floor(log2(n)). n must be > 0. // Returns (int)floor(log2(n)). n must be > 0.
// use GNU builtins where available. // use GNU builtins where available.
#if defined(__GNUC__) && \ #if defined(__GNUC__) && \
@ -124,22 +137,8 @@ static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
_BitScanReverse(&first_set_bit, n); _BitScanReverse(&first_set_bit, n);
return first_set_bit; return first_set_bit;
} }
#else #else // default: use the C-version.
static WEBP_INLINE int BitsLog2Floor(uint32_t n) { static WEBP_INLINE int BitsLog2Floor(uint32_t n) { return WebPLog2FloorC(n); }
int log = 0;
uint32_t value = n;
int i;
for (i = 4; i >= 0; --i) {
const int shift = (1 << i);
const uint32_t x = value >> shift;
if (x != 0) {
value = x;
log += shift;
}
}
return log;
}
#endif #endif
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------