mirror of
https://github.com/webmproject/libwebp.git
synced 2025-01-15 17:18:23 +01:00
VP8LFillBitWindow: enable fast path for 32-bit builds
also reduce the load size from 64 to 32 bits as the top 32 bits are being shifted away in the operation. the change is neutral speed-wise on x86_64 as is the change in load size on x86, but it gives a slight improvement on 32-bit arm. x86 is improved ~13%, 32-bit arm ~3.7% aarch64 is untested but will likely benefit as well. Change-Id: Ibcb02a70f46f2651105d7ab571afe352673bef48
This commit is contained in:
parent
4f7f52b2a1
commit
6422e683af
@ -109,6 +109,13 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int bits) {
|
|||||||
#define WBITS 32 // Minimum number of bytes needed after VP8LFillBitWindow.
|
#define WBITS 32 // Minimum number of bytes needed after VP8LFillBitWindow.
|
||||||
#define LOG8_WBITS 4 // Number of bytes needed to store WBITS bits.
|
#define LOG8_WBITS 4 // Number of bytes needed to store WBITS bits.
|
||||||
|
|
||||||
|
#if !defined(WEBP_FORCE_ALIGNED) && \
|
||||||
|
(defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \
|
||||||
|
defined(__i386__) || defined(_M_IX86) || \
|
||||||
|
defined(__x86_64__) || defined(_M_X64))
|
||||||
|
#define VP8L_USE_UNALIGNED_LOAD
|
||||||
|
#endif
|
||||||
|
|
||||||
static const uint32_t kBitMask[VP8L_MAX_NUM_BIT_READ + 1] = {
|
static const uint32_t kBitMask[VP8L_MAX_NUM_BIT_READ + 1] = {
|
||||||
0,
|
0,
|
||||||
0x000001, 0x000003, 0x000007, 0x00000f,
|
0x000001, 0x000003, 0x000007, 0x00000f,
|
||||||
@ -176,17 +183,16 @@ static void ShiftBytes(VP8LBitReader* const br) {
|
|||||||
|
|
||||||
void VP8LFillBitWindow(VP8LBitReader* const br) {
|
void VP8LFillBitWindow(VP8LBitReader* const br) {
|
||||||
if (br->bit_pos_ >= WBITS) {
|
if (br->bit_pos_ >= WBITS) {
|
||||||
// TODO(jzern): 1) this might be of benefit in 32-bit builds too, along with
|
// TODO(jzern): given the fixed read size it may be possible to force
|
||||||
// reducing the load size.
|
|
||||||
// 2) given the fixed read size it may be possible to force
|
|
||||||
// alignment in this block.
|
// alignment in this block.
|
||||||
#if !defined(WEBP_FORCE_ALIGNED) && (defined(__x86_64__) || defined(_M_X64))
|
#if defined(VP8L_USE_UNALIGNED_LOAD)
|
||||||
if (br->pos_ + sizeof(br->val_) < br->len_) {
|
if (br->pos_ + sizeof(br->val_) < br->len_) {
|
||||||
br->val_ >>= WBITS;
|
br->val_ >>= WBITS;
|
||||||
br->bit_pos_ -= WBITS;
|
br->bit_pos_ -= WBITS;
|
||||||
// The expression below needs a little-endian arch to work correctly.
|
// The expression below needs a little-endian arch to work correctly.
|
||||||
// This gives a large speedup for decoding speed.
|
// This gives a large speedup for decoding speed.
|
||||||
br->val_ |= *(const vp8l_val_t*)(br->buf_ + br->pos_) << (LBITS - WBITS);
|
br->val_ |= (vp8l_val_t)*(const uint32_t*)(br->buf_ + br->pos_) <<
|
||||||
|
(LBITS - WBITS);
|
||||||
br->pos_ += LOG8_WBITS;
|
br->pos_ += LOG8_WBITS;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user