mirror of
https://github.com/webmproject/libwebp.git
synced 2024-12-28 14:38:21 +01:00
Remove ReadOneBit() and ReadSymbolUnsafe()
Simplify and re-organize the VP8L bit-reader functions (e.g.: the 40-bit look-ahead code was helping much) Speed-up with LBITS=64, on arm7-a: => before: ./dwebp_justify_24_neon -v bryce_ll.webp Time to decode picture: 11.393s File bryce_ll.webp can be decoded (dimensions: 11158 x 2156). ... => after (LBITS=64): Time to decode picture: 9.953s making the VP8L bit-reader in 32 bit mode is going to be harder (because we need to be able to read two symbols at a time, each with max length 15 bits) Change-Id: I89746fb103b87b5e2fd40a3208a6fbc584b88297
This commit is contained in:
parent
b7490f8553
commit
1667bded67
@ -149,30 +149,22 @@ static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) {
|
|||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Decodes the next Huffman code from bit-stream.
|
// Decodes the next Huffman code from bit-stream.
|
||||||
// FillBitWindow(br) needs to be called at minimum every second call
|
// FillBitWindow(br) needs to be called at minimum every second call
|
||||||
// to ReadSymbolUnsafe.
|
// to ReadSymbol, in order to pre-fetch enough bits.
|
||||||
static int ReadSymbolUnsafe(const HuffmanTree* tree, VP8LBitReader* const br) {
|
|
||||||
const HuffmanTreeNode* node = tree->root_;
|
|
||||||
assert(node != NULL);
|
|
||||||
while (!HuffmanTreeNodeIsLeaf(node)) {
|
|
||||||
node = HuffmanTreeNextNode(node, VP8LReadOneBitUnsafe(br));
|
|
||||||
}
|
|
||||||
return node->symbol_;
|
|
||||||
}
|
|
||||||
|
|
||||||
static WEBP_INLINE int ReadSymbol(const HuffmanTree* tree,
|
static WEBP_INLINE int ReadSymbol(const HuffmanTree* tree,
|
||||||
VP8LBitReader* const br) {
|
VP8LBitReader* const br) {
|
||||||
const int read_safe = (br->pos_ + 8 > br->len_);
|
|
||||||
if (!read_safe) {
|
|
||||||
return ReadSymbolUnsafe(tree, br);
|
|
||||||
} else {
|
|
||||||
const HuffmanTreeNode* node = tree->root_;
|
const HuffmanTreeNode* node = tree->root_;
|
||||||
|
int num_bits = 0;
|
||||||
|
uint32_t bits;
|
||||||
|
bits = VP8LPrefetchBits(br);
|
||||||
assert(node != NULL);
|
assert(node != NULL);
|
||||||
while (!HuffmanTreeNodeIsLeaf(node)) {
|
while (!HuffmanTreeNodeIsLeaf(node)) {
|
||||||
node = HuffmanTreeNextNode(node, VP8LReadOneBit(br));
|
node = HuffmanTreeNextNode(node, bits & 1);
|
||||||
|
bits >>= 1;
|
||||||
|
++num_bits;
|
||||||
}
|
}
|
||||||
|
VP8LDiscardBits(br, num_bits);
|
||||||
return node->symbol_;
|
return node->symbol_;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
static int ReadHuffmanCodeLengths(
|
static int ReadHuffmanCodeLengths(
|
||||||
VP8LDecoder* const dec, const int* const code_length_code_lengths,
|
VP8LDecoder* const dec, const int* const code_length_code_lengths,
|
||||||
|
@ -113,6 +113,10 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int bits) {
|
|||||||
|
|
||||||
#define MAX_NUM_BIT_READ 25
|
#define MAX_NUM_BIT_READ 25
|
||||||
|
|
||||||
|
#define LBITS 64 // Number of bits prefetched.
|
||||||
|
#define WBITS 32 // Minimum number of bytes needed after VP8LFillBitWindow.
|
||||||
|
#define LOG8_WBITS 4 // Number of bytes needed to store WBITS bits.
|
||||||
|
|
||||||
static const uint32_t kBitMask[MAX_NUM_BIT_READ] = {
|
static const uint32_t kBitMask[MAX_NUM_BIT_READ] = {
|
||||||
0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767,
|
0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767,
|
||||||
65535, 131071, 262143, 524287, 1048575, 2097151, 4194303, 8388607, 16777215
|
65535, 131071, 262143, 524287, 1048575, 2097151, 4194303, 8388607, 16777215
|
||||||
@ -134,7 +138,7 @@ void VP8LInitBitReader(VP8LBitReader* const br,
|
|||||||
br->eos_ = 0;
|
br->eos_ = 0;
|
||||||
br->error_ = 0;
|
br->error_ = 0;
|
||||||
for (i = 0; i < sizeof(br->val_) && i < br->len_; ++i) {
|
for (i = 0; i < sizeof(br->val_) && i < br->len_; ++i) {
|
||||||
br->val_ |= ((uint64_t)br->buf_[br->pos_]) << (8 * i);
|
br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (8 * i);
|
||||||
++br->pos_;
|
++br->pos_;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -149,91 +153,56 @@ void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
|
|||||||
br->len_ = len;
|
br->len_ = len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If not at EOS, reload up to LBITS byte-by-byte
|
||||||
static void ShiftBytes(VP8LBitReader* const br) {
|
static void ShiftBytes(VP8LBitReader* const br) {
|
||||||
while (br->bit_pos_ >= 8 && br->pos_ < br->len_) {
|
while (br->bit_pos_ >= 8 && br->pos_ < br->len_) {
|
||||||
br->val_ >>= 8;
|
br->val_ >>= 8;
|
||||||
br->val_ |= ((uint64_t)br->buf_[br->pos_]) << 56;
|
br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (LBITS - 8);
|
||||||
++br->pos_;
|
++br->pos_;
|
||||||
br->bit_pos_ -= 8;
|
br->bit_pos_ -= 8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void VP8LFillBitWindow(VP8LBitReader* const br) {
|
void VP8LFillBitWindow(VP8LBitReader* const br) {
|
||||||
if (br->bit_pos_ >= 32) {
|
if (br->bit_pos_ >= WBITS) {
|
||||||
#if defined(__x86_64__) || defined(_M_X64)
|
#if (defined(__x86_64__) || defined(_M_X64))
|
||||||
if (br->pos_ + 8 < br->len_) {
|
if (br->pos_ + sizeof(br->val_) < br->len_) {
|
||||||
br->val_ >>= 32;
|
br->val_ >>= WBITS;
|
||||||
|
br->bit_pos_ -= WBITS;
|
||||||
// The expression below needs a little-endian arch to work correctly.
|
// The expression below needs a little-endian arch to work correctly.
|
||||||
// This gives a large speedup for decoding speed.
|
// This gives a large speedup for decoding speed.
|
||||||
br->val_ |= *(const uint64_t *)(br->buf_ + br->pos_) << 32;
|
br->val_ |= *(const vp8l_val_t*)(br->buf_ + br->pos_) << (LBITS - WBITS);
|
||||||
br->pos_ += 4;
|
br->pos_ += LOG8_WBITS;
|
||||||
br->bit_pos_ -= 32;
|
return;
|
||||||
} else {
|
|
||||||
// Slow path.
|
|
||||||
ShiftBytes(br);
|
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
// Always the slow path.
|
|
||||||
ShiftBytes(br);
|
|
||||||
#endif
|
#endif
|
||||||
}
|
ShiftBytes(br); // Slow path.
|
||||||
if (br->pos_ == br->len_ && br->bit_pos_ == 64) {
|
if (br->pos_ == br->len_ && br->bit_pos_ == LBITS) {
|
||||||
br->eos_ = 1;
|
br->eos_ = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t VP8LReadOneBit(VP8LBitReader* const br) {
|
|
||||||
const uint32_t val = (uint32_t)((br->val_ >> br->bit_pos_) & 1);
|
|
||||||
// Flag an error at end_of_stream.
|
|
||||||
if (!br->eos_) {
|
|
||||||
++br->bit_pos_;
|
|
||||||
if (br->bit_pos_ >= 32) {
|
|
||||||
ShiftBytes(br);
|
|
||||||
}
|
|
||||||
// After this last bit is read, check if eos needs to be flagged.
|
|
||||||
if (br->pos_ == br->len_ && br->bit_pos_ == 64) {
|
|
||||||
br->eos_ = 1;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
br->error_ = 1;
|
|
||||||
}
|
|
||||||
return val;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits) {
|
uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits) {
|
||||||
uint32_t val = 0;
|
|
||||||
assert(n_bits >= 0);
|
assert(n_bits >= 0);
|
||||||
// Flag an error if end_of_stream or n_bits is more than allowed limit.
|
// Flag an error if end_of_stream or n_bits is more than allowed limit.
|
||||||
if (!br->eos_ && n_bits < MAX_NUM_BIT_READ) {
|
if (!br->eos_ && n_bits < MAX_NUM_BIT_READ) {
|
||||||
|
const uint32_t val =
|
||||||
|
(uint32_t)(br->val_ >> br->bit_pos_) & kBitMask[n_bits];
|
||||||
|
const int new_bits = br->bit_pos_ + n_bits;
|
||||||
|
br->bit_pos_ = new_bits;
|
||||||
// If this read is going to cross the read buffer, set the eos flag.
|
// If this read is going to cross the read buffer, set the eos flag.
|
||||||
if (br->pos_ == br->len_) {
|
if (br->pos_ == br->len_) {
|
||||||
if ((br->bit_pos_ + n_bits) >= 64) {
|
if (new_bits >= LBITS) {
|
||||||
br->eos_ = 1;
|
br->eos_ = 1;
|
||||||
if ((br->bit_pos_ + n_bits) > 64) return val;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
val = (uint32_t)((br->val_ >> br->bit_pos_) & kBitMask[n_bits]);
|
|
||||||
br->bit_pos_ += n_bits;
|
|
||||||
if (br->bit_pos_ >= 40) {
|
|
||||||
if (br->pos_ + 5 < br->len_) {
|
|
||||||
br->val_ >>= 40;
|
|
||||||
br->val_ |=
|
|
||||||
(((uint64_t)br->buf_[br->pos_ + 0]) << 24) |
|
|
||||||
(((uint64_t)br->buf_[br->pos_ + 1]) << 32) |
|
|
||||||
(((uint64_t)br->buf_[br->pos_ + 2]) << 40) |
|
|
||||||
(((uint64_t)br->buf_[br->pos_ + 3]) << 48) |
|
|
||||||
(((uint64_t)br->buf_[br->pos_ + 4]) << 56);
|
|
||||||
br->pos_ += 5;
|
|
||||||
br->bit_pos_ -= 40;
|
|
||||||
}
|
|
||||||
if (br->bit_pos_ >= 8) {
|
|
||||||
ShiftBytes(br);
|
ShiftBytes(br);
|
||||||
}
|
return val;
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
br->error_ = 1;
|
br->error_ = 1;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
return val;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
@ -258,14 +258,16 @@ static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) {
|
|||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
// Bitreader for lossless format
|
// Bitreader for lossless format
|
||||||
|
|
||||||
|
typedef uint64_t vp8l_val_t; // right now, this bit-reader can only use 64bit.
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint64_t val_;
|
vp8l_val_t val_; // pre-fetched bits
|
||||||
const uint8_t* buf_;
|
const uint8_t* buf_; // input byte buffer
|
||||||
size_t len_;
|
size_t len_; // buffer length
|
||||||
size_t pos_;
|
size_t pos_; // byte position in buf_
|
||||||
int bit_pos_;
|
int bit_pos_; // current bit-reading position in val_
|
||||||
int eos_;
|
int eos_; // bitstream is finished
|
||||||
int error_;
|
int error_; // an error occurred (buffer overflow attempt...)
|
||||||
} VP8LBitReader;
|
} VP8LBitReader;
|
||||||
|
|
||||||
void VP8LInitBitReader(VP8LBitReader* const br,
|
void VP8LInitBitReader(VP8LBitReader* const br,
|
||||||
@ -281,17 +283,14 @@ void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
|
|||||||
// Flags eos if this read attempt is going to cross the read buffer.
|
// Flags eos if this read attempt is going to cross the read buffer.
|
||||||
uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits);
|
uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits);
|
||||||
|
|
||||||
// Reads one bit from Read Buffer. Flags an error in case end_of_stream.
|
// Return the prefetched bits, so they can be looked up.
|
||||||
// Flags eos after reading last bit from the buffer.
|
static WEBP_INLINE uint32_t VP8LPrefetchBits(VP8LBitReader* const br) {
|
||||||
uint32_t VP8LReadOneBit(VP8LBitReader* const br);
|
return (uint32_t)(br->val_ >> br->bit_pos_);
|
||||||
|
}
|
||||||
|
|
||||||
// VP8LReadOneBitUnsafe is faster than VP8LReadOneBit, but it can be called only
|
// Discard 'num_bits' bits from the cache.
|
||||||
// 32 times after the last VP8LFillBitWindow. Any subsequent calls
|
static WEBP_INLINE void VP8LDiscardBits(VP8LBitReader* const br, int num_bits) {
|
||||||
// (without VP8LFillBitWindow) will return invalid data.
|
br->bit_pos_ += num_bits;
|
||||||
static WEBP_INLINE uint32_t VP8LReadOneBitUnsafe(VP8LBitReader* const br) {
|
|
||||||
const uint32_t val = (uint32_t)((br->val_ >> br->bit_pos_) & 1);
|
|
||||||
++br->bit_pos_;
|
|
||||||
return val;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Advances the Read buffer by 4 bytes to make room for reading next 32 bits.
|
// Advances the Read buffer by 4 bytes to make room for reading next 32 bits.
|
||||||
|
Loading…
Reference in New Issue
Block a user