From 1667bded67811d5502eebca9e7cefe7c31e4809a Mon Sep 17 00:00:00 2001 From: skal Date: Wed, 20 Feb 2013 00:13:23 +0100 Subject: [PATCH] Remove ReadOneBit() and ReadSymbolUnsafe() Simplify and re-organize the VP8L bit-reader functions (e.g.: the 40-bit look-ahead code was helping much) Speed-up with LBITS=64, on arm7-a: => before: ./dwebp_justify_24_neon -v bryce_ll.webp Time to decode picture: 11.393s File bryce_ll.webp can be decoded (dimensions: 11158 x 2156). ... => after (LBITS=64): Time to decode picture: 9.953s making the VP8L bit-reader in 32 bit mode is going to be harder (because we need to be able to read two symbols at a time, each with max length 15 bits) Change-Id: I89746fb103b87b5e2fd40a3208a6fbc584b88297 --- src/dec/vp8l.c | 32 +++++++---------- src/utils/bit_reader.c | 81 +++++++++++++----------------------------- src/utils/bit_reader.h | 33 +++++++++-------- 3 files changed, 53 insertions(+), 93 deletions(-) diff --git a/src/dec/vp8l.c b/src/dec/vp8l.c index a1c8d3a9..0603e074 100644 --- a/src/dec/vp8l.c +++ b/src/dec/vp8l.c @@ -149,29 +149,21 @@ static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) { //------------------------------------------------------------------------------ // Decodes the next Huffman code from bit-stream. // FillBitWindow(br) needs to be called at minimum every second call -// to ReadSymbolUnsafe. -static int ReadSymbolUnsafe(const HuffmanTree* tree, VP8LBitReader* const br) { - const HuffmanTreeNode* node = tree->root_; - assert(node != NULL); - while (!HuffmanTreeNodeIsLeaf(node)) { - node = HuffmanTreeNextNode(node, VP8LReadOneBitUnsafe(br)); - } - return node->symbol_; -} - +// to ReadSymbol, in order to pre-fetch enough bits. static WEBP_INLINE int ReadSymbol(const HuffmanTree* tree, VP8LBitReader* const br) { - const int read_safe = (br->pos_ + 8 > br->len_); - if (!read_safe) { - return ReadSymbolUnsafe(tree, br); - } else { - const HuffmanTreeNode* node = tree->root_; - assert(node != NULL); - while (!HuffmanTreeNodeIsLeaf(node)) { - node = HuffmanTreeNextNode(node, VP8LReadOneBit(br)); - } - return node->symbol_; + const HuffmanTreeNode* node = tree->root_; + int num_bits = 0; + uint32_t bits; + bits = VP8LPrefetchBits(br); + assert(node != NULL); + while (!HuffmanTreeNodeIsLeaf(node)) { + node = HuffmanTreeNextNode(node, bits & 1); + bits >>= 1; + ++num_bits; } + VP8LDiscardBits(br, num_bits); + return node->symbol_; } static int ReadHuffmanCodeLengths( diff --git a/src/utils/bit_reader.c b/src/utils/bit_reader.c index 5f79bbd9..d6cfd864 100644 --- a/src/utils/bit_reader.c +++ b/src/utils/bit_reader.c @@ -113,6 +113,10 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int bits) { #define MAX_NUM_BIT_READ 25 +#define LBITS 64 // Number of bits prefetched. +#define WBITS 32 // Minimum number of bytes needed after VP8LFillBitWindow. +#define LOG8_WBITS 4 // Number of bytes needed to store WBITS bits. + static const uint32_t kBitMask[MAX_NUM_BIT_READ] = { 0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767, 65535, 131071, 262143, 524287, 1048575, 2097151, 4194303, 8388607, 16777215 @@ -134,7 +138,7 @@ void VP8LInitBitReader(VP8LBitReader* const br, br->eos_ = 0; br->error_ = 0; for (i = 0; i < sizeof(br->val_) && i < br->len_; ++i) { - br->val_ |= ((uint64_t)br->buf_[br->pos_]) << (8 * i); + br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (8 * i); ++br->pos_; } } @@ -149,91 +153,56 @@ void VP8LBitReaderSetBuffer(VP8LBitReader* const br, br->len_ = len; } +// If not at EOS, reload up to LBITS byte-by-byte static void ShiftBytes(VP8LBitReader* const br) { while (br->bit_pos_ >= 8 && br->pos_ < br->len_) { br->val_ >>= 8; - br->val_ |= ((uint64_t)br->buf_[br->pos_]) << 56; + br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (LBITS - 8); ++br->pos_; br->bit_pos_ -= 8; } } void VP8LFillBitWindow(VP8LBitReader* const br) { - if (br->bit_pos_ >= 32) { -#if defined(__x86_64__) || defined(_M_X64) - if (br->pos_ + 8 < br->len_) { - br->val_ >>= 32; + if (br->bit_pos_ >= WBITS) { +#if (defined(__x86_64__) || defined(_M_X64)) + if (br->pos_ + sizeof(br->val_) < br->len_) { + br->val_ >>= WBITS; + br->bit_pos_ -= WBITS; // The expression below needs a little-endian arch to work correctly. // This gives a large speedup for decoding speed. - br->val_ |= *(const uint64_t *)(br->buf_ + br->pos_) << 32; - br->pos_ += 4; - br->bit_pos_ -= 32; - } else { - // Slow path. - ShiftBytes(br); + br->val_ |= *(const vp8l_val_t*)(br->buf_ + br->pos_) << (LBITS - WBITS); + br->pos_ += LOG8_WBITS; + return; } -#else - // Always the slow path. - ShiftBytes(br); #endif - } - if (br->pos_ == br->len_ && br->bit_pos_ == 64) { - br->eos_ = 1; - } -} - -uint32_t VP8LReadOneBit(VP8LBitReader* const br) { - const uint32_t val = (uint32_t)((br->val_ >> br->bit_pos_) & 1); - // Flag an error at end_of_stream. - if (!br->eos_) { - ++br->bit_pos_; - if (br->bit_pos_ >= 32) { - ShiftBytes(br); - } - // After this last bit is read, check if eos needs to be flagged. - if (br->pos_ == br->len_ && br->bit_pos_ == 64) { + ShiftBytes(br); // Slow path. + if (br->pos_ == br->len_ && br->bit_pos_ == LBITS) { br->eos_ = 1; } - } else { - br->error_ = 1; } - return val; } uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits) { - uint32_t val = 0; assert(n_bits >= 0); // Flag an error if end_of_stream or n_bits is more than allowed limit. if (!br->eos_ && n_bits < MAX_NUM_BIT_READ) { + const uint32_t val = + (uint32_t)(br->val_ >> br->bit_pos_) & kBitMask[n_bits]; + const int new_bits = br->bit_pos_ + n_bits; + br->bit_pos_ = new_bits; // If this read is going to cross the read buffer, set the eos flag. if (br->pos_ == br->len_) { - if ((br->bit_pos_ + n_bits) >= 64) { + if (new_bits >= LBITS) { br->eos_ = 1; - if ((br->bit_pos_ + n_bits) > 64) return val; - } - } - val = (uint32_t)((br->val_ >> br->bit_pos_) & kBitMask[n_bits]); - br->bit_pos_ += n_bits; - if (br->bit_pos_ >= 40) { - if (br->pos_ + 5 < br->len_) { - br->val_ >>= 40; - br->val_ |= - (((uint64_t)br->buf_[br->pos_ + 0]) << 24) | - (((uint64_t)br->buf_[br->pos_ + 1]) << 32) | - (((uint64_t)br->buf_[br->pos_ + 2]) << 40) | - (((uint64_t)br->buf_[br->pos_ + 3]) << 48) | - (((uint64_t)br->buf_[br->pos_ + 4]) << 56); - br->pos_ += 5; - br->bit_pos_ -= 40; - } - if (br->bit_pos_ >= 8) { - ShiftBytes(br); } } + ShiftBytes(br); + return val; } else { br->error_ = 1; + return 0; } - return val; } //------------------------------------------------------------------------------ diff --git a/src/utils/bit_reader.h b/src/utils/bit_reader.h index f1ded6f3..3b6ca663 100644 --- a/src/utils/bit_reader.h +++ b/src/utils/bit_reader.h @@ -258,14 +258,16 @@ static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) { // ----------------------------------------------------------------------------- // Bitreader for lossless format +typedef uint64_t vp8l_val_t; // right now, this bit-reader can only use 64bit. + typedef struct { - uint64_t val_; - const uint8_t* buf_; - size_t len_; - size_t pos_; - int bit_pos_; - int eos_; - int error_; + vp8l_val_t val_; // pre-fetched bits + const uint8_t* buf_; // input byte buffer + size_t len_; // buffer length + size_t pos_; // byte position in buf_ + int bit_pos_; // current bit-reading position in val_ + int eos_; // bitstream is finished + int error_; // an error occurred (buffer overflow attempt...) } VP8LBitReader; void VP8LInitBitReader(VP8LBitReader* const br, @@ -281,17 +283,14 @@ void VP8LBitReaderSetBuffer(VP8LBitReader* const br, // Flags eos if this read attempt is going to cross the read buffer. uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits); -// Reads one bit from Read Buffer. Flags an error in case end_of_stream. -// Flags eos after reading last bit from the buffer. -uint32_t VP8LReadOneBit(VP8LBitReader* const br); +// Return the prefetched bits, so they can be looked up. +static WEBP_INLINE uint32_t VP8LPrefetchBits(VP8LBitReader* const br) { + return (uint32_t)(br->val_ >> br->bit_pos_); +} -// VP8LReadOneBitUnsafe is faster than VP8LReadOneBit, but it can be called only -// 32 times after the last VP8LFillBitWindow. Any subsequent calls -// (without VP8LFillBitWindow) will return invalid data. -static WEBP_INLINE uint32_t VP8LReadOneBitUnsafe(VP8LBitReader* const br) { - const uint32_t val = (uint32_t)((br->val_ >> br->bit_pos_) & 1); - ++br->bit_pos_; - return val; +// Discard 'num_bits' bits from the cache. +static WEBP_INLINE void VP8LDiscardBits(VP8LBitReader* const br, int num_bits) { + br->bit_pos_ += num_bits; } // Advances the Read buffer by 4 bytes to make room for reading next 32 bits.