mirror of
				https://github.com/webmproject/libwebp.git
				synced 2025-10-31 02:15:42 +01:00 
			
		
		
		
	VP8LBitWriter: use a bit-accumulator
* simplify the endian logic * remove the need for memset() * write 16 or 32 at a time (likely aligned) Makes the code a bit faster on ARM (~1%) Change-Id: I650bc5654e8d0b0454318b7a78206b301c5f6c2c
This commit is contained in:
		| @@ -194,19 +194,54 @@ void VP8BitWriterWipeOut(VP8BitWriter* const bw) { | ||||
| //------------------------------------------------------------------------------ | ||||
| // VP8LBitWriter | ||||
|  | ||||
| // This is the minimum amount of size the memory buffer is guaranteed to grow | ||||
| // when extra space is needed. | ||||
| #define MIN_EXTRA_SIZE  (32768ULL) | ||||
|  | ||||
| #define VP8L_WRITER_BYTES ((int)sizeof(vp8l_wtype_t)) | ||||
| #define VP8L_WRITER_BITS (VP8L_WRITER_BYTES * 8) | ||||
|  | ||||
| //  endian-specific htoleXX() definition | ||||
| // TODO(skal): move this to config.h, and collect all the endian-related code | ||||
| // in a proper .h file | ||||
| #if defined(_WIN32) | ||||
| #if !defined(_M_PPC) | ||||
| #define htole32(x) (x) | ||||
| #define htole16(x) (x) | ||||
| #else     // PPC is BIG_ENDIAN | ||||
| #include <stdlib.h> | ||||
| #define htole32(x) (_byteswap_ulong((unsigned long)(x))) | ||||
| #define htole16(x) (_byteswap_ushort((unsigned short)(x))) | ||||
| #endif    // _M_PPC | ||||
| #elif defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD__) || \ | ||||
|       defined(__DragonFly__) | ||||
| #include <sys/endian.h> | ||||
| #elif defined(__APPLE__) | ||||
| #include <libkern/OSByteOrder.h> | ||||
| #define htole32 OSSwapHostToLittleInt32 | ||||
| #define htole16 OSSwapHostToLittleInt16 | ||||
| #elif defined(__native_client__) && !defined(__GLIBC__) | ||||
| // NaCl without glibc is assumed to be little-endian | ||||
| #define htole32(x) (x) | ||||
| #define htole16(x) (x) | ||||
| #else     // pretty much all linux and/or glibc | ||||
| #include <endian.h> | ||||
| #endif | ||||
|  | ||||
| // Returns 1 on success. | ||||
| static int VP8LBitWriterResize(VP8LBitWriter* const bw, size_t extra_size) { | ||||
|   uint8_t* allocated_buf; | ||||
|   size_t allocated_size; | ||||
|   const size_t current_size = VP8LBitWriterNumBytes(bw); | ||||
|   const size_t max_bytes = bw->end_ - bw->buf_; | ||||
|   const size_t current_size = bw->cur_ - bw->buf_; | ||||
|   const uint64_t size_required_64b = (uint64_t)current_size + extra_size; | ||||
|   const size_t size_required = (size_t)size_required_64b; | ||||
|   if (size_required != size_required_64b) { | ||||
|     bw->error_ = 1; | ||||
|     return 0; | ||||
|   } | ||||
|   if (bw->max_bytes_ > 0 && size_required <= bw->max_bytes_) return 1; | ||||
|   allocated_size = (3 * bw->max_bytes_) >> 1; | ||||
|   if (max_bytes > 0 && size_required <= max_bytes) return 1; | ||||
|   allocated_size = (3 * max_bytes) >> 1; | ||||
|   if (allocated_size < size_required) allocated_size = size_required; | ||||
|   // make allocated size multiple of 1k | ||||
|   allocated_size = (((allocated_size >> 10) + 1) << 10); | ||||
| @@ -215,11 +250,13 @@ static int VP8LBitWriterResize(VP8LBitWriter* const bw, size_t extra_size) { | ||||
|     bw->error_ = 1; | ||||
|     return 0; | ||||
|   } | ||||
|   memcpy(allocated_buf, bw->buf_, current_size); | ||||
|   if (current_size > 0) { | ||||
|     memcpy(allocated_buf, bw->buf_, current_size); | ||||
|   } | ||||
|   free(bw->buf_); | ||||
|   bw->buf_ = allocated_buf; | ||||
|   bw->max_bytes_ = allocated_size; | ||||
|   memset(allocated_buf + current_size, 0, allocated_size - current_size); | ||||
|   bw->cur_ = bw->buf_ + current_size; | ||||
|   bw->end_ = bw->buf_ + allocated_size; | ||||
|   return 1; | ||||
| } | ||||
|  | ||||
| @@ -236,46 +273,37 @@ void VP8LBitWriterDestroy(VP8LBitWriter* const bw) { | ||||
| } | ||||
|  | ||||
| void VP8LWriteBits(VP8LBitWriter* const bw, int n_bits, uint32_t bits) { | ||||
|   if (n_bits < 1) return; | ||||
| #if !defined(__BIG_ENDIAN__) | ||||
|   // Technically, this branch of the code can write up to 25 bits at a time, | ||||
|   // but in prefix encoding, the maximum number of bits written is 18 at a time. | ||||
|   { | ||||
|     uint8_t* const p = &bw->buf_[bw->bit_pos_ >> 3]; | ||||
|     uint32_t v = *(const uint32_t*)p; | ||||
|     v |= bits << (bw->bit_pos_ & 7); | ||||
|     *(uint32_t*)p = v; | ||||
|     bw->bit_pos_ += n_bits; | ||||
|   } | ||||
| #else  // BIG_ENDIAN | ||||
|   { | ||||
|     uint8_t* p = &bw->buf_[bw->bit_pos_ >> 3]; | ||||
|     const int bits_reserved_in_first_byte = bw->bit_pos_ & 7; | ||||
|     const int bits_left_to_write = n_bits - 8 + bits_reserved_in_first_byte; | ||||
|     // implicit & 0xff is assumed for uint8_t arithmetic | ||||
|     *p++ |= bits << bits_reserved_in_first_byte; | ||||
|     bits >>= 8 - bits_reserved_in_first_byte; | ||||
|     if (bits_left_to_write >= 1) { | ||||
|       *p++ = bits; | ||||
|       bits >>= 8; | ||||
|       if (bits_left_to_write >= 9) { | ||||
|         *p++ = bits; | ||||
|         bits >>= 8; | ||||
|   if (n_bits <= 0) return; | ||||
|   bw->bits_ |= (vp8l_atype_t)bits << bw->used_; | ||||
|   bw->used_ += n_bits; | ||||
|   if (bw->used_ > VP8L_WRITER_BITS) { | ||||
|     if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) { | ||||
|       const uint64_t extra_size = (bw->end_ - bw->buf_) + MIN_EXTRA_SIZE; | ||||
|       if (extra_size != (size_t)extra_size || | ||||
|           !VP8LBitWriterResize(bw, (size_t)extra_size)) { | ||||
|         bw->cur_ = bw->buf_; | ||||
|         bw->error_ = 1; | ||||
|         return; | ||||
|       } | ||||
|     } | ||||
|     assert(n_bits <= 25); | ||||
|     *p = bits; | ||||
|     bw->bit_pos_ += n_bits; | ||||
|     *(vp8l_wtype_t*)bw->cur_ = (vp8l_wtype_t)WSWAP((vp8l_wtype_t)bw->bits_); | ||||
|     bw->cur_ += VP8L_WRITER_BYTES; | ||||
|     bw->bits_ >>= VP8L_WRITER_BITS; | ||||
|     bw->used_ -= VP8L_WRITER_BITS; | ||||
|   } | ||||
| #endif | ||||
|   if ((bw->bit_pos_ >> 3) > (bw->max_bytes_ - 8)) { | ||||
|     const uint64_t extra_size = 32768ULL + bw->max_bytes_; | ||||
|     if (extra_size != (size_t)extra_size || | ||||
|         !VP8LBitWriterResize(bw, (size_t)extra_size)) { | ||||
|       bw->bit_pos_ = 0; | ||||
|       bw->error_ = 1; | ||||
| } | ||||
|  | ||||
| uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw) { | ||||
|   // flush leftover bits | ||||
|   if (VP8LBitWriterResize(bw, (bw->used_ + 7) >> 3)) { | ||||
|     while (bw->used_ > 0) { | ||||
|       *bw->cur_++ = bw->bits_;   // & 0xff is implied here | ||||
|       bw->bits_ >>= 8; | ||||
|       bw->used_ -= 8; | ||||
|     } | ||||
|     bw->used_ = 0; | ||||
|   } | ||||
|   return bw->buf_; | ||||
| } | ||||
|  | ||||
| //------------------------------------------------------------------------------ | ||||
|   | ||||
| @@ -68,51 +68,45 @@ static WEBP_INLINE size_t VP8BitWriterSize(const VP8BitWriter* const bw) { | ||||
|  | ||||
| //------------------------------------------------------------------------------ | ||||
| // VP8LBitWriter | ||||
| // TODO(vikasa): VP8LBitWriter is copied as-is from lossless code. There's scope | ||||
| // of re-using VP8BitWriter. Will evaluate once basic lossless encoder is | ||||
| // implemented. | ||||
|  | ||||
| #if defined(__x86_64__) || defined(_M_X64)   // 64bit | ||||
| typedef uint64_t vp8l_atype_t;   // accumulator type | ||||
| typedef uint32_t vp8l_wtype_t;   // writing type | ||||
| #define WSWAP htole32 | ||||
| #else | ||||
| typedef uint32_t vp8l_atype_t; | ||||
| typedef uint16_t vp8l_wtype_t; | ||||
| #define WSWAP htole16 | ||||
| #endif | ||||
|  | ||||
| typedef struct { | ||||
|   uint8_t* buf_; | ||||
|   size_t bit_pos_; | ||||
|   size_t max_bytes_; | ||||
|   vp8l_atype_t bits_;   // bit accumulator | ||||
|   int          used_;   // number of bits used in accumulator | ||||
|   uint8_t*     buf_;    // start of buffer | ||||
|   uint8_t*     cur_;    // current write position | ||||
|   uint8_t*     end_;    // end of buffer | ||||
|  | ||||
|   // After all bits are written, the caller must observe the state of | ||||
|   // error_. A value of 1 indicates that a memory allocation failure | ||||
|   // has happened during bit writing. A value of 0 indicates successful | ||||
|   // After all bits are written (VP8LBitWriterFinish()), the caller must observe | ||||
|   // the state of error_. A value of 1 indicates that a memory allocation | ||||
|   // failure has happened during bit writing. A value of 0 indicates successful | ||||
|   // writing of bits. | ||||
|   int error_; | ||||
| } VP8LBitWriter; | ||||
|  | ||||
| static WEBP_INLINE size_t VP8LBitWriterNumBytes(VP8LBitWriter* const bw) { | ||||
|   return (bw->bit_pos_ + 7) >> 3; | ||||
|   return (bw->cur_ - bw->buf_) + ((bw->used_ + 7) >> 3); | ||||
| } | ||||
|  | ||||
| static WEBP_INLINE uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw) { | ||||
|   return bw->buf_; | ||||
| } | ||||
| uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw); | ||||
|  | ||||
| // Returns 0 in case of memory allocation error. | ||||
| int VP8LBitWriterInit(VP8LBitWriter* const bw, size_t expected_size); | ||||
|  | ||||
| void VP8LBitWriterDestroy(VP8LBitWriter* const bw); | ||||
|  | ||||
| // This function writes bits into bytes in increasing addresses, and within | ||||
| // a byte least-significant-bit first. | ||||
| // | ||||
| // The function can write up to 16 bits in one go with WriteBits | ||||
| // Example: let's assume that 3 bits (Rs below) have been written already: | ||||
| // | ||||
| // BYTE-0     BYTE+1       BYTE+2 | ||||
| // | ||||
| // 0000 0RRR    0000 0000    0000 0000 | ||||
| // | ||||
| // Now, we could write 5 or less bits in MSB by just sifting by 3 | ||||
| // and OR'ing to BYTE-0. | ||||
| // | ||||
| // For n bits, we take the last 5 bytes, OR that with high bits in BYTE-0, | ||||
| // and locate the rest in BYTE+1 and BYTE+2. | ||||
| // | ||||
| // This function writes bits into bytes in increasing addresses (little endian), | ||||
| // and within a byte least-significant-bit first. | ||||
| // The function can write up to 8*sizeof(vp8l_wtype_t) bits in one go. | ||||
| // VP8LBitWriter's error_ flag is set in case of  memory allocation error. | ||||
| void VP8LWriteBits(VP8LBitWriter* const bw, int n_bits, uint32_t bits); | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user