lossless: bit writer optimization

valgrind --tool=callgrind shows a 9 % speedup: 1021201984 ticks before vs. 927917709 after -q 0 -m 0 -lossless ~/alpi/1.png 22.040 MP/s before 24.796 MP/s after Change-Id: Iaab928167b3e20fb0d9401c6f8317a26c5a610b4
2025-07-21 08:21:11 +02:00 · 2015-06-29 14:21:10 +00:00
parent d97b9ff755
commit f3a7a5bf76
2 changed files with 39 additions and 5 deletions
--- a/src/utils/bit_writer.c
+++ b/src/utils/bit_writer.c
@ -250,12 +250,28 @@ void VP8LBitWriterWipeOut(VP8LBitWriter* const bw) {
  }
 }

-void VP8LPutBits(VP8LBitWriter* const bw, uint32_t bits, int n_bits) {
+void VP8LPutBitsFlushBits(VP8LBitWriter* const bw) {
+  // If needed, make some room by flushing some bits out.
+  if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) {
+    const uint64_t extra_size = (bw->end_ - bw->buf_) + MIN_EXTRA_SIZE;
+    if (extra_size != (size_t)extra_size ||
+        !VP8LBitWriterResize(bw, (size_t)extra_size)) {
+      bw->cur_ = bw->buf_;
+      bw->error_ = 1;
+      return;
+    }
+  }
+  *(vp8l_wtype_t*)bw->cur_ = (vp8l_wtype_t)WSWAP((vp8l_wtype_t)bw->bits_);
+  bw->cur_ += VP8L_WRITER_BYTES;
+  bw->bits_ >>= VP8L_WRITER_BITS;
+  bw->used_ -= VP8L_WRITER_BITS;
+}
+
+void VP8LPutBitsInternal(VP8LBitWriter* const bw, uint32_t bits, int n_bits) {
  assert(n_bits <= 32);
  // That's the max we can handle:
-  assert(bw->used_ + n_bits <= 2 * VP8L_WRITER_MAX_BITS);
+  assert(sizeof(vp8l_wtype_t) == 2);
  if (n_bits > 0) {
-    // Local field copy.
    vp8l_atype_t lbits = bw->bits_;
    int used = bw->used_;
    // Special case of overflow handling for 32bit accumulator (2-steps flush).
@ -286,7 +302,6 @@ void VP8LPutBits(VP8LBitWriter* const bw, uint32_t bits, int n_bits) {
      lbits >>= VP8L_WRITER_BITS;
      used -= VP8L_WRITER_BITS;
    }
-    // Eventually, insert new bits.
    bw->bits_ = lbits | ((vp8l_atype_t)bits << used);
    bw->used_ = used + n_bits;
  }
--- a/src/utils/bit_writer.h
+++ b/src/utils/bit_writer.h
@ -104,12 +104,31 @@ uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw);
 // Release any pending memory and zeroes the object.
 void VP8LBitWriterWipeOut(VP8LBitWriter* const bw);

+// Internal function for VP8LPutBits flushing 32 bits from the written state.
+void VP8LPutBitsFlushBits(VP8LBitWriter* const bw);
+
+// PutBits internal function used in the 16 bit vp8l_wtype_t case.
+void VP8LPutBitsInternal(VP8LBitWriter* const bw, uint32_t bits, int n_bits);
+
 // This function writes bits into bytes in increasing addresses (little endian),
 // and within a byte least-significant-bit first.
 // This function can write up to 32 bits in one go, but VP8LBitReader can only
 // read 24 bits max (VP8L_MAX_NUM_BIT_READ).
 // VP8LBitWriter's error_ flag is set in case of  memory allocation error.
-void VP8LPutBits(VP8LBitWriter* const bw, uint32_t bits, int n_bits);
+static WEBP_INLINE void VP8LPutBits(VP8LBitWriter* const bw,
+                                    uint32_t bits, int n_bits) {
+  if (sizeof(vp8l_wtype_t) == 4) {
+    if (n_bits > 0) {
+      if (bw->used_ >= 32) {
+        VP8LPutBitsFlushBits(bw);
+      }
+      bw->bits_ |= (vp8l_atype_t)bits << bw->used_;
+      bw->used_ += n_bits;
+    }
+  } else {
+    VP8LPutBitsInternal(bw, bits, n_bits);
+  }
+}

 //------------------------------------------------------------------------------