1-2% faster quantization in SSE2

C-version is a bit faster too (sub-1% faster on ARM) Change-Id: I077262042f1d0937aba1ecf15174f2c51bf6cd97
2025-07-12 22:14:29 +02:00 · 2014-02-13 15:55:30 -08:00
parent b2fbc36c26
commit 0235d5e44b
4 changed files with 64 additions and 62 deletions
--- a/src/enc/quant.c
+++ b/src/enc/quant.c
@ -592,13 +592,13 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
  // traverse trellis.
  for (n = first; n <= last; ++n) {
    const int j  = kZigzag[n];
-    const int Q  = mtx->q_[j];
-    const int iQ = mtx->iq_[j];
-    const int B = BIAS(0x00);     // neutral bias
+    const uint32_t Q  = mtx->q_[j];
+    const uint32_t iQ = mtx->iq_[j];
+    const uint32_t B = BIAS(0x00);     // neutral bias
    // note: it's important to take sign of the _original_ coeff,
    // so we don't have to consider level < 0 afterward.
    const int sign = (in[j] < 0);
-    const int coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
+    const uint32_t coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
    int level0 = QUANTDIV(coeff0, iQ, B);
    if (level0 > MAX_LEVEL) level0 = MAX_LEVEL;

--- a/src/enc/vp8enci.h
+++ b/src/enc/vp8enci.h
@ -166,8 +166,8 @@ typedef int64_t score_t;     // type used for scores, rate, distortion
 #define BIAS(b)  ((b) << (QFIX - 8))
 // Fun fact: this is the _only_ line where we're actually being lossy and
 // discarding bits.
-static WEBP_INLINE int QUANTDIV(int n, int iQ, int B) {
-  return (n * iQ + B) >> QFIX;
+static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) {
+  return (int)((n * iQ + B) >> QFIX);
 }

 // size of histogram used by CollectHistogram.
@ -236,8 +236,8 @@ typedef struct {
 typedef struct VP8Matrix {
  uint16_t q_[16];        // quantizer steps
  uint16_t iq_[16];       // reciprocals, fixed point.
-  uint16_t bias_[16];     // rounding bias
-  uint16_t zthresh_[16];  // value under which a coefficient is zeroed
+  uint32_t bias_[16];     // rounding bias
+  uint32_t zthresh_[16];  // value below which a coefficient is zeroed
  uint16_t sharpen_[16];  // frequency boosters for slight sharpening
 } VP8Matrix;