introduce VP8EncQuantize2Blocks to quantize two blocks at a time

No speed diff for now. We might reorder better the instructions later, to speed things up. Change-Id: I1949525a0b329c7fd861b8dbea7db4b23d37709c
2026-04-09 14:22:31 +02:00 · 2014-08-25 13:16:14 -07:00
parent 0b21c30b1a
commit 73d361dd5f
6 changed files with 50 additions and 6 deletions
--- a/src/dsp/dsp.h
+++ b/src/dsp/dsp.h
@@ -117,7 +117,12 @@ extern VP8BlockCopy VP8Copy4x4;
 struct VP8Matrix;   // forward declaration
 typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
                                const struct VP8Matrix* const mtx);
+// Same as VP8QuantizeBlock, but quantizes two consecutive blocks.
+typedef int (*VP8Quantize2Blocks)(int16_t in[32], int16_t out[32],
+                                  const struct VP8Matrix* const mtx);
+
 extern VP8QuantizeBlock VP8EncQuantizeBlock;
+extern VP8Quantize2Blocks VP8EncQuantize2Blocks;

 // specific to 2nd transform:
 typedef int (*VP8QuantizeBlockWHT)(int16_t in[16], int16_t out[16],
--- a/src/dsp/enc.c
+++ b/src/dsp/enc.c
@@ -625,6 +625,14 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
  return (last >= 0);
 }

+static int Quantize2Blocks(int16_t in[32], int16_t out[32],
+                           const VP8Matrix* const mtx) {
+  int nz;
+  nz  = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
+  nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
+  return nz;
+}
+
 static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
                            const VP8Matrix* const mtx) {
  int n, last = -1;
@@ -684,6 +692,7 @@ VP8Metric VP8SSE4x4;
 VP8WMetric VP8TDisto4x4;
 VP8WMetric VP8TDisto16x16;
 VP8QuantizeBlock VP8EncQuantizeBlock;
+VP8Quantize2Blocks VP8EncQuantize2Blocks;
 VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
 VP8BlockCopy VP8Copy4x4;

@@ -711,6 +720,7 @@ void VP8EncDspInit(void) {
  VP8TDisto4x4 = Disto4x4;
  VP8TDisto16x16 = Disto16x16;
  VP8EncQuantizeBlock = QuantizeBlock;
+  VP8EncQuantize2Blocks = Quantize2Blocks;
  VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
  VP8Copy4x4 = Copy4x4;

--- a/src/dsp/enc_mips32.c
+++ b/src/dsp/enc_mips32.c
@@ -237,6 +237,14 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
  return 0;
 }

+static int Quantize2Blocks(int16_t in[32], int16_t out[32],
+                           const VP8Matrix* const mtx) {
+  int nz;
+  nz  = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
+  nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
+  return nz;
+}
+
 #undef QUANTIZE_ONE

 // macro for one horizontal pass in Disto4x4 (TTransform)
@@ -756,6 +764,7 @@ void VP8EncDspInitMIPS32(void) {
 #if defined(WEBP_USE_MIPS32)
  VP8ITransform = ITransform;
  VP8EncQuantizeBlock = QuantizeBlock;
+  VP8EncQuantize2Blocks = Quantize2Blocks;
  VP8TDisto4x4 = Disto4x4;
  VP8TDisto16x16 = Disto16x16;
  VP8FTransform = FTransform;
--- a/src/dsp/enc_neon.c
+++ b/src/dsp/enc_neon.c
@@ -1047,6 +1047,14 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
  return 0;
 }

+static int Quantize2Blocks(int16_t in[32], int16_t out[32],
+                           const VP8Matrix* const mtx) {
+  int nz;
+  nz  = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
+  nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
+  return nz;
+}
+
 #endif   // !WORK_AROUND_GCC

 #endif   // WEBP_USE_NEON
@@ -1072,6 +1080,7 @@ void VP8EncDspInitNEON(void) {
  VP8SSE4x4 = SSE4x4;
 #if !defined(WORK_AROUND_GCC)
  VP8EncQuantizeBlock = QuantizeBlock;
+  VP8EncQuantize2Blocks = Quantize2Blocks;
 #endif
 #endif   // WEBP_USE_NEON
 }
--- a/src/dsp/enc_sse2.c
+++ b/src/dsp/enc_sse2.c
@@ -929,6 +929,15 @@ static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
  return DoQuantizeBlock(in, out, NULL, mtx);
 }

+static int Quantize2Blocks(int16_t in[32], int16_t out[32],
+                           const VP8Matrix* const mtx) {
+  int nz;
+  const uint16_t* const sharpen = &mtx->sharpen_[0];
+  nz  = DoQuantizeBlock(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
+  nz |= DoQuantizeBlock(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
+  return nz;
+}
+
 // Forward declaration.
 void VP8SetResidualCoeffsSSE2(const int16_t* const coeffs,
                              VP8Residual* const res);
@@ -967,6 +976,7 @@ void VP8EncDspInitSSE2(void) {
 #if defined(WEBP_USE_SSE2)
  VP8CollectHistogram = CollectHistogram;
  VP8EncQuantizeBlock = QuantizeBlock;
+  VP8EncQuantize2Blocks = Quantize2Blocks;
  VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
  VP8ITransform = ITransform;
  VP8FTransform = FTransform;
--- a/src/enc/quant.c
+++ b/src/enc/quant.c
@@ -746,12 +746,13 @@ static int ReconstructIntra16(VP8EncIterator* const it,
      }
    }
  } else {
-    for (n = 0; n < 16; ++n) {
+     for (n = 0; n < 16; n += 2) {
      // Zero-out the first coeff, so that: a) nz is correct below, and
      // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.
-      tmp[n][0] = 0;
-      nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;
-      assert(rd->y_ac_levels[n][0] == 0);
+      tmp[n][0] = tmp[n + 1][0] = 0;
+      nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;
+      assert(rd->y_ac_levels[n + 0][0] == 0);
+      assert(rd->y_ac_levels[n + 1][0] == 0);
    }
  }

@@ -816,8 +817,8 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
      }
    }
  } else {
-    for (n = 0; n < 8; ++n) {
-      nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;
+    for (n = 0; n < 8; n += 2) {
+      nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;
    }
  }