new segmentation algorithm

fixes the 'blocky sky problem' (saturation problem: when luma was flat, chroma noise was taking over, resulting in random segment id assigned. When just using a common uniform segment was better). + side clean-up and readibility/experimentability MACRO'ization + added '-map 7' option Change-Id: I35982a9e43c0fecbfdd7b05e4813e8ba8c121d71
2025-06-06 06:04:20 +02:00 · 2012-09-03 19:40:52 +02:00 · 2012-09-03 19:40:52 +02:00 · 5725cabac0
commit 5725cabac0
parent 2cf1f81590
6 changed files with 135 additions and 95 deletions
--- a/src/dsp/dsp.h
+++ b/src/dsp/dsp.h
@ -49,8 +49,6 @@ extern VP8CPUInfo VP8GetCPUInfo;
 //------------------------------------------------------------------------------
 // Encoding

-int VP8GetAlpha(const int histo[]);
-
 // Transforms
 // VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms
 //          will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4).
@ -85,10 +83,11 @@ typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
                                int n, const struct VP8Matrix* const mtx);
 extern VP8QuantizeBlock VP8EncQuantizeBlock;

-// Compute susceptibility based on DCT-coeff histograms:
-// the higher, the "easier" the macroblock is to compress.
-typedef int (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
-                         int start_block, int end_block);
+// Collect histogram for susceptibility calculation and accumulate in histo[].
+struct VP8Histogram;
+typedef void (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
+                          int start_block, int end_block,
+                          struct VP8Histogram* const histo);
 extern const int VP8DspScan[16 + 4 + 4];
 extern VP8CHisto VP8CollectHistogram;

--- a/src/dsp/enc.c
+++ b/src/dsp/enc.c
@ -17,31 +17,18 @@
 extern "C" {
 #endif

+static WEBP_INLINE uint8_t clip_8b(int v) {
+  return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
+}
+
+static WEBP_INLINE int clip_max(int v, int max) {
+  return (v > max) ? max : v;
+}
+
 //------------------------------------------------------------------------------
 // Compute susceptibility based on DCT-coeff histograms:
 // the higher, the "easier" the macroblock is to compress.

-static int ClipAlpha(int alpha) {
-  return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha;
-}
-
-int VP8GetAlpha(const int histo[MAX_COEFF_THRESH + 1]) {
-  int num = 0, den = 0, val = 0;
-  int k;
-  int alpha;
-  // note: changing this loop to avoid the numerous "k + 1" slows things down.
-  for (k = 0; k < MAX_COEFF_THRESH; ++k) {
-    if (histo[k + 1]) {
-      val += histo[k + 1];
-      num += val * (k + 1);
-      den += (k + 1) * (k + 1);
-    }
-  }
-  // we scale the value to a usable [0..255] range
-  alpha = den ? 10 * num / den - 5 : 0;
-  return ClipAlpha(alpha);
-}
-
 const int VP8DspScan[16 + 4 + 4] = {
  // Luma
  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
@ -53,27 +40,23 @@ const int VP8DspScan[16 + 4 + 4] = {
  8 + 0 * BPS,  12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS     // V
 };

-static int CollectHistogram(const uint8_t* ref, const uint8_t* pred,
-                            int start_block, int end_block) {
-  int histo[MAX_COEFF_THRESH + 1] = { 0 };
-  int16_t out[16];
-  int j, k;
+static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
+                             int start_block, int end_block,
+                             VP8Histogram* const histo) {
+  int j;
  for (j = start_block; j < end_block; ++j) {
+    int k;
+    int16_t out[16];
+
    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);

-    // Convert coefficients to bin (within out[]).
+    // Convert coefficients to bin.
    for (k = 0; k < 16; ++k) {
-      const int v = abs(out[k]) >> 2;
-      out[k] = (v > MAX_COEFF_THRESH) ? MAX_COEFF_THRESH : v;
-    }
-
-    // Use bin to update histogram.
-    for (k = 0; k < 16; ++k) {
-      histo[out[k]]++;
+      const int v = abs(out[k]) >> 3;  // TODO(skal): add rounding?
+      const int clipped_value = clip_max(v, MAX_COEFF_THRESH);
+      histo->distribution[clipped_value]++;
    }
  }
-
-  return VP8GetAlpha(histo);
 }

 //------------------------------------------------------------------------------
@ -89,15 +72,12 @@ static void InitTables(void) {
  if (!tables_ok) {
    int i;
    for (i = -255; i <= 255 + 255; ++i) {
-      clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i;
+      clip1[255 + i] = clip_8b(i);
    }
    tables_ok = 1;
  }
 }

-static WEBP_INLINE uint8_t clip_8b(int v) {
-  return (!(v & ~0xff)) ? v : v < 0 ? 0 : 255;
-}

 //------------------------------------------------------------------------------
 // Transforms (Paragraph 14.4)
--- a/src/dsp/enc_sse2.c
+++ b/src/dsp/enc_sse2.c
@ -25,13 +25,15 @@ extern "C" {
 // Compute susceptibility based on DCT-coeff histograms:
 // the higher, the "easier" the macroblock is to compress.

-static int CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred,
-                                int start_block, int end_block) {
-  int histo[MAX_COEFF_THRESH + 1] = { 0 };
-  int16_t out[16];
-  int j, k;
+static void CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred,
+                                 int start_block, int end_block,
+                                 VP8Histogram* const histo) {
  const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
+  int j;
  for (j = start_block; j < end_block; ++j) {
+    int16_t out[16];
+    int k;
+
    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);

    // Convert coefficients to bin (within out[]).
@ -47,9 +49,9 @@ static int CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred,
      const __m128i xor1 = _mm_xor_si128(out1, sign1);
      const __m128i abs0 = _mm_sub_epi16(xor0, sign0);
      const __m128i abs1 = _mm_sub_epi16(xor1, sign1);
-      // v = abs(out) >> 2
-      const __m128i v0 = _mm_srai_epi16(abs0, 2);
-      const __m128i v1 = _mm_srai_epi16(abs1, 2);
+      // v = abs(out) >> 3
+      const __m128i v0 = _mm_srai_epi16(abs0, 3);
+      const __m128i v1 = _mm_srai_epi16(abs1, 3);
      // bin = min(v, MAX_COEFF_THRESH)
      const __m128i bin0 = _mm_min_epi16(v0, max_coeff_thresh);
      const __m128i bin1 = _mm_min_epi16(v1, max_coeff_thresh);
@ -58,13 +60,11 @@ static int CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred,
      _mm_storeu_si128((__m128i*)&out[8], bin1);
    }

-    // Use bin to update histogram.
+    // Convert coefficients to bin.
    for (k = 0; k < 16; ++k) {
-      histo[out[k]]++;
+      histo->distribution[out[k]]++;
    }
  }
-
-  return VP8GetAlpha(histo);
 }

 //------------------------------------------------------------------------------
--- a/src/enc/analysis.c
+++ b/src/enc/analysis.c
@ -23,10 +23,6 @@ extern "C" {

 #define MAX_ITERS_K_MEANS  6

-static int ClipAlpha(int alpha) {
-  return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha;
-}
-
 //------------------------------------------------------------------------------
 // Smooth the segment map by replacing isolated block by the majority of its
 // neighbours.
@ -115,7 +111,7 @@ static void SetSegmentProbas(VP8Encoder* const enc) {
 }

 static WEBP_INLINE int clip(int v, int m, int M) {
-  return v < m ? m : v > M ? M : v;
+  return (v < m) ? m : (v > M) ? M : v;
 }

 static void SetSegmentAlphas(VP8Encoder* const enc,
@ -141,23 +137,64 @@ static void SetSegmentAlphas(VP8Encoder* const enc,
  }
 }

+//------------------------------------------------------------------------------
+// Compute susceptibility based on DCT-coeff histograms:
+// the higher, the "easier" the macroblock is to compress.
+
+#define MAX_ALPHA 255                // 8b of precision for susceptibilities.
+#define ALPHA_SCALE (2 * MAX_ALPHA)  // scaling factor for alpha.
+#define DEFAULT_ALPHA (-1)
+#define IS_BETTER_ALPHA(alpha, best_alpha) ((alpha) > (best_alpha))
+
+static int FinalAlphaValue(int alpha) {
+  alpha = MAX_ALPHA - alpha;
+  return clip(alpha, 0, MAX_ALPHA);
+}
+
+static int GetAlpha(const VP8Histogram* const histo) {
+  int max_value = 0, last_non_zero = 1;
+  int k;
+  int alpha;
+  for (k = 0; k <= MAX_COEFF_THRESH; ++k) {
+    const int value = histo->distribution[k];
+    if (value > 0) {
+      if (value > max_value) max_value = value;
+      last_non_zero = k;
+    }
+  }
+  // 'alpha' will later be clipped to [0..MAX_ALPHA] range, clamping outer
+  // values which happen to be mostly noise. This leaves the maximum precision
+  // for handling the useful small values which contribute most.
+  alpha = (max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0;
+  return alpha;
+}
+
+static void MergeHistograms(const VP8Histogram* const in,
+                            VP8Histogram* const out) {
+  int i;
+  for (i = 0; i <= MAX_COEFF_THRESH; ++i) {
+    out->distribution[i] += in->distribution[i];
+  }
+}
+
 //------------------------------------------------------------------------------
 // Simplified k-Means, to assign Nb segments based on alpha-histogram

-static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
+static void AssignSegments(VP8Encoder* const enc,
+                           const int alphas[MAX_ALPHA + 1]) {
  const int nb = enc->segment_hdr_.num_segments_;
  int centers[NUM_MB_SEGMENTS];
  int weighted_average = 0;
-  int map[256];
+  int map[MAX_ALPHA + 1];
  int a, n, k;
-  int min_a = 0, max_a = 255, range_a;
+  int min_a = 0, max_a = MAX_ALPHA, range_a;
  // 'int' type is ok for histo, and won't overflow
  int accum[NUM_MB_SEGMENTS], dist_accum[NUM_MB_SEGMENTS];

  // bracket the input
-  for (n = 0; n < 256 && alphas[n] == 0; ++n) {}
+  for (n = 0; n <= MAX_ALPHA && alphas[n] == 0; ++n) {}
  min_a = n;
-  for (n = 255; n > min_a && alphas[n] == 0; --n) {}
+  for (n = MAX_ALPHA; n > min_a && alphas[n] == 0; --n) {}
  max_a = n;
  range_a = max_a - min_a;

@ -210,7 +247,7 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
    VP8MBInfo* const mb = &enc->mb_info_[n];
    const int alpha = mb->alpha_;
    mb->segment_ = map[alpha];
-    mb->alpha_ = centers[map[alpha]];     // just for the record.
+    mb->alpha_ = centers[map[alpha]];  // for the record.
  }

  if (nb > 1) {
@ -236,15 +273,19 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
 static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
  const int max_mode = (it->enc_->method_ >= 3) ? MAX_INTRA16_MODE : 4;
  int mode;
-  int best_alpha = -1;
+  int best_alpha = DEFAULT_ALPHA;
  int best_mode = 0;

  VP8MakeLuma16Preds(it);
  for (mode = 0; mode < max_mode; ++mode) {
-    const int alpha = VP8CollectHistogram(it->yuv_in_ + Y_OFF,
-                                          it->yuv_p_ + VP8I16ModeOffsets[mode],
-                                          0, 16);
-    if (alpha > best_alpha) {
+    VP8Histogram histo = { { 0 } };
+    int alpha;
+
+    VP8CollectHistogram(it->yuv_in_ + Y_OFF,
+                        it->yuv_p_ + VP8I16ModeOffsets[mode],
+                        0, 16, &histo);
+    alpha = GetAlpha(&histo);
+    if (IS_BETTER_ALPHA(alpha, best_alpha)) {
      best_alpha = alpha;
      best_mode = mode;
    }
@ -257,45 +298,58 @@ static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it,
                                   int best_alpha) {
  uint8_t modes[16];
  const int max_mode = (it->enc_->method_ >= 3) ? MAX_INTRA4_MODE : NUM_BMODES;
-  int i4_alpha = 0;
+  int i4_alpha;
+  VP8Histogram total_histo = { { 0 } };
+  int cur_histo = 0;
+
  VP8IteratorStartI4(it);
  do {
    int mode;
-    int best_mode_alpha = -1;
+    int best_mode_alpha = DEFAULT_ALPHA;
+    VP8Histogram histos[2];
    const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];

    VP8MakeIntra4Preds(it);
    for (mode = 0; mode < max_mode; ++mode) {
-      const int alpha = VP8CollectHistogram(src,
-                                            it->yuv_p_ + VP8I4ModeOffsets[mode],
-                                            0, 1);
-      if (alpha > best_mode_alpha) {
+      int alpha;
+
+      memset(&histos[cur_histo], 0, sizeof(histos[cur_histo]));
+      VP8CollectHistogram(src, it->yuv_p_ + VP8I4ModeOffsets[mode],
+                          0, 1, &histos[cur_histo]);
+      alpha = GetAlpha(&histos[cur_histo]);
+      if (IS_BETTER_ALPHA(alpha, best_mode_alpha)) {
        best_mode_alpha = alpha;
        modes[it->i4_] = mode;
+        cur_histo ^= 1;   // keep track of best histo so far.
      }
    }
-    i4_alpha += best_mode_alpha;
+    // accumulate best histogram
+    MergeHistograms(&histos[cur_histo ^ 1], &total_histo);
    // Note: we reuse the original samples for predictors
  } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));

-  if (i4_alpha > best_alpha) {
+  i4_alpha = GetAlpha(&total_histo);
+  if (IS_BETTER_ALPHA(i4_alpha, best_alpha)) {
    VP8SetIntra4Mode(it, modes);
-    best_alpha = ClipAlpha(i4_alpha);
+    best_alpha = i4_alpha;
  }
  return best_alpha;
 }

 static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
-  int best_alpha = -1;
+  int best_alpha = DEFAULT_ALPHA;
  int best_mode = 0;
  const int max_mode = (it->enc_->method_ >= 3) ? MAX_UV_MODE : 4;
  int mode;
  VP8MakeChroma8Preds(it);
  for (mode = 0; mode < max_mode; ++mode) {
-    const int alpha = VP8CollectHistogram(it->yuv_in_ + U_OFF,
-                                          it->yuv_p_ + VP8UVModeOffsets[mode],
-                                          16, 16 + 4 + 4);
-    if (alpha > best_alpha) {
+    VP8Histogram histo = { { 0 } };
+    int alpha;
+    VP8CollectHistogram(it->yuv_in_ + U_OFF,
+                        it->yuv_p_ + VP8UVModeOffsets[mode],
+                        16, 16 + 4 + 4, &histo);
+    alpha = GetAlpha(&histo);
+    if (IS_BETTER_ALPHA(alpha, best_alpha)) {
      best_alpha = alpha;
      best_mode = mode;
    }
@ -305,7 +359,7 @@ static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
 }

 static void MBAnalyze(VP8EncIterator* const it,
-                      int alphas[256], int* const uv_alpha) {
+                      int alphas[MAX_ALPHA + 1], int* const uv_alpha) {
  const VP8Encoder* const enc = it->enc_;
  int best_alpha, best_uv_alpha;

@ -324,10 +378,11 @@ static void MBAnalyze(VP8EncIterator* const it,
  best_uv_alpha = MBAnalyzeBestUVMode(it);

  // Final susceptibility mix
-  best_alpha = (best_alpha + best_uv_alpha + 1) / 2;
+  best_alpha = (3 * best_alpha + best_uv_alpha + 2) >> 2;
+  best_alpha = FinalAlphaValue(best_alpha);
  alphas[best_alpha]++;
  *uv_alpha += best_uv_alpha;
-  it->mb_->alpha_ = best_alpha;   // Informative only.
+  it->mb_->alpha_ = best_alpha;   // for later remapping.
 }

 //------------------------------------------------------------------------------
@ -342,7 +397,7 @@ static void MBAnalyze(VP8EncIterator* const it,

 int VP8EncAnalyze(VP8Encoder* const enc) {
  int ok = 1;
-  int alphas[256] = { 0 };
+  int alphas[MAX_ALPHA + 1] = { 0 };
  VP8EncIterator it;

  VP8IteratorInit(enc, &it);
--- a/src/enc/frame.c
+++ b/src/enc/frame.c
@ -736,6 +736,7 @@ static void StoreSideInfo(const VP8EncIterator* const it) {
        const int b = (int)((it->luma_bits_ + it->uv_bits_ + 7) >> 3);
        *info = (b > 255) ? 255 : b; break;
      }
+      case 7: *info = mb->alpha_; break;
      default: *info = 0; break;
    };
  }
--- a/src/enc/vp8enci.h
+++ b/src/enc/vp8enci.h
@ -29,9 +29,6 @@ extern "C" {
 #define ENC_MIN_VERSION 2
 #define ENC_REV_VERSION 0

-// size of histogram used by CollectHistogram.
-#define MAX_COEFF_THRESH   64
-
 // intra prediction modes
 enum { B_DC_PRED = 0,   // 4x4 modes
       B_TM_PRED = 1,
@ -162,6 +159,14 @@ static WEBP_INLINE int QUANTDIV(int n, int iQ, int B) {
 }
 extern const uint8_t VP8Zigzag[16];

+// size of histogram used by CollectHistogram.
+#define MAX_COEFF_THRESH   31
+typedef struct VP8Histogram VP8Histogram;
+struct VP8Histogram {
+  // TODO(skal): we only need to store the max_value and last_non_zero actually.
+  int distribution[MAX_COEFF_THRESH + 1];
+};
+
 //------------------------------------------------------------------------------
 // Headers