From bad775715a7d01f80e9634a14ac3ce9a0f7136d9 Mon Sep 17 00:00:00 2001 From: Pascal Massimino Date: Wed, 10 Dec 2014 10:44:57 +0100 Subject: [PATCH] simplify the Histogram struct, to only store max_value and last_nz we don't need to store the whole distribution in order to compute the alpha Later, we can incorporate the max_value / last_non_zero bookkeeping in SSE2 directly. Change-Id: I748ccea4ac17965d7afcab91845ef01be3aa3e15 --- src/dsp/dsp.h | 17 +++++++++++++---- src/dsp/enc.c | 20 +++++++++++++++++++- src/dsp/enc_neon.c | 4 +++- src/dsp/enc_sse2.c | 4 +++- src/enc/analysis.c | 39 +++++++++++++++++++++------------------ src/enc/vp8enci.h | 8 -------- 6 files changed, 59 insertions(+), 33 deletions(-) diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h index 4ea304b7..d35235ed 100644 --- a/src/dsp/dsp.h +++ b/src/dsp/dsp.h @@ -151,13 +151,22 @@ typedef int (*VP8QuantizeBlockWHT)(int16_t in[16], int16_t out[16], const struct VP8Matrix* const mtx); extern VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT; -// Collect histogram for susceptibility calculation and accumulate in histo[]. -struct VP8Histogram; +extern const int VP8DspScan[16 + 4 + 4]; + +// Collect histogram for susceptibility calculation. +#define MAX_COEFF_THRESH 31 // size of histogram used by CollectHistogram. +typedef struct { + // We only need to store max_value and last_non_zero, not the distribution. + int max_value; + int last_non_zero; +} VP8Histogram; typedef void (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred, int start_block, int end_block, - struct VP8Histogram* const histo); -extern const int VP8DspScan[16 + 4 + 4]; + VP8Histogram* const histo); extern VP8CHisto VP8CollectHistogram; +// General-purpose util function to help VP8CollectHistogram(). +void VP8LSetHistogramData(const int distribution[MAX_COEFF_THRESH + 1], + VP8Histogram* const histo); // must be called before using any of the above WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void); diff --git a/src/dsp/enc.c b/src/dsp/enc.c index 2b85537c..c22986ae 100644 --- a/src/dsp/enc.c +++ b/src/dsp/enc.c @@ -40,10 +40,27 @@ const int VP8DspScan[16 + 4 + 4] = { 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V }; +// general-purpose util function +void VP8LSetHistogramData(const int distribution[MAX_COEFF_THRESH + 1], + VP8Histogram* const histo) { + int max_value = 0, last_non_zero = 1; + int k; + for (k = 0; k <= MAX_COEFF_THRESH; ++k) { + const int value = distribution[k]; + if (value > 0) { + if (value > max_value) max_value = value; + last_non_zero = k; + } + } + histo->max_value = max_value; + histo->last_non_zero = last_non_zero; +} + static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, int start_block, int end_block, VP8Histogram* const histo) { int j; + int distribution[MAX_COEFF_THRESH + 1] = { 0 }; for (j = start_block; j < end_block; ++j) { int k; int16_t out[16]; @@ -54,9 +71,10 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, for (k = 0; k < 16; ++k) { const int v = abs(out[k]) >> 3; // TODO(skal): add rounding? const int clipped_value = clip_max(v, MAX_COEFF_THRESH); - histo->distribution[clipped_value]++; + ++distribution[clipped_value]; } } + VP8LSetHistogramData(distribution, histo); } //------------------------------------------------------------------------------ diff --git a/src/dsp/enc_neon.c b/src/dsp/enc_neon.c index 2e942d16..68e49455 100644 --- a/src/dsp/enc_neon.c +++ b/src/dsp/enc_neon.c @@ -727,6 +727,7 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, VP8Histogram* const histo) { const uint16x8_t max_coeff_thresh = vdupq_n_u16(MAX_COEFF_THRESH); int j; + int distribution[MAX_COEFF_THRESH + 1] = { 0 }; for (j = start_block; j < end_block; ++j) { int16_t out[16]; FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out); @@ -744,10 +745,11 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, vst1q_s16(out + 8, vreinterpretq_s16_u16(b3)); // Convert coefficients to bin. for (k = 0; k < 16; ++k) { - histo->distribution[out[k]]++; + ++distribution[out[k]]; } } } + VP8LSetHistogramData(distribution, histo); } //------------------------------------------------------------------------------ diff --git a/src/dsp/enc_sse2.c b/src/dsp/enc_sse2.c index 1ce0efd7..571bd158 100644 --- a/src/dsp/enc_sse2.c +++ b/src/dsp/enc_sse2.c @@ -59,6 +59,7 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, VP8Histogram* const histo) { const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH); int j; + int distribution[MAX_COEFF_THRESH + 1] = { 0 }; for (j = start_block; j < end_block; ++j) { int16_t out[16]; int k; @@ -91,9 +92,10 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, // Convert coefficients to bin. for (k = 0; k < 16; ++k) { - histo->distribution[out[k]]++; + ++distribution[out[k]]; } } + VP8LSetHistogramData(distribution, histo); } //------------------------------------------------------------------------------ diff --git a/src/enc/analysis.c b/src/enc/analysis.c index e019465b..b0404ffd 100644 --- a/src/enc/analysis.c +++ b/src/enc/analysis.c @@ -111,28 +111,28 @@ static int FinalAlphaValue(int alpha) { } static int GetAlpha(const VP8Histogram* const histo) { - int max_value = 0, last_non_zero = 1; - int k; - int alpha; - for (k = 0; k <= MAX_COEFF_THRESH; ++k) { - const int value = histo->distribution[k]; - if (value > 0) { - if (value > max_value) max_value = value; - last_non_zero = k; - } - } // 'alpha' will later be clipped to [0..MAX_ALPHA] range, clamping outer // values which happen to be mostly noise. This leaves the maximum precision // for handling the useful small values which contribute most. - alpha = (max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0; + const int max_value = histo->max_value; + const int last_non_zero = histo->last_non_zero; + const int alpha = + (max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0; return alpha; } +static void InitHistogram(VP8Histogram* const histo) { + histo->max_value = 0; + histo->last_non_zero = 1; +} + static void MergeHistograms(const VP8Histogram* const in, VP8Histogram* const out) { - int i; - for (i = 0; i <= MAX_COEFF_THRESH; ++i) { - out->distribution[i] += in->distribution[i]; + if (in->max_value > out->max_value) { + out->max_value = in->max_value; + } + if (in->last_non_zero > out->last_non_zero) { + out->last_non_zero = in->last_non_zero; } } @@ -245,9 +245,10 @@ static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) { VP8MakeLuma16Preds(it); for (mode = 0; mode < max_mode; ++mode) { - VP8Histogram histo = { { 0 } }; + VP8Histogram histo; int alpha; + InitHistogram(&histo); VP8CollectHistogram(it->yuv_in_ + Y_OFF, it->yuv_p_ + VP8I16ModeOffsets[mode], 0, 16, &histo); @@ -266,8 +267,9 @@ static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it, uint8_t modes[16]; const int max_mode = MAX_INTRA4_MODE; int i4_alpha; - VP8Histogram total_histo = { { 0 } }; + VP8Histogram total_histo; int cur_histo = 0; + InitHistogram(&total_histo); VP8IteratorStartI4(it); do { @@ -280,7 +282,7 @@ static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it, for (mode = 0; mode < max_mode; ++mode) { int alpha; - memset(&histos[cur_histo], 0, sizeof(histos[cur_histo])); + InitHistogram(&histos[cur_histo]); VP8CollectHistogram(src, it->yuv_p_ + VP8I4ModeOffsets[mode], 0, 1, &histos[cur_histo]); alpha = GetAlpha(&histos[cur_histo]); @@ -311,8 +313,9 @@ static int MBAnalyzeBestUVMode(VP8EncIterator* const it) { VP8MakeChroma8Preds(it); for (mode = 0; mode < max_mode; ++mode) { - VP8Histogram histo = { { 0 } }; + VP8Histogram histo; int alpha; + InitHistogram(&histo); VP8CollectHistogram(it->yuv_in_ + U_OFF, it->yuv_p_ + VP8UVModeOffsets[mode], 16, 16 + 4 + 4, &histo); diff --git a/src/enc/vp8enci.h b/src/enc/vp8enci.h index cd06113e..c96a250a 100644 --- a/src/enc/vp8enci.h +++ b/src/enc/vp8enci.h @@ -141,14 +141,6 @@ static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) { return (int)((n * iQ + B) >> QFIX); } -// size of histogram used by CollectHistogram. -#define MAX_COEFF_THRESH 31 -typedef struct VP8Histogram VP8Histogram; -struct VP8Histogram { - // TODO(skal): we only need to store the max_value and last_non_zero actually. - int distribution[MAX_COEFF_THRESH + 1]; -}; - // Uncomment the following to remove token-buffer code: // #define DISABLE_TOKEN_BUFFER