diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h index 24d0c188..a797193e 100644 --- a/src/dsp/dsp.h +++ b/src/dsp/dsp.h @@ -49,8 +49,6 @@ extern VP8CPUInfo VP8GetCPUInfo; //------------------------------------------------------------------------------ // Encoding -int VP8GetAlpha(const int histo[]); - // Transforms // VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms // will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4). @@ -85,10 +83,11 @@ typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16], int n, const struct VP8Matrix* const mtx); extern VP8QuantizeBlock VP8EncQuantizeBlock; -// Compute susceptibility based on DCT-coeff histograms: -// the higher, the "easier" the macroblock is to compress. -typedef int (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred, - int start_block, int end_block); +// Collect histogram for susceptibility calculation and accumulate in histo[]. +struct VP8Histogram; +typedef void (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred, + int start_block, int end_block, + struct VP8Histogram* const histo); extern const int VP8DspScan[16 + 4 + 4]; extern VP8CHisto VP8CollectHistogram; diff --git a/src/dsp/enc.c b/src/dsp/enc.c index 02234564..1bac3bf4 100644 --- a/src/dsp/enc.c +++ b/src/dsp/enc.c @@ -17,31 +17,18 @@ extern "C" { #endif +static WEBP_INLINE uint8_t clip_8b(int v) { + return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; +} + +static WEBP_INLINE int clip_max(int v, int max) { + return (v > max) ? max : v; +} + //------------------------------------------------------------------------------ // Compute susceptibility based on DCT-coeff histograms: // the higher, the "easier" the macroblock is to compress. -static int ClipAlpha(int alpha) { - return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha; -} - -int VP8GetAlpha(const int histo[MAX_COEFF_THRESH + 1]) { - int num = 0, den = 0, val = 0; - int k; - int alpha; - // note: changing this loop to avoid the numerous "k + 1" slows things down. - for (k = 0; k < MAX_COEFF_THRESH; ++k) { - if (histo[k + 1]) { - val += histo[k + 1]; - num += val * (k + 1); - den += (k + 1) * (k + 1); - } - } - // we scale the value to a usable [0..255] range - alpha = den ? 10 * num / den - 5 : 0; - return ClipAlpha(alpha); -} - const int VP8DspScan[16 + 4 + 4] = { // Luma 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS, @@ -53,27 +40,23 @@ const int VP8DspScan[16 + 4 + 4] = { 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V }; -static int CollectHistogram(const uint8_t* ref, const uint8_t* pred, - int start_block, int end_block) { - int histo[MAX_COEFF_THRESH + 1] = { 0 }; - int16_t out[16]; - int j, k; +static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, + int start_block, int end_block, + VP8Histogram* const histo) { + int j; for (j = start_block; j < end_block; ++j) { + int k; + int16_t out[16]; + VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out); - // Convert coefficients to bin (within out[]). + // Convert coefficients to bin. for (k = 0; k < 16; ++k) { - const int v = abs(out[k]) >> 2; - out[k] = (v > MAX_COEFF_THRESH) ? MAX_COEFF_THRESH : v; - } - - // Use bin to update histogram. - for (k = 0; k < 16; ++k) { - histo[out[k]]++; + const int v = abs(out[k]) >> 3; // TODO(skal): add rounding? + const int clipped_value = clip_max(v, MAX_COEFF_THRESH); + histo->distribution[clipped_value]++; } } - - return VP8GetAlpha(histo); } //------------------------------------------------------------------------------ @@ -89,15 +72,12 @@ static void InitTables(void) { if (!tables_ok) { int i; for (i = -255; i <= 255 + 255; ++i) { - clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i; + clip1[255 + i] = clip_8b(i); } tables_ok = 1; } } -static WEBP_INLINE uint8_t clip_8b(int v) { - return (!(v & ~0xff)) ? v : v < 0 ? 0 : 255; -} //------------------------------------------------------------------------------ // Transforms (Paragraph 14.4) diff --git a/src/dsp/enc_sse2.c b/src/dsp/enc_sse2.c index 0986e82e..fc3b68fd 100644 --- a/src/dsp/enc_sse2.c +++ b/src/dsp/enc_sse2.c @@ -25,13 +25,15 @@ extern "C" { // Compute susceptibility based on DCT-coeff histograms: // the higher, the "easier" the macroblock is to compress. -static int CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred, - int start_block, int end_block) { - int histo[MAX_COEFF_THRESH + 1] = { 0 }; - int16_t out[16]; - int j, k; +static void CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred, + int start_block, int end_block, + VP8Histogram* const histo) { const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH); + int j; for (j = start_block; j < end_block; ++j) { + int16_t out[16]; + int k; + VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out); // Convert coefficients to bin (within out[]). @@ -47,9 +49,9 @@ static int CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred, const __m128i xor1 = _mm_xor_si128(out1, sign1); const __m128i abs0 = _mm_sub_epi16(xor0, sign0); const __m128i abs1 = _mm_sub_epi16(xor1, sign1); - // v = abs(out) >> 2 - const __m128i v0 = _mm_srai_epi16(abs0, 2); - const __m128i v1 = _mm_srai_epi16(abs1, 2); + // v = abs(out) >> 3 + const __m128i v0 = _mm_srai_epi16(abs0, 3); + const __m128i v1 = _mm_srai_epi16(abs1, 3); // bin = min(v, MAX_COEFF_THRESH) const __m128i bin0 = _mm_min_epi16(v0, max_coeff_thresh); const __m128i bin1 = _mm_min_epi16(v1, max_coeff_thresh); @@ -58,13 +60,11 @@ static int CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred, _mm_storeu_si128((__m128i*)&out[8], bin1); } - // Use bin to update histogram. + // Convert coefficients to bin. for (k = 0; k < 16; ++k) { - histo[out[k]]++; + histo->distribution[out[k]]++; } } - - return VP8GetAlpha(histo); } //------------------------------------------------------------------------------ diff --git a/src/enc/analysis.c b/src/enc/analysis.c index 22cfb492..a32fffcd 100644 --- a/src/enc/analysis.c +++ b/src/enc/analysis.c @@ -23,10 +23,6 @@ extern "C" { #define MAX_ITERS_K_MEANS 6 -static int ClipAlpha(int alpha) { - return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha; -} - //------------------------------------------------------------------------------ // Smooth the segment map by replacing isolated block by the majority of its // neighbours. @@ -115,7 +111,7 @@ static void SetSegmentProbas(VP8Encoder* const enc) { } static WEBP_INLINE int clip(int v, int m, int M) { - return v < m ? m : v > M ? M : v; + return (v < m) ? m : (v > M) ? M : v; } static void SetSegmentAlphas(VP8Encoder* const enc, @@ -141,23 +137,64 @@ static void SetSegmentAlphas(VP8Encoder* const enc, } } +//------------------------------------------------------------------------------ +// Compute susceptibility based on DCT-coeff histograms: +// the higher, the "easier" the macroblock is to compress. + +#define MAX_ALPHA 255 // 8b of precision for susceptibilities. +#define ALPHA_SCALE (2 * MAX_ALPHA) // scaling factor for alpha. +#define DEFAULT_ALPHA (-1) +#define IS_BETTER_ALPHA(alpha, best_alpha) ((alpha) > (best_alpha)) + +static int FinalAlphaValue(int alpha) { + alpha = MAX_ALPHA - alpha; + return clip(alpha, 0, MAX_ALPHA); +} + +static int GetAlpha(const VP8Histogram* const histo) { + int max_value = 0, last_non_zero = 1; + int k; + int alpha; + for (k = 0; k <= MAX_COEFF_THRESH; ++k) { + const int value = histo->distribution[k]; + if (value > 0) { + if (value > max_value) max_value = value; + last_non_zero = k; + } + } + // 'alpha' will later be clipped to [0..MAX_ALPHA] range, clamping outer + // values which happen to be mostly noise. This leaves the maximum precision + // for handling the useful small values which contribute most. + alpha = (max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0; + return alpha; +} + +static void MergeHistograms(const VP8Histogram* const in, + VP8Histogram* const out) { + int i; + for (i = 0; i <= MAX_COEFF_THRESH; ++i) { + out->distribution[i] += in->distribution[i]; + } +} + //------------------------------------------------------------------------------ // Simplified k-Means, to assign Nb segments based on alpha-histogram -static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) { +static void AssignSegments(VP8Encoder* const enc, + const int alphas[MAX_ALPHA + 1]) { const int nb = enc->segment_hdr_.num_segments_; int centers[NUM_MB_SEGMENTS]; int weighted_average = 0; - int map[256]; + int map[MAX_ALPHA + 1]; int a, n, k; - int min_a = 0, max_a = 255, range_a; + int min_a = 0, max_a = MAX_ALPHA, range_a; // 'int' type is ok for histo, and won't overflow int accum[NUM_MB_SEGMENTS], dist_accum[NUM_MB_SEGMENTS]; // bracket the input - for (n = 0; n < 256 && alphas[n] == 0; ++n) {} + for (n = 0; n <= MAX_ALPHA && alphas[n] == 0; ++n) {} min_a = n; - for (n = 255; n > min_a && alphas[n] == 0; --n) {} + for (n = MAX_ALPHA; n > min_a && alphas[n] == 0; --n) {} max_a = n; range_a = max_a - min_a; @@ -210,7 +247,7 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) { VP8MBInfo* const mb = &enc->mb_info_[n]; const int alpha = mb->alpha_; mb->segment_ = map[alpha]; - mb->alpha_ = centers[map[alpha]]; // just for the record. + mb->alpha_ = centers[map[alpha]]; // for the record. } if (nb > 1) { @@ -236,15 +273,19 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) { static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) { const int max_mode = (it->enc_->method_ >= 3) ? MAX_INTRA16_MODE : 4; int mode; - int best_alpha = -1; + int best_alpha = DEFAULT_ALPHA; int best_mode = 0; VP8MakeLuma16Preds(it); for (mode = 0; mode < max_mode; ++mode) { - const int alpha = VP8CollectHistogram(it->yuv_in_ + Y_OFF, - it->yuv_p_ + VP8I16ModeOffsets[mode], - 0, 16); - if (alpha > best_alpha) { + VP8Histogram histo = { { 0 } }; + int alpha; + + VP8CollectHistogram(it->yuv_in_ + Y_OFF, + it->yuv_p_ + VP8I16ModeOffsets[mode], + 0, 16, &histo); + alpha = GetAlpha(&histo); + if (IS_BETTER_ALPHA(alpha, best_alpha)) { best_alpha = alpha; best_mode = mode; } @@ -257,45 +298,58 @@ static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it, int best_alpha) { uint8_t modes[16]; const int max_mode = (it->enc_->method_ >= 3) ? MAX_INTRA4_MODE : NUM_BMODES; - int i4_alpha = 0; + int i4_alpha; + VP8Histogram total_histo = { { 0 } }; + int cur_histo = 0; + VP8IteratorStartI4(it); do { int mode; - int best_mode_alpha = -1; + int best_mode_alpha = DEFAULT_ALPHA; + VP8Histogram histos[2]; const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_]; VP8MakeIntra4Preds(it); for (mode = 0; mode < max_mode; ++mode) { - const int alpha = VP8CollectHistogram(src, - it->yuv_p_ + VP8I4ModeOffsets[mode], - 0, 1); - if (alpha > best_mode_alpha) { + int alpha; + + memset(&histos[cur_histo], 0, sizeof(histos[cur_histo])); + VP8CollectHistogram(src, it->yuv_p_ + VP8I4ModeOffsets[mode], + 0, 1, &histos[cur_histo]); + alpha = GetAlpha(&histos[cur_histo]); + if (IS_BETTER_ALPHA(alpha, best_mode_alpha)) { best_mode_alpha = alpha; modes[it->i4_] = mode; + cur_histo ^= 1; // keep track of best histo so far. } } - i4_alpha += best_mode_alpha; + // accumulate best histogram + MergeHistograms(&histos[cur_histo ^ 1], &total_histo); // Note: we reuse the original samples for predictors } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF)); - if (i4_alpha > best_alpha) { + i4_alpha = GetAlpha(&total_histo); + if (IS_BETTER_ALPHA(i4_alpha, best_alpha)) { VP8SetIntra4Mode(it, modes); - best_alpha = ClipAlpha(i4_alpha); + best_alpha = i4_alpha; } return best_alpha; } static int MBAnalyzeBestUVMode(VP8EncIterator* const it) { - int best_alpha = -1; + int best_alpha = DEFAULT_ALPHA; int best_mode = 0; const int max_mode = (it->enc_->method_ >= 3) ? MAX_UV_MODE : 4; int mode; VP8MakeChroma8Preds(it); for (mode = 0; mode < max_mode; ++mode) { - const int alpha = VP8CollectHistogram(it->yuv_in_ + U_OFF, - it->yuv_p_ + VP8UVModeOffsets[mode], - 16, 16 + 4 + 4); - if (alpha > best_alpha) { + VP8Histogram histo = { { 0 } }; + int alpha; + VP8CollectHistogram(it->yuv_in_ + U_OFF, + it->yuv_p_ + VP8UVModeOffsets[mode], + 16, 16 + 4 + 4, &histo); + alpha = GetAlpha(&histo); + if (IS_BETTER_ALPHA(alpha, best_alpha)) { best_alpha = alpha; best_mode = mode; } @@ -305,7 +359,7 @@ static int MBAnalyzeBestUVMode(VP8EncIterator* const it) { } static void MBAnalyze(VP8EncIterator* const it, - int alphas[256], int* const uv_alpha) { + int alphas[MAX_ALPHA + 1], int* const uv_alpha) { const VP8Encoder* const enc = it->enc_; int best_alpha, best_uv_alpha; @@ -324,10 +378,11 @@ static void MBAnalyze(VP8EncIterator* const it, best_uv_alpha = MBAnalyzeBestUVMode(it); // Final susceptibility mix - best_alpha = (best_alpha + best_uv_alpha + 1) / 2; + best_alpha = (3 * best_alpha + best_uv_alpha + 2) >> 2; + best_alpha = FinalAlphaValue(best_alpha); alphas[best_alpha]++; *uv_alpha += best_uv_alpha; - it->mb_->alpha_ = best_alpha; // Informative only. + it->mb_->alpha_ = best_alpha; // for later remapping. } //------------------------------------------------------------------------------ @@ -342,7 +397,7 @@ static void MBAnalyze(VP8EncIterator* const it, int VP8EncAnalyze(VP8Encoder* const enc) { int ok = 1; - int alphas[256] = { 0 }; + int alphas[MAX_ALPHA + 1] = { 0 }; VP8EncIterator it; VP8IteratorInit(enc, &it); diff --git a/src/enc/frame.c b/src/enc/frame.c index bdd36006..262d84ec 100644 --- a/src/enc/frame.c +++ b/src/enc/frame.c @@ -736,6 +736,7 @@ static void StoreSideInfo(const VP8EncIterator* const it) { const int b = (int)((it->luma_bits_ + it->uv_bits_ + 7) >> 3); *info = (b > 255) ? 255 : b; break; } + case 7: *info = mb->alpha_; break; default: *info = 0; break; }; } diff --git a/src/enc/vp8enci.h b/src/enc/vp8enci.h index a77778c0..8a8d90e5 100644 --- a/src/enc/vp8enci.h +++ b/src/enc/vp8enci.h @@ -29,9 +29,6 @@ extern "C" { #define ENC_MIN_VERSION 2 #define ENC_REV_VERSION 0 -// size of histogram used by CollectHistogram. -#define MAX_COEFF_THRESH 64 - // intra prediction modes enum { B_DC_PRED = 0, // 4x4 modes B_TM_PRED = 1, @@ -162,6 +159,14 @@ static WEBP_INLINE int QUANTDIV(int n, int iQ, int B) { } extern const uint8_t VP8Zigzag[16]; +// size of histogram used by CollectHistogram. +#define MAX_COEFF_THRESH 31 +typedef struct VP8Histogram VP8Histogram; +struct VP8Histogram { + // TODO(skal): we only need to store the max_value and last_non_zero actually. + int distribution[MAX_COEFF_THRESH + 1]; +}; + //------------------------------------------------------------------------------ // Headers