From 14d27a46be7ec589a1be5e3a20731e3492a49278 Mon Sep 17 00:00:00 2001 From: Pascal Massimino Date: Thu, 10 Dec 2015 08:04:04 +0100 Subject: [PATCH] improve method #2 by merging DistoRefine() and SimpleQuantize() it's now a single function, that reconstructs the intra4x4 block during the scan The I4_PENALTY had to be adjusted. Overall, result is better quality-wise (esp. at q < 50), and a tad faster too. method #0, #1 and #3+ are unchanged Change-Id: If262aeb552397860b3dd532df8df6b1357779222 --- src/enc/quant.c | 79 +++++++++++++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 28 deletions(-) diff --git a/src/enc/quant.c b/src/enc/quant.c index 002c326b..9e7f076f 100644 --- a/src/enc/quant.c +++ b/src/enc/quant.c @@ -30,7 +30,7 @@ #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP // power-law modulation. Must be strictly less than 1. -#define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision +#define I4_PENALTY 14000 // Rate-penalty for quick i4/i16 decision // number of non-zero coeffs below which we consider the block very flat // (and apply a penalty to complex predictions) @@ -1100,12 +1100,18 @@ static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) { } // Refine intra16/intra4 sub-modes based on distortion only (not rate). -static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) { - const int is_i16 = (it->mb_->type_ == 1); - score_t best_score = MAX_COST; +static void RefineUsingDistortion(VP8EncIterator* const it, int try_both_modes, + VP8ModeScore* const rd) { - if (try_both_i4_i16 || is_i16) { + score_t best_score = MAX_COST; + score_t score_i4 = (score_t)I4_PENALTY; + int16_t tmp_levels[16][16]; + uint8_t modes_i4[16]; + int nz = 0; int mode; + int is_i16 = try_both_modes || (it->mb_->type_ == 1); + + if (is_i16) { // First, evaluate Intra16 distortion int best_mode = -1; for (mode = 0; mode < NUM_PRED_MODES; ++mode) { const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; @@ -1117,39 +1123,56 @@ static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) { } } VP8SetIntra16Mode(it, best_mode); + // we'll reconstruct later, if i16 mode actually gets selected } - if (try_both_i4_i16 || !is_i16) { - uint8_t modes_i4[16]; + + // Next, evaluate Intra4 + if (try_both_modes || !is_i16) { // We don't evaluate the rate here, but just account for it through a // constant penalty (i4 mode usually needs more bits compared to i16). - score_t score_i4 = (score_t)I4_PENALTY; - + is_i16 = 0; VP8IteratorStartI4(it); do { - int mode; - int best_sub_mode = -1; - score_t best_sub_score = MAX_COST; + int best_i4_mode = -1; + score_t best_i4_score = MAX_COST; const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_]; - // TODO(skal): we don't really need the prediction pixels here, - // but just the distortion against 'src'. VP8MakeIntra4Preds(it); for (mode = 0; mode < NUM_BMODES; ++mode) { const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode]; const score_t score = VP8SSE4x4(src, ref); - if (score < best_sub_score) { - best_sub_mode = mode; - best_sub_score = score; + if (score < best_i4_score) { + best_i4_mode = mode; + best_i4_score = score; } } - modes_i4[it->i4_] = best_sub_mode; - score_i4 += best_sub_score; - if (score_i4 >= best_score) break; - } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF_ENC)); - if (score_i4 < best_score) { - VP8SetIntra4Mode(it, modes_i4); + modes_i4[it->i4_] = best_i4_mode; + score_i4 += best_i4_score; + if (score_i4 >= best_score) { + // Intra4 won't be better than Intra16. Bail out and pick Intra16. + is_i16 = 1; + break; + } else { // reconstruct partial block inside yuv_out2_ buffer + uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC + VP8Scan[it->i4_]; + nz |= ReconstructIntra4(it, tmp_levels[it->i4_], + src, tmp_dst, best_i4_mode) << it->i4_; + } + } while (VP8IteratorRotateI4(it, it->yuv_out2_ + Y_OFF_ENC)); } + + // Final reconstruction, depending on which mode is selected. + if (!is_i16) { + VP8SetIntra4Mode(it, modes_i4); + memcpy(rd->y_ac_levels, tmp_levels, sizeof(tmp_levels)); + SwapOut(it); + best_score = score_i4; + } else { + nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]); } + // ... and UV! + nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_); + rd->nz = nz; + rd->score = best_score; } //------------------------------------------------------------------------------ @@ -1179,13 +1202,13 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, SimpleQuantize(it, rd); } } else { - // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower). - // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode). - DistoRefine(it, (method >= 2)); - SimpleQuantize(it, rd); + // At this point we have heuristically decided intra16 / intra4. + // For method >= 2, pick the best intra4/intra16 based on SSE (~tad slower). + // For method <= 1, we don't re-examine the decision but just go ahead with + // quantization/reconstruction. + RefineUsingDistortion(it, (method >= 2), rd); } is_skipped = (rd->nz == 0); VP8SetSkip(it, is_skipped); return is_skipped; } -