diff --git a/src/enc/frame_enc.c b/src/enc/frame_enc.c index 2b0dc664..15a81ae9 100644 --- a/src/enc/frame_enc.c +++ b/src/enc/frame_enc.c @@ -871,4 +871,3 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { #endif // DISABLE_TOKEN_BUFFER //------------------------------------------------------------------------------ - diff --git a/src/enc/iterator_enc.c b/src/enc/iterator_enc.c index cfacfd24..7c47d512 100644 --- a/src/enc/iterator_enc.c +++ b/src/enc/iterator_enc.c @@ -26,6 +26,9 @@ static void InitLeft(VP8EncIterator* const it) { memset(it->u_left_, 129, 8); memset(it->v_left_, 129, 8); it->left_nz_[8] = 0; + if (it->top_derr_ != NULL) { + memset(&it->left_derr_, 0, sizeof(it->left_derr_)); + } } static void InitTop(VP8EncIterator* const it) { @@ -33,6 +36,9 @@ static void InitTop(VP8EncIterator* const it) { const size_t top_size = enc->mb_w_ * 16; memset(enc->y_top_, 127, 2 * top_size); memset(enc->nz_, 0, enc->mb_w_ * sizeof(*enc->nz_)); + if (enc->top_derr_ != NULL) { + memset(enc->top_derr_, 0, enc->mb_w_ * sizeof(*enc->top_derr_)); + } } void VP8IteratorSetRow(VP8EncIterator* const it, int y) { @@ -76,6 +82,7 @@ void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) { it->y_left_ = (uint8_t*)WEBP_ALIGN(it->yuv_left_mem_ + 1); it->u_left_ = it->y_left_ + 16 + 16; it->v_left_ = it->u_left_ + 16; + it->top_derr_ = enc->top_derr_; VP8IteratorReset(it); } @@ -450,4 +457,3 @@ int VP8IteratorRotateI4(VP8EncIterator* const it, } //------------------------------------------------------------------------------ - diff --git a/src/enc/quant_enc.c b/src/enc/quant_enc.c index 3b1a3129..67288a74 100644 --- a/src/enc/quant_enc.c +++ b/src/enc/quant_enc.c @@ -826,6 +826,80 @@ static int ReconstructIntra4(VP8EncIterator* const it, return nz; } +//------------------------------------------------------------------------------ +// DC-error diffusion + +// Diffusion weights. We under-correct a bit (3/4th of the error is actually +// diffused) to avoid 'rainbow' chessboard pattern of blocks at q~=0. +#define C1 2 // fraction of error sent to the 4x4 block below +#define C2 1 // fraction of error sent to the 4x4 block on the right +#define DSHIFT 2 + +// Quantize as usual, but also compute and return the quantization error. +// Error is already divided by DSHIFT. +static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) { + int V = *v; + const int sign = (V < 0); + if (sign) V = -V; + if (V > (int)mtx->zthresh_[0]) { + const int qV = QUANTDIV(V, mtx->iq_[0], mtx->bias_[0]) * mtx->q_[0]; + const int err = (V - qV); + *v = sign ? -qV : qV; + return (sign ? -err : err) >> DSHIFT; + } + *v = 0; + return (sign ? -V : V) >> DSHIFT; +} + +static void CorrectDCValues(const VP8EncIterator* const it, + const VP8Matrix* const mtx, + int16_t tmp[][16], VP8ModeScore* const rd) { + // | top[0] | top[1] + // --------+--------+--------- + // left[0] | tmp[0] tmp[1] <-> err0 err1 + // left[1] | tmp[2] tmp[3] err2 err3 + // + // Final errors {err1,err2,err3} are preserved and later restored + // as top[]/left[] on the next block. + int ch; + for (ch = 0; ch <= 1; ++ch) { + const int16_t* const top = it->top_derr_[it->x_][ch]; + const int16_t* const left = it->left_derr_[ch]; + int16_t (* const c)[16] = &tmp[ch * 4]; + int err0, err1, err2, err3; + c[0][0] += C1 * top[0] + C2 * left[0]; + err0 = QuantizeSingle(&c[0][0], mtx); + c[1][0] += C1 * top[1] + C2 * err0; + err1 = QuantizeSingle(&c[1][0], mtx); + c[2][0] += C1 * err0 + C2 * left[1]; + err2 = QuantizeSingle(&c[2][0], mtx); + c[3][0] += C1 * err1 + C2 * err2; + err3 = QuantizeSingle(&c[3][0], mtx); + rd->derr[ch][0] = err1; + rd->derr[ch][1] = err2; + rd->derr[ch][2] = err3; + } +} + +static void StoreDiffusionErrors(VP8EncIterator* const it, + const VP8ModeScore* const rd) { + int ch; + for (ch = 0; ch <= 1; ++ch) { + int16_t* const top = it->top_derr_[it->x_][ch]; + int16_t* const left = it->left_derr_[ch]; + left[0] = rd->derr[ch][0]; // restore err1 + left[1] = rd->derr[ch][2]; // ... err3 + top[0] = rd->derr[ch][1]; // ... err2 + top[1] = rd->derr[ch][2]; // ... err3. + } +} + +#undef C1 +#undef C2 +#undef DSHIFT + +//------------------------------------------------------------------------------ + static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, uint8_t* const yuv_out, int mode) { const VP8Encoder* const enc = it->enc_; @@ -839,6 +913,8 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, for (n = 0; n < 8; n += 2) { VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]); } + if (it->top_derr_ != NULL) CorrectDCValues(it, &dqm->uv_, tmp, rd); + if (DO_TRELLIS_UV && it->do_trellis_) { int ch, x, y; for (ch = 0, n = 0; ch <= 2; ch += 2) { @@ -1101,6 +1177,9 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { CopyScore(&rd_best, &rd_uv); rd->mode_uv = mode; memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels)); + if (it->top_derr_ != NULL) { + memcpy(rd->derr, rd_uv.derr, sizeof(rd_uv.derr)); + } SwapPtr(&dst, &tmp_dst); } } @@ -1109,6 +1188,9 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { if (dst != dst0) { // copy 16x8 block if needed VP8Copy16x8(dst, dst0); } + if (it->top_derr_ != NULL) { // store diffusion errors for next block + StoreDiffusionErrors(it, rd); + } } //------------------------------------------------------------------------------ diff --git a/src/enc/vp8i_enc.h b/src/enc/vp8i_enc.h index 3463491e..d2fce941 100644 --- a/src/enc/vp8i_enc.h +++ b/src/enc/vp8i_enc.h @@ -120,6 +120,9 @@ static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) { // Uncomment the following to remove token-buffer code: // #define DISABLE_TOKEN_BUFFER +// quality below which error-diffusion is enabled +#define ERROR_DIFFUSION_QUALITY 30 + //------------------------------------------------------------------------------ // Headers @@ -201,6 +204,8 @@ typedef struct { score_t i4_penalty_; // penalty for using Intra4 } VP8SegmentInfo; +typedef int16_t DError[2 /* u/v */][2 /* top or left */]; + // Handy transient struct to accumulate score and info during RD-optimization // and mode evaluation. typedef struct { @@ -213,6 +218,7 @@ typedef struct { uint8_t modes_i4[16]; // mode numbers for intra4 predictions int mode_uv; // mode number of chroma prediction uint32_t nz; // non-zero blocks + int16_t derr[2][3]; // DC diffusion errors for U/V for blocks #1/2/3 } VP8ModeScore; // Iterator structure to iterate through macroblocks, pointing to the @@ -242,6 +248,9 @@ typedef struct { int count_down0_; // starting counter value (for progress) int percent0_; // saved initial progress percent + DError left_derr_; // left error diffusion (u/v) + DError *top_derr_; // top diffusion error - NULL if disabled + uint8_t* y_left_; // left luma samples (addressable from index -1 to 15). uint8_t* u_left_; // left u samples (addressable from index -1 to 7) uint8_t* v_left_; // left v samples (addressable from index -1 to 7) @@ -401,6 +410,7 @@ struct VP8Encoder { uint8_t* uv_top_; // top u/v samples. // U and V are packed into 16 bytes (8 U + 8 V) LFStats* lf_stats_; // autofilter stats (if NULL, autofilter is off) + DError* top_derr_; // diffusion error (NULL if disabled) }; //------------------------------------------------------------------------------ diff --git a/src/enc/webp_enc.c b/src/enc/webp_enc.c index 283cda8e..3fc895f9 100644 --- a/src/enc/webp_enc.c +++ b/src/enc/webp_enc.c @@ -159,12 +159,15 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, + WEBP_ALIGN_CST; // align all const size_t lf_stats_size = config->autofilter ? sizeof(*enc->lf_stats_) + WEBP_ALIGN_CST : 0; + const size_t top_derr_size = (config->quality <= ERROR_DIFFUSION_QUALITY) ? + mb_w * sizeof(*enc->top_derr_) : 0; uint8_t* mem; const uint64_t size = (uint64_t)sizeof(*enc) // main struct + WEBP_ALIGN_CST // cache alignment + info_size // modes info + preds_size // prediction modes + samples_size // top/left samples + + top_derr_size // top diffusion error + nz_size // coeff context bits + lf_stats_size; // autofilter stats @@ -175,11 +178,12 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, " info: %ld\n" " preds: %ld\n" " top samples: %ld\n" + " top diffusion: %ld\n" " non-zero: %ld\n" " lf-stats: %ld\n" " total: %ld\n", sizeof(*enc) + WEBP_ALIGN_CST, info_size, - preds_size, samples_size, nz_size, lf_stats_size, size); + preds_size, samples_size, top_derr_size, nz_size, lf_stats_size, size); printf("Transient object sizes:\n" " VP8EncIterator: %ld\n" " VP8ModeScore: %ld\n" @@ -219,6 +223,8 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config, enc->y_top_ = mem; enc->uv_top_ = enc->y_top_ + top_stride; mem += 2 * top_stride; + enc->top_derr_ = top_derr_size ? (DError*)mem : NULL; + mem += top_derr_size; assert(mem <= (uint8_t*)enc + size); enc->config_ = config;