use DC error diffusion for U/V at low-quality

This fixes some color smearing due to heavy quantization.
This is only enabled for q <= 30 (cf ERROR_DIFFUSION_QUALITY)

Change-Id: I07e83a4d38461357a32c9e214f7eadc6db73baa9
This commit is contained in:
Pascal Massimino 2017-12-11 05:07:13 -08:00
parent 1c59020b93
commit 96bf07c560
5 changed files with 106 additions and 3 deletions

View File

@ -871,4 +871,3 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
#endif // DISABLE_TOKEN_BUFFER #endif // DISABLE_TOKEN_BUFFER
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------

View File

@ -26,6 +26,9 @@ static void InitLeft(VP8EncIterator* const it) {
memset(it->u_left_, 129, 8); memset(it->u_left_, 129, 8);
memset(it->v_left_, 129, 8); memset(it->v_left_, 129, 8);
it->left_nz_[8] = 0; it->left_nz_[8] = 0;
if (it->top_derr_ != NULL) {
memset(&it->left_derr_, 0, sizeof(it->left_derr_));
}
} }
static void InitTop(VP8EncIterator* const it) { static void InitTop(VP8EncIterator* const it) {
@ -33,6 +36,9 @@ static void InitTop(VP8EncIterator* const it) {
const size_t top_size = enc->mb_w_ * 16; const size_t top_size = enc->mb_w_ * 16;
memset(enc->y_top_, 127, 2 * top_size); memset(enc->y_top_, 127, 2 * top_size);
memset(enc->nz_, 0, enc->mb_w_ * sizeof(*enc->nz_)); memset(enc->nz_, 0, enc->mb_w_ * sizeof(*enc->nz_));
if (enc->top_derr_ != NULL) {
memset(enc->top_derr_, 0, enc->mb_w_ * sizeof(*enc->top_derr_));
}
} }
void VP8IteratorSetRow(VP8EncIterator* const it, int y) { void VP8IteratorSetRow(VP8EncIterator* const it, int y) {
@ -76,6 +82,7 @@ void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) {
it->y_left_ = (uint8_t*)WEBP_ALIGN(it->yuv_left_mem_ + 1); it->y_left_ = (uint8_t*)WEBP_ALIGN(it->yuv_left_mem_ + 1);
it->u_left_ = it->y_left_ + 16 + 16; it->u_left_ = it->y_left_ + 16 + 16;
it->v_left_ = it->u_left_ + 16; it->v_left_ = it->u_left_ + 16;
it->top_derr_ = enc->top_derr_;
VP8IteratorReset(it); VP8IteratorReset(it);
} }
@ -450,4 +457,3 @@ int VP8IteratorRotateI4(VP8EncIterator* const it,
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------

View File

@ -826,6 +826,80 @@ static int ReconstructIntra4(VP8EncIterator* const it,
return nz; return nz;
} }
//------------------------------------------------------------------------------
// DC-error diffusion
// Diffusion weights. We under-correct a bit (3/4th of the error is actually
// diffused) to avoid 'rainbow' chessboard pattern of blocks at q~=0.
#define C1 2 // fraction of error sent to the 4x4 block below
#define C2 1 // fraction of error sent to the 4x4 block on the right
#define DSHIFT 2
// Quantize as usual, but also compute and return the quantization error.
// Error is already divided by DSHIFT.
static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) {
int V = *v;
const int sign = (V < 0);
if (sign) V = -V;
if (V > (int)mtx->zthresh_[0]) {
const int qV = QUANTDIV(V, mtx->iq_[0], mtx->bias_[0]) * mtx->q_[0];
const int err = (V - qV);
*v = sign ? -qV : qV;
return (sign ? -err : err) >> DSHIFT;
}
*v = 0;
return (sign ? -V : V) >> DSHIFT;
}
static void CorrectDCValues(const VP8EncIterator* const it,
const VP8Matrix* const mtx,
int16_t tmp[][16], VP8ModeScore* const rd) {
// | top[0] | top[1]
// --------+--------+---------
// left[0] | tmp[0] tmp[1] <-> err0 err1
// left[1] | tmp[2] tmp[3] err2 err3
//
// Final errors {err1,err2,err3} are preserved and later restored
// as top[]/left[] on the next block.
int ch;
for (ch = 0; ch <= 1; ++ch) {
const int16_t* const top = it->top_derr_[it->x_][ch];
const int16_t* const left = it->left_derr_[ch];
int16_t (* const c)[16] = &tmp[ch * 4];
int err0, err1, err2, err3;
c[0][0] += C1 * top[0] + C2 * left[0];
err0 = QuantizeSingle(&c[0][0], mtx);
c[1][0] += C1 * top[1] + C2 * err0;
err1 = QuantizeSingle(&c[1][0], mtx);
c[2][0] += C1 * err0 + C2 * left[1];
err2 = QuantizeSingle(&c[2][0], mtx);
c[3][0] += C1 * err1 + C2 * err2;
err3 = QuantizeSingle(&c[3][0], mtx);
rd->derr[ch][0] = err1;
rd->derr[ch][1] = err2;
rd->derr[ch][2] = err3;
}
}
static void StoreDiffusionErrors(VP8EncIterator* const it,
const VP8ModeScore* const rd) {
int ch;
for (ch = 0; ch <= 1; ++ch) {
int16_t* const top = it->top_derr_[it->x_][ch];
int16_t* const left = it->left_derr_[ch];
left[0] = rd->derr[ch][0]; // restore err1
left[1] = rd->derr[ch][2]; // ... err3
top[0] = rd->derr[ch][1]; // ... err2
top[1] = rd->derr[ch][2]; // ... err3.
}
}
#undef C1
#undef C2
#undef DSHIFT
//------------------------------------------------------------------------------
static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
uint8_t* const yuv_out, int mode) { uint8_t* const yuv_out, int mode) {
const VP8Encoder* const enc = it->enc_; const VP8Encoder* const enc = it->enc_;
@ -839,6 +913,8 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
for (n = 0; n < 8; n += 2) { for (n = 0; n < 8; n += 2) {
VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]); VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);
} }
if (it->top_derr_ != NULL) CorrectDCValues(it, &dqm->uv_, tmp, rd);
if (DO_TRELLIS_UV && it->do_trellis_) { if (DO_TRELLIS_UV && it->do_trellis_) {
int ch, x, y; int ch, x, y;
for (ch = 0, n = 0; ch <= 2; ch += 2) { for (ch = 0, n = 0; ch <= 2; ch += 2) {
@ -1101,6 +1177,9 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
CopyScore(&rd_best, &rd_uv); CopyScore(&rd_best, &rd_uv);
rd->mode_uv = mode; rd->mode_uv = mode;
memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels)); memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
if (it->top_derr_ != NULL) {
memcpy(rd->derr, rd_uv.derr, sizeof(rd_uv.derr));
}
SwapPtr(&dst, &tmp_dst); SwapPtr(&dst, &tmp_dst);
} }
} }
@ -1109,6 +1188,9 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
if (dst != dst0) { // copy 16x8 block if needed if (dst != dst0) { // copy 16x8 block if needed
VP8Copy16x8(dst, dst0); VP8Copy16x8(dst, dst0);
} }
if (it->top_derr_ != NULL) { // store diffusion errors for next block
StoreDiffusionErrors(it, rd);
}
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------

View File

@ -120,6 +120,9 @@ static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) {
// Uncomment the following to remove token-buffer code: // Uncomment the following to remove token-buffer code:
// #define DISABLE_TOKEN_BUFFER // #define DISABLE_TOKEN_BUFFER
// quality below which error-diffusion is enabled
#define ERROR_DIFFUSION_QUALITY 30
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Headers // Headers
@ -201,6 +204,8 @@ typedef struct {
score_t i4_penalty_; // penalty for using Intra4 score_t i4_penalty_; // penalty for using Intra4
} VP8SegmentInfo; } VP8SegmentInfo;
typedef int16_t DError[2 /* u/v */][2 /* top or left */];
// Handy transient struct to accumulate score and info during RD-optimization // Handy transient struct to accumulate score and info during RD-optimization
// and mode evaluation. // and mode evaluation.
typedef struct { typedef struct {
@ -213,6 +218,7 @@ typedef struct {
uint8_t modes_i4[16]; // mode numbers for intra4 predictions uint8_t modes_i4[16]; // mode numbers for intra4 predictions
int mode_uv; // mode number of chroma prediction int mode_uv; // mode number of chroma prediction
uint32_t nz; // non-zero blocks uint32_t nz; // non-zero blocks
int16_t derr[2][3]; // DC diffusion errors for U/V for blocks #1/2/3
} VP8ModeScore; } VP8ModeScore;
// Iterator structure to iterate through macroblocks, pointing to the // Iterator structure to iterate through macroblocks, pointing to the
@ -242,6 +248,9 @@ typedef struct {
int count_down0_; // starting counter value (for progress) int count_down0_; // starting counter value (for progress)
int percent0_; // saved initial progress percent int percent0_; // saved initial progress percent
DError left_derr_; // left error diffusion (u/v)
DError *top_derr_; // top diffusion error - NULL if disabled
uint8_t* y_left_; // left luma samples (addressable from index -1 to 15). uint8_t* y_left_; // left luma samples (addressable from index -1 to 15).
uint8_t* u_left_; // left u samples (addressable from index -1 to 7) uint8_t* u_left_; // left u samples (addressable from index -1 to 7)
uint8_t* v_left_; // left v samples (addressable from index -1 to 7) uint8_t* v_left_; // left v samples (addressable from index -1 to 7)
@ -401,6 +410,7 @@ struct VP8Encoder {
uint8_t* uv_top_; // top u/v samples. uint8_t* uv_top_; // top u/v samples.
// U and V are packed into 16 bytes (8 U + 8 V) // U and V are packed into 16 bytes (8 U + 8 V)
LFStats* lf_stats_; // autofilter stats (if NULL, autofilter is off) LFStats* lf_stats_; // autofilter stats (if NULL, autofilter is off)
DError* top_derr_; // diffusion error (NULL if disabled)
}; };
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------

View File

@ -159,12 +159,15 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
+ WEBP_ALIGN_CST; // align all + WEBP_ALIGN_CST; // align all
const size_t lf_stats_size = const size_t lf_stats_size =
config->autofilter ? sizeof(*enc->lf_stats_) + WEBP_ALIGN_CST : 0; config->autofilter ? sizeof(*enc->lf_stats_) + WEBP_ALIGN_CST : 0;
const size_t top_derr_size = (config->quality <= ERROR_DIFFUSION_QUALITY) ?
mb_w * sizeof(*enc->top_derr_) : 0;
uint8_t* mem; uint8_t* mem;
const uint64_t size = (uint64_t)sizeof(*enc) // main struct const uint64_t size = (uint64_t)sizeof(*enc) // main struct
+ WEBP_ALIGN_CST // cache alignment + WEBP_ALIGN_CST // cache alignment
+ info_size // modes info + info_size // modes info
+ preds_size // prediction modes + preds_size // prediction modes
+ samples_size // top/left samples + samples_size // top/left samples
+ top_derr_size // top diffusion error
+ nz_size // coeff context bits + nz_size // coeff context bits
+ lf_stats_size; // autofilter stats + lf_stats_size; // autofilter stats
@ -175,11 +178,12 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
" info: %ld\n" " info: %ld\n"
" preds: %ld\n" " preds: %ld\n"
" top samples: %ld\n" " top samples: %ld\n"
" top diffusion: %ld\n"
" non-zero: %ld\n" " non-zero: %ld\n"
" lf-stats: %ld\n" " lf-stats: %ld\n"
" total: %ld\n", " total: %ld\n",
sizeof(*enc) + WEBP_ALIGN_CST, info_size, sizeof(*enc) + WEBP_ALIGN_CST, info_size,
preds_size, samples_size, nz_size, lf_stats_size, size); preds_size, samples_size, top_derr_size, nz_size, lf_stats_size, size);
printf("Transient object sizes:\n" printf("Transient object sizes:\n"
" VP8EncIterator: %ld\n" " VP8EncIterator: %ld\n"
" VP8ModeScore: %ld\n" " VP8ModeScore: %ld\n"
@ -219,6 +223,8 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
enc->y_top_ = mem; enc->y_top_ = mem;
enc->uv_top_ = enc->y_top_ + top_stride; enc->uv_top_ = enc->y_top_ + top_stride;
mem += 2 * top_stride; mem += 2 * top_stride;
enc->top_derr_ = top_derr_size ? (DError*)mem : NULL;
mem += top_derr_size;
assert(mem <= (uint8_t*)enc + size); assert(mem <= (uint8_t*)enc + size);
enc->config_ = config; enc->config_ = config;