add a -dither dithering option to the decoder

Even at high quality setting, the U/V quantizer step is limited
to 4 which can lead to banding on gradient.
This option allows to selectively apply some randomness to
potentially flattened-out U/V blocks and attenuate the banding.

This option is off by default in 'dwebp', but set to -dither 50
by default in 'vwebp'.

Note: depending on the number of blocks selectively dithered,
we can have up to a 10% slow-down in decoding speed it seems.

Change-Id: Icc2446007f33ddacb60b3a80a9e63f2d5ad162de
This commit is contained in:
skal 2013-11-26 22:59:02 +01:00 committed by Pascal Massimino
parent e812401299
commit cbdd3e6e53
11 changed files with 138 additions and 11 deletions

View File

@ -555,6 +555,8 @@ static void Help(void) {
" -version .... print version number and exit.\n"
" -nofancy ..... don't use the fancy YUV420 upscaler.\n"
" -nofilter .... disable in-loop filtering.\n"
" -nodither .... disable dithering.\n"
" -dither <d> .. dithering strength (in 0..100)\n"
" -mt .......... use multi-threading\n"
" -crop <x> <y> <w> <h> ... crop output with the given rectangle\n"
" -scale <w> <h> .......... scale the output (*after* any cropping)\n"
@ -625,6 +627,10 @@ int main(int argc, const char *argv[]) {
format = YUV;
} else if (!strcmp(argv[c], "-mt")) {
config.options.use_threads = 1;
} else if (!strcmp(argv[c], "-nodither")) {
config.options.dithering_strength = 0;
} else if (!strcmp(argv[c], "-dither") && c < argc - 1) {
config.options.dithering_strength = strtol(argv[++c], NULL, 0);
} else if (!strcmp(argv[c], "-crop") && c < argc - 4) {
config.options.use_cropping = 1;
config.options.crop_left = strtol(argv[++c], NULL, 0);
@ -719,7 +725,7 @@ int main(int argc, const char *argv[]) {
if (!incremental) {
status = WebPDecode(data, data_size, &config);
} else {
WebPIDecoder* const idec = WebPINewDecoder(output_buffer);
WebPIDecoder* const idec = WebPIDecode(data, data_size, &config);
if (idec == NULL) {
fprintf(stderr, "Failed during WebPINewDecoder().\n");
status = VP8_STATUS_OUT_OF_MEMORY;

View File

@ -376,6 +376,7 @@ static void Help(void) {
" -noicc ....... don't use the icc profile if present.\n"
" -nofancy ..... don't use the fancy YUV420 upscaler.\n"
" -nofilter .... disable in-loop filtering.\n"
" -dither <int> dithering strength (0..100). Default=50.\n"
" -mt .......... use multi-threading.\n"
" -info ........ print info.\n"
" -h ....... this help message.\n"
@ -397,6 +398,7 @@ int main(int argc, char *argv[]) {
fprintf(stderr, "Library version mismatch!\n");
return -1;
}
config->options.dithering_strength = 50;
kParams.use_color_profile = 1;
for (c = 1; c < argc; ++c) {
@ -409,6 +411,8 @@ int main(int argc, char *argv[]) {
config->options.no_fancy_upsampling = 1;
} else if (!strcmp(argv[c], "-nofilter")) {
config->options.bypass_filtering = 1;
} else if (!strcmp(argv[c], "-dither") && c + 1 < argc) {
config->options.dithering_strength = strtol(argv[++c], NULL, 0);
} else if (!strcmp(argv[c], "-info")) {
kParams.print_info = 1;
} else if (!strcmp(argv[c], "-version")) {

View File

@ -1,5 +1,5 @@
.\" Hey, EMACS: -*- nroff -*-
.TH DWEBP 1 "May 10, 2013"
.TH DWEBP 1 "November 26, 2013"
.SH NAME
dwebp \- decompress a WebP file to an image file
.SH SYNOPSIS
@ -55,7 +55,15 @@ edges (especially the red ones), but should be faster.
.B \-nofilter
Don't use the in-loop filtering process even if it is required by
the bitstream. This may produce visible blocks on the non-compliant output,
but will make the decoding faster.
but it will make the decoding faster.
.TP
.B \-dither " strength
Specify a dithering \fBstrength\fP between 0 and 100. Dithering is a
post-processing effect applied to chroma components in lossy compression.
It helps by smoothing gradients and avoiding banding artifacts.
.TP
.B \-nodither
Disable all dithering (default).
.TP
.B \-mt
Use multi-threading for decoding, if possible.

View File

@ -148,6 +148,82 @@ static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
}
}
//------------------------------------------------------------------------------
// Dithering
#define DITHER_AMP_TAB_SIZE 12
static const int kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
// roughly, it's dqm->uv_mat_[1]
8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1
};
void VP8InitDithering(const WebPDecoderOptions* const options,
VP8Decoder* const dec) {
assert(dec != NULL);
if (options != NULL) {
const int d = options->dithering_strength;
const int max_amp = (1 << VP8_RANDOM_DITHER_FIX) - 1;
const int f = (d < 0) ? 0 : (d > 100) ? max_amp : (d * max_amp / 100);
if (f > 0) {
int s;
int all_amp = 0;
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
VP8QuantMatrix* const dqm = &dec->dqm_[s];
if (dqm->uv_quant_ < DITHER_AMP_TAB_SIZE) {
// TODO(skal): should we specially dither more for uv_quant_ < 0?
const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_;
dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3;
}
all_amp |= dqm->dither_;
}
if (all_amp != 0) {
VP8InitRandom(&dec->dithering_rg_, 1.0f);
dec->dither_ = 1;
}
}
}
}
// minimal amp that will provide a non-zero dithering effect
#define MIN_DITHER_AMP 4
#define DITHER_DESCALE 4
#define DITHER_DESCALE_ROUNDER (1 << (DITHER_DESCALE - 1))
#define DITHER_AMP_BITS 8
#define DITHER_AMP_CENTER (1 << DITHER_AMP_BITS)
static void Dither8x8(VP8Random* const rg, uint8_t* dst, int bps, int amp) {
int i, j;
for (j = 0; j < 8; ++j) {
for (i = 0; i < 8; ++i) {
// TODO: could be made faster with SSE2
const int bits =
VP8RandomBits2(rg, DITHER_AMP_BITS + 1, amp) - DITHER_AMP_CENTER;
// Convert to range: [-2,2] for dither=50, [-4,4] for dither=100
const int delta = (bits + DITHER_DESCALE_ROUNDER) >> DITHER_DESCALE;
const int v = (int)dst[i] + delta;
dst[i] = (v < 0) ? 0 : (v > 255) ? 255u : (uint8_t)v;
}
dst += bps;
}
}
static void DitherRow(VP8Decoder* const dec) {
int mb_x;
assert(dec->dither_);
for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {
const VP8ThreadContext* const ctx = &dec->thread_ctx_;
const VP8MBData* const data = ctx->mb_data_ + mb_x;
const int cache_id = ctx->id_;
const int uv_bps = dec->cache_uv_stride_;
if (data->dither_ >= MIN_DITHER_AMP) {
uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8;
uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8;
Dither8x8(&dec->dithering_rg_, u_dst, uv_bps, data->dither_);
Dither8x8(&dec->dithering_rg_, v_dst, uv_bps, data->dither_);
}
}
}
//------------------------------------------------------------------------------
// This function is called after a row of macroblocks is finished decoding.
// It also takes into account the following restrictions:
@ -186,6 +262,10 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
FilterRow(dec);
}
if (dec->dither_) {
DitherRow(dec);
}
if (io->put != NULL) {
int y_start = MACROBLOCK_VPOS(mb_y);
int y_end = MACROBLOCK_VPOS(mb_y + 1);

View File

@ -423,6 +423,7 @@ static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
// This change must be done before calling VP8InitFrame()
dec->mt_method_ = VP8GetThreadMethod(params->options, NULL,
io->width, io->height);
VP8InitDithering(params->options, dec);
if (!CopyParts0Data(idec)) {
return IDecError(idec, VP8_STATUS_OUT_OF_MEMORY);
}

View File

@ -104,6 +104,8 @@ void VP8ParseQuant(VP8Decoder* const dec) {
m->uv_mat_[0] = kDcTable[clip(q + dquv_dc, 117)];
m->uv_mat_[1] = kAcTable[clip(q + dquv_ac, 127)];
m->uv_quant_ = q + dquv_ac; // for dithering strength evaluation
}
}
}

View File

@ -561,6 +561,12 @@ static int ParseResiduals(VP8Decoder* const dec,
block->non_zero_y_ = non_zero_y;
block->non_zero_uv_ = non_zero_uv;
// We look at the mode-code of each block and check if some blocks have less
// than three non-zero coeffs (code < 2). This is to avoid dithering flat and
// empty blocks.
block->dither_ = (non_zero_uv & 0xaaaa) ? 0 : q->dither_;
return !(non_zero_y | non_zero_uv); // will be used for further optimization
}

View File

@ -17,6 +17,7 @@
#include <string.h> // for memcpy()
#include "./vp8li.h"
#include "../utils/bit_reader.h"
#include "../utils/random.h"
#include "../utils/thread.h"
#include "../dsp/dsp.h"
@ -173,6 +174,9 @@ typedef struct { // Top/Left Contexts used for syntax-parsing
typedef int quant_t[2]; // [DC / AC]. Can be 'uint16_t[2]' too (~slower).
typedef struct {
quant_t y1_mat_, y2_mat_, uv_mat_;
int uv_quant_; // U/V quantizer value
int dither_; // dithering amplitude (0 = off, max=255)
} VP8QuantMatrix;
// Data needed to reconstruct a macroblock
@ -190,6 +194,7 @@ typedef struct {
// This allows to call specialized transform functions.
uint32_t non_zero_y_;
uint32_t non_zero_uv_;
uint8_t dither_; // local dithering strength (deduced from non_zero_*)
} VP8MBData;
// Persistent information needed by the parallel processing
@ -244,6 +249,10 @@ struct VP8Decoder {
// per-partition boolean decoders.
VP8BitReader parts_[MAX_NUM_PARTITIONS];
// Dithering strength, deduced from decoding options
int dither_; // whether to use dithering or not
VP8Random dithering_rg_; // random generator for dithering
// dequantization (one set of DC/AC dequant factor per segment)
VP8QuantMatrix dqm_[NUM_MB_SEGMENTS];
@ -324,7 +333,10 @@ int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
int VP8GetThreadMethod(const WebPDecoderOptions* const options,
const WebPHeaderStructure* const headers,
int width, int height);
// Process the last decoded row (filtering + output)
// Initialize dithering post-process if needed.
void VP8InitDithering(const WebPDecoderOptions* const options,
VP8Decoder* const dec);
// Process the last decoded row (filtering + output).
int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io);
// To be called at the start of a new scanline, to initialize predictors.
void VP8InitScanline(VP8Decoder* const dec);

View File

@ -474,6 +474,7 @@ static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size,
// This change must be done before calling VP8Decode()
dec->mt_method_ = VP8GetThreadMethod(params->options, &headers,
io.width, io.height);
VP8InitDithering(params->options, dec);
if (!VP8Decode(dec, &io)) {
status = dec->status_;
}

View File

@ -34,8 +34,10 @@ typedef struct {
void VP8InitRandom(VP8Random* const rg, float dithering);
// Returns a centered pseudo-random number with 'num_bits' amplitude.
// (uses D.Knuth's Difference-based random generator)
static WEBP_INLINE int VP8RandomBits(VP8Random* const rg, int num_bits) {
// (uses D.Knuth's Difference-based random generator).
// 'amp' is in VP8_RANDOM_DITHER_FIX fixed-point precision.
static WEBP_INLINE int VP8RandomBits2(VP8Random* const rg, int num_bits,
int amp) {
int diff;
assert(num_bits + VP8_RANDOM_DITHER_FIX <= 31);
diff = rg->tab_[rg->index1_] - rg->tab_[rg->index2_];
@ -44,11 +46,15 @@ static WEBP_INLINE int VP8RandomBits(VP8Random* const rg, int num_bits) {
if (++rg->index1_ == VP8_RANDOM_TABLE_SIZE) rg->index1_ = 0;
if (++rg->index2_ == VP8_RANDOM_TABLE_SIZE) rg->index2_ = 0;
diff = (diff << 1) >> (32 - num_bits); // sign-extend, 0-center
diff = (diff * rg->amp_) >> VP8_RANDOM_DITHER_FIX; // restrict range
diff = (diff * amp) >> VP8_RANDOM_DITHER_FIX; // restrict range
diff += 1 << (num_bits - 1); // shift back to 0.5-center
return diff;
}
static WEBP_INLINE int VP8RandomBits(VP8Random* const rg, int num_bits) {
return VP8RandomBits2(rg, num_bits, rg->amp_);
}
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif

View File

@ -20,7 +20,7 @@
extern "C" {
#endif
#define WEBP_DECODER_ABI_VERSION 0x0202 // MAJOR(8b) + MINOR(8b)
#define WEBP_DECODER_ABI_VERSION 0x0203 // MAJOR(8b) + MINOR(8b)
// Note: forward declaring enumerations is not allowed in (strict) C and C++,
// the types are left here for reference.
@ -441,11 +441,12 @@ struct WebPDecoderOptions {
int use_scaling; // if true, scaling is applied _afterward_
int scaled_width, scaled_height; // final resolution
int use_threads; // if true, use multi-threaded decoding
int dithering_strength; // dithering strength (0=Off, 100=full)
// Unused for now:
int force_rotation; // forced rotation (to be applied _last_)
int no_enhancement; // if true, discard enhancement layer
uint32_t pad[6]; // padding for later use
uint32_t pad[5]; // padding for later use
};
// Main object storing the configuration for advanced decoding.