fancy chroma upscaling

When FANCY_UPSCALING is defined, use a smoothing filter for upscaling
the U/V chroma fields. The filter used is a separable t[1 3 3 1] x [1 3 3 1]
filter. It can be easily changed in macros MIX_*.

The upscaling code reside on the thing shell between user and core
decoding (in webp.c), and not in the core decoder. As such, this smoothing
process can still be offloaded to GPU in some future and is not integral
part of the decoding process.

Coincidentaly: changed the way data is tranfered to user. For profile 2 (no
filtering), it used to be on a per-block basis. Now, for all profiles, we
emit rows of pixels (between 8 and 24 in height) when they are ready.
This makes the upscaling code much easier.

Will update the test vectors MD5 sums soon (as they'll be broken
after this change)

Change-Id: I2640ff12596cb8b843a4a376d7347447d9b9f778
This commit is contained in:
Pascal Massimino 2010-11-03 14:27:51 -07:00
parent 5a936a0a21
commit 6a37a2aaa9
5 changed files with 294 additions and 119 deletions

View File

@ -31,8 +31,7 @@ int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) {
const int info_size = (mb_w + 1) * sizeof(VP8MB);
const int yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
const int coeffs_size = 384 * sizeof(*dec->coeffs_);
const int cache_height = (dec->filter_type_ == 0) ? 0 :
(16 + kFilterExtraRows[dec->filter_type_]) * 3 / 2;
const int cache_height = (16 + kFilterExtraRows[dec->filter_type_]) * 3 / 2;
const int cache_size = top_size * cache_height;
const int needed = intra_pred_mode_size
+ top_size + info_size
@ -74,14 +73,10 @@ int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) {
dec->cache_y_stride_ = 16 * mb_w;
dec->cache_uv_stride_ = 8 * mb_w;
if (dec->filter_type_ == 0) {
dec->cache_y_ = NULL;
dec->cache_u_ = NULL;
dec->cache_v_ = NULL;
} else {
{
const int extra_rows = kFilterExtraRows[dec->filter_type_];
const int extra_y = extra_rows * dec->cache_y_stride_;
const int extra_uv =(extra_rows / 2) * dec->cache_uv_stride_;
const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;
dec->cache_y_ = ((uint8_t*)mem) + extra_y;
dec->cache_u_ = dec->cache_y_ + 16 * dec->cache_y_stride_ + extra_uv;
dec->cache_v_ = dec->cache_u_ + 8 * dec->cache_uv_stride_ + extra_uv;
@ -97,22 +92,13 @@ int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) {
// prepare 'io'
io->width = dec->pic_hdr_.width_;
io->height = dec->pic_hdr_.height_;
io->mb_x = 0;
io->mb_y = 0;
if (dec->filter_type_ == 0) {
io->y = dec->yuv_b_ + Y_OFF;
io->u = dec->yuv_b_ + U_OFF;
io->v = dec->yuv_b_ + V_OFF;
io->y_stride = BPS;
io->uv_stride = BPS;
} else {
io->y = dec->cache_y_;
io->u = dec->cache_u_;
io->v = dec->cache_v_;
io->y_stride = dec->cache_y_stride_;
io->uv_stride = dec->cache_uv_stride_;
io->mb_w = io->width;
}
io->y = dec->cache_y_;
io->u = dec->cache_u_;
io->v = dec->cache_v_;
io->y_stride = dec->cache_y_stride_;
io->uv_stride = dec->cache_uv_stride_;
io->fancy_upscaling = 0; // default
// Init critical function pointers and look-up tables.
VP8DspInitTables();
@ -177,32 +163,34 @@ static void DoFilter(VP8Decoder* const dec, int mb_x, int mb_y) {
}
}
void VP8StoreBlock(VP8Decoder* const dec) {
VP8MB* const info = dec->mb_info_ + dec->mb_x_;
int level = dec->filter_levels_[dec->segment_];
if (dec->filter_hdr_.use_lf_delta_) {
// TODO(skal): only CURRENT is handled for now.
level += dec->filter_hdr_.ref_lf_delta_[0];
if (dec->is_i4x4_) {
level += dec->filter_hdr_.mode_lf_delta_[0];
void VP8StoreBlock(VP8Decoder* const dec, VP8Io* const io) {
if (dec->filter_type_ > 0) {
VP8MB* const info = dec->mb_info_ + dec->mb_x_;
int level = dec->filter_levels_[dec->segment_];
if (dec->filter_hdr_.use_lf_delta_) {
// TODO(skal): only CURRENT is handled for now.
level += dec->filter_hdr_.ref_lf_delta_[0];
if (dec->is_i4x4_) {
level += dec->filter_hdr_.mode_lf_delta_[0];
}
}
}
level = (level < 0) ? 0 : (level > 63) ? 63 : level;
info->f_level_ = level;
level = (level < 0) ? 0 : (level > 63) ? 63 : level;
info->f_level_ = level;
if (dec->filter_hdr_.sharpness_ > 0) {
if (dec->filter_hdr_.sharpness_ > 4) {
level >>= 2;
} else {
level >>= 1;
if (dec->filter_hdr_.sharpness_ > 0) {
if (dec->filter_hdr_.sharpness_ > 4) {
level >>= 2;
} else {
level >>= 1;
}
if (level > 9 - dec->filter_hdr_.sharpness_) {
level = 9 - dec->filter_hdr_.sharpness_;
}
}
if (level > 9 - dec->filter_hdr_.sharpness_) {
level = 9 - dec->filter_hdr_.sharpness_;
}
}
info->f_ilevel_ = (level < 1) ? 1 : level;
info->f_inner_ = (!info->skip_ || dec->is_i4x4_);
info->f_ilevel_ = (level < 1) ? 1 : level;
info->f_inner_ = (!info->skip_ || dec->is_i4x4_);
}
{
// Transfer samples to row cache
int y;
@ -222,7 +210,7 @@ void VP8StoreBlock(VP8Decoder* const dec) {
}
}
void VP8FilterRow(VP8Decoder* const dec, VP8Io* io) {
void VP8FinishRow(VP8Decoder* const dec, VP8Io* io) {
const int extra_y_rows = kFilterExtraRows[dec->filter_type_];
const int ysize = extra_y_rows * dec->cache_y_stride_;
const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_;
@ -231,9 +219,11 @@ void VP8FilterRow(VP8Decoder* const dec, VP8Io* io) {
uint8_t* const ydst = dec->cache_y_ - ysize;
uint8_t* const udst = dec->cache_u_ - uvsize;
uint8_t* const vdst = dec->cache_v_ - uvsize;
int mb_x;
for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {
DoFilter(dec, mb_x, dec->mb_y_);
if (dec->filter_type_ > 0) {
int mb_x;
for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {
DoFilter(dec, mb_x, dec->mb_y_);
}
}
if (io->put) {
int y_start = dec->mb_y_ * 16;

View File

@ -492,18 +492,6 @@ static int ParseResiduals(VP8Decoder* const dec,
//-----------------------------------------------------------------------------
// Main loop
static void SendBlock(VP8Decoder* const dec, VP8Io* io) {
if (io->put) {
io->mb_x = dec->mb_x_ * 16;
io->mb_y = dec->mb_y_ * 16;
io->mb_w = io->width - io->mb_x;
io->mb_h = io->height - io->mb_y;
if (io->mb_w > 16) io->mb_w = 16;
if (io->mb_h > 16) io->mb_h = 16;
io->put(io);
}
}
static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
int ok = 1;
VP8BitReader* const br = &dec->br_;
@ -548,19 +536,13 @@ static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
}
VP8ReconstructBlock(dec);
// Store filter params
if (dec->filter_type_ > 0) {
VP8StoreBlock(dec);
} else { // We're done. Send block to user at once.
SendBlock(dec, io);
}
// Store data and save block's filtering params
VP8StoreBlock(dec, io);
}
if (!ok) {
break;
}
if (dec->filter_type_ > 0) { // filter a row
VP8FilterRow(dec, io);
}
VP8FinishRow(dec, io);
if (dec->br_.eof_ || token_br->eof_) {
ok = 0;
break;
@ -596,17 +578,23 @@ int VP8Decode(VP8Decoder* const dec, VP8Io* const io) {
return VP8SetError(dec, 3, "Allocation failed");
}
// set-up
if (io->setup) io->setup(io);
// Main decoding loop
if (!ParseFrame(dec, io)) {
if (io->setup && !io->setup(io)) {
VP8Clear(dec);
return VP8SetError(dec, 3, "Frame decoding failed");
return VP8SetError(dec, 3, "Frame setup failed");
}
// tear-down
if (io->teardown) io->teardown(io);
// Main decoding loop
{
const int ret = ParseFrame(dec, io);
if (io->teardown) {
io->teardown(io);
}
if (!ret) {
VP8Clear(dec);
return VP8SetError(dec, 3, "Frame decoding failed");
}
}
dec->ready_ = 0;
return 1;

View File

@ -263,9 +263,10 @@ void VP8ParseQuant(VP8Decoder* const dec);
int VP8InitFrame(VP8Decoder* const dec, VP8Io* io);
// Predict a block and add residual
void VP8ReconstructBlock(VP8Decoder* const dec);
// Filtering
void VP8StoreBlock(VP8Decoder* const dec);
void VP8FilterRow(VP8Decoder* const dec, VP8Io* io);
// Store a block, along with filtering params
void VP8StoreBlock(VP8Decoder* const dec, VP8Io* io);
// Finalize and transmit a complete row
void VP8FinishRow(VP8Decoder* const dec, VP8Io* io);
// in dsp.c
typedef void (*VP8Idct)(const int16_t* coeffs, uint8_t* dst);

View File

@ -17,6 +17,8 @@
extern "C" {
#endif
#define FANCY_UPSCALING // undefined to remove fancy upscaling support
//-----------------------------------------------------------------------------
// RIFF layout is:
// 0ffset tag
@ -60,14 +62,111 @@ static uint32_t CheckRIFFHeader(const uint8_t** data_ptr,
}
//-----------------------------------------------------------------------------
// Fancy upscaling
typedef enum { MODE_RGB = 0, MODE_RGBA = 1,
MODE_BGR = 2, MODE_BGRA = 3,
MODE_YUV = 4 } CSP_MODE;
#ifdef FANCY_UPSCALING
// Given samples laid out in a square as:
// [a b]
// [c d]
// we interpolate u/v as:
// ([9*a + 3*b + 3*c + d 3*a + 9*b + 3*c + d] + [8 8]) / 16
// ([3*a + b + 9*c + 3*d a + 3*b + 3*c + 9*d] [8 8]) / 16
#define MIX_ODD(a, b, c, d) \
((9 * (a) + 3 * ((b) + (c)) + (d) + 0x00080008u) >> 4)
#define MIX_EVEN(a, b, c, d) \
((9 * (c) + 3 * ((d) + (a)) + (b) + 0x00080008u) >> 4)
// We process u and v together stashed into 32bit (16bit each).
// Note that we could store the pair (3*t_uv + uv, t_uv + 3*uv)
// instead of (t_uv, uv), into a 64bit variable. Doing so, we could
// simplify the MIXing a bit and save two multiplies. TODO(skal).
#define LOAD_UV(u,v) ((u) | ((v) << 16))
// Macro festival, so we can define all of rgb/bgr/rgba/bgra cases
// for odd and even lines
#define UPSCALE_FUNC(FUNC_NAME, MIX, FUNC, XSTEP) \
static void FUNC_NAME(const uint8_t* cur_y, \
const uint8_t* cur_u, const uint8_t* cur_v, \
const uint8_t* top_u, const uint8_t* top_v, \
int len, uint8_t* dst) { \
int x; \
uint32_t tl_uv = LOAD_UV(top_u[0], top_v[0]); /* top-left sample */ \
uint32_t l_uv = LOAD_UV(cur_u[0], cur_v[0]); /* left-sample */ \
uint32_t uv0 = MIX(tl_uv, tl_uv, l_uv, l_uv); \
FUNC(cur_y[0], uv0 & 0xff, (uv0 >> 16), dst); \
len -= 1; /* first pixel is done. */ \
for (x = 1; x <= (len >> 1); ++x) { \
const uint32_t t_uv = LOAD_UV(top_u[x], top_v[x]); /* top sample */ \
const uint32_t uv = LOAD_UV(cur_u[x], cur_v[x]); /* sample */ \
const uint32_t uv0 = MIX(tl_uv, t_uv, l_uv, uv); \
const uint32_t uv1 = MIX(t_uv, tl_uv, uv, l_uv); \
FUNC(cur_y[2*x-1], uv0 & 0xff, (uv0 >> 16), dst + (2*x-1) * XSTEP); \
FUNC(cur_y[2*x ], uv1 & 0xff, (uv1 >> 16), dst + (2*x ) * XSTEP); \
tl_uv = t_uv; \
l_uv = uv; \
} \
if (len & 1) { \
uv0 = MIX(tl_uv, tl_uv, l_uv, l_uv); \
FUNC(cur_y[len], uv0 & 0xff, (uv0 >> 16), dst + len * XSTEP); \
} \
} \
// All variants implemented.
UPSCALE_FUNC(UpscaleEvenRgb, MIX_EVEN, VP8YuvToRgb, 3)
UPSCALE_FUNC(UpscaleOddRgb, MIX_ODD, VP8YuvToRgb, 3)
UPSCALE_FUNC(UpscaleEvenBgr, MIX_EVEN, VP8YuvToBgr, 3)
UPSCALE_FUNC(UpscaleOddBgr, MIX_ODD, VP8YuvToBgr, 3)
UPSCALE_FUNC(UpscaleEvenRgba, MIX_EVEN, VP8YuvToRgba, 4)
UPSCALE_FUNC(UpscaleOddRgba, MIX_ODD, VP8YuvToRgba, 4)
UPSCALE_FUNC(UpscaleEvenBgra, MIX_EVEN, VP8YuvToBgra, 4)
UPSCALE_FUNC(UpscaleOddBgra, MIX_ODD, VP8YuvToBgra, 4)
// Main driver function.
static inline void UpscaleLine(const uint8_t* cur_y,
const uint8_t* cur_u, const uint8_t* cur_v,
const uint8_t* top_u, const uint8_t* top_v,
int len, uint8_t* dst, int odd, CSP_MODE mode) {
if (odd) {
if (mode == MODE_RGB) {
UpscaleOddRgb(cur_y, cur_u, cur_v, top_u, top_v, len, dst);
} else if (mode == MODE_BGR) {
UpscaleOddBgr(cur_y, cur_u, cur_v, top_u, top_v, len, dst);
} else if (mode == MODE_RGBA) {
UpscaleOddRgba(cur_y, cur_u, cur_v, top_u, top_v, len, dst);
} else {
UpscaleOddBgra(cur_y, cur_u, cur_v, top_u, top_v, len, dst);
}
} else {
if (mode == MODE_RGB) {
UpscaleEvenRgb(cur_y, cur_u, cur_v, top_u, top_v, len, dst);
} else if (mode == MODE_BGR) {
UpscaleEvenBgr(cur_y, cur_u, cur_v, top_u, top_v, len, dst);
} else if (mode == MODE_RGBA) {
UpscaleEvenRgba(cur_y, cur_u, cur_v, top_u, top_v, len, dst);
} else {
UpscaleEvenBgra(cur_y, cur_u, cur_v, top_u, top_v, len, dst);
}
}
}
#undef LOAD_UV
#undef UPSCALE_FUNC
#undef MIX_ODD
#undef MIX_EVEN
#endif // FANCY_UPSCALING
//-----------------------------------------------------------------------------
// Main conversion driver.
typedef struct {
uint8_t* output; // rgb(a) or luma
uint8_t *u, *v;
uint8_t *top_y, *top_u, *top_v;
int stride; // rgb(a) stride or luma stride
int u_stride;
int v_stride;
@ -76,52 +175,139 @@ typedef struct {
static void CustomPut(const VP8Io* io) {
Params *p = (Params*)io->opaque;
const int mb_w = io->mb_w;
const int w = io->width;
const int mb_h = io->mb_h;
int j;
const int uv_w = (w + 1) / 2;
assert(!(io->mb_y & 1));
if (p->mode == MODE_YUV) {
uint8_t* const y_dst = p->output + io->mb_x + io->mb_y * p->stride;
uint8_t* u_dst;
uint8_t* v_dst;
int uv_w;
uint8_t* const y_dst = p->output + io->mb_y * p->stride;
uint8_t* const u_dst = p->u + (io->mb_y >> 1) * p->u_stride;
uint8_t* const v_dst = p->v + (io->mb_y >> 1) * p->v_stride;
int j;
for (j = 0; j < mb_h; ++j) {
memcpy(y_dst + j * p->stride, io->y + j * io->y_stride, mb_w);
memcpy(y_dst + j * p->stride, io->y + j * io->y_stride, w);
}
u_dst = p->u + (io->mb_x / 2) + (io->mb_y / 2) * p->u_stride;
v_dst = p->v + (io->mb_x / 2) + (io->mb_y / 2) * p->v_stride;
uv_w = (mb_w + 1) / 2;
for (j = 0; j < (mb_h + 1) / 2; ++j) {
memcpy(u_dst + j * p->u_stride, io->u + j * io->uv_stride, uv_w);
memcpy(v_dst + j * p->v_stride, io->v + j * io->uv_stride, uv_w);
}
} else {
const int psize = (p->mode == MODE_RGB || p->mode == MODE_BGR) ? 3 : 4;
uint8_t* dst = p->output + psize * io->mb_x + io->mb_y * p->stride;
int i;
uint8_t* dst = p->output + io->mb_y * p->stride;
if (io->fancy_upscaling) {
#ifdef FANCY_UPSCALING
const uint8_t* cur_y;
const uint8_t* cur_u = io->u;
const uint8_t* cur_v = io->v;
const uint8_t* top_u = p->top_u;
const uint8_t* top_v = p->top_v;
int y = io->mb_y;
int y_end = io->mb_y + io->mb_h - 1;
if (y > 0) {
// If mid-fly, we need to finish the previous line.
cur_y = p->top_y;
dst -= p->stride;
y -= 1;
} else {
// else we "replicate" the u/v sample of the first line
top_u = cur_u;
top_v = cur_v;
// and start with the top line
cur_y = io->y;
}
if (y_end >= io->height - 1) {
// for the very last rows, we can process them right now
y_end = io->height;
} else {
// we won't process the very last line this time,
// waiting for the next call instead.
}
for (j = 0; j < mb_h; ++j) {
const uint8_t* y_src = io->y + j * io->y_stride;
for (i = 0; i < mb_w; ++i) {
const int y = y_src[i];
const int u = io->u[(j / 2) * io->uv_stride + (i / 2)];
const int v = io->v[(j / 2) * io->uv_stride + (i / 2)];
if (p->mode == MODE_RGB) {
VP8YuvToRgb(y, u, v, dst + i * 3);
} else if (p->mode == MODE_BGR) {
VP8YuvToBgr(y, u, v, dst + i * 3);
} else if (p->mode == MODE_RGBA) {
VP8YuvToRgba(y, u, v, dst + i * 4);
// Loop over each output row.
for (; y < y_end; ++y) {
if (y & 1) { // odd lines
UpscaleLine(cur_y, cur_u, cur_v, top_u, top_v, w, dst, 1, p->mode);
} else { // even lines
UpscaleLine(cur_y, cur_u, cur_v, top_u, top_v, w, dst, 0, p->mode);
top_u = cur_u;
top_v = cur_v;
if (y < io->height - 2) {
cur_u += io->uv_stride;
cur_v += io->uv_stride;
}
}
dst += p->stride;
if (cur_y == p->top_y) {
cur_y = io->y;
} else {
VP8YuvToBgra(y, u, v, dst + i * 4);
cur_y += io->y_stride;
}
}
dst += p->stride;
// Save the unfinished samples for next call (if we're not done yet).
if (y < io->height - 1) {
memcpy(p->top_y, cur_y, w * sizeof(*p->top_y));
memcpy(p->top_u, top_u, uv_w * sizeof(*p->top_u));
memcpy(p->top_v, top_v, uv_w * sizeof(*p->top_v));
}
#else
assert(0); // shouldn't happen.
#endif
} else {
// Point-sampling U/V upscaler.
// Could be implemented with special MIX functions, too.
int j;
for (j = 0; j < mb_h; ++j) {
const uint8_t* y_src = io->y + j * io->y_stride;
int i;
for (i = 0; i < w; ++i) {
const int y = y_src[i];
const int u = io->u[(j / 2) * io->uv_stride + (i / 2)];
const int v = io->v[(j / 2) * io->uv_stride + (i / 2)];
if (p->mode == MODE_RGB) {
VP8YuvToRgb(y, u, v, dst + i * 3);
} else if (p->mode == MODE_BGR) {
VP8YuvToBgr(y, u, v, dst + i * 3);
} else if (p->mode == MODE_RGBA) {
VP8YuvToRgba(y, u, v, dst + i * 4);
} else {
VP8YuvToBgra(y, u, v, dst + i * 4);
}
}
dst += p->stride;
}
}
}
}
//-----------------------------------------------------------------------------
static int CustomSetup(VP8Io* io) {
#ifdef FANCY_UPSCALING
Params *p = (Params*)io->opaque;
p->top_y = p->top_u = p->top_v = NULL;
if (p->mode != MODE_YUV) {
const int uv_width = (io->width + 1) >> 1;
p->top_y = (uint8_t*)malloc(io->width + 2 * uv_width);
if (p->top_y == NULL) {
return 0; // memory error.
}
p->top_u = p->top_y + io->width;
p->top_v = p->top_u + uv_width;
io->fancy_upscaling = 1; // activate fancy upscaling
}
#endif
return 1;
}
static void CustomTeardown(const VP8Io* io) {
#ifdef FANCY_UPSCALING
Params *p = (Params*)io->opaque;
if (p->top_y) {
free(p->top_y);
p->top_y = p->top_u = p->top_v = NULL;
}
#endif
}
//-----------------------------------------------------------------------------
// "Into" variants
@ -145,6 +331,8 @@ static uint8_t* DecodeInto(CSP_MODE mode,
params->mode = mode;
io.opaque = params;
io.put = CustomPut;
io.setup = CustomSetup;
io.teardown = CustomTeardown;
if (!VP8GetHeaders(dec, &io)) {
VP8Delete(dec);

View File

@ -40,28 +40,36 @@ extern "C" {
typedef struct VP8Io VP8Io;
struct VP8Io {
// set by VP8GetHeaders()
int width, height; // picture dimensions, in pixels
int width, height; // picture dimensions, in pixels
// set before calling put()
int mb_x, mb_y; // position of the current sample (in pixels)
int mb_w, mb_h; // size of the current sample (usually 16x16)
const uint8_t *y, *u, *v; // samples to copy
int y_stride; // stride for luma
int uv_stride; // stride for chroma
int mb_y; // position of the current rows (in pixels)
int mb_h; // number of rows in the sample
const uint8_t *y, *u, *v; // rows to copy (in yuv420 format)
int y_stride; // row stride for luma
int uv_stride; // row stride for chroma
void* opaque; // user data
// called when fresh samples are available (1 block of 16x16 pixels)
// called when fresh samples are available. Currently, samples are in
// YUV420 format, and can be up to width x 24 in size (depending on the
// in-loop filtering level, e.g.).
void (*put)(const VP8Io* io);
// called just before starting to decode the blocks
void (*setup)(const VP8Io* io);
// called just before starting to decode the blocks.
// Should returns 0 in case of error.
int (*setup)(VP8Io* io);
// called just after block decoding is finished
// called just after block decoding is finished (or when an error occurred).
void (*teardown)(const VP8Io* io);
// this is a recommendation for the user-side yuv->rgb converter. This flag
// is set when calling setup() hook and can be overwritten by it. It then
// can be taken into consideration during the put() method.
int fancy_upscaling;
// Input buffer.
uint32_t data_size;
uint32_t data_size;
const uint8_t* data;
};