2012-01-06 14:49:06 -08:00
|
|
|
// Copyright 2010 Google Inc. All Rights Reserved.
|
2010-09-30 09:34:38 -04:00
|
|
|
//
|
|
|
|
// This code is licensed under the same terms as WebM:
|
|
|
|
// Software License Agreement: http://www.webmproject.org/license/software/
|
|
|
|
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
|
|
|
|
// -----------------------------------------------------------------------------
|
|
|
|
//
|
|
|
|
// Frame-reconstruction function. Memory allocation.
|
|
|
|
//
|
|
|
|
// Author: Skal (pascal.massimino@gmail.com)
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
2011-05-02 17:19:00 -07:00
|
|
|
#include "./vp8i.h"
|
2012-08-01 12:06:04 -07:00
|
|
|
#include "../utils/utils.h"
|
2010-09-30 09:34:38 -04:00
|
|
|
|
|
|
|
#if defined(__cplusplus) || defined(c_plusplus)
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define ALIGN_MASK (32 - 1)
|
|
|
|
|
2011-07-22 13:09:10 -07:00
|
|
|
//------------------------------------------------------------------------------
|
2012-01-27 17:39:47 -08:00
|
|
|
// Filtering
|
2010-09-30 09:34:38 -04:00
|
|
|
|
2011-06-09 12:08:08 -07:00
|
|
|
// kFilterExtraRows[] = How many extra lines are needed on the MB boundary
|
|
|
|
// for caching, given a filtering level.
|
|
|
|
// Simple filter: up to 2 luma samples are read and 1 is written.
|
|
|
|
// Complex filter: up to 4 luma samples are read and 3 are written. Same for
|
|
|
|
// U/V, so it's 8 samples total (because of the 2x upsampling).
|
|
|
|
static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 };
|
2010-09-30 09:34:38 -04:00
|
|
|
|
2011-11-04 19:44:57 -07:00
|
|
|
static WEBP_INLINE int hev_thresh_from_level(int level, int keyframe) {
|
2010-09-30 09:34:38 -04:00
|
|
|
if (keyframe) {
|
|
|
|
return (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
|
|
|
|
} else {
|
|
|
|
return (level >= 40) ? 3 : (level >= 20) ? 2 : (level >= 15) ? 1 : 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-06-20 00:45:15 -07:00
|
|
|
static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
|
2011-07-22 13:09:10 -07:00
|
|
|
const VP8ThreadContext* const ctx = &dec->thread_ctx_;
|
2010-09-30 09:34:38 -04:00
|
|
|
const int y_bps = dec->cache_y_stride_;
|
2011-07-22 13:09:10 -07:00
|
|
|
VP8FInfo* const f_info = ctx->f_info_ + mb_x;
|
|
|
|
uint8_t* const y_dst = dec->cache_y_ + ctx->id_ * 16 * y_bps + mb_x * 16;
|
|
|
|
const int level = f_info->f_level_;
|
|
|
|
const int ilevel = f_info->f_ilevel_;
|
2010-09-30 09:34:38 -04:00
|
|
|
const int limit = 2 * level + ilevel;
|
2011-03-08 18:47:08 -08:00
|
|
|
if (level == 0) {
|
|
|
|
return;
|
|
|
|
}
|
2010-09-30 09:34:38 -04:00
|
|
|
if (dec->filter_type_ == 1) { // simple
|
|
|
|
if (mb_x > 0) {
|
|
|
|
VP8SimpleHFilter16(y_dst, y_bps, limit + 4);
|
|
|
|
}
|
2011-07-22 13:09:10 -07:00
|
|
|
if (f_info->f_inner_) {
|
2010-09-30 09:34:38 -04:00
|
|
|
VP8SimpleHFilter16i(y_dst, y_bps, limit);
|
|
|
|
}
|
|
|
|
if (mb_y > 0) {
|
|
|
|
VP8SimpleVFilter16(y_dst, y_bps, limit + 4);
|
|
|
|
}
|
2011-07-22 13:09:10 -07:00
|
|
|
if (f_info->f_inner_) {
|
2010-09-30 09:34:38 -04:00
|
|
|
VP8SimpleVFilter16i(y_dst, y_bps, limit);
|
|
|
|
}
|
|
|
|
} else { // complex
|
|
|
|
const int uv_bps = dec->cache_uv_stride_;
|
2011-07-22 13:09:10 -07:00
|
|
|
uint8_t* const u_dst = dec->cache_u_ + ctx->id_ * 8 * uv_bps + mb_x * 8;
|
|
|
|
uint8_t* const v_dst = dec->cache_v_ + ctx->id_ * 8 * uv_bps + mb_x * 8;
|
2010-09-30 09:34:38 -04:00
|
|
|
const int hev_thresh =
|
|
|
|
hev_thresh_from_level(level, dec->frm_hdr_.key_frame_);
|
|
|
|
if (mb_x > 0) {
|
|
|
|
VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
|
|
|
|
VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
|
|
|
|
}
|
2011-07-22 13:09:10 -07:00
|
|
|
if (f_info->f_inner_) {
|
2010-09-30 09:34:38 -04:00
|
|
|
VP8HFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
|
|
|
|
VP8HFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
|
|
|
|
}
|
|
|
|
if (mb_y > 0) {
|
|
|
|
VP8VFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
|
|
|
|
VP8VFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
|
|
|
|
}
|
2011-07-22 13:09:10 -07:00
|
|
|
if (f_info->f_inner_) {
|
2010-09-30 09:34:38 -04:00
|
|
|
VP8VFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
|
|
|
|
VP8VFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-07-22 13:09:10 -07:00
|
|
|
// Filter the decoded macroblock row (if needed)
|
|
|
|
static void FilterRow(const VP8Decoder* const dec) {
|
2011-06-20 00:45:15 -07:00
|
|
|
int mb_x;
|
2011-07-22 13:09:10 -07:00
|
|
|
const int mb_y = dec->thread_ctx_.mb_y_;
|
|
|
|
assert(dec->thread_ctx_.filter_row_);
|
2011-06-20 00:45:15 -07:00
|
|
|
for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {
|
2011-07-22 13:09:10 -07:00
|
|
|
DoFilter(dec, mb_x, mb_y);
|
2011-06-20 00:45:15 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
2012-12-14 10:22:54 -08:00
|
|
|
// Precompute the filtering strength for each segment and each i4x4/i16x16 mode.
|
2011-06-20 00:45:15 -07:00
|
|
|
|
2012-12-14 10:22:54 -08:00
|
|
|
static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
|
2010-11-03 14:27:51 -07:00
|
|
|
if (dec->filter_type_ > 0) {
|
2012-12-14 10:22:54 -08:00
|
|
|
int s;
|
|
|
|
const VP8FilterHeader* const hdr = &dec->filter_hdr_;
|
|
|
|
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
|
|
|
|
int i4x4;
|
|
|
|
// First, compute the initial level
|
|
|
|
int base_level;
|
|
|
|
if (dec->segment_hdr_.use_segment_) {
|
|
|
|
base_level = dec->segment_hdr_.filter_strength_[s];
|
|
|
|
if (!dec->segment_hdr_.absolute_delta_) {
|
|
|
|
base_level += hdr->level_;
|
|
|
|
}
|
2010-11-03 14:27:51 -07:00
|
|
|
} else {
|
2012-12-14 10:22:54 -08:00
|
|
|
base_level = hdr->level_;
|
2010-11-03 14:27:51 -07:00
|
|
|
}
|
2012-12-14 10:22:54 -08:00
|
|
|
for (i4x4 = 0; i4x4 <= 1; ++i4x4) {
|
|
|
|
VP8FInfo* const info = &dec->fstrengths_[s][i4x4];
|
|
|
|
int level = base_level;
|
|
|
|
if (hdr->use_lf_delta_) {
|
|
|
|
// TODO(skal): only CURRENT is handled for now.
|
|
|
|
level += hdr->ref_lf_delta_[0];
|
|
|
|
if (i4x4) {
|
|
|
|
level += hdr->mode_lf_delta_[0];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
level = (level < 0) ? 0 : (level > 63) ? 63 : level;
|
|
|
|
info->f_level_ = level;
|
|
|
|
|
|
|
|
if (hdr->sharpness_ > 0) {
|
|
|
|
if (hdr->sharpness_ > 4) {
|
|
|
|
level >>= 2;
|
|
|
|
} else {
|
|
|
|
level >>= 1;
|
|
|
|
}
|
|
|
|
if (level > 9 - hdr->sharpness_) {
|
|
|
|
level = 9 - hdr->sharpness_;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
info->f_ilevel_ = (level < 1) ? 1 : level;
|
|
|
|
info->f_inner_ = 0;
|
2010-11-03 14:27:51 -07:00
|
|
|
}
|
|
|
|
}
|
2010-09-30 09:34:38 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-06-20 00:45:15 -07:00
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
// This function is called after a row of macroblocks is finished decoding.
|
|
|
|
// It also takes into account the following restrictions:
|
|
|
|
// * In case of in-loop filtering, we must hold off sending some of the bottom
|
|
|
|
// pixels as they are yet unfiltered. They will be when the next macroblock
|
|
|
|
// row is decoded. Meanwhile, we must preserve them by rotating them in the
|
|
|
|
// cache area. This doesn't hold for the very bottom row of the uncropped
|
|
|
|
// picture of course.
|
|
|
|
// * we must clip the remaining pixels against the cropping area. The VP8Io
|
|
|
|
// struct must have the following fields set correctly before calling put():
|
|
|
|
|
|
|
|
#define MACROBLOCK_VPOS(mb_y) ((mb_y) * 16) // vertical position of a MB
|
|
|
|
|
2011-07-22 13:09:10 -07:00
|
|
|
// Finalize and transmit a complete row. Return false in case of user-abort.
|
2012-01-27 17:39:47 -08:00
|
|
|
static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
|
2011-07-22 13:09:10 -07:00
|
|
|
int ok = 1;
|
|
|
|
const VP8ThreadContext* const ctx = &dec->thread_ctx_;
|
2010-10-31 09:27:06 -07:00
|
|
|
const int extra_y_rows = kFilterExtraRows[dec->filter_type_];
|
|
|
|
const int ysize = extra_y_rows * dec->cache_y_stride_;
|
|
|
|
const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_;
|
2011-07-22 13:09:10 -07:00
|
|
|
const int y_offset = ctx->id_ * 16 * dec->cache_y_stride_;
|
|
|
|
const int uv_offset = ctx->id_ * 8 * dec->cache_uv_stride_;
|
|
|
|
uint8_t* const ydst = dec->cache_y_ - ysize + y_offset;
|
|
|
|
uint8_t* const udst = dec->cache_u_ - uvsize + uv_offset;
|
|
|
|
uint8_t* const vdst = dec->cache_v_ - uvsize + uv_offset;
|
|
|
|
const int first_row = (ctx->mb_y_ == 0);
|
|
|
|
const int last_row = (ctx->mb_y_ >= dec->br_mb_y_ - 1);
|
|
|
|
int y_start = MACROBLOCK_VPOS(ctx->mb_y_);
|
|
|
|
int y_end = MACROBLOCK_VPOS(ctx->mb_y_ + 1);
|
|
|
|
|
|
|
|
if (ctx->filter_row_) {
|
|
|
|
FilterRow(dec);
|
|
|
|
}
|
|
|
|
|
2013-04-11 00:57:31 -07:00
|
|
|
if (io->put != NULL) {
|
2010-10-31 09:27:06 -07:00
|
|
|
if (!first_row) {
|
|
|
|
y_start -= extra_y_rows;
|
|
|
|
io->y = ydst;
|
|
|
|
io->u = udst;
|
|
|
|
io->v = vdst;
|
|
|
|
} else {
|
2011-07-22 13:09:10 -07:00
|
|
|
io->y = dec->cache_y_ + y_offset;
|
|
|
|
io->u = dec->cache_u_ + uv_offset;
|
|
|
|
io->v = dec->cache_v_ + uv_offset;
|
2010-09-30 09:34:38 -04:00
|
|
|
}
|
2011-06-20 00:45:15 -07:00
|
|
|
|
2010-10-31 09:27:06 -07:00
|
|
|
if (!last_row) {
|
|
|
|
y_end -= extra_y_rows;
|
|
|
|
}
|
2011-06-20 00:45:15 -07:00
|
|
|
if (y_end > io->crop_bottom) {
|
|
|
|
y_end = io->crop_bottom; // make sure we don't overflow on last row.
|
2010-09-30 09:34:38 -04:00
|
|
|
}
|
EXPERIMENTAL: add support for alpha channel
This is a (minor) bitstream change: if the 'color_space' bit is set to '1'
(which is normally an undefined/invalid behaviour), we add extra data at the
end of partition #0 (so-called 'extensions')
Namely, we add the size of the extension data as 3 bytes (little-endian),
followed by a set of bits telling which extensions we're incorporating.
The data then _preceeds_ this trailing tags.
This is all experimental, and you'll need to have
'#define WEBP_EXPERIMENTAL_FEATURES' in webp/types.h to enable this code
(at your own risk! :))
Still, this hack produces almost-valid WebP file for decoders that don't
check this color_space bit. In particular, previous 'dwebp' (and for instance
Chrome) will recognize this files and decode them, but without the alpha
of course. Other decoder will just see random extra stuff at the end of
partition #0.
To experiment with the alpha-channel, you need to compile on Unix platform
and use PNGs for input/output.
If 'alpha.png' is a source with alpha channel, then you can try (on Unix):
cwebp alpha.png -o alpha.webp
dwebp alpha.webp -o test.png
cwebp now has a '-noalpha' flag to ignore any alpha information from the
source, if present.
More hacking and experimenting welcome!
Change-Id: I3c7b1fd8411c9e7a9f77690e898479ad85c52f3e
2011-04-25 16:58:04 -07:00
|
|
|
io->a = NULL;
|
2012-06-04 07:40:32 -07:00
|
|
|
if (dec->alpha_data_ != NULL && y_start < y_end) {
|
|
|
|
// TODO(skal): several things to correct here:
|
|
|
|
// * testing presence of alpha with dec->alpha_data_ is not a good idea
|
|
|
|
// * we're actually decompressing the full plane only once. It should be
|
|
|
|
// more obvious from signature.
|
|
|
|
// * we could free alpha_data_ right after this call, but we don't own.
|
EXPERIMENTAL: add support for alpha channel
This is a (minor) bitstream change: if the 'color_space' bit is set to '1'
(which is normally an undefined/invalid behaviour), we add extra data at the
end of partition #0 (so-called 'extensions')
Namely, we add the size of the extension data as 3 bytes (little-endian),
followed by a set of bits telling which extensions we're incorporating.
The data then _preceeds_ this trailing tags.
This is all experimental, and you'll need to have
'#define WEBP_EXPERIMENTAL_FEATURES' in webp/types.h to enable this code
(at your own risk! :))
Still, this hack produces almost-valid WebP file for decoders that don't
check this color_space bit. In particular, previous 'dwebp' (and for instance
Chrome) will recognize this files and decode them, but without the alpha
of course. Other decoder will just see random extra stuff at the end of
partition #0.
To experiment with the alpha-channel, you need to compile on Unix platform
and use PNGs for input/output.
If 'alpha.png' is a source with alpha channel, then you can try (on Unix):
cwebp alpha.png -o alpha.webp
dwebp alpha.webp -o test.png
cwebp now has a '-noalpha' flag to ignore any alpha information from the
source, if present.
More hacking and experimenting welcome!
Change-Id: I3c7b1fd8411c9e7a9f77690e898479ad85c52f3e
2011-04-25 16:58:04 -07:00
|
|
|
io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start);
|
|
|
|
if (io->a == NULL) {
|
|
|
|
return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
|
2011-04-28 15:52:06 -07:00
|
|
|
"Could not decode alpha data.");
|
EXPERIMENTAL: add support for alpha channel
This is a (minor) bitstream change: if the 'color_space' bit is set to '1'
(which is normally an undefined/invalid behaviour), we add extra data at the
end of partition #0 (so-called 'extensions')
Namely, we add the size of the extension data as 3 bytes (little-endian),
followed by a set of bits telling which extensions we're incorporating.
The data then _preceeds_ this trailing tags.
This is all experimental, and you'll need to have
'#define WEBP_EXPERIMENTAL_FEATURES' in webp/types.h to enable this code
(at your own risk! :))
Still, this hack produces almost-valid WebP file for decoders that don't
check this color_space bit. In particular, previous 'dwebp' (and for instance
Chrome) will recognize this files and decode them, but without the alpha
of course. Other decoder will just see random extra stuff at the end of
partition #0.
To experiment with the alpha-channel, you need to compile on Unix platform
and use PNGs for input/output.
If 'alpha.png' is a source with alpha channel, then you can try (on Unix):
cwebp alpha.png -o alpha.webp
dwebp alpha.webp -o test.png
cwebp now has a '-noalpha' flag to ignore any alpha information from the
source, if present.
More hacking and experimenting welcome!
Change-Id: I3c7b1fd8411c9e7a9f77690e898479ad85c52f3e
2011-04-25 16:58:04 -07:00
|
|
|
}
|
|
|
|
}
|
2011-06-20 00:45:15 -07:00
|
|
|
if (y_start < io->crop_top) {
|
|
|
|
const int delta_y = io->crop_top - y_start;
|
|
|
|
y_start = io->crop_top;
|
|
|
|
assert(!(delta_y & 1));
|
|
|
|
io->y += dec->cache_y_stride_ * delta_y;
|
|
|
|
io->u += dec->cache_uv_stride_ * (delta_y >> 1);
|
|
|
|
io->v += dec->cache_uv_stride_ * (delta_y >> 1);
|
2012-06-04 07:40:32 -07:00
|
|
|
if (io->a != NULL) {
|
2011-06-20 00:45:15 -07:00
|
|
|
io->a += io->width * delta_y;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (y_start < y_end) {
|
|
|
|
io->y += io->crop_left;
|
|
|
|
io->u += io->crop_left >> 1;
|
|
|
|
io->v += io->crop_left >> 1;
|
2012-06-04 07:40:32 -07:00
|
|
|
if (io->a != NULL) {
|
2011-06-20 00:45:15 -07:00
|
|
|
io->a += io->crop_left;
|
|
|
|
}
|
|
|
|
io->mb_y = y_start - io->crop_top;
|
|
|
|
io->mb_w = io->crop_right - io->crop_left;
|
|
|
|
io->mb_h = y_end - y_start;
|
2011-07-22 13:09:10 -07:00
|
|
|
ok = io->put(io);
|
2011-02-16 14:33:16 -08:00
|
|
|
}
|
2010-10-31 09:27:06 -07:00
|
|
|
}
|
2011-07-22 13:09:10 -07:00
|
|
|
// rotate top samples if needed
|
|
|
|
if (ctx->id_ + 1 == dec->num_caches_) {
|
|
|
|
if (!last_row) {
|
|
|
|
memcpy(dec->cache_y_ - ysize, ydst + 16 * dec->cache_y_stride_, ysize);
|
|
|
|
memcpy(dec->cache_u_ - uvsize, udst + 8 * dec->cache_uv_stride_, uvsize);
|
|
|
|
memcpy(dec->cache_v_ - uvsize, vdst + 8 * dec->cache_uv_stride_, uvsize);
|
|
|
|
}
|
2010-09-30 09:34:38 -04:00
|
|
|
}
|
2011-07-22 13:09:10 -07:00
|
|
|
|
|
|
|
return ok;
|
2010-09-30 09:34:38 -04:00
|
|
|
}
|
|
|
|
|
2011-06-20 00:45:15 -07:00
|
|
|
#undef MACROBLOCK_VPOS
|
|
|
|
|
2011-07-22 13:09:10 -07:00
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
|
|
|
|
int ok = 1;
|
|
|
|
VP8ThreadContext* const ctx = &dec->thread_ctx_;
|
|
|
|
if (!dec->use_threads_) {
|
|
|
|
// ctx->id_ and ctx->f_info_ are already set
|
|
|
|
ctx->mb_y_ = dec->mb_y_;
|
|
|
|
ctx->filter_row_ = dec->filter_row_;
|
2012-01-27 17:39:47 -08:00
|
|
|
ok = FinishRow(dec, io);
|
2011-07-22 13:09:10 -07:00
|
|
|
} else {
|
|
|
|
WebPWorker* const worker = &dec->worker_;
|
|
|
|
// Finish previous job *before* updating context
|
|
|
|
ok &= WebPWorkerSync(worker);
|
|
|
|
assert(worker->status_ == OK);
|
|
|
|
if (ok) { // spawn a new deblocking/output job
|
|
|
|
ctx->io_ = *io;
|
|
|
|
ctx->id_ = dec->cache_id_;
|
|
|
|
ctx->mb_y_ = dec->mb_y_;
|
|
|
|
ctx->filter_row_ = dec->filter_row_;
|
|
|
|
if (ctx->filter_row_) { // just swap filter info
|
|
|
|
VP8FInfo* const tmp = ctx->f_info_;
|
|
|
|
ctx->f_info_ = dec->f_info_;
|
|
|
|
dec->f_info_ = tmp;
|
|
|
|
}
|
|
|
|
WebPWorkerLaunch(worker);
|
|
|
|
if (++dec->cache_id_ == dec->num_caches_) {
|
|
|
|
dec->cache_id_ = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ok;
|
|
|
|
}
|
|
|
|
|
2011-06-20 00:45:15 -07:00
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
// Finish setting up the decoding parameter once user's setup() is called.
|
|
|
|
|
2011-07-22 13:09:10 -07:00
|
|
|
VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
|
2011-06-20 00:45:15 -07:00
|
|
|
// Call setup() first. This may trigger additional decoding features on 'io'.
|
2011-07-22 13:09:10 -07:00
|
|
|
// Note: Afterward, we must call teardown() not matter what.
|
2013-04-11 00:57:31 -07:00
|
|
|
if (io->setup != NULL && !io->setup(io)) {
|
2011-06-20 00:45:15 -07:00
|
|
|
VP8SetError(dec, VP8_STATUS_USER_ABORT, "Frame setup failed");
|
|
|
|
return dec->status_;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Disable filtering per user request
|
|
|
|
if (io->bypass_filtering) {
|
|
|
|
dec->filter_type_ = 0;
|
|
|
|
}
|
|
|
|
// TODO(skal): filter type / strength / sharpness forcing
|
|
|
|
|
|
|
|
// Define the area where we can skip in-loop filtering, in case of cropping.
|
|
|
|
//
|
|
|
|
// 'Simple' filter reads two luma samples outside of the macroblock and
|
|
|
|
// and filters one. It doesn't filter the chroma samples. Hence, we can
|
|
|
|
// avoid doing the in-loop filtering before crop_top/crop_left position.
|
|
|
|
// For the 'Complex' filter, 3 samples are read and up to 3 are filtered.
|
|
|
|
// Means: there's a dependency chain that goes all the way up to the
|
|
|
|
// top-left corner of the picture (MB #0). We must filter all the previous
|
|
|
|
// macroblocks.
|
|
|
|
// TODO(skal): add an 'approximate_decoding' option, that won't produce
|
|
|
|
// a 1:1 bit-exactness for complex filtering?
|
|
|
|
{
|
|
|
|
const int extra_pixels = kFilterExtraRows[dec->filter_type_];
|
|
|
|
if (dec->filter_type_ == 2) {
|
|
|
|
// For complex filter, we need to preserve the dependency chain.
|
|
|
|
dec->tl_mb_x_ = 0;
|
|
|
|
dec->tl_mb_y_ = 0;
|
|
|
|
} else {
|
|
|
|
// For simple filter, we can filter only the cropped region.
|
2011-12-05 08:37:55 -08:00
|
|
|
// We include 'extra_pixels' on the other side of the boundary, since
|
|
|
|
// vertical or horizontal filtering of the previous macroblock can
|
|
|
|
// modify some abutting pixels.
|
|
|
|
dec->tl_mb_x_ = (io->crop_left - extra_pixels) >> 4;
|
|
|
|
dec->tl_mb_y_ = (io->crop_top - extra_pixels) >> 4;
|
|
|
|
if (dec->tl_mb_x_ < 0) dec->tl_mb_x_ = 0;
|
|
|
|
if (dec->tl_mb_y_ < 0) dec->tl_mb_y_ = 0;
|
2011-06-20 00:45:15 -07:00
|
|
|
}
|
|
|
|
// We need some 'extra' pixels on the right/bottom.
|
|
|
|
dec->br_mb_y_ = (io->crop_bottom + 15 + extra_pixels) >> 4;
|
|
|
|
dec->br_mb_x_ = (io->crop_right + 15 + extra_pixels) >> 4;
|
|
|
|
if (dec->br_mb_x_ > dec->mb_w_) {
|
|
|
|
dec->br_mb_x_ = dec->mb_w_;
|
|
|
|
}
|
|
|
|
if (dec->br_mb_y_ > dec->mb_h_) {
|
|
|
|
dec->br_mb_y_ = dec->mb_h_;
|
|
|
|
}
|
|
|
|
}
|
2012-12-14 10:22:54 -08:00
|
|
|
PrecomputeFilterStrengths(dec);
|
2011-06-20 00:45:15 -07:00
|
|
|
return VP8_STATUS_OK;
|
|
|
|
}
|
|
|
|
|
2011-07-22 13:09:10 -07:00
|
|
|
int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {
|
|
|
|
int ok = 1;
|
|
|
|
if (dec->use_threads_) {
|
|
|
|
ok = WebPWorkerSync(&dec->worker_);
|
|
|
|
}
|
|
|
|
|
2013-04-11 00:57:31 -07:00
|
|
|
if (io->teardown != NULL) {
|
2011-07-22 13:09:10 -07:00
|
|
|
io->teardown(io);
|
|
|
|
}
|
|
|
|
return ok;
|
|
|
|
}
|
|
|
|
|
2012-01-27 17:39:47 -08:00
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
// For multi-threaded decoding we need to use 3 rows of 16 pixels as delay line.
|
|
|
|
//
|
|
|
|
// Reason is: the deblocking filter cannot deblock the bottom horizontal edges
|
|
|
|
// immediately, and needs to wait for first few rows of the next macroblock to
|
|
|
|
// be decoded. Hence, deblocking is lagging behind by 4 or 8 pixels (depending
|
|
|
|
// on strength).
|
|
|
|
// With two threads, the vertical positions of the rows being decoded are:
|
|
|
|
// Decode: [ 0..15][16..31][32..47][48..63][64..79][...
|
|
|
|
// Deblock: [ 0..11][12..27][28..43][44..59][...
|
|
|
|
// If we use two threads and two caches of 16 pixels, the sequence would be:
|
|
|
|
// Decode: [ 0..15][16..31][ 0..15!!][16..31][ 0..15][...
|
|
|
|
// Deblock: [ 0..11][12..27!!][-4..11][12..27][...
|
|
|
|
// The problem occurs during row [12..15!!] that both the decoding and
|
|
|
|
// deblocking threads are writing simultaneously.
|
|
|
|
// With 3 cache lines, one get a safe write pattern:
|
|
|
|
// Decode: [ 0..15][16..31][32..47][ 0..15][16..31][32..47][0..
|
|
|
|
// Deblock: [ 0..11][12..27][28..43][-4..11][12..27][28...
|
|
|
|
// Note that multi-threaded output _without_ deblocking can make use of two
|
|
|
|
// cache lines of 16 pixels only, since there's no lagging behind. The decoding
|
|
|
|
// and output process have non-concurrent writing:
|
|
|
|
// Decode: [ 0..15][16..31][ 0..15][16..31][...
|
|
|
|
// io->put: [ 0..15][16..31][ 0..15][...
|
|
|
|
|
|
|
|
#define MT_CACHE_LINES 3
|
|
|
|
#define ST_CACHE_LINES 1 // 1 cache row only for single-threaded case
|
|
|
|
|
|
|
|
// Initialize multi/single-thread worker
|
|
|
|
static int InitThreadContext(VP8Decoder* const dec) {
|
|
|
|
dec->cache_id_ = 0;
|
|
|
|
if (dec->use_threads_) {
|
|
|
|
WebPWorker* const worker = &dec->worker_;
|
|
|
|
if (!WebPWorkerReset(worker)) {
|
|
|
|
return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
|
|
|
|
"thread initialization failed.");
|
|
|
|
}
|
|
|
|
worker->data1 = dec;
|
|
|
|
worker->data2 = (void*)&dec->thread_ctx_.io_;
|
|
|
|
worker->hook = (WebPWorkerHook)FinishRow;
|
|
|
|
dec->num_caches_ =
|
|
|
|
(dec->filter_type_ > 0) ? MT_CACHE_LINES : MT_CACHE_LINES - 1;
|
|
|
|
} else {
|
|
|
|
dec->num_caches_ = ST_CACHE_LINES;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
#undef MT_CACHE_LINES
|
|
|
|
#undef ST_CACHE_LINES
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
// Memory setup
|
|
|
|
|
|
|
|
static int AllocateMemory(VP8Decoder* const dec) {
|
|
|
|
const int num_caches = dec->num_caches_;
|
|
|
|
const int mb_w = dec->mb_w_;
|
2012-06-20 23:58:43 -07:00
|
|
|
// Note: we use 'size_t' when there's no overflow risk, uint64_t otherwise.
|
2012-01-27 17:39:47 -08:00
|
|
|
const size_t intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t);
|
|
|
|
const size_t top_size = (16 + 8 + 8) * mb_w;
|
|
|
|
const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);
|
|
|
|
const size_t f_info_size =
|
|
|
|
(dec->filter_type_ > 0) ?
|
|
|
|
mb_w * (dec->use_threads_ ? 2 : 1) * sizeof(VP8FInfo)
|
|
|
|
: 0;
|
|
|
|
const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
|
|
|
|
const size_t coeffs_size = 384 * sizeof(*dec->coeffs_);
|
|
|
|
const size_t cache_height = (16 * num_caches
|
|
|
|
+ kFilterExtraRows[dec->filter_type_]) * 3 / 2;
|
|
|
|
const size_t cache_size = top_size * cache_height;
|
2012-06-20 23:58:43 -07:00
|
|
|
// alpha_size is the only one that scales as width x height.
|
|
|
|
const uint64_t alpha_size = (dec->alpha_data_ != NULL) ?
|
|
|
|
(uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL;
|
|
|
|
const uint64_t needed = (uint64_t)intra_pred_mode_size
|
|
|
|
+ top_size + mb_info_size + f_info_size
|
|
|
|
+ yuv_size + coeffs_size
|
|
|
|
+ cache_size + alpha_size + ALIGN_MASK;
|
2012-01-27 17:39:47 -08:00
|
|
|
uint8_t* mem;
|
|
|
|
|
2012-06-20 23:58:43 -07:00
|
|
|
if (needed != (size_t)needed) return 0; // check for overflow
|
2012-01-27 17:39:47 -08:00
|
|
|
if (needed > dec->mem_size_) {
|
|
|
|
free(dec->mem_);
|
|
|
|
dec->mem_size_ = 0;
|
2012-08-01 12:06:04 -07:00
|
|
|
dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t));
|
2012-01-27 17:39:47 -08:00
|
|
|
if (dec->mem_ == NULL) {
|
|
|
|
return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
|
|
|
|
"no memory during frame initialization.");
|
|
|
|
}
|
2012-08-01 12:06:04 -07:00
|
|
|
// down-cast is ok, thanks to WebPSafeAlloc() above.
|
2012-06-20 23:58:43 -07:00
|
|
|
dec->mem_size_ = (size_t)needed;
|
2012-01-27 17:39:47 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
mem = (uint8_t*)dec->mem_;
|
|
|
|
dec->intra_t_ = (uint8_t*)mem;
|
|
|
|
mem += intra_pred_mode_size;
|
|
|
|
|
|
|
|
dec->y_t_ = (uint8_t*)mem;
|
|
|
|
mem += 16 * mb_w;
|
|
|
|
dec->u_t_ = (uint8_t*)mem;
|
|
|
|
mem += 8 * mb_w;
|
|
|
|
dec->v_t_ = (uint8_t*)mem;
|
|
|
|
mem += 8 * mb_w;
|
|
|
|
|
|
|
|
dec->mb_info_ = ((VP8MB*)mem) + 1;
|
|
|
|
mem += mb_info_size;
|
|
|
|
|
|
|
|
dec->f_info_ = f_info_size ? (VP8FInfo*)mem : NULL;
|
|
|
|
mem += f_info_size;
|
|
|
|
dec->thread_ctx_.id_ = 0;
|
|
|
|
dec->thread_ctx_.f_info_ = dec->f_info_;
|
|
|
|
if (dec->use_threads_) {
|
|
|
|
// secondary cache line. The deblocking process need to make use of the
|
|
|
|
// filtering strength from previous macroblock row, while the new ones
|
|
|
|
// are being decoded in parallel. We'll just swap the pointers.
|
|
|
|
dec->thread_ctx_.f_info_ += mb_w;
|
|
|
|
}
|
|
|
|
|
|
|
|
mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK);
|
|
|
|
assert((yuv_size & ALIGN_MASK) == 0);
|
|
|
|
dec->yuv_b_ = (uint8_t*)mem;
|
|
|
|
mem += yuv_size;
|
|
|
|
|
|
|
|
dec->coeffs_ = (int16_t*)mem;
|
|
|
|
mem += coeffs_size;
|
|
|
|
|
|
|
|
dec->cache_y_stride_ = 16 * mb_w;
|
|
|
|
dec->cache_uv_stride_ = 8 * mb_w;
|
|
|
|
{
|
|
|
|
const int extra_rows = kFilterExtraRows[dec->filter_type_];
|
|
|
|
const int extra_y = extra_rows * dec->cache_y_stride_;
|
|
|
|
const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;
|
|
|
|
dec->cache_y_ = ((uint8_t*)mem) + extra_y;
|
|
|
|
dec->cache_u_ = dec->cache_y_
|
|
|
|
+ 16 * num_caches * dec->cache_y_stride_ + extra_uv;
|
|
|
|
dec->cache_v_ = dec->cache_u_
|
|
|
|
+ 8 * num_caches * dec->cache_uv_stride_ + extra_uv;
|
|
|
|
dec->cache_id_ = 0;
|
|
|
|
}
|
|
|
|
mem += cache_size;
|
|
|
|
|
|
|
|
// alpha plane
|
|
|
|
dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;
|
|
|
|
mem += alpha_size;
|
2013-01-22 13:38:58 +01:00
|
|
|
assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);
|
2012-01-27 17:39:47 -08:00
|
|
|
|
|
|
|
// note: left-info is initialized once for all.
|
|
|
|
memset(dec->mb_info_ - 1, 0, mb_info_size);
|
|
|
|
|
|
|
|
// initialize top
|
|
|
|
memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size);
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void InitIo(VP8Decoder* const dec, VP8Io* io) {
|
|
|
|
// prepare 'io'
|
|
|
|
io->mb_y = 0;
|
|
|
|
io->y = dec->cache_y_;
|
|
|
|
io->u = dec->cache_u_;
|
|
|
|
io->v = dec->cache_v_;
|
|
|
|
io->y_stride = dec->cache_y_stride_;
|
|
|
|
io->uv_stride = dec->cache_uv_stride_;
|
|
|
|
io->a = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) {
|
|
|
|
if (!InitThreadContext(dec)) return 0; // call first. Sets dec->num_caches_.
|
|
|
|
if (!AllocateMemory(dec)) return 0;
|
|
|
|
InitIo(dec, io);
|
|
|
|
VP8DspInit(); // Init critical function pointers and look-up tables.
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2011-06-20 00:45:15 -07:00
|
|
|
//------------------------------------------------------------------------------
|
2010-09-30 09:34:38 -04:00
|
|
|
// Main reconstruction function.
|
|
|
|
|
|
|
|
static const int kScan[16] = {
|
|
|
|
0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
|
|
|
|
0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
|
|
|
|
0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
|
|
|
|
0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS
|
|
|
|
};
|
|
|
|
|
2013-04-24 14:33:44 +02:00
|
|
|
static WEBP_INLINE int CheckMode(int mb_x, int mb_y, int mode) {
|
2010-09-30 09:34:38 -04:00
|
|
|
if (mode == B_DC_PRED) {
|
2013-04-24 14:33:44 +02:00
|
|
|
if (mb_x == 0) {
|
|
|
|
return (mb_y == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT;
|
2010-09-30 09:34:38 -04:00
|
|
|
} else {
|
2013-04-24 14:33:44 +02:00
|
|
|
return (mb_y == 0) ? B_DC_PRED_NOTOP : B_DC_PRED;
|
2010-09-30 09:34:38 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return mode;
|
|
|
|
}
|
|
|
|
|
2011-11-04 19:44:57 -07:00
|
|
|
static WEBP_INLINE void Copy32b(uint8_t* dst, uint8_t* src) {
|
2010-09-30 09:34:38 -04:00
|
|
|
*(uint32_t*)dst = *(uint32_t*)src;
|
|
|
|
}
|
|
|
|
|
|
|
|
void VP8ReconstructBlock(VP8Decoder* const dec) {
|
2012-12-14 10:22:54 -08:00
|
|
|
int j;
|
2010-09-30 09:34:38 -04:00
|
|
|
uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
|
|
|
|
uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
|
|
|
|
uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
|
|
|
|
|
|
|
|
// Rotate in the left samples from previously decoded block. We move four
|
|
|
|
// pixels at a time for alignment reason, and because of in-loop filter.
|
|
|
|
if (dec->mb_x_ > 0) {
|
2010-10-05 16:58:47 -07:00
|
|
|
for (j = -1; j < 16; ++j) {
|
2010-09-30 09:34:38 -04:00
|
|
|
Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
|
|
|
|
}
|
2010-10-05 16:58:47 -07:00
|
|
|
for (j = -1; j < 8; ++j) {
|
2010-09-30 09:34:38 -04:00
|
|
|
Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]);
|
|
|
|
Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
|
|
|
|
}
|
|
|
|
} else {
|
2010-10-05 16:58:47 -07:00
|
|
|
for (j = 0; j < 16; ++j) {
|
2010-09-30 09:34:38 -04:00
|
|
|
y_dst[j * BPS - 1] = 129;
|
|
|
|
}
|
2010-10-05 16:58:47 -07:00
|
|
|
for (j = 0; j < 8; ++j) {
|
2010-09-30 09:34:38 -04:00
|
|
|
u_dst[j * BPS - 1] = 129;
|
|
|
|
v_dst[j * BPS - 1] = 129;
|
|
|
|
}
|
|
|
|
// Init top-left sample on left column too
|
|
|
|
if (dec->mb_y_ > 0) {
|
|
|
|
y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129;
|
|
|
|
}
|
|
|
|
}
|
2010-10-05 16:58:47 -07:00
|
|
|
{
|
|
|
|
// bring top samples into the cache
|
|
|
|
uint8_t* const top_y = dec->y_t_ + dec->mb_x_ * 16;
|
|
|
|
uint8_t* const top_u = dec->u_t_ + dec->mb_x_ * 8;
|
|
|
|
uint8_t* const top_v = dec->v_t_ + dec->mb_x_ * 8;
|
|
|
|
const int16_t* coeffs = dec->coeffs_;
|
|
|
|
int n;
|
2010-09-30 09:34:38 -04:00
|
|
|
|
|
|
|
if (dec->mb_y_ > 0) {
|
2010-10-05 16:58:47 -07:00
|
|
|
memcpy(y_dst - BPS, top_y, 16);
|
|
|
|
memcpy(u_dst - BPS, top_u, 8);
|
|
|
|
memcpy(v_dst - BPS, top_v, 8);
|
|
|
|
} else if (dec->mb_x_ == 0) {
|
|
|
|
// we only need to do this init once at block (0,0).
|
|
|
|
// Afterward, it remains valid for the whole topmost row.
|
|
|
|
memset(y_dst - BPS - 1, 127, 16 + 4 + 1);
|
|
|
|
memset(u_dst - BPS - 1, 127, 8 + 1);
|
|
|
|
memset(v_dst - BPS - 1, 127, 8 + 1);
|
2010-09-30 09:34:38 -04:00
|
|
|
}
|
2010-10-05 16:58:47 -07:00
|
|
|
|
|
|
|
// predict and add residuals
|
|
|
|
|
|
|
|
if (dec->is_i4x4_) { // 4x4
|
|
|
|
uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
|
|
|
|
|
|
|
|
if (dec->mb_y_ > 0) {
|
|
|
|
if (dec->mb_x_ >= dec->mb_w_ - 1) { // on rightmost border
|
|
|
|
top_right[0] = top_y[15] * 0x01010101u;
|
|
|
|
} else {
|
|
|
|
memcpy(top_right, top_y + 16, sizeof(*top_right));
|
|
|
|
}
|
2010-09-30 09:34:38 -04:00
|
|
|
}
|
2010-10-05 16:58:47 -07:00
|
|
|
// replicate the top-right pixels below
|
|
|
|
top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];
|
|
|
|
|
|
|
|
// predict and add residues for all 4x4 blocks in turn.
|
|
|
|
for (n = 0; n < 16; n++) {
|
2010-09-30 09:34:38 -04:00
|
|
|
uint8_t* const dst = y_dst + kScan[n];
|
2010-10-05 16:58:47 -07:00
|
|
|
VP8PredLuma4[dec->imodes_[n]](dst);
|
2011-03-13 20:34:38 -07:00
|
|
|
if (dec->non_zero_ac_ & (1 << n)) {
|
2011-06-20 00:22:37 -07:00
|
|
|
VP8Transform(coeffs + n * 16, dst, 0);
|
2010-09-30 09:34:38 -04:00
|
|
|
} else if (dec->non_zero_ & (1 << n)) { // only DC is present
|
|
|
|
VP8TransformDC(coeffs + n * 16, dst);
|
|
|
|
}
|
|
|
|
}
|
2010-10-05 16:58:47 -07:00
|
|
|
} else { // 16x16
|
2013-04-24 14:33:44 +02:00
|
|
|
const int pred_func = CheckMode(dec->mb_x_, dec->mb_y_, dec->imodes_[0]);
|
2010-10-05 16:58:47 -07:00
|
|
|
VP8PredLuma16[pred_func](y_dst);
|
|
|
|
if (dec->non_zero_) {
|
|
|
|
for (n = 0; n < 16; n++) {
|
|
|
|
uint8_t* const dst = y_dst + kScan[n];
|
|
|
|
if (dec->non_zero_ac_ & (1 << n)) {
|
2011-06-20 00:22:37 -07:00
|
|
|
VP8Transform(coeffs + n * 16, dst, 0);
|
2010-10-05 16:58:47 -07:00
|
|
|
} else if (dec->non_zero_ & (1 << n)) { // only DC is present
|
|
|
|
VP8TransformDC(coeffs + n * 16, dst);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2010-09-30 09:34:38 -04:00
|
|
|
}
|
2010-10-05 16:58:47 -07:00
|
|
|
{
|
|
|
|
// Chroma
|
2013-04-24 14:33:44 +02:00
|
|
|
const int pred_func = CheckMode(dec->mb_x_, dec->mb_y_, dec->uvmode_);
|
2010-10-05 16:58:47 -07:00
|
|
|
VP8PredChroma8[pred_func](u_dst);
|
|
|
|
VP8PredChroma8[pred_func](v_dst);
|
|
|
|
|
|
|
|
if (dec->non_zero_ & 0x0f0000) { // chroma-U
|
|
|
|
const int16_t* const u_coeffs = dec->coeffs_ + 16 * 16;
|
|
|
|
if (dec->non_zero_ac_ & 0x0f0000) {
|
|
|
|
VP8TransformUV(u_coeffs, u_dst);
|
|
|
|
} else {
|
|
|
|
VP8TransformDCUV(u_coeffs, u_dst);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (dec->non_zero_ & 0xf00000) { // chroma-V
|
|
|
|
const int16_t* const v_coeffs = dec->coeffs_ + 20 * 16;
|
|
|
|
if (dec->non_zero_ac_ & 0xf00000) {
|
|
|
|
VP8TransformUV(v_coeffs, v_dst);
|
|
|
|
} else {
|
|
|
|
VP8TransformDCUV(v_coeffs, v_dst);
|
|
|
|
}
|
|
|
|
}
|
2010-09-30 09:34:38 -04:00
|
|
|
|
2010-10-05 16:58:47 -07:00
|
|
|
// stash away top samples for next block
|
|
|
|
if (dec->mb_y_ < dec->mb_h_ - 1) {
|
|
|
|
memcpy(top_y, y_dst + 15 * BPS, 16);
|
|
|
|
memcpy(top_u, u_dst + 7 * BPS, 8);
|
|
|
|
memcpy(top_v, v_dst + 7 * BPS, 8);
|
|
|
|
}
|
2010-09-30 09:34:38 -04:00
|
|
|
}
|
|
|
|
}
|
2012-12-14 10:22:54 -08:00
|
|
|
// Transfer reconstructed samples from yuv_b_ cache to final destination.
|
|
|
|
{
|
|
|
|
const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_;
|
|
|
|
const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_;
|
|
|
|
uint8_t* const y_out = dec->cache_y_ + dec->mb_x_ * 16 + y_offset;
|
|
|
|
uint8_t* const u_out = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset;
|
|
|
|
uint8_t* const v_out = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset;
|
|
|
|
for (j = 0; j < 16; ++j) {
|
|
|
|
memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16);
|
|
|
|
}
|
|
|
|
for (j = 0; j < 8; ++j) {
|
|
|
|
memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8);
|
|
|
|
memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8);
|
|
|
|
}
|
|
|
|
}
|
2010-09-30 09:34:38 -04:00
|
|
|
}
|
|
|
|
|
2011-06-20 00:45:15 -07:00
|
|
|
//------------------------------------------------------------------------------
|
2010-09-30 09:34:38 -04:00
|
|
|
|
|
|
|
#if defined(__cplusplus) || defined(c_plusplus)
|
|
|
|
} // extern "C"
|
|
|
|
#endif
|