mirror of
https://github.com/webmproject/libwebp.git
synced 2024-12-27 22:28:22 +01:00
decoding speed-up (~1%)
- precompute filtering strength once for all at the beginning instead of per-macroblock - reduce size of VP8MB struct from 8 bytes to 4. - removed VP8StoreBlock() accordingly Change-Id: Icf3d329473e21c464770be3d72a04c9ee4c321f2
This commit is contained in:
parent
bcec339b01
commit
0f57dcc31f
100
src/dec/frame.c
100
src/dec/frame.c
@ -97,53 +97,50 @@ static void FilterRow(const VP8Decoder* const dec) {
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Precompute the filtering strength for each segment and each i4x4/i16x16 mode.
|
||||
|
||||
void VP8StoreBlock(VP8Decoder* const dec) {
|
||||
static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
|
||||
if (dec->filter_type_ > 0) {
|
||||
VP8FInfo* const info = dec->f_info_ + dec->mb_x_;
|
||||
const int skip = dec->mb_info_[dec->mb_x_].skip_;
|
||||
int level = dec->filter_levels_[dec->segment_];
|
||||
if (dec->filter_hdr_.use_lf_delta_) {
|
||||
// TODO(skal): only CURRENT is handled for now.
|
||||
level += dec->filter_hdr_.ref_lf_delta_[0];
|
||||
if (dec->is_i4x4_) {
|
||||
level += dec->filter_hdr_.mode_lf_delta_[0];
|
||||
}
|
||||
}
|
||||
level = (level < 0) ? 0 : (level > 63) ? 63 : level;
|
||||
info->f_level_ = level;
|
||||
|
||||
if (dec->filter_hdr_.sharpness_ > 0) {
|
||||
if (dec->filter_hdr_.sharpness_ > 4) {
|
||||
level >>= 2;
|
||||
int s;
|
||||
const VP8FilterHeader* const hdr = &dec->filter_hdr_;
|
||||
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
|
||||
int i4x4;
|
||||
// First, compute the initial level
|
||||
int base_level;
|
||||
if (dec->segment_hdr_.use_segment_) {
|
||||
base_level = dec->segment_hdr_.filter_strength_[s];
|
||||
if (!dec->segment_hdr_.absolute_delta_) {
|
||||
base_level += hdr->level_;
|
||||
}
|
||||
} else {
|
||||
level >>= 1;
|
||||
base_level = hdr->level_;
|
||||
}
|
||||
if (level > 9 - dec->filter_hdr_.sharpness_) {
|
||||
level = 9 - dec->filter_hdr_.sharpness_;
|
||||
}
|
||||
}
|
||||
for (i4x4 = 0; i4x4 <= 1; ++i4x4) {
|
||||
VP8FInfo* const info = &dec->fstrengths_[s][i4x4];
|
||||
int level = base_level;
|
||||
if (hdr->use_lf_delta_) {
|
||||
// TODO(skal): only CURRENT is handled for now.
|
||||
level += hdr->ref_lf_delta_[0];
|
||||
if (i4x4) {
|
||||
level += hdr->mode_lf_delta_[0];
|
||||
}
|
||||
}
|
||||
level = (level < 0) ? 0 : (level > 63) ? 63 : level;
|
||||
info->f_level_ = level;
|
||||
|
||||
info->f_ilevel_ = (level < 1) ? 1 : level;
|
||||
info->f_inner_ = (!skip || dec->is_i4x4_);
|
||||
}
|
||||
{
|
||||
// Transfer samples to row cache
|
||||
int y;
|
||||
const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_;
|
||||
const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_;
|
||||
uint8_t* const ydst = dec->cache_y_ + dec->mb_x_ * 16 + y_offset;
|
||||
uint8_t* const udst = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset;
|
||||
uint8_t* const vdst = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset;
|
||||
for (y = 0; y < 16; ++y) {
|
||||
memcpy(ydst + y * dec->cache_y_stride_,
|
||||
dec->yuv_b_ + Y_OFF + y * BPS, 16);
|
||||
}
|
||||
for (y = 0; y < 8; ++y) {
|
||||
memcpy(udst + y * dec->cache_uv_stride_,
|
||||
dec->yuv_b_ + U_OFF + y * BPS, 8);
|
||||
memcpy(vdst + y * dec->cache_uv_stride_,
|
||||
dec->yuv_b_ + V_OFF + y * BPS, 8);
|
||||
if (hdr->sharpness_ > 0) {
|
||||
if (hdr->sharpness_ > 4) {
|
||||
level >>= 2;
|
||||
} else {
|
||||
level >>= 1;
|
||||
}
|
||||
if (level > 9 - hdr->sharpness_) {
|
||||
level = 9 - hdr->sharpness_;
|
||||
}
|
||||
}
|
||||
info->f_ilevel_ = (level < 1) ? 1 : level;
|
||||
info->f_inner_ = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -339,6 +336,7 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
|
||||
dec->br_mb_y_ = dec->mb_h_;
|
||||
}
|
||||
}
|
||||
PrecomputeFilterStrengths(dec);
|
||||
return VP8_STATUS_OK;
|
||||
}
|
||||
|
||||
@ -551,6 +549,7 @@ static WEBP_INLINE void Copy32b(uint8_t* dst, uint8_t* src) {
|
||||
}
|
||||
|
||||
void VP8ReconstructBlock(VP8Decoder* const dec) {
|
||||
int j;
|
||||
uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
|
||||
uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
|
||||
uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
|
||||
@ -558,7 +557,6 @@ void VP8ReconstructBlock(VP8Decoder* const dec) {
|
||||
// Rotate in the left samples from previously decoded block. We move four
|
||||
// pixels at a time for alignment reason, and because of in-loop filter.
|
||||
if (dec->mb_x_ > 0) {
|
||||
int j;
|
||||
for (j = -1; j < 16; ++j) {
|
||||
Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
|
||||
}
|
||||
@ -567,7 +565,6 @@ void VP8ReconstructBlock(VP8Decoder* const dec) {
|
||||
Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
|
||||
}
|
||||
} else {
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
y_dst[j * BPS - 1] = 129;
|
||||
}
|
||||
@ -670,6 +667,21 @@ void VP8ReconstructBlock(VP8Decoder* const dec) {
|
||||
}
|
||||
}
|
||||
}
|
||||
// Transfer reconstructed samples from yuv_b_ cache to final destination.
|
||||
{
|
||||
const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_;
|
||||
const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_;
|
||||
uint8_t* const y_out = dec->cache_y_ + dec->mb_x_ * 16 + y_offset;
|
||||
uint8_t* const u_out = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset;
|
||||
uint8_t* const v_out = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16);
|
||||
}
|
||||
for (j = 0; j < 8; ++j) {
|
||||
memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8);
|
||||
memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -425,9 +425,8 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
|
||||
}
|
||||
return VP8_STATUS_SUSPENDED;
|
||||
}
|
||||
// Reconstruct and emit samples.
|
||||
VP8ReconstructBlock(dec);
|
||||
// Store data and save block's filtering params
|
||||
VP8StoreBlock(dec);
|
||||
|
||||
// Release buffer only if there is only one partition
|
||||
if (dec->num_parts_ == 1) {
|
||||
|
@ -236,20 +236,6 @@ static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) {
|
||||
}
|
||||
}
|
||||
dec->filter_type_ = (hdr->level_ == 0) ? 0 : hdr->simple_ ? 1 : 2;
|
||||
if (dec->filter_type_ > 0) { // precompute filter levels per segment
|
||||
if (dec->segment_hdr_.use_segment_) {
|
||||
int s;
|
||||
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
|
||||
int strength = dec->segment_hdr_.filter_strength_[s];
|
||||
if (!dec->segment_hdr_.absolute_delta_) {
|
||||
strength += hdr->level_;
|
||||
}
|
||||
dec->filter_levels_[s] = strength;
|
||||
}
|
||||
} else {
|
||||
dec->filter_levels_[0] = hdr->level_;
|
||||
}
|
||||
}
|
||||
return !br->eof_;
|
||||
}
|
||||
|
||||
@ -675,6 +661,12 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
|
||||
dec->non_zero_ac_ = 0;
|
||||
}
|
||||
|
||||
if (dec->filter_type_ > 0) { // store filter info
|
||||
VP8FInfo* const finfo = dec->f_info_ + dec->mb_x_;
|
||||
*finfo = dec->fstrengths_[dec->segment_][dec->is_i4x4_];
|
||||
finfo->f_inner_ = (!info->skip_ || dec->is_i4x4_);
|
||||
}
|
||||
|
||||
return (!token_br->eof_);
|
||||
}
|
||||
|
||||
@ -698,10 +690,8 @@ static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
|
||||
return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
|
||||
"Premature end-of-file encountered.");
|
||||
}
|
||||
// Reconstruct and emit samples.
|
||||
VP8ReconstructBlock(dec);
|
||||
|
||||
// Store data and save block's filtering params
|
||||
VP8StoreBlock(dec);
|
||||
}
|
||||
if (!VP8ProcessRow(dec, io)) {
|
||||
return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted.");
|
||||
|
@ -157,7 +157,7 @@ typedef struct { // filter specs
|
||||
} VP8FInfo;
|
||||
|
||||
typedef struct { // used for syntax-parsing
|
||||
unsigned int nz_; // non-zero AC/DC coeffs
|
||||
unsigned int nz_:24; // non-zero AC/DC coeffs (24bit)
|
||||
unsigned int dc_nz_:1; // non-zero DC coeffs
|
||||
unsigned int skip_:1; // block type
|
||||
} VP8MB;
|
||||
@ -269,9 +269,9 @@ struct VP8Decoder {
|
||||
uint32_t non_zero_ac_;
|
||||
|
||||
// Filtering side-info
|
||||
int filter_type_; // 0=off, 1=simple, 2=complex
|
||||
int filter_row_; // per-row flag
|
||||
uint8_t filter_levels_[NUM_MB_SEGMENTS]; // precalculated per-segment
|
||||
int filter_type_; // 0=off, 1=simple, 2=complex
|
||||
int filter_row_; // per-row flag
|
||||
VP8FInfo fstrengths_[NUM_MB_SEGMENTS][2]; // precalculated per-segment/type
|
||||
|
||||
// extensions
|
||||
const uint8_t* alpha_data_; // compressed alpha data (if present)
|
||||
@ -312,8 +312,6 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io);
|
||||
int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
|
||||
// Process the last decoded row (filtering + output)
|
||||
int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io);
|
||||
// Store a block, along with filtering params
|
||||
void VP8StoreBlock(VP8Decoder* const dec);
|
||||
// To be called at the start of a new scanline, to initialize predictors.
|
||||
void VP8InitScanline(VP8Decoder* const dec);
|
||||
// Decode one macroblock. Returns false if there is not enough data.
|
||||
|
Loading…
Reference in New Issue
Block a user