mirror of
https://github.com/webmproject/libwebp.git
synced 2025-04-04 16:06:49 +02:00
decoding speed-up (~1%)
- precompute filtering strength once for all at the beginning instead of per-macroblock - reduce size of VP8MB struct from 8 bytes to 4. - removed VP8StoreBlock() accordingly Change-Id: Icf3d329473e21c464770be3d72a04c9ee4c321f2
This commit is contained in:
parent
bcec339b01
commit
0f57dcc31f
100
src/dec/frame.c
100
src/dec/frame.c
@ -97,53 +97,50 @@ static void FilterRow(const VP8Decoder* const dec) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
// Precompute the filtering strength for each segment and each i4x4/i16x16 mode.
|
||||||
|
|
||||||
void VP8StoreBlock(VP8Decoder* const dec) {
|
static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
|
||||||
if (dec->filter_type_ > 0) {
|
if (dec->filter_type_ > 0) {
|
||||||
VP8FInfo* const info = dec->f_info_ + dec->mb_x_;
|
int s;
|
||||||
const int skip = dec->mb_info_[dec->mb_x_].skip_;
|
const VP8FilterHeader* const hdr = &dec->filter_hdr_;
|
||||||
int level = dec->filter_levels_[dec->segment_];
|
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
|
||||||
if (dec->filter_hdr_.use_lf_delta_) {
|
int i4x4;
|
||||||
// TODO(skal): only CURRENT is handled for now.
|
// First, compute the initial level
|
||||||
level += dec->filter_hdr_.ref_lf_delta_[0];
|
int base_level;
|
||||||
if (dec->is_i4x4_) {
|
if (dec->segment_hdr_.use_segment_) {
|
||||||
level += dec->filter_hdr_.mode_lf_delta_[0];
|
base_level = dec->segment_hdr_.filter_strength_[s];
|
||||||
}
|
if (!dec->segment_hdr_.absolute_delta_) {
|
||||||
}
|
base_level += hdr->level_;
|
||||||
level = (level < 0) ? 0 : (level > 63) ? 63 : level;
|
}
|
||||||
info->f_level_ = level;
|
|
||||||
|
|
||||||
if (dec->filter_hdr_.sharpness_ > 0) {
|
|
||||||
if (dec->filter_hdr_.sharpness_ > 4) {
|
|
||||||
level >>= 2;
|
|
||||||
} else {
|
} else {
|
||||||
level >>= 1;
|
base_level = hdr->level_;
|
||||||
}
|
}
|
||||||
if (level > 9 - dec->filter_hdr_.sharpness_) {
|
for (i4x4 = 0; i4x4 <= 1; ++i4x4) {
|
||||||
level = 9 - dec->filter_hdr_.sharpness_;
|
VP8FInfo* const info = &dec->fstrengths_[s][i4x4];
|
||||||
}
|
int level = base_level;
|
||||||
}
|
if (hdr->use_lf_delta_) {
|
||||||
|
// TODO(skal): only CURRENT is handled for now.
|
||||||
|
level += hdr->ref_lf_delta_[0];
|
||||||
|
if (i4x4) {
|
||||||
|
level += hdr->mode_lf_delta_[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
level = (level < 0) ? 0 : (level > 63) ? 63 : level;
|
||||||
|
info->f_level_ = level;
|
||||||
|
|
||||||
info->f_ilevel_ = (level < 1) ? 1 : level;
|
if (hdr->sharpness_ > 0) {
|
||||||
info->f_inner_ = (!skip || dec->is_i4x4_);
|
if (hdr->sharpness_ > 4) {
|
||||||
}
|
level >>= 2;
|
||||||
{
|
} else {
|
||||||
// Transfer samples to row cache
|
level >>= 1;
|
||||||
int y;
|
}
|
||||||
const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_;
|
if (level > 9 - hdr->sharpness_) {
|
||||||
const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_;
|
level = 9 - hdr->sharpness_;
|
||||||
uint8_t* const ydst = dec->cache_y_ + dec->mb_x_ * 16 + y_offset;
|
}
|
||||||
uint8_t* const udst = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset;
|
}
|
||||||
uint8_t* const vdst = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset;
|
info->f_ilevel_ = (level < 1) ? 1 : level;
|
||||||
for (y = 0; y < 16; ++y) {
|
info->f_inner_ = 0;
|
||||||
memcpy(ydst + y * dec->cache_y_stride_,
|
}
|
||||||
dec->yuv_b_ + Y_OFF + y * BPS, 16);
|
|
||||||
}
|
|
||||||
for (y = 0; y < 8; ++y) {
|
|
||||||
memcpy(udst + y * dec->cache_uv_stride_,
|
|
||||||
dec->yuv_b_ + U_OFF + y * BPS, 8);
|
|
||||||
memcpy(vdst + y * dec->cache_uv_stride_,
|
|
||||||
dec->yuv_b_ + V_OFF + y * BPS, 8);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -339,6 +336,7 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
|
|||||||
dec->br_mb_y_ = dec->mb_h_;
|
dec->br_mb_y_ = dec->mb_h_;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
PrecomputeFilterStrengths(dec);
|
||||||
return VP8_STATUS_OK;
|
return VP8_STATUS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -551,6 +549,7 @@ static WEBP_INLINE void Copy32b(uint8_t* dst, uint8_t* src) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void VP8ReconstructBlock(VP8Decoder* const dec) {
|
void VP8ReconstructBlock(VP8Decoder* const dec) {
|
||||||
|
int j;
|
||||||
uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
|
uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
|
||||||
uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
|
uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
|
||||||
uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
|
uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
|
||||||
@ -558,7 +557,6 @@ void VP8ReconstructBlock(VP8Decoder* const dec) {
|
|||||||
// Rotate in the left samples from previously decoded block. We move four
|
// Rotate in the left samples from previously decoded block. We move four
|
||||||
// pixels at a time for alignment reason, and because of in-loop filter.
|
// pixels at a time for alignment reason, and because of in-loop filter.
|
||||||
if (dec->mb_x_ > 0) {
|
if (dec->mb_x_ > 0) {
|
||||||
int j;
|
|
||||||
for (j = -1; j < 16; ++j) {
|
for (j = -1; j < 16; ++j) {
|
||||||
Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
|
Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
|
||||||
}
|
}
|
||||||
@ -567,7 +565,6 @@ void VP8ReconstructBlock(VP8Decoder* const dec) {
|
|||||||
Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
|
Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int j;
|
|
||||||
for (j = 0; j < 16; ++j) {
|
for (j = 0; j < 16; ++j) {
|
||||||
y_dst[j * BPS - 1] = 129;
|
y_dst[j * BPS - 1] = 129;
|
||||||
}
|
}
|
||||||
@ -670,6 +667,21 @@ void VP8ReconstructBlock(VP8Decoder* const dec) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Transfer reconstructed samples from yuv_b_ cache to final destination.
|
||||||
|
{
|
||||||
|
const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_;
|
||||||
|
const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_;
|
||||||
|
uint8_t* const y_out = dec->cache_y_ + dec->mb_x_ * 16 + y_offset;
|
||||||
|
uint8_t* const u_out = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset;
|
||||||
|
uint8_t* const v_out = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset;
|
||||||
|
for (j = 0; j < 16; ++j) {
|
||||||
|
memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16);
|
||||||
|
}
|
||||||
|
for (j = 0; j < 8; ++j) {
|
||||||
|
memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8);
|
||||||
|
memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
@ -425,9 +425,8 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
|
|||||||
}
|
}
|
||||||
return VP8_STATUS_SUSPENDED;
|
return VP8_STATUS_SUSPENDED;
|
||||||
}
|
}
|
||||||
|
// Reconstruct and emit samples.
|
||||||
VP8ReconstructBlock(dec);
|
VP8ReconstructBlock(dec);
|
||||||
// Store data and save block's filtering params
|
|
||||||
VP8StoreBlock(dec);
|
|
||||||
|
|
||||||
// Release buffer only if there is only one partition
|
// Release buffer only if there is only one partition
|
||||||
if (dec->num_parts_ == 1) {
|
if (dec->num_parts_ == 1) {
|
||||||
|
@ -236,20 +236,6 @@ static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
dec->filter_type_ = (hdr->level_ == 0) ? 0 : hdr->simple_ ? 1 : 2;
|
dec->filter_type_ = (hdr->level_ == 0) ? 0 : hdr->simple_ ? 1 : 2;
|
||||||
if (dec->filter_type_ > 0) { // precompute filter levels per segment
|
|
||||||
if (dec->segment_hdr_.use_segment_) {
|
|
||||||
int s;
|
|
||||||
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
|
|
||||||
int strength = dec->segment_hdr_.filter_strength_[s];
|
|
||||||
if (!dec->segment_hdr_.absolute_delta_) {
|
|
||||||
strength += hdr->level_;
|
|
||||||
}
|
|
||||||
dec->filter_levels_[s] = strength;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
dec->filter_levels_[0] = hdr->level_;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return !br->eof_;
|
return !br->eof_;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -675,6 +661,12 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
|
|||||||
dec->non_zero_ac_ = 0;
|
dec->non_zero_ac_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (dec->filter_type_ > 0) { // store filter info
|
||||||
|
VP8FInfo* const finfo = dec->f_info_ + dec->mb_x_;
|
||||||
|
*finfo = dec->fstrengths_[dec->segment_][dec->is_i4x4_];
|
||||||
|
finfo->f_inner_ = (!info->skip_ || dec->is_i4x4_);
|
||||||
|
}
|
||||||
|
|
||||||
return (!token_br->eof_);
|
return (!token_br->eof_);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -698,10 +690,8 @@ static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
|
|||||||
return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
|
return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
|
||||||
"Premature end-of-file encountered.");
|
"Premature end-of-file encountered.");
|
||||||
}
|
}
|
||||||
|
// Reconstruct and emit samples.
|
||||||
VP8ReconstructBlock(dec);
|
VP8ReconstructBlock(dec);
|
||||||
|
|
||||||
// Store data and save block's filtering params
|
|
||||||
VP8StoreBlock(dec);
|
|
||||||
}
|
}
|
||||||
if (!VP8ProcessRow(dec, io)) {
|
if (!VP8ProcessRow(dec, io)) {
|
||||||
return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted.");
|
return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted.");
|
||||||
|
@ -157,7 +157,7 @@ typedef struct { // filter specs
|
|||||||
} VP8FInfo;
|
} VP8FInfo;
|
||||||
|
|
||||||
typedef struct { // used for syntax-parsing
|
typedef struct { // used for syntax-parsing
|
||||||
unsigned int nz_; // non-zero AC/DC coeffs
|
unsigned int nz_:24; // non-zero AC/DC coeffs (24bit)
|
||||||
unsigned int dc_nz_:1; // non-zero DC coeffs
|
unsigned int dc_nz_:1; // non-zero DC coeffs
|
||||||
unsigned int skip_:1; // block type
|
unsigned int skip_:1; // block type
|
||||||
} VP8MB;
|
} VP8MB;
|
||||||
@ -269,9 +269,9 @@ struct VP8Decoder {
|
|||||||
uint32_t non_zero_ac_;
|
uint32_t non_zero_ac_;
|
||||||
|
|
||||||
// Filtering side-info
|
// Filtering side-info
|
||||||
int filter_type_; // 0=off, 1=simple, 2=complex
|
int filter_type_; // 0=off, 1=simple, 2=complex
|
||||||
int filter_row_; // per-row flag
|
int filter_row_; // per-row flag
|
||||||
uint8_t filter_levels_[NUM_MB_SEGMENTS]; // precalculated per-segment
|
VP8FInfo fstrengths_[NUM_MB_SEGMENTS][2]; // precalculated per-segment/type
|
||||||
|
|
||||||
// extensions
|
// extensions
|
||||||
const uint8_t* alpha_data_; // compressed alpha data (if present)
|
const uint8_t* alpha_data_; // compressed alpha data (if present)
|
||||||
@ -312,8 +312,6 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io);
|
|||||||
int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
|
int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
|
||||||
// Process the last decoded row (filtering + output)
|
// Process the last decoded row (filtering + output)
|
||||||
int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io);
|
int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io);
|
||||||
// Store a block, along with filtering params
|
|
||||||
void VP8StoreBlock(VP8Decoder* const dec);
|
|
||||||
// To be called at the start of a new scanline, to initialize predictors.
|
// To be called at the start of a new scanline, to initialize predictors.
|
||||||
void VP8InitScanline(VP8Decoder* const dec);
|
void VP8InitScanline(VP8Decoder* const dec);
|
||||||
// Decode one macroblock. Returns false if there is not enough data.
|
// Decode one macroblock. Returns false if there is not enough data.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user