diff --git a/src/dec/frame.c b/src/dec/frame.c index 9c91a48e..2e7964ad 100644 --- a/src/dec/frame.c +++ b/src/dec/frame.c @@ -97,53 +97,50 @@ static void FilterRow(const VP8Decoder* const dec) { } //------------------------------------------------------------------------------ +// Precompute the filtering strength for each segment and each i4x4/i16x16 mode. -void VP8StoreBlock(VP8Decoder* const dec) { +static void PrecomputeFilterStrengths(VP8Decoder* const dec) { if (dec->filter_type_ > 0) { - VP8FInfo* const info = dec->f_info_ + dec->mb_x_; - const int skip = dec->mb_info_[dec->mb_x_].skip_; - int level = dec->filter_levels_[dec->segment_]; - if (dec->filter_hdr_.use_lf_delta_) { - // TODO(skal): only CURRENT is handled for now. - level += dec->filter_hdr_.ref_lf_delta_[0]; - if (dec->is_i4x4_) { - level += dec->filter_hdr_.mode_lf_delta_[0]; - } - } - level = (level < 0) ? 0 : (level > 63) ? 63 : level; - info->f_level_ = level; - - if (dec->filter_hdr_.sharpness_ > 0) { - if (dec->filter_hdr_.sharpness_ > 4) { - level >>= 2; + int s; + const VP8FilterHeader* const hdr = &dec->filter_hdr_; + for (s = 0; s < NUM_MB_SEGMENTS; ++s) { + int i4x4; + // First, compute the initial level + int base_level; + if (dec->segment_hdr_.use_segment_) { + base_level = dec->segment_hdr_.filter_strength_[s]; + if (!dec->segment_hdr_.absolute_delta_) { + base_level += hdr->level_; + } } else { - level >>= 1; + base_level = hdr->level_; } - if (level > 9 - dec->filter_hdr_.sharpness_) { - level = 9 - dec->filter_hdr_.sharpness_; - } - } + for (i4x4 = 0; i4x4 <= 1; ++i4x4) { + VP8FInfo* const info = &dec->fstrengths_[s][i4x4]; + int level = base_level; + if (hdr->use_lf_delta_) { + // TODO(skal): only CURRENT is handled for now. + level += hdr->ref_lf_delta_[0]; + if (i4x4) { + level += hdr->mode_lf_delta_[0]; + } + } + level = (level < 0) ? 0 : (level > 63) ? 63 : level; + info->f_level_ = level; - info->f_ilevel_ = (level < 1) ? 1 : level; - info->f_inner_ = (!skip || dec->is_i4x4_); - } - { - // Transfer samples to row cache - int y; - const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_; - const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_; - uint8_t* const ydst = dec->cache_y_ + dec->mb_x_ * 16 + y_offset; - uint8_t* const udst = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset; - uint8_t* const vdst = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset; - for (y = 0; y < 16; ++y) { - memcpy(ydst + y * dec->cache_y_stride_, - dec->yuv_b_ + Y_OFF + y * BPS, 16); - } - for (y = 0; y < 8; ++y) { - memcpy(udst + y * dec->cache_uv_stride_, - dec->yuv_b_ + U_OFF + y * BPS, 8); - memcpy(vdst + y * dec->cache_uv_stride_, - dec->yuv_b_ + V_OFF + y * BPS, 8); + if (hdr->sharpness_ > 0) { + if (hdr->sharpness_ > 4) { + level >>= 2; + } else { + level >>= 1; + } + if (level > 9 - hdr->sharpness_) { + level = 9 - hdr->sharpness_; + } + } + info->f_ilevel_ = (level < 1) ? 1 : level; + info->f_inner_ = 0; + } } } } @@ -339,6 +336,7 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) { dec->br_mb_y_ = dec->mb_h_; } } + PrecomputeFilterStrengths(dec); return VP8_STATUS_OK; } @@ -551,6 +549,7 @@ static WEBP_INLINE void Copy32b(uint8_t* dst, uint8_t* src) { } void VP8ReconstructBlock(VP8Decoder* const dec) { + int j; uint8_t* const y_dst = dec->yuv_b_ + Y_OFF; uint8_t* const u_dst = dec->yuv_b_ + U_OFF; uint8_t* const v_dst = dec->yuv_b_ + V_OFF; @@ -558,7 +557,6 @@ void VP8ReconstructBlock(VP8Decoder* const dec) { // Rotate in the left samples from previously decoded block. We move four // pixels at a time for alignment reason, and because of in-loop filter. if (dec->mb_x_ > 0) { - int j; for (j = -1; j < 16; ++j) { Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]); } @@ -567,7 +565,6 @@ void VP8ReconstructBlock(VP8Decoder* const dec) { Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]); } } else { - int j; for (j = 0; j < 16; ++j) { y_dst[j * BPS - 1] = 129; } @@ -670,6 +667,21 @@ void VP8ReconstructBlock(VP8Decoder* const dec) { } } } + // Transfer reconstructed samples from yuv_b_ cache to final destination. + { + const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_; + const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_; + uint8_t* const y_out = dec->cache_y_ + dec->mb_x_ * 16 + y_offset; + uint8_t* const u_out = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset; + uint8_t* const v_out = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset; + for (j = 0; j < 16; ++j) { + memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16); + } + for (j = 0; j < 8; ++j) { + memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8); + memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8); + } + } } //------------------------------------------------------------------------------ diff --git a/src/dec/idec.c b/src/dec/idec.c index 7df790ce..1c833095 100644 --- a/src/dec/idec.c +++ b/src/dec/idec.c @@ -425,9 +425,8 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) { } return VP8_STATUS_SUSPENDED; } + // Reconstruct and emit samples. VP8ReconstructBlock(dec); - // Store data and save block's filtering params - VP8StoreBlock(dec); // Release buffer only if there is only one partition if (dec->num_parts_ == 1) { diff --git a/src/dec/vp8.c b/src/dec/vp8.c index bf15e567..253cb6b6 100644 --- a/src/dec/vp8.c +++ b/src/dec/vp8.c @@ -236,20 +236,6 @@ static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) { } } dec->filter_type_ = (hdr->level_ == 0) ? 0 : hdr->simple_ ? 1 : 2; - if (dec->filter_type_ > 0) { // precompute filter levels per segment - if (dec->segment_hdr_.use_segment_) { - int s; - for (s = 0; s < NUM_MB_SEGMENTS; ++s) { - int strength = dec->segment_hdr_.filter_strength_[s]; - if (!dec->segment_hdr_.absolute_delta_) { - strength += hdr->level_; - } - dec->filter_levels_[s] = strength; - } - } else { - dec->filter_levels_[0] = hdr->level_; - } - } return !br->eof_; } @@ -675,6 +661,12 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) { dec->non_zero_ac_ = 0; } + if (dec->filter_type_ > 0) { // store filter info + VP8FInfo* const finfo = dec->f_info_ + dec->mb_x_; + *finfo = dec->fstrengths_[dec->segment_][dec->is_i4x4_]; + finfo->f_inner_ = (!info->skip_ || dec->is_i4x4_); + } + return (!token_br->eof_); } @@ -698,10 +690,8 @@ static int ParseFrame(VP8Decoder* const dec, VP8Io* io) { return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA, "Premature end-of-file encountered."); } + // Reconstruct and emit samples. VP8ReconstructBlock(dec); - - // Store data and save block's filtering params - VP8StoreBlock(dec); } if (!VP8ProcessRow(dec, io)) { return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted."); diff --git a/src/dec/vp8i.h b/src/dec/vp8i.h index 6e27456f..4f5192e2 100644 --- a/src/dec/vp8i.h +++ b/src/dec/vp8i.h @@ -157,7 +157,7 @@ typedef struct { // filter specs } VP8FInfo; typedef struct { // used for syntax-parsing - unsigned int nz_; // non-zero AC/DC coeffs + unsigned int nz_:24; // non-zero AC/DC coeffs (24bit) unsigned int dc_nz_:1; // non-zero DC coeffs unsigned int skip_:1; // block type } VP8MB; @@ -269,9 +269,9 @@ struct VP8Decoder { uint32_t non_zero_ac_; // Filtering side-info - int filter_type_; // 0=off, 1=simple, 2=complex - int filter_row_; // per-row flag - uint8_t filter_levels_[NUM_MB_SEGMENTS]; // precalculated per-segment + int filter_type_; // 0=off, 1=simple, 2=complex + int filter_row_; // per-row flag + VP8FInfo fstrengths_[NUM_MB_SEGMENTS][2]; // precalculated per-segment/type // extensions const uint8_t* alpha_data_; // compressed alpha data (if present) @@ -312,8 +312,6 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io); int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io); // Process the last decoded row (filtering + output) int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io); -// Store a block, along with filtering params -void VP8StoreBlock(VP8Decoder* const dec); // To be called at the start of a new scanline, to initialize predictors. void VP8InitScanline(VP8Decoder* const dec); // Decode one macroblock. Returns false if there is not enough data.