mirror of
https://github.com/webmproject/libwebp.git
synced 2025-02-13 15:32:53 +01:00
collect macroblock reconstruction data in VP8MBData struct
This is to better separate bitstream parsing from reconstruction. Change-Id: I872b58e9940c4b14f72ebee50fba545468ff754c
This commit is contained in:
parent
23d28e216d
commit
f4710e3b89
@ -417,7 +417,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
|
|||||||
mb_w * (dec->use_threads_ ? 2 : 1) * sizeof(VP8FInfo)
|
mb_w * (dec->use_threads_ ? 2 : 1) * sizeof(VP8FInfo)
|
||||||
: 0;
|
: 0;
|
||||||
const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
|
const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
|
||||||
const size_t coeffs_size = 384 * sizeof(*dec->coeffs_);
|
const size_t mb_data_size = sizeof(*dec->mb_data_);
|
||||||
const size_t cache_height = (16 * num_caches
|
const size_t cache_height = (16 * num_caches
|
||||||
+ kFilterExtraRows[dec->filter_type_]) * 3 / 2;
|
+ kFilterExtraRows[dec->filter_type_]) * 3 / 2;
|
||||||
const size_t cache_size = top_size * cache_height;
|
const size_t cache_size = top_size * cache_height;
|
||||||
@ -426,7 +426,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
|
|||||||
(uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL;
|
(uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL;
|
||||||
const uint64_t needed = (uint64_t)intra_pred_mode_size
|
const uint64_t needed = (uint64_t)intra_pred_mode_size
|
||||||
+ top_size + mb_info_size + f_info_size
|
+ top_size + mb_info_size + f_info_size
|
||||||
+ yuv_size + coeffs_size
|
+ yuv_size + mb_data_size
|
||||||
+ cache_size + alpha_size + ALIGN_MASK;
|
+ cache_size + alpha_size + ALIGN_MASK;
|
||||||
uint8_t* mem;
|
uint8_t* mem;
|
||||||
|
|
||||||
@ -473,8 +473,8 @@ static int AllocateMemory(VP8Decoder* const dec) {
|
|||||||
dec->yuv_b_ = (uint8_t*)mem;
|
dec->yuv_b_ = (uint8_t*)mem;
|
||||||
mem += yuv_size;
|
mem += yuv_size;
|
||||||
|
|
||||||
dec->coeffs_ = (int16_t*)mem;
|
dec->mb_data_ = (VP8MBData*)mem;
|
||||||
mem += coeffs_size;
|
mem += mb_data_size;
|
||||||
|
|
||||||
dec->cache_y_stride_ = 16 * mb_w;
|
dec->cache_y_stride_ = 16 * mb_w;
|
||||||
dec->cache_uv_stride_ = 8 * mb_w;
|
dec->cache_uv_stride_ = 8 * mb_w;
|
||||||
@ -554,6 +554,7 @@ void VP8ReconstructBlock(const VP8Decoder* const dec) {
|
|||||||
uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
|
uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
|
||||||
uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
|
uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
|
||||||
uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
|
uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
|
||||||
|
const VP8MBData* const block = dec->mb_data_;
|
||||||
|
|
||||||
// Rotate in the left samples from previously decoded block. We move four
|
// Rotate in the left samples from previously decoded block. We move four
|
||||||
// pixels at a time for alignment reason, and because of in-loop filter.
|
// pixels at a time for alignment reason, and because of in-loop filter.
|
||||||
@ -583,7 +584,7 @@ void VP8ReconstructBlock(const VP8Decoder* const dec) {
|
|||||||
uint8_t* const top_y = dec->y_t_ + dec->mb_x_ * 16;
|
uint8_t* const top_y = dec->y_t_ + dec->mb_x_ * 16;
|
||||||
uint8_t* const top_u = dec->u_t_ + dec->mb_x_ * 8;
|
uint8_t* const top_u = dec->u_t_ + dec->mb_x_ * 8;
|
||||||
uint8_t* const top_v = dec->v_t_ + dec->mb_x_ * 8;
|
uint8_t* const top_v = dec->v_t_ + dec->mb_x_ * 8;
|
||||||
const int16_t* const coeffs = dec->coeffs_;
|
const int16_t* const coeffs = block->coeffs_;
|
||||||
int n;
|
int n;
|
||||||
|
|
||||||
if (dec->mb_y_ > 0) {
|
if (dec->mb_y_ > 0) {
|
||||||
@ -599,8 +600,7 @@ void VP8ReconstructBlock(const VP8Decoder* const dec) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// predict and add residuals
|
// predict and add residuals
|
||||||
|
if (block->is_i4x4_) { // 4x4
|
||||||
if (dec->is_i4x4_) { // 4x4
|
|
||||||
uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
|
uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
|
||||||
|
|
||||||
if (dec->mb_y_ > 0) {
|
if (dec->mb_y_ > 0) {
|
||||||
@ -613,25 +613,26 @@ void VP8ReconstructBlock(const VP8Decoder* const dec) {
|
|||||||
// replicate the top-right pixels below
|
// replicate the top-right pixels below
|
||||||
top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];
|
top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];
|
||||||
|
|
||||||
// predict and add residues for all 4x4 blocks in turn.
|
// predict and add residuals for all 4x4 blocks in turn.
|
||||||
for (n = 0; n < 16; n++) {
|
for (n = 0; n < 16; n++) {
|
||||||
uint8_t* const dst = y_dst + kScan[n];
|
uint8_t* const dst = y_dst + kScan[n];
|
||||||
VP8PredLuma4[dec->imodes_[n]](dst);
|
VP8PredLuma4[block->imodes_[n]](dst);
|
||||||
if (dec->non_zero_ac_ & (1 << n)) {
|
if (block->non_zero_ac_ & (1 << n)) {
|
||||||
VP8Transform(coeffs + n * 16, dst, 0);
|
VP8Transform(coeffs + n * 16, dst, 0);
|
||||||
} else if (dec->non_zero_ & (1 << n)) { // only DC is present
|
} else if (block->non_zero_ & (1 << n)) { // only DC is present
|
||||||
VP8TransformDC(coeffs + n * 16, dst);
|
VP8TransformDC(coeffs + n * 16, dst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else { // 16x16
|
} else { // 16x16
|
||||||
const int pred_func = CheckMode(dec->mb_x_, dec->mb_y_, dec->imodes_[0]);
|
const int pred_func = CheckMode(dec->mb_x_, dec->mb_y_,
|
||||||
|
block->imodes_[0]);
|
||||||
VP8PredLuma16[pred_func](y_dst);
|
VP8PredLuma16[pred_func](y_dst);
|
||||||
if (dec->non_zero_ & 0xffff) {
|
if (block->non_zero_ & 0xffff) {
|
||||||
for (n = 0; n < 16; n++) {
|
for (n = 0; n < 16; n++) {
|
||||||
uint8_t* const dst = y_dst + kScan[n];
|
uint8_t* const dst = y_dst + kScan[n];
|
||||||
if (dec->non_zero_ac_ & (1 << n)) {
|
if (block->non_zero_ac_ & (1 << n)) {
|
||||||
VP8Transform(coeffs + n * 16, dst, 0);
|
VP8Transform(coeffs + n * 16, dst, 0);
|
||||||
} else if (dec->non_zero_ & (1 << n)) { // only DC is present
|
} else if (block->non_zero_ & (1 << n)) { // only DC is present
|
||||||
VP8TransformDC(coeffs + n * 16, dst);
|
VP8TransformDC(coeffs + n * 16, dst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -639,21 +640,21 @@ void VP8ReconstructBlock(const VP8Decoder* const dec) {
|
|||||||
}
|
}
|
||||||
{
|
{
|
||||||
// Chroma
|
// Chroma
|
||||||
const int pred_func = CheckMode(dec->mb_x_, dec->mb_y_, dec->uvmode_);
|
const int pred_func = CheckMode(dec->mb_x_, dec->mb_y_, block->uvmode_);
|
||||||
VP8PredChroma8[pred_func](u_dst);
|
VP8PredChroma8[pred_func](u_dst);
|
||||||
VP8PredChroma8[pred_func](v_dst);
|
VP8PredChroma8[pred_func](v_dst);
|
||||||
|
|
||||||
if (dec->non_zero_ & 0x0f0000) { // chroma-U
|
if (block->non_zero_ & 0x0f0000) { // chroma-U
|
||||||
const int16_t* const u_coeffs = dec->coeffs_ + 16 * 16;
|
const int16_t* const u_coeffs = coeffs + 16 * 16;
|
||||||
if (dec->non_zero_ac_ & 0x0f0000) {
|
if (block->non_zero_ac_ & 0x0f0000) {
|
||||||
VP8TransformUV(u_coeffs, u_dst);
|
VP8TransformUV(u_coeffs, u_dst);
|
||||||
} else {
|
} else {
|
||||||
VP8TransformDCUV(u_coeffs, u_dst);
|
VP8TransformDCUV(u_coeffs, u_dst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (dec->non_zero_ & 0xf00000) { // chroma-V
|
if (block->non_zero_ & 0xf00000) { // chroma-V
|
||||||
const int16_t* const v_coeffs = dec->coeffs_ + 20 * 16;
|
const int16_t* const v_coeffs = coeffs + 20 * 16;
|
||||||
if (dec->non_zero_ac_ & 0xf00000) {
|
if (block->non_zero_ac_ & 0xf00000) {
|
||||||
VP8TransformUV(v_coeffs, v_dst);
|
VP8TransformUV(v_coeffs, v_dst);
|
||||||
} else {
|
} else {
|
||||||
VP8TransformDCUV(v_coeffs, v_dst);
|
VP8TransformDCUV(v_coeffs, v_dst);
|
||||||
|
@ -337,17 +337,19 @@ void VP8ResetProba(VP8Proba* const proba) {
|
|||||||
void VP8ParseIntraMode(VP8BitReader* const br, VP8Decoder* const dec) {
|
void VP8ParseIntraMode(VP8BitReader* const br, VP8Decoder* const dec) {
|
||||||
uint8_t* const top = dec->intra_t_ + 4 * dec->mb_x_;
|
uint8_t* const top = dec->intra_t_ + 4 * dec->mb_x_;
|
||||||
uint8_t* const left = dec->intra_l_;
|
uint8_t* const left = dec->intra_l_;
|
||||||
|
VP8MBData* const block = dec->mb_data_;
|
||||||
|
|
||||||
// Hardcoded 16x16 intra-mode decision tree.
|
// Hardcoded 16x16 intra-mode decision tree.
|
||||||
dec->is_i4x4_ = !VP8GetBit(br, 145); // decide for B_PRED first
|
block->is_i4x4_ = !VP8GetBit(br, 145); // decide for B_PRED first
|
||||||
if (!dec->is_i4x4_) {
|
if (!block->is_i4x4_) {
|
||||||
const int ymode =
|
const int ymode =
|
||||||
VP8GetBit(br, 156) ? (VP8GetBit(br, 128) ? TM_PRED : H_PRED)
|
VP8GetBit(br, 156) ? (VP8GetBit(br, 128) ? TM_PRED : H_PRED)
|
||||||
: (VP8GetBit(br, 163) ? V_PRED : DC_PRED);
|
: (VP8GetBit(br, 163) ? V_PRED : DC_PRED);
|
||||||
dec->imodes_[0] = ymode;
|
block->imodes_[0] = ymode;
|
||||||
memset(top, ymode, 4 * sizeof(top[0]));
|
memset(top, ymode, 4 * sizeof(top[0]));
|
||||||
memset(left, ymode, 4 * sizeof(left[0]));
|
memset(left, ymode, 4 * sizeof(left[0]));
|
||||||
} else {
|
} else {
|
||||||
uint8_t* modes = dec->imodes_;
|
uint8_t* modes = block->imodes_;
|
||||||
int y;
|
int y;
|
||||||
for (y = 0; y < 4; ++y) {
|
for (y = 0; y < 4; ++y) {
|
||||||
int ymode = left[y];
|
int ymode = left[y];
|
||||||
@ -380,9 +382,9 @@ void VP8ParseIntraMode(VP8BitReader* const br, VP8Decoder* const dec) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Hardcoded UVMode decision tree
|
// Hardcoded UVMode decision tree
|
||||||
dec->uvmode_ = !VP8GetBit(br, 142) ? DC_PRED
|
block->uvmode_ = !VP8GetBit(br, 142) ? DC_PRED
|
||||||
: !VP8GetBit(br, 114) ? V_PRED
|
: !VP8GetBit(br, 114) ? V_PRED
|
||||||
: VP8GetBit(br, 183) ? TM_PRED : H_PRED;
|
: VP8GetBit(br, 183) ? TM_PRED : H_PRED;
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
@ -530,7 +530,8 @@ static int ParseResiduals(VP8Decoder* const dec,
|
|||||||
int first;
|
int first;
|
||||||
ProbaArray ac_prob;
|
ProbaArray ac_prob;
|
||||||
const VP8QuantMatrix* const q = &dec->dqm_[dec->segment_];
|
const VP8QuantMatrix* const q = &dec->dqm_[dec->segment_];
|
||||||
int16_t* dst = dec->coeffs_;
|
VP8MBData* const block = dec->mb_data_;
|
||||||
|
int16_t* dst = block->coeffs_;
|
||||||
VP8MB* const left_mb = dec->mb_info_ - 1;
|
VP8MB* const left_mb = dec->mb_info_ - 1;
|
||||||
PackedNz nz_ac, nz_dc;
|
PackedNz nz_ac, nz_dc;
|
||||||
PackedNz tnz, lnz;
|
PackedNz tnz, lnz;
|
||||||
@ -539,7 +540,7 @@ static int ParseResiduals(VP8Decoder* const dec,
|
|||||||
int x, y, ch;
|
int x, y, ch;
|
||||||
|
|
||||||
memset(dst, 0, 384 * sizeof(*dst));
|
memset(dst, 0, 384 * sizeof(*dst));
|
||||||
if (!dec->is_i4x4_) { // parse DC
|
if (!block->is_i4x4_) { // parse DC
|
||||||
int16_t dc[16] = { 0 };
|
int16_t dc[16] = { 0 };
|
||||||
const int ctx = mb->nz_dc_ + left_mb->nz_dc_;
|
const int ctx = mb->nz_dc_ + left_mb->nz_dc_;
|
||||||
mb->nz_dc_ = left_mb->nz_dc_ =
|
mb->nz_dc_ = left_mb->nz_dc_ =
|
||||||
@ -598,9 +599,9 @@ static int ParseResiduals(VP8Decoder* const dec,
|
|||||||
mb->nz_ = out_t_nz;
|
mb->nz_ = out_t_nz;
|
||||||
left_mb->nz_ = out_l_nz;
|
left_mb->nz_ = out_l_nz;
|
||||||
|
|
||||||
dec->non_zero_ac_ = non_zero_ac;
|
block->non_zero_ac_ = non_zero_ac;
|
||||||
dec->non_zero_ = non_zero_ac | non_zero_dc;
|
block->non_zero_ = non_zero_ac | non_zero_dc;
|
||||||
return !dec->non_zero_; // will be used for further optimization
|
return !block->non_zero_; // will be used for further optimization
|
||||||
}
|
}
|
||||||
#undef PACK
|
#undef PACK
|
||||||
|
|
||||||
@ -611,6 +612,7 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
|
|||||||
VP8BitReader* const br = &dec->br_;
|
VP8BitReader* const br = &dec->br_;
|
||||||
VP8MB* const left = dec->mb_info_ - 1;
|
VP8MB* const left = dec->mb_info_ - 1;
|
||||||
VP8MB* const mb = dec->mb_info_ + dec->mb_x_;
|
VP8MB* const mb = dec->mb_info_ + dec->mb_x_;
|
||||||
|
VP8MBData* const block = dec->mb_data_;
|
||||||
int skip;
|
int skip;
|
||||||
|
|
||||||
// Note: we don't save segment map (yet), as we don't expect
|
// Note: we don't save segment map (yet), as we don't expect
|
||||||
@ -632,17 +634,17 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
|
|||||||
skip = ParseResiduals(dec, mb, token_br);
|
skip = ParseResiduals(dec, mb, token_br);
|
||||||
} else {
|
} else {
|
||||||
left->nz_ = mb->nz_ = 0;
|
left->nz_ = mb->nz_ = 0;
|
||||||
if (!dec->is_i4x4_) {
|
if (!block->is_i4x4_) {
|
||||||
left->nz_dc_ = mb->nz_dc_ = 0;
|
left->nz_dc_ = mb->nz_dc_ = 0;
|
||||||
}
|
}
|
||||||
dec->non_zero_ = 0;
|
block->non_zero_ = 0;
|
||||||
dec->non_zero_ac_ = 0;
|
block->non_zero_ac_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dec->filter_type_ > 0) { // store filter info
|
if (dec->filter_type_ > 0) { // store filter info
|
||||||
VP8FInfo* const finfo = dec->f_info_ + dec->mb_x_;
|
VP8FInfo* const finfo = dec->f_info_ + dec->mb_x_;
|
||||||
*finfo = dec->fstrengths_[dec->segment_][dec->is_i4x4_];
|
*finfo = dec->fstrengths_[dec->segment_][block->is_i4x4_];
|
||||||
finfo->f_inner_ = !skip || dec->is_i4x4_;
|
finfo->f_inner_ = !skip || block->is_i4x4_;
|
||||||
}
|
}
|
||||||
|
|
||||||
return !token_br->eof_;
|
return !token_br->eof_;
|
||||||
|
@ -168,6 +168,20 @@ typedef struct {
|
|||||||
quant_t y1_mat_, y2_mat_, uv_mat_;
|
quant_t y1_mat_, y2_mat_, uv_mat_;
|
||||||
} VP8QuantMatrix;
|
} VP8QuantMatrix;
|
||||||
|
|
||||||
|
// Data needed to reconstruct a macroblock
|
||||||
|
typedef struct {
|
||||||
|
int16_t coeffs_[384]; // 384 coeffs = (16+4+4) * 4*4
|
||||||
|
uint8_t is_i4x4_; // true if intra4x4
|
||||||
|
uint8_t imodes_[16]; // one 16x16 mode (#0) or sixteen 4x4 modes
|
||||||
|
uint8_t uvmode_; // chroma prediction mode
|
||||||
|
// bit-wise info about the content of each sub-4x4 blocks: there are 16 bits
|
||||||
|
// for luma (bits #0->#15), then 4 bits for chroma-u (#16->#19) and 4 bits for
|
||||||
|
// chroma-v (#20->#23), each corresponding to one 4x4 block in decoding order.
|
||||||
|
// If the bit is set, the 4x4 block contains some non-zero coefficients.
|
||||||
|
uint32_t non_zero_;
|
||||||
|
uint32_t non_zero_ac_;
|
||||||
|
} VP8MBData;
|
||||||
|
|
||||||
// Persistent information needed by the parallel processing
|
// Persistent information needed by the parallel processing
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int id_; // cache row to process (in [0..2])
|
int id_; // cache row to process (in [0..2])
|
||||||
@ -238,11 +252,11 @@ struct VP8Decoder {
|
|||||||
uint8_t intra_l_[4]; // left intra modes values
|
uint8_t intra_l_[4]; // left intra modes values
|
||||||
uint8_t* y_t_; // top luma samples: 16 * mb_w_
|
uint8_t* y_t_; // top luma samples: 16 * mb_w_
|
||||||
uint8_t* u_t_, *v_t_; // top u/v samples: 8 * mb_w_ each
|
uint8_t* u_t_, *v_t_; // top u/v samples: 8 * mb_w_ each
|
||||||
|
uint8_t segment_; // segment of the currently parsed block
|
||||||
|
|
||||||
VP8MB* mb_info_; // contextual macroblock info (mb_w_ + 1)
|
VP8MB* mb_info_; // contextual macroblock info (mb_w_ + 1)
|
||||||
VP8FInfo* f_info_; // filter strength info
|
VP8FInfo* f_info_; // filter strength info
|
||||||
uint8_t* yuv_b_; // main block for Y/U/V (size = YUV_SIZE)
|
uint8_t* yuv_b_; // main block for Y/U/V (size = YUV_SIZE)
|
||||||
int16_t* coeffs_; // 384 coeffs = (16+8+8) * 4*4
|
|
||||||
|
|
||||||
uint8_t* cache_y_; // macroblock row for storing unfiltered samples
|
uint8_t* cache_y_; // macroblock row for storing unfiltered samples
|
||||||
uint8_t* cache_u_;
|
uint8_t* cache_u_;
|
||||||
@ -256,17 +270,7 @@ struct VP8Decoder {
|
|||||||
|
|
||||||
// Per macroblock non-persistent infos.
|
// Per macroblock non-persistent infos.
|
||||||
int mb_x_, mb_y_; // current position, in macroblock units
|
int mb_x_, mb_y_; // current position, in macroblock units
|
||||||
uint8_t is_i4x4_; // true if intra4x4
|
VP8MBData* mb_data_; // reconstruction data
|
||||||
uint8_t imodes_[16]; // one 16x16 mode (#0) or sixteen 4x4 modes
|
|
||||||
uint8_t uvmode_; // chroma prediction mode
|
|
||||||
uint8_t segment_; // block's segment
|
|
||||||
|
|
||||||
// bit-wise info about the content of each sub-4x4 blocks: there are 16 bits
|
|
||||||
// for luma (bits #0->#15), then 4 bits for chroma-u (#16->#19) and 4 bits for
|
|
||||||
// chroma-v (#20->#23), each corresponding to one 4x4 block in decoding order.
|
|
||||||
// If the bit is set, the 4x4 block contains some non-zero coefficients.
|
|
||||||
uint32_t non_zero_;
|
|
||||||
uint32_t non_zero_ac_;
|
|
||||||
|
|
||||||
// Filtering side-info
|
// Filtering side-info
|
||||||
int filter_type_; // 0=off, 1=simple, 2=complex
|
int filter_type_; // 0=off, 1=simple, 2=complex
|
||||||
|
Loading…
x
Reference in New Issue
Block a user