mirror of
https://github.com/webmproject/libwebp.git
synced 2025-07-15 05:19:48 +02:00
Special-case sparse transform
If the number of non-zero coeffs is <= 3, use a simplified transform for luma. Change-Id: I78a1252704228d21720d4bc1221252c84338d9c8
This commit is contained in:
@ -544,6 +544,34 @@ static void Copy32b(uint8_t* dst, uint8_t* src) {
|
||||
memcpy(dst, src, 4);
|
||||
}
|
||||
|
||||
static void DoTransform(uint32_t bits, const int16_t* const src,
|
||||
uint8_t* const dst) {
|
||||
switch (bits >> 30) {
|
||||
case 3:
|
||||
VP8Transform(src, dst, 0);
|
||||
break;
|
||||
case 2:
|
||||
VP8TransformAC3(src, dst);
|
||||
break;
|
||||
case 1:
|
||||
VP8TransformDC(src, dst);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void DoUVTransform(uint32_t bits, const int16_t* const src,
|
||||
uint8_t* const dst) {
|
||||
if (bits & 0xff) { // any non-zero coeff at all?
|
||||
if (bits & 0xaa) { // any non-zero AC coefficient?
|
||||
VP8TransformUV(src, dst); // note we don't use the AC3 variant for U/V
|
||||
} else {
|
||||
VP8TransformDCUV(src, dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VP8ReconstructBlock(const VP8Decoder* const dec) {
|
||||
int j;
|
||||
uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
|
||||
@ -578,6 +606,7 @@ void VP8ReconstructBlock(const VP8Decoder* const dec) {
|
||||
// bring top samples into the cache
|
||||
VP8TopSamples* const top_yuv = dec->yuv_t_ + dec->mb_x_;
|
||||
const int16_t* const coeffs = block->coeffs_;
|
||||
uint32_t bits = block->non_zero_y_;
|
||||
int n;
|
||||
|
||||
if (dec->mb_y_ > 0) {
|
||||
@ -595,7 +624,6 @@ void VP8ReconstructBlock(const VP8Decoder* const dec) {
|
||||
// predict and add residuals
|
||||
if (block->is_i4x4_) { // 4x4
|
||||
uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
|
||||
uint32_t bits = (block->non_zero_ & 0xffff) | (block->non_zero_ac_ << 16);
|
||||
|
||||
if (dec->mb_y_ > 0) {
|
||||
if (dec->mb_x_ >= dec->mb_w_ - 1) { // on rightmost border
|
||||
@ -608,53 +636,29 @@ void VP8ReconstructBlock(const VP8Decoder* const dec) {
|
||||
top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];
|
||||
|
||||
// predict and add residuals for all 4x4 blocks in turn.
|
||||
for (n = 0; n < 16; ++n, bits <<= 1) {
|
||||
for (n = 0; n < 16; ++n, bits <<= 2) {
|
||||
uint8_t* const dst = y_dst + kScan[n];
|
||||
VP8PredLuma4[block->imodes_[n]](dst);
|
||||
if (bits & (1UL << 31)) {
|
||||
VP8Transform(coeffs + n * 16, dst, 0);
|
||||
} else if (bits & (1UL << 15)) { // only DC is present
|
||||
VP8TransformDC(coeffs + n * 16, dst);
|
||||
}
|
||||
DoTransform(bits, coeffs + n * 16, dst);
|
||||
}
|
||||
} else { // 16x16
|
||||
const int pred_func = CheckMode(dec->mb_x_, dec->mb_y_,
|
||||
block->imodes_[0]);
|
||||
uint32_t bits = (block->non_zero_ & 0xffff) | (block->non_zero_ac_ << 16);
|
||||
VP8PredLuma16[pred_func](y_dst);
|
||||
if (bits & 0xffff) {
|
||||
for (n = 0; n < 16; ++n, bits <<= 1) {
|
||||
uint8_t* const dst = y_dst + kScan[n];
|
||||
if (bits & (1UL << 31)) {
|
||||
VP8Transform(coeffs + n * 16, dst, 0);
|
||||
} else if (bits & (1UL << 15)) { // only DC is present
|
||||
VP8TransformDC(coeffs + n * 16, dst);
|
||||
}
|
||||
if (bits != 0) {
|
||||
for (n = 0; n < 16; ++n, bits <<= 2) {
|
||||
DoTransform(bits, coeffs + n * 16, y_dst + kScan[n]);
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
// Chroma
|
||||
const uint32_t bits_uv = block->non_zero_uv_;
|
||||
const int pred_func = CheckMode(dec->mb_x_, dec->mb_y_, block->uvmode_);
|
||||
VP8PredChroma8[pred_func](u_dst);
|
||||
VP8PredChroma8[pred_func](v_dst);
|
||||
|
||||
if (block->non_zero_ & 0x0f0000) { // chroma-U
|
||||
const int16_t* const u_coeffs = coeffs + 16 * 16;
|
||||
if (block->non_zero_ac_ & 0x0f0000) {
|
||||
VP8TransformUV(u_coeffs, u_dst);
|
||||
} else {
|
||||
VP8TransformDCUV(u_coeffs, u_dst);
|
||||
}
|
||||
}
|
||||
if (block->non_zero_ & 0xf00000) { // chroma-V
|
||||
const int16_t* const v_coeffs = coeffs + 20 * 16;
|
||||
if (block->non_zero_ac_ & 0xf00000) {
|
||||
VP8TransformUV(v_coeffs, v_dst);
|
||||
} else {
|
||||
VP8TransformDCUV(v_coeffs, v_dst);
|
||||
}
|
||||
}
|
||||
DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst);
|
||||
DoUVTransform(bits_uv >> 8, coeffs + 20 * 16, v_dst);
|
||||
}
|
||||
|
||||
// stash away top samples for next block
|
||||
|
@ -509,8 +509,8 @@ static int ParseResiduals(VP8Decoder* const dec,
|
||||
int16_t* dst = block->coeffs_;
|
||||
VP8MB* const left_mb = dec->mb_info_ - 1;
|
||||
uint8_t tnz, lnz;
|
||||
uint32_t non_zero_ac = 0;
|
||||
uint32_t non_zero_dc = 0;
|
||||
uint32_t non_zero_y = 0;
|
||||
uint32_t non_zero_uv = 0;
|
||||
int x, y, ch;
|
||||
uint32_t out_t_nz, out_l_nz;
|
||||
int first;
|
||||
@ -539,26 +539,27 @@ static int ParseResiduals(VP8Decoder* const dec,
|
||||
lnz = left_mb->nz_ & 0x0f;
|
||||
for (y = 0; y < 4; ++y) {
|
||||
int l = lnz & 1;
|
||||
uint32_t nz_dc = 0, nz_ac = 0;
|
||||
uint32_t nz_coeffs = 0;
|
||||
for (x = 0; x < 4; ++x) {
|
||||
const int ctx = l + (tnz & 1);
|
||||
const int nz = GetCoeffs(token_br, ac_proba, ctx, q->y1_mat_, first, dst);
|
||||
l = (nz > first);
|
||||
tnz = (tnz >> 1) | (l << 7);
|
||||
nz_dc = (nz_dc << 1) | (dst[0] != 0);
|
||||
nz_ac = (nz_ac << 1) | (nz > 1);
|
||||
nz_coeffs <<= 2;
|
||||
if (nz > 3) nz_coeffs |= 3;
|
||||
else if (nz > 1) nz_coeffs |= 2;
|
||||
else if (dst[0] != 0) nz_coeffs |= 1;
|
||||
dst += 16;
|
||||
}
|
||||
tnz >>= 4;
|
||||
lnz = (lnz >> 1) | (l << 7);
|
||||
non_zero_dc = (non_zero_dc << 4) | nz_dc;
|
||||
non_zero_ac = (non_zero_ac << 4) | nz_ac;
|
||||
non_zero_y = (non_zero_y << 8) | nz_coeffs;
|
||||
}
|
||||
out_t_nz = tnz;
|
||||
out_l_nz = lnz >> 4;
|
||||
|
||||
for (ch = 0; ch < 4; ch += 2) {
|
||||
uint32_t nz_dc = 0, nz_ac = 0;
|
||||
uint32_t nz_coeffs = 0;
|
||||
tnz = mb->nz_ >> (4 + ch);
|
||||
lnz = left_mb->nz_ >> (4 + ch);
|
||||
for (y = 0; y < 2; ++y) {
|
||||
@ -568,25 +569,26 @@ static int ParseResiduals(VP8Decoder* const dec,
|
||||
const int nz = GetCoeffs(token_br, bands[2], ctx, q->uv_mat_, 0, dst);
|
||||
l = (nz > 0);
|
||||
tnz = (tnz >> 1) | (l << 3);
|
||||
nz_dc = (nz_dc << 1) | (dst[0] != 0);
|
||||
nz_ac = (nz_ac << 1) | (nz > 1);
|
||||
nz_coeffs <<= 2;
|
||||
if (nz > 3) nz_coeffs |= 3;
|
||||
else if (nz > 1) nz_coeffs |= 2;
|
||||
else if (dst[0] != 0) nz_coeffs |= 1;
|
||||
dst += 16;
|
||||
}
|
||||
tnz >>= 2;
|
||||
lnz = (lnz >> 1) | (l << 5);
|
||||
}
|
||||
// Note: we don't really need the per-4x4 details for U/V blocks.
|
||||
non_zero_dc |= (nz_dc & 0x0f) << (16 + 2 * ch);
|
||||
non_zero_ac |= (nz_ac & 0x0f) << (16 + 2 * ch);
|
||||
non_zero_uv |= nz_coeffs << (4 * ch);
|
||||
out_t_nz |= (tnz << 4) << ch;
|
||||
out_l_nz |= (lnz & 0xf0) << ch;
|
||||
}
|
||||
mb->nz_ = out_t_nz;
|
||||
left_mb->nz_ = out_l_nz;
|
||||
|
||||
block->non_zero_ac_ = non_zero_ac;
|
||||
block->non_zero_ = non_zero_ac | non_zero_dc;
|
||||
return !block->non_zero_; // will be used for further optimization
|
||||
block->non_zero_y_ = non_zero_y;
|
||||
block->non_zero_uv_ = non_zero_uv;
|
||||
return !(non_zero_y | non_zero_uv); // will be used for further optimization
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -621,8 +623,8 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
|
||||
if (!block->is_i4x4_) {
|
||||
left->nz_dc_ = mb->nz_dc_ = 0;
|
||||
}
|
||||
block->non_zero_ = 0;
|
||||
block->non_zero_ac_ = 0;
|
||||
block->non_zero_y_ = 0;
|
||||
block->non_zero_uv_ = 0;
|
||||
}
|
||||
|
||||
if (dec->filter_type_ > 0) { // store filter info
|
||||
|
@ -184,12 +184,15 @@ typedef struct {
|
||||
uint8_t is_i4x4_; // true if intra4x4
|
||||
uint8_t imodes_[16]; // one 16x16 mode (#0) or sixteen 4x4 modes
|
||||
uint8_t uvmode_; // chroma prediction mode
|
||||
// bit-wise info about the content of each sub-4x4 blocks: there are 16 bits
|
||||
// for luma (bits #15->#0), then 4 bits for chroma-u (#19->#16) and 4 bits for
|
||||
// chroma-v (#23->#20), each corresponding to one 4x4 block in decoding order.
|
||||
// If the bit is set, the 4x4 block contains some non-zero coefficients.
|
||||
uint32_t non_zero_;
|
||||
uint32_t non_zero_ac_;
|
||||
// bit-wise info about the content of each sub-4x4 blocks (in decoding order).
|
||||
// Each of the 4x4 blocks for y/u/v is associated with a 2b code according to:
|
||||
// code=0 -> no coefficient
|
||||
// code=1 -> only DC
|
||||
// code=2 -> first three coefficients are non-zero
|
||||
// code=3 -> more than three coefficients are non-zero
|
||||
// This allows to call specialized transform functions.
|
||||
uint32_t non_zero_y_;
|
||||
uint32_t non_zero_uv_;
|
||||
} VP8MBData;
|
||||
|
||||
// Persistent information needed by the parallel processing
|
||||
|
Reference in New Issue
Block a user