introduce VP8EncQuantize2Blocks to quantize two blocks at a time

No speed diff for now. We might reorder better the instructions later,
to speed things up.

Change-Id: I1949525a0b329c7fd861b8dbea7db4b23d37709c
This commit is contained in:
skal 2014-08-25 13:16:14 -07:00
parent 0b21c30b1a
commit 73d361dd5f
6 changed files with 50 additions and 6 deletions

View File

@ -117,7 +117,12 @@ extern VP8BlockCopy VP8Copy4x4;
struct VP8Matrix; // forward declaration
typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
const struct VP8Matrix* const mtx);
// Same as VP8QuantizeBlock, but quantizes two consecutive blocks.
typedef int (*VP8Quantize2Blocks)(int16_t in[32], int16_t out[32],
const struct VP8Matrix* const mtx);
extern VP8QuantizeBlock VP8EncQuantizeBlock;
extern VP8Quantize2Blocks VP8EncQuantize2Blocks;
// specific to 2nd transform:
typedef int (*VP8QuantizeBlockWHT)(int16_t in[16], int16_t out[16],

View File

@ -625,6 +625,14 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
return (last >= 0);
}
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
const VP8Matrix* const mtx) {
int nz;
nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
return nz;
}
static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
const VP8Matrix* const mtx) {
int n, last = -1;
@ -684,6 +692,7 @@ VP8Metric VP8SSE4x4;
VP8WMetric VP8TDisto4x4;
VP8WMetric VP8TDisto16x16;
VP8QuantizeBlock VP8EncQuantizeBlock;
VP8Quantize2Blocks VP8EncQuantize2Blocks;
VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
VP8BlockCopy VP8Copy4x4;
@ -711,6 +720,7 @@ void VP8EncDspInit(void) {
VP8TDisto4x4 = Disto4x4;
VP8TDisto16x16 = Disto16x16;
VP8EncQuantizeBlock = QuantizeBlock;
VP8EncQuantize2Blocks = Quantize2Blocks;
VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
VP8Copy4x4 = Copy4x4;

View File

@ -237,6 +237,14 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
return 0;
}
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
const VP8Matrix* const mtx) {
int nz;
nz = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
return nz;
}
#undef QUANTIZE_ONE
// macro for one horizontal pass in Disto4x4 (TTransform)
@ -756,6 +764,7 @@ void VP8EncDspInitMIPS32(void) {
#if defined(WEBP_USE_MIPS32)
VP8ITransform = ITransform;
VP8EncQuantizeBlock = QuantizeBlock;
VP8EncQuantize2Blocks = Quantize2Blocks;
VP8TDisto4x4 = Disto4x4;
VP8TDisto16x16 = Disto16x16;
VP8FTransform = FTransform;

View File

@ -1047,6 +1047,14 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
return 0;
}
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
const VP8Matrix* const mtx) {
int nz;
nz = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
return nz;
}
#endif // !WORK_AROUND_GCC
#endif // WEBP_USE_NEON
@ -1072,6 +1080,7 @@ void VP8EncDspInitNEON(void) {
VP8SSE4x4 = SSE4x4;
#if !defined(WORK_AROUND_GCC)
VP8EncQuantizeBlock = QuantizeBlock;
VP8EncQuantize2Blocks = Quantize2Blocks;
#endif
#endif // WEBP_USE_NEON
}

View File

@ -929,6 +929,15 @@ static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
return DoQuantizeBlock(in, out, NULL, mtx);
}
static int Quantize2Blocks(int16_t in[32], int16_t out[32],
const VP8Matrix* const mtx) {
int nz;
const uint16_t* const sharpen = &mtx->sharpen_[0];
nz = DoQuantizeBlock(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
nz |= DoQuantizeBlock(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
return nz;
}
// Forward declaration.
void VP8SetResidualCoeffsSSE2(const int16_t* const coeffs,
VP8Residual* const res);
@ -967,6 +976,7 @@ void VP8EncDspInitSSE2(void) {
#if defined(WEBP_USE_SSE2)
VP8CollectHistogram = CollectHistogram;
VP8EncQuantizeBlock = QuantizeBlock;
VP8EncQuantize2Blocks = Quantize2Blocks;
VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
VP8ITransform = ITransform;
VP8FTransform = FTransform;

View File

@ -746,12 +746,13 @@ static int ReconstructIntra16(VP8EncIterator* const it,
}
}
} else {
for (n = 0; n < 16; ++n) {
for (n = 0; n < 16; n += 2) {
// Zero-out the first coeff, so that: a) nz is correct below, and
// b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.
tmp[n][0] = 0;
nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;
assert(rd->y_ac_levels[n][0] == 0);
tmp[n][0] = tmp[n + 1][0] = 0;
nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;
assert(rd->y_ac_levels[n + 0][0] == 0);
assert(rd->y_ac_levels[n + 1][0] == 0);
}
}
@ -816,8 +817,8 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
}
}
} else {
for (n = 0; n < 8; ++n) {
nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;
for (n = 0; n < 8; n += 2) {
nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;
}
}