From 73d361dd5f58f7d6f52ab0beaea129dfe8f392d0 Mon Sep 17 00:00:00 2001 From: skal Date: Mon, 25 Aug 2014 13:16:14 -0700 Subject: [PATCH] introduce VP8EncQuantize2Blocks to quantize two blocks at a time No speed diff for now. We might reorder better the instructions later, to speed things up. Change-Id: I1949525a0b329c7fd861b8dbea7db4b23d37709c --- src/dsp/dsp.h | 5 +++++ src/dsp/enc.c | 10 ++++++++++ src/dsp/enc_mips32.c | 9 +++++++++ src/dsp/enc_neon.c | 9 +++++++++ src/dsp/enc_sse2.c | 10 ++++++++++ src/enc/quant.c | 13 +++++++------ 6 files changed, 50 insertions(+), 6 deletions(-) diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h index 912fab21..5c0be204 100644 --- a/src/dsp/dsp.h +++ b/src/dsp/dsp.h @@ -117,7 +117,12 @@ extern VP8BlockCopy VP8Copy4x4; struct VP8Matrix; // forward declaration typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16], const struct VP8Matrix* const mtx); +// Same as VP8QuantizeBlock, but quantizes two consecutive blocks. +typedef int (*VP8Quantize2Blocks)(int16_t in[32], int16_t out[32], + const struct VP8Matrix* const mtx); + extern VP8QuantizeBlock VP8EncQuantizeBlock; +extern VP8Quantize2Blocks VP8EncQuantize2Blocks; // specific to 2nd transform: typedef int (*VP8QuantizeBlockWHT)(int16_t in[16], int16_t out[16], diff --git a/src/dsp/enc.c b/src/dsp/enc.c index e4ea8cb8..3a17c690 100644 --- a/src/dsp/enc.c +++ b/src/dsp/enc.c @@ -625,6 +625,14 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16], return (last >= 0); } +static int Quantize2Blocks(int16_t in[32], int16_t out[32], + const VP8Matrix* const mtx) { + int nz; + nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0; + nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1; + return nz; +} + static int QuantizeBlockWHT(int16_t in[16], int16_t out[16], const VP8Matrix* const mtx) { int n, last = -1; @@ -684,6 +692,7 @@ VP8Metric VP8SSE4x4; VP8WMetric VP8TDisto4x4; VP8WMetric VP8TDisto16x16; VP8QuantizeBlock VP8EncQuantizeBlock; +VP8Quantize2Blocks VP8EncQuantize2Blocks; VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT; VP8BlockCopy VP8Copy4x4; @@ -711,6 +720,7 @@ void VP8EncDspInit(void) { VP8TDisto4x4 = Disto4x4; VP8TDisto16x16 = Disto16x16; VP8EncQuantizeBlock = QuantizeBlock; + VP8EncQuantize2Blocks = Quantize2Blocks; VP8EncQuantizeBlockWHT = QuantizeBlockWHT; VP8Copy4x4 = Copy4x4; diff --git a/src/dsp/enc_mips32.c b/src/dsp/enc_mips32.c index 6acde8a5..acd18fd6 100644 --- a/src/dsp/enc_mips32.c +++ b/src/dsp/enc_mips32.c @@ -237,6 +237,14 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16], return 0; } +static int Quantize2Blocks(int16_t in[32], int16_t out[32], + const VP8Matrix* const mtx) { + int nz; + nz = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0; + nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1; + return nz; +} + #undef QUANTIZE_ONE // macro for one horizontal pass in Disto4x4 (TTransform) @@ -756,6 +764,7 @@ void VP8EncDspInitMIPS32(void) { #if defined(WEBP_USE_MIPS32) VP8ITransform = ITransform; VP8EncQuantizeBlock = QuantizeBlock; + VP8EncQuantize2Blocks = Quantize2Blocks; VP8TDisto4x4 = Disto4x4; VP8TDisto16x16 = Disto16x16; VP8FTransform = FTransform; diff --git a/src/dsp/enc_neon.c b/src/dsp/enc_neon.c index 5b79c9c2..3ef9fe36 100644 --- a/src/dsp/enc_neon.c +++ b/src/dsp/enc_neon.c @@ -1047,6 +1047,14 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16], return 0; } +static int Quantize2Blocks(int16_t in[32], int16_t out[32], + const VP8Matrix* const mtx) { + int nz; + nz = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0; + nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1; + return nz; +} + #endif // !WORK_AROUND_GCC #endif // WEBP_USE_NEON @@ -1072,6 +1080,7 @@ void VP8EncDspInitNEON(void) { VP8SSE4x4 = SSE4x4; #if !defined(WORK_AROUND_GCC) VP8EncQuantizeBlock = QuantizeBlock; + VP8EncQuantize2Blocks = Quantize2Blocks; #endif #endif // WEBP_USE_NEON } diff --git a/src/dsp/enc_sse2.c b/src/dsp/enc_sse2.c index 9958d9f6..928cf837 100644 --- a/src/dsp/enc_sse2.c +++ b/src/dsp/enc_sse2.c @@ -929,6 +929,15 @@ static int QuantizeBlockWHT(int16_t in[16], int16_t out[16], return DoQuantizeBlock(in, out, NULL, mtx); } +static int Quantize2Blocks(int16_t in[32], int16_t out[32], + const VP8Matrix* const mtx) { + int nz; + const uint16_t* const sharpen = &mtx->sharpen_[0]; + nz = DoQuantizeBlock(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0; + nz |= DoQuantizeBlock(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1; + return nz; +} + // Forward declaration. void VP8SetResidualCoeffsSSE2(const int16_t* const coeffs, VP8Residual* const res); @@ -967,6 +976,7 @@ void VP8EncDspInitSSE2(void) { #if defined(WEBP_USE_SSE2) VP8CollectHistogram = CollectHistogram; VP8EncQuantizeBlock = QuantizeBlock; + VP8EncQuantize2Blocks = Quantize2Blocks; VP8EncQuantizeBlockWHT = QuantizeBlockWHT; VP8ITransform = ITransform; VP8FTransform = FTransform; diff --git a/src/enc/quant.c b/src/enc/quant.c index 9130a416..9b4f4d73 100644 --- a/src/enc/quant.c +++ b/src/enc/quant.c @@ -746,12 +746,13 @@ static int ReconstructIntra16(VP8EncIterator* const it, } } } else { - for (n = 0; n < 16; ++n) { + for (n = 0; n < 16; n += 2) { // Zero-out the first coeff, so that: a) nz is correct below, and // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified. - tmp[n][0] = 0; - nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n; - assert(rd->y_ac_levels[n][0] == 0); + tmp[n][0] = tmp[n + 1][0] = 0; + nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n; + assert(rd->y_ac_levels[n + 0][0] == 0); + assert(rd->y_ac_levels[n + 1][0] == 0); } } @@ -816,8 +817,8 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, } } } else { - for (n = 0; n < 8; ++n) { - nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], &dqm->uv_) << n; + for (n = 0; n < 8; n += 2) { + nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv_) << n; } }