From 69fce2ea787824b657cf0def24e4a81bd01e5501 Mon Sep 17 00:00:00 2001 From: skal Date: Sun, 8 Jun 2014 06:40:22 +0200 Subject: [PATCH] remove the special casing for res->first in VP8SetResidualCoeffs if res->first = 1, coeffs[0]=0 because of quant.c:749 and line added at quant.c:744 So, no need for the extra case. Going forward, TrellisQuantizeBlock() should also be calling a variant of VP8SetResidualCoeffs() to set the 'last' field. also: fixes a warning for win64 + slight speed-up Change-Id: Ib24b611f7396d24aeb5b56dc74d5c39160f048f0 --- src/dsp/enc_sse2.c | 8 ++++---- src/enc/cost.c | 3 ++- src/enc/quant.c | 6 +++++- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/dsp/enc_sse2.c b/src/dsp/enc_sse2.c index 9f49744d..53332f7f 100644 --- a/src/dsp/enc_sse2.c +++ b/src/dsp/enc_sse2.c @@ -946,14 +946,14 @@ void VP8SetResidualCoeffsSSE2(const int16_t* const coeffs, // Get the comparison results as a bitmask, consisting of two times 16 bits: // two identical bits for each result. Concatenate both bitmasks to get a // single 32 bit value. Negate the mask to get the position of entries that - // are not equal to zero. Finally, mask out least significant bits according - // to res->first. + // are not equal to zero. We don't need to mask out least significant bits + // according to res->first, since coeffs[0] is 0 if res->first > 0 const uint32_t mask = - ~(((uint32_t)_mm_movemask_epi8(m1) << 16) | _mm_movemask_epi8(m0)) & - -(1U << (res->first << 1)); + ~(((uint32_t)_mm_movemask_epi8(m1) << 16) | _mm_movemask_epi8(m0)); // The position of the most significant non-zero bit indicates the position of // the last non-zero value. Divide the result by two because __movemask_epi8 // operates on 8 bit values instead of 16 bit values. + assert(res->first == 0 || coeffs[0] == 0); res->last = mask ? (BitsLog2Floor(mask) >> 1) : -1; res->coeffs = coeffs; } diff --git a/src/enc/cost.c b/src/enc/cost.c index 5d83f262..9d2cc017 100644 --- a/src/enc/cost.c +++ b/src/enc/cost.c @@ -562,7 +562,8 @@ static void SetResidualCoeffs(const int16_t* const coeffs, VP8Residual* const res) { int n; res->last = -1; - for (n = 15; n >= res->first; --n) { + assert(res->first == 0 || coeffs[0] == 0); + for (n = 15; n >= 0; --n) { if (coeffs[n]) { res->last = n; break; diff --git a/src/enc/quant.c b/src/enc/quant.c index c8cdc160..9130a416 100644 --- a/src/enc/quant.c +++ b/src/enc/quant.c @@ -741,13 +741,17 @@ static int ReconstructIntra16(VP8EncIterator* const it, TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0, &dqm->y1_, dqm->lambda_trellis_i16_); it->top_nz_[x] = it->left_nz_[y] = non_zero; + rd->y_ac_levels[n][0] = 0; nz |= non_zero << n; } } } else { for (n = 0; n < 16; ++n) { - tmp[n][0] = 0; // so that nz is correct below + // Zero-out the first coeff, so that: a) nz is correct below, and + // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified. + tmp[n][0] = 0; nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n; + assert(rd->y_ac_levels[n][0] == 0); } }