From 77724f70e9e51e6eae91526048d0e85ac9bcc712 Mon Sep 17 00:00:00 2001 From: Pascal Massimino Date: Sat, 31 Jan 2015 23:16:10 +0100 Subject: [PATCH] SSE2 version of GradientUnfilter somewhat 1-2% faster decoder for lossy+alpha Change-Id: Ib317e26e9fcb8d37af02668ffbfccc4664e659fe --- src/dsp/filters_sse2.c | 60 +++++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 15 deletions(-) diff --git a/src/dsp/filters_sse2.c b/src/dsp/filters_sse2.c index 2ef6e209..12b6d07f 100644 --- a/src/dsp/filters_sse2.c +++ b/src/dsp/filters_sse2.c @@ -36,6 +36,7 @@ static void PredictLineTop(const uint8_t* src, const uint8_t* pred, uint8_t* dst, int length, int inverse) { int i; const int max_pos = length & ~31; + assert(length >= 0); if (inverse) { for (i = 0; i < max_pos; i += 32) { const __m128i A0 = _mm_loadu_si128((const __m128i*)&src[i + 0]); @@ -67,10 +68,10 @@ static void PredictLineTop(const uint8_t* src, const uint8_t* pred, static void PredictLineLeft(const uint8_t* src, uint8_t* dst, int length, int inverse) { int i; + if (length <= 0) return; if (inverse) { const int max_pos = length & ~7; - __m128i last = - _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, dst[-1]); + __m128i last = _mm_set_epi32(0, 0, 0, dst[-1]); for (i = 0; i < max_pos; i += 8) { const __m128i A0 = _mm_loadl_epi64((const __m128i*)(src + i)); const __m128i A1 = _mm_add_epi8(A0, last); @@ -218,45 +219,74 @@ static void GradientPredictDirect(const uint8_t* const row, } } +static void GradientPredictInverse(const uint8_t* const in, + const uint8_t* const top, + uint8_t* const row, int length) { + if (length > 0) { + int i; + const int max_pos = length & ~7; + const __m128i zero = _mm_setzero_si128(); + __m128i A = _mm_set_epi32(0, 0, 0, row[-1]); // left sample + for (i = 0; i < max_pos; i += 8) { + const __m128i tmp0 = _mm_loadl_epi64((const __m128i*)&top[i]); + const __m128i tmp1 = _mm_loadl_epi64((const __m128i*)&top[i - 1]); + const __m128i B = _mm_unpacklo_epi8(tmp0, zero); + const __m128i C = _mm_unpacklo_epi8(tmp1, zero); + const __m128i tmp2 = _mm_loadl_epi64((const __m128i*)&in[i]); + const __m128i D = _mm_unpacklo_epi8(tmp2, zero); // base input + const __m128i E = _mm_sub_epi16(B, C); // unclipped gradient basis B - C + __m128i out = zero; // accumulator for output + __m128i mask_hi = _mm_set_epi32(0, 0, 0, 0xff); + int k = 8; + while (1) { + const __m128i tmp3 = _mm_add_epi16(A, E); // delta = A + B - C + const __m128i tmp4 = _mm_min_epi16(tmp3, mask_hi); + const __m128i tmp5 = _mm_max_epi16(tmp4, zero); // clipped delta + const __m128i tmp6 = _mm_add_epi16(tmp5, D); // add to in[] values + A = _mm_and_si128(tmp6, mask_hi); // 1-complement clip + out = _mm_or_si128(out, A); // accumulate output + if (--k == 0) break; + A = _mm_slli_si128(A, 2); // rotate left sample + mask_hi = _mm_slli_si128(mask_hi, 2); // rotate mask + } + A = _mm_srli_si128(A, 14); // prepare left sample for next iteration + _mm_storel_epi64((__m128i*)&row[i], _mm_packus_epi16(out, zero)); + } + for (; i < length; ++i) { + row[i] = in[i] + GradientPredictorC(row[i - 1], top[i], top[i - 1]); + } + } +} + static WEBP_INLINE void DoGradientFilter(const uint8_t* in, int width, int height, int stride, int row, int num_rows, int inverse, uint8_t* out) { - const uint8_t* preds; const size_t start_offset = row * stride; const int last_row = row + num_rows; SANITY_CHECK(in, out); in += start_offset; out += start_offset; - preds = inverse ? out : in; // left prediction for top scan-line if (row == 0) { out[0] = in[0]; PredictLineLeft(in + 1, out + 1, width - 1, inverse); row = 1; - preds += stride; in += stride; out += stride; } // Filter line-by-line. while (row < last_row) { - int w; - // leftmost pixel: predict from above. - PredictLineC(in, preds - stride, out, 1, inverse); if (inverse) { - for (w = 1; w < width; ++w) { - const int pred = GradientPredictorC(out[w - 1], - out[w - stride], - out[w - stride - 1]); - out[w] = in[w] + pred; - } + PredictLineC(in, out - stride, out, 1, inverse); // predict from above + GradientPredictInverse(in + 1, out + 1 - stride, out + 1, width - 1); } else { + PredictLineC(in, in - stride, out, 1, inverse); GradientPredictDirect(in + 1, in + 1 - stride, out + 1, width - 1); } ++row; - preds += stride; in += stride; out += stride; }