diff --git a/src/dsp/dec_neon.c b/src/dsp/dec_neon.c index afdb48f1..d84e0d1c 100644 --- a/src/dsp/dec_neon.c +++ b/src/dsp/dec_neon.c @@ -479,6 +479,21 @@ static int8x16_t GetBaseDelta0(const int8x16_t p0, const int8x16_t q0) { //------------------------------------------------------------------------------ +static void ApplyFilter2NoFlip(const int8x16_t p0s, const int8x16_t q0s, + const int8x16_t delta, + int8x16_t* const op0, int8x16_t* const oq0) { + const int8x16_t kCst3 = vdupq_n_s8(0x03); + const int8x16_t kCst4 = vdupq_n_s8(0x04); + const int8x16_t delta_p3 = vqaddq_s8(delta, kCst3); + const int8x16_t delta_p4 = vqaddq_s8(delta, kCst4); + const int8x16_t delta3 = vshrq_n_s8(delta_p3, 3); + const int8x16_t delta4 = vshrq_n_s8(delta_p4, 3); + *op0 = vqaddq_s8(p0s, delta3); + *oq0 = vqsubq_s8(q0s, delta4); +} + +#if defined(WEBP_USE_INTRINSICS) + static void ApplyFilter2(const int8x16_t p0s, const int8x16_t q0s, const int8x16_t delta, uint8x16_t* const op0, uint8x16_t* const oq0) { @@ -494,8 +509,6 @@ static void ApplyFilter2(const int8x16_t p0s, const int8x16_t q0s, *oq0 = FlipSignBack(sq0); } -#if defined(WEBP_USE_INTRINSICS) - static void DoFilter2(const uint8x16_t p1, const uint8x16_t p0, const uint8x16_t q0, const uint8x16_t q1, const uint8x16_t mask, @@ -721,11 +734,7 @@ static void DoFilter4( const int8x16_t delta = GetBaseDelta(p1s, p0s, q0s, q1s); const int8x16_t simple_lf_delta = vandq_s8(delta, vreinterpretq_s8_u8(simple_lf_mask)); - uint8x16_t tmp_p0, tmp_q0; - ApplyFilter2(p0s, q0s, simple_lf_delta, &tmp_p0, &tmp_q0); - // TODO(skal): avoid the double FlipSign() in ApplyFilter2() and here - p0s = FlipSign(tmp_p0); - q0s = FlipSign(tmp_q0); + ApplyFilter2NoFlip(p0s, q0s, simple_lf_delta, &p0s, &q0s); } // do_filter4 part (complex loopfilter on pixels without hev) @@ -797,11 +806,7 @@ static void DoFilter6( { const int8x16_t simple_lf_delta = vandq_s8(delta0, vreinterpretq_s8_u8(simple_lf_mask)); - uint8x16_t tmp_p0, tmp_q0; - ApplyFilter2(p0s, q0s, simple_lf_delta, &tmp_p0, &tmp_q0); - // TODO(skal): avoid the double FlipSign() in ApplyFilter2() and here - p0s = FlipSign(tmp_p0); - q0s = FlipSign(tmp_q0); + ApplyFilter2NoFlip(p0s, q0s, simple_lf_delta, &p0s, &q0s); } // do_filter6 part (complex loopfilter on pixels without hev)