From 8d033b14d77fe3a8ace9c681ee351eb54b8bd778 Mon Sep 17 00:00:00 2001 From: James Zern Date: Fri, 20 Oct 2017 00:54:12 -0700 Subject: [PATCH] {dec,enc}_neon: harmonize function suffixes x2 + neon.h BUG=webp:355 Change-Id: Ia17c7dfc7d61742a4758823675a2d556a739c389 --- src/dsp/dec_neon.c | 2 +- src/dsp/enc_neon.c | 20 +++++++++++--------- src/dsp/neon.h | 2 +- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/dsp/dec_neon.c b/src/dsp/dec_neon.c index 2a27dec1..ece9a2a3 100644 --- a/src/dsp/dec_neon.c +++ b/src/dsp/dec_neon.c @@ -1219,7 +1219,7 @@ static void TransformWHT_NEON(const int16_t* in, int16_t* out) { tmp.val[2] = vsubq_s32(a0, a1); tmp.val[3] = vsubq_s32(a3, a2); // Arrange the temporary results column-wise. - tmp = Transpose4x4(tmp); + tmp = Transpose4x4_NEON(tmp); } { diff --git a/src/dsp/enc_neon.c b/src/dsp/enc_neon.c index d3fe7741..43bf1245 100644 --- a/src/dsp/enc_neon.c +++ b/src/dsp/enc_neon.c @@ -268,10 +268,12 @@ static uint8x16_t Load4x4_NEON(const uint8_t* src) { #if defined(WEBP_USE_INTRINSICS) -static WEBP_INLINE void Transpose4x4_S16(const int16x4_t A, const int16x4_t B, - const int16x4_t C, const int16x4_t D, - int16x8_t* const out01, - int16x8_t* const out32) { +static WEBP_INLINE void Transpose4x4_S16_NEON(const int16x4_t A, + const int16x4_t B, + const int16x4_t C, + const int16x4_t D, + int16x8_t* const out01, + int16x8_t* const out32) { const int16x4x2_t AB = vtrn_s16(A, B); const int16x4x2_t CD = vtrn_s16(C, D); const int32x2x2_t tmp02 = vtrn_s32(vreinterpret_s32_s16(AB.val[0]), @@ -303,7 +305,7 @@ static void FTransform_NEON(const uint8_t* src, const uint8_t* ref, const int16x4_t D1 = vget_high_s16(D0D1); const int16x4_t D2 = vget_low_s16(D2D3); const int16x4_t D3 = vget_high_s16(D2D3); - Transpose4x4_S16(D0, D1, D2, D3, &d0d1, &d3d2); + Transpose4x4_S16_NEON(D0, D1, D2, D3, &d0d1, &d3d2); } { // 1rst pass const int32x4_t kCst937 = vdupq_n_s32(937); @@ -321,7 +323,7 @@ static void FTransform_NEON(const uint8_t* src, const uint8_t* ref, const int32x4_t a3_m_a2 = vmlsl_n_s16(a3_2217, vget_high_s16(a3a2), 5352); const int16x4_t tmp1 = vshrn_n_s32(vaddq_s32(a2_p_a3, kCst1812), 9); const int16x4_t tmp3 = vshrn_n_s32(vaddq_s32(a3_m_a2, kCst937), 9); - Transpose4x4_S16(tmp0, tmp1, tmp2, tmp3, &d0d1, &d3d2); + Transpose4x4_S16_NEON(tmp0, tmp1, tmp2, tmp3, &d0d1, &d3d2); } { // 2nd pass // the (1<<16) addition is for the replacement: a3!=0 <-> 1-(a3==0) @@ -519,7 +521,7 @@ static void FTransformWHT_NEON(const int16_t* src, int16_t* out) { tmp0.val[3] = vsubq_s32(a0, a1); } { - const int32x4x4_t tmp1 = Transpose4x4(tmp0); + const int32x4x4_t tmp1 = Transpose4x4_NEON(tmp0); // a0 = tmp[0 + i] + tmp[ 8 + i] // a1 = tmp[4 + i] + tmp[12 + i] // a2 = tmp[4 + i] - tmp[12 + i] @@ -563,7 +565,7 @@ static void FTransformWHT_NEON(const int16_t* src, int16_t* out) { // a 26ae, b 26ae // a 37bf, b 37bf // -static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16(int16x8x4_t q4_in) { +static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16_NEON(int16x8x4_t q4_in) { const int16x8x2_t q2_tmp0 = vtrnq_s16(q4_in.val[0], q4_in.val[1]); const int16x8x2_t q2_tmp1 = vtrnq_s16(q4_in.val[2], q4_in.val[3]); const int32x4x2_t q2_tmp2 = vtrnq_s32(vreinterpretq_s32_s16(q2_tmp0.val[0]), @@ -686,7 +688,7 @@ static int Disto4x4_NEON(const uint8_t* const a, const uint8_t* const b, const int16x8x4_t q4_v = DistoVerticalPass_NEON(d4_in); const int16x4x4_t d4_w = DistoLoadW_NEON(w); // horizontal pass - const int16x8x4_t q4_t = DistoTranspose4x4S16(q4_v); + const int16x8x4_t q4_t = DistoTranspose4x4S16_NEON(q4_v); const int16x8x4_t q4_h = DistoHorizontalPass_NEON(q4_t); int32x2_t d_sum = DistoSum_NEON(q4_h, d4_w); diff --git a/src/dsp/neon.h b/src/dsp/neon.h index 02d69f4c..aa1dea13 100644 --- a/src/dsp/neon.h +++ b/src/dsp/neon.h @@ -48,7 +48,7 @@ #define WORK_AROUND_GCC #endif -static WEBP_INLINE int32x4x4_t Transpose4x4(const int32x4x4_t rows) { +static WEBP_INLINE int32x4x4_t Transpose4x4_NEON(const int32x4x4_t rows) { uint64x2x2_t row01, row23; row01.val[0] = vreinterpretq_u64_s32(rows.val[0]);