{dec,enc}_neon: harmonize function suffixes x2

+ neon.h

BUG=webp:355

Change-Id: Ia17c7dfc7d61742a4758823675a2d556a739c389
This commit is contained in:
James Zern 2017-10-20 00:54:12 -07:00
parent 0295e9815d
commit 8d033b14d7
3 changed files with 13 additions and 11 deletions

View File

@ -1219,7 +1219,7 @@ static void TransformWHT_NEON(const int16_t* in, int16_t* out) {
tmp.val[2] = vsubq_s32(a0, a1); tmp.val[2] = vsubq_s32(a0, a1);
tmp.val[3] = vsubq_s32(a3, a2); tmp.val[3] = vsubq_s32(a3, a2);
// Arrange the temporary results column-wise. // Arrange the temporary results column-wise.
tmp = Transpose4x4(tmp); tmp = Transpose4x4_NEON(tmp);
} }
{ {

View File

@ -268,8 +268,10 @@ static uint8x16_t Load4x4_NEON(const uint8_t* src) {
#if defined(WEBP_USE_INTRINSICS) #if defined(WEBP_USE_INTRINSICS)
static WEBP_INLINE void Transpose4x4_S16(const int16x4_t A, const int16x4_t B, static WEBP_INLINE void Transpose4x4_S16_NEON(const int16x4_t A,
const int16x4_t C, const int16x4_t D, const int16x4_t B,
const int16x4_t C,
const int16x4_t D,
int16x8_t* const out01, int16x8_t* const out01,
int16x8_t* const out32) { int16x8_t* const out32) {
const int16x4x2_t AB = vtrn_s16(A, B); const int16x4x2_t AB = vtrn_s16(A, B);
@ -303,7 +305,7 @@ static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
const int16x4_t D1 = vget_high_s16(D0D1); const int16x4_t D1 = vget_high_s16(D0D1);
const int16x4_t D2 = vget_low_s16(D2D3); const int16x4_t D2 = vget_low_s16(D2D3);
const int16x4_t D3 = vget_high_s16(D2D3); const int16x4_t D3 = vget_high_s16(D2D3);
Transpose4x4_S16(D0, D1, D2, D3, &d0d1, &d3d2); Transpose4x4_S16_NEON(D0, D1, D2, D3, &d0d1, &d3d2);
} }
{ // 1rst pass { // 1rst pass
const int32x4_t kCst937 = vdupq_n_s32(937); const int32x4_t kCst937 = vdupq_n_s32(937);
@ -321,7 +323,7 @@ static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
const int32x4_t a3_m_a2 = vmlsl_n_s16(a3_2217, vget_high_s16(a3a2), 5352); const int32x4_t a3_m_a2 = vmlsl_n_s16(a3_2217, vget_high_s16(a3a2), 5352);
const int16x4_t tmp1 = vshrn_n_s32(vaddq_s32(a2_p_a3, kCst1812), 9); const int16x4_t tmp1 = vshrn_n_s32(vaddq_s32(a2_p_a3, kCst1812), 9);
const int16x4_t tmp3 = vshrn_n_s32(vaddq_s32(a3_m_a2, kCst937), 9); const int16x4_t tmp3 = vshrn_n_s32(vaddq_s32(a3_m_a2, kCst937), 9);
Transpose4x4_S16(tmp0, tmp1, tmp2, tmp3, &d0d1, &d3d2); Transpose4x4_S16_NEON(tmp0, tmp1, tmp2, tmp3, &d0d1, &d3d2);
} }
{ // 2nd pass { // 2nd pass
// the (1<<16) addition is for the replacement: a3!=0 <-> 1-(a3==0) // the (1<<16) addition is for the replacement: a3!=0 <-> 1-(a3==0)
@ -519,7 +521,7 @@ static void FTransformWHT_NEON(const int16_t* src, int16_t* out) {
tmp0.val[3] = vsubq_s32(a0, a1); tmp0.val[3] = vsubq_s32(a0, a1);
} }
{ {
const int32x4x4_t tmp1 = Transpose4x4(tmp0); const int32x4x4_t tmp1 = Transpose4x4_NEON(tmp0);
// a0 = tmp[0 + i] + tmp[ 8 + i] // a0 = tmp[0 + i] + tmp[ 8 + i]
// a1 = tmp[4 + i] + tmp[12 + i] // a1 = tmp[4 + i] + tmp[12 + i]
// a2 = tmp[4 + i] - tmp[12 + i] // a2 = tmp[4 + i] - tmp[12 + i]
@ -563,7 +565,7 @@ static void FTransformWHT_NEON(const int16_t* src, int16_t* out) {
// a 26ae, b 26ae // a 26ae, b 26ae
// a 37bf, b 37bf // a 37bf, b 37bf
// //
static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16(int16x8x4_t q4_in) { static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16_NEON(int16x8x4_t q4_in) {
const int16x8x2_t q2_tmp0 = vtrnq_s16(q4_in.val[0], q4_in.val[1]); const int16x8x2_t q2_tmp0 = vtrnq_s16(q4_in.val[0], q4_in.val[1]);
const int16x8x2_t q2_tmp1 = vtrnq_s16(q4_in.val[2], q4_in.val[3]); const int16x8x2_t q2_tmp1 = vtrnq_s16(q4_in.val[2], q4_in.val[3]);
const int32x4x2_t q2_tmp2 = vtrnq_s32(vreinterpretq_s32_s16(q2_tmp0.val[0]), const int32x4x2_t q2_tmp2 = vtrnq_s32(vreinterpretq_s32_s16(q2_tmp0.val[0]),
@ -686,7 +688,7 @@ static int Disto4x4_NEON(const uint8_t* const a, const uint8_t* const b,
const int16x8x4_t q4_v = DistoVerticalPass_NEON(d4_in); const int16x8x4_t q4_v = DistoVerticalPass_NEON(d4_in);
const int16x4x4_t d4_w = DistoLoadW_NEON(w); const int16x4x4_t d4_w = DistoLoadW_NEON(w);
// horizontal pass // horizontal pass
const int16x8x4_t q4_t = DistoTranspose4x4S16(q4_v); const int16x8x4_t q4_t = DistoTranspose4x4S16_NEON(q4_v);
const int16x8x4_t q4_h = DistoHorizontalPass_NEON(q4_t); const int16x8x4_t q4_h = DistoHorizontalPass_NEON(q4_t);
int32x2_t d_sum = DistoSum_NEON(q4_h, d4_w); int32x2_t d_sum = DistoSum_NEON(q4_h, d4_w);

View File

@ -48,7 +48,7 @@
#define WORK_AROUND_GCC #define WORK_AROUND_GCC
#endif #endif
static WEBP_INLINE int32x4x4_t Transpose4x4(const int32x4x4_t rows) { static WEBP_INLINE int32x4x4_t Transpose4x4_NEON(const int32x4x4_t rows) {
uint64x2x2_t row01, row23; uint64x2x2_t row01, row23;
row01.val[0] = vreinterpretq_u64_s32(rows.val[0]); row01.val[0] = vreinterpretq_u64_s32(rows.val[0]);