dsp,neon: use vaddv in a few more places

SumToInt_NEON
horizontal_add_uint32x4

Change-Id: I881831a7b2bab35a1810b0d83fee761470f3e09f
This commit is contained in:
James Zern
2022-09-09 22:17:46 -07:00
parent e8f83de286
commit e68765af42
2 changed files with 16 additions and 6 deletions

View File

@ -21,10 +21,15 @@
#define IsFlat IsFlat_NEON
static uint32x2_t horizontal_add_uint32x4(const uint32x4_t a) {
static uint32_t horizontal_add_uint32x4(const uint32x4_t a) {
#if defined(__aarch64__)
return vaddvq_u32(a);
#else
const uint64x2_t b = vpaddlq_u32(a);
return vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
vreinterpret_u32_u64(vget_high_u64(b)));
const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
vreinterpret_u32_u64(vget_high_u64(b)));
return vget_lane_u32(c, 0);
#endif
}
static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks,
@ -45,7 +50,7 @@ static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks,
levels += 16;
}
return thresh >= (int32_t)vget_lane_u32(horizontal_add_uint32x4(sum), 0);
return thresh >= (int)horizontal_add_uint32x4(sum);
}
#else