dec_neon,DC8_NEON: use vaddlv instead of movl+vaddv

one fewer instruction

Change-Id: I2f599fd6f9eebbb0cab81ae9855244fc401d4323
This commit is contained in:
James Zern 2020-03-04 15:46:38 -08:00
parent 27d082403c
commit 53f3d8cf7e

View File

@ -1429,8 +1429,7 @@ static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) {
if (do_top) { if (do_top) {
const uint8x8_t A = vld1_u8(dst - BPS); // top row const uint8x8_t A = vld1_u8(dst - BPS); // top row
#if defined(__aarch64__) #if defined(__aarch64__)
const uint16x8_t B = vmovl_u8(A); const uint16_t p2 = vaddlv_u8(A);
const uint16_t p2 = vaddvq_u16(B);
sum_top = vdupq_n_u16(p2); sum_top = vdupq_n_u16(p2);
#else #else
const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top