mirror of
https://github.com/webmproject/libwebp.git
synced 2024-11-19 20:08:28 +01:00
dec_neon: use vst_lane instead of vget_lane
results in fewer instructions, small speed improvement Change-Id: I98de632d09ff09f295368c0d744cb4397b585084
This commit is contained in:
parent
bf06105293
commit
71bca5ecf3
@ -566,10 +566,10 @@ static WEBP_INLINE void SaturateAndStore4x4(uint8_t* const dst,
|
||||
const uint8x8_t dst23_u8 = vqmovun_s16(dst23);
|
||||
|
||||
// Store the results.
|
||||
*(uint32_t*)(dst + 0 * BPS) = vget_lane_s32(vreinterpret_s32_u8(dst01_u8), 0);
|
||||
*(uint32_t*)(dst + 1 * BPS) = vget_lane_s32(vreinterpret_s32_u8(dst01_u8), 1);
|
||||
*(uint32_t*)(dst + 2 * BPS) = vget_lane_s32(vreinterpret_s32_u8(dst23_u8), 0);
|
||||
*(uint32_t*)(dst + 3 * BPS) = vget_lane_s32(vreinterpret_s32_u8(dst23_u8), 1);
|
||||
vst1_lane_u32((uint32_t*)(dst + 0 * BPS), vreinterpret_u32_u8(dst01_u8), 0);
|
||||
vst1_lane_u32((uint32_t*)(dst + 1 * BPS), vreinterpret_u32_u8(dst01_u8), 1);
|
||||
vst1_lane_u32((uint32_t*)(dst + 2 * BPS), vreinterpret_u32_u8(dst23_u8), 0);
|
||||
vst1_lane_u32((uint32_t*)(dst + 3 * BPS), vreinterpret_u32_u8(dst23_u8), 1);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void Add4x4(const int16x8_t row01, const int16x8_t row23,
|
||||
|
Loading…
Reference in New Issue
Block a user