mirror of
https://github.com/webmproject/libwebp.git
synced 2024-12-27 06:08:21 +01:00
dec_neon/TrueMotion: simply left border load
use vld1_dup_u8() rather than a separate ld+dup after the values were zero extended; mildly faster at the function level Change-Id: I1b3666a6aeb465722a1214dbc6d71c27689a7f89
This commit is contained in:
parent
bf46d0acff
commit
ea95b305ca
@ -1289,14 +1289,10 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
|
||||
int y;
|
||||
for (y = 0; y < size; y += 4) {
|
||||
// left edge
|
||||
const int16x8_t l0 = ConvertU8ToS16(vld1_u8(dst + 0 * BPS - 1));
|
||||
const int16x8_t l1 = ConvertU8ToS16(vld1_u8(dst + 1 * BPS - 1));
|
||||
const int16x8_t l2 = ConvertU8ToS16(vld1_u8(dst + 2 * BPS - 1));
|
||||
const int16x8_t l3 = ConvertU8ToS16(vld1_u8(dst + 3 * BPS - 1));
|
||||
const int16x8_t L0 = vdupq_lane_s16(vget_low_s16(l0), 0);
|
||||
const int16x8_t L1 = vdupq_lane_s16(vget_low_s16(l1), 0);
|
||||
const int16x8_t L2 = vdupq_lane_s16(vget_low_s16(l2), 0);
|
||||
const int16x8_t L3 = vdupq_lane_s16(vget_low_s16(l3), 0);
|
||||
const int16x8_t L0 = ConvertU8ToS16(vld1_dup_u8(dst + 0 * BPS - 1));
|
||||
const int16x8_t L1 = ConvertU8ToS16(vld1_dup_u8(dst + 1 * BPS - 1));
|
||||
const int16x8_t L2 = ConvertU8ToS16(vld1_dup_u8(dst + 2 * BPS - 1));
|
||||
const int16x8_t L3 = ConvertU8ToS16(vld1_dup_u8(dst + 3 * BPS - 1));
|
||||
const int16x8_t r0 = vaddq_s16(L0, d); // L[r] + A[c] - A[-1]
|
||||
const int16x8_t r1 = vaddq_s16(L1, d);
|
||||
const int16x8_t r2 = vaddq_s16(L2, d);
|
||||
|
Loading…
Reference in New Issue
Block a user