mirror of
https://github.com/webmproject/libwebp.git
synced 2024-11-20 04:18:26 +01:00
dec_neon: add SaturateAndStore4x4
converts 2 s16 vectors to 2 u8 and store to uint8_t destination; TransformAC3 can reuse this after a rework Change-Id: Ia9370283ee3d9bfbc8c008fa883412100ff483d0
This commit is contained in:
parent
e02f16ef45
commit
b7b60ca16c
@ -89,6 +89,21 @@
|
|||||||
"vst2.8 {" #c1"[6], " #c2"[6]}," #p "," #stride " \n" \
|
"vst2.8 {" #c1"[6], " #c2"[6]}," #p "," #stride " \n" \
|
||||||
"vst2.8 {" #c1"[7], " #c2"[7]}," #p "," #stride " \n"
|
"vst2.8 {" #c1"[7], " #c2"[7]}," #p "," #stride " \n"
|
||||||
|
|
||||||
|
// Performs unsigned 8b saturation on 'dst01' and 'dst23' storing the result
|
||||||
|
// to the corresponding rows of 'dst'.
|
||||||
|
static WEBP_INLINE void SaturateAndStore4x4(uint8_t* const dst,
|
||||||
|
int16x8_t dst01, int16x8_t dst23) {
|
||||||
|
// Unsigned saturate to 8b.
|
||||||
|
const uint8x8_t dst01_u8 = vqmovun_s16(dst01);
|
||||||
|
const uint8x8_t dst23_u8 = vqmovun_s16(dst23);
|
||||||
|
|
||||||
|
// Store the results.
|
||||||
|
*(int*)(dst + 0 * BPS) = vget_lane_s32(vreinterpret_s32_u8(dst01_u8), 0);
|
||||||
|
*(int*)(dst + 1 * BPS) = vget_lane_s32(vreinterpret_s32_u8(dst01_u8), 1);
|
||||||
|
*(int*)(dst + 2 * BPS) = vget_lane_s32(vreinterpret_s32_u8(dst23_u8), 0);
|
||||||
|
*(int*)(dst + 3 * BPS) = vget_lane_s32(vreinterpret_s32_u8(dst23_u8), 1);
|
||||||
|
}
|
||||||
|
|
||||||
//-----------------------------------------------------------------------------
|
//-----------------------------------------------------------------------------
|
||||||
// Simple In-loop filtering (Paragraph 15.2)
|
// Simple In-loop filtering (Paragraph 15.2)
|
||||||
|
|
||||||
@ -335,17 +350,8 @@ static void TransformDC(const int16_t* in, uint8_t* dst) {
|
|||||||
// Add the inverse transform.
|
// Add the inverse transform.
|
||||||
dst01_s16 = vaddq_s16(dst01_s16, DC);
|
dst01_s16 = vaddq_s16(dst01_s16, DC);
|
||||||
dst23_s16 = vaddq_s16(dst23_s16, DC);
|
dst23_s16 = vaddq_s16(dst23_s16, DC);
|
||||||
{
|
|
||||||
// Unsigned saturate to 8b.
|
|
||||||
const uint8x8_t dst01_u8 = vqmovun_s16(dst01_s16);
|
|
||||||
const uint8x8_t dst23_u8 = vqmovun_s16(dst23_s16);
|
|
||||||
|
|
||||||
// Store the results.
|
SaturateAndStore4x4(dst, dst01_s16, dst23_s16);
|
||||||
*(int*)(dst + 0 * BPS) = vget_lane_s32(vreinterpret_s32_u8(dst01_u8), 0);
|
|
||||||
*(int*)(dst + 1 * BPS) = vget_lane_s32(vreinterpret_s32_u8(dst01_u8), 1);
|
|
||||||
*(int*)(dst + 2 * BPS) = vget_lane_s32(vreinterpret_s32_u8(dst23_u8), 0);
|
|
||||||
*(int*)(dst + 3 * BPS) = vget_lane_s32(vreinterpret_s32_u8(dst23_u8), 1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user