Intra4Preds_NEON: fix truemotion saturation

This needs to be done with signed saturation as the sum may be negative.

fixes mismatch with C code after:
baa93808 Add AArch64 Neon implementation of Intra4Preds

Change-Id: I190c3d7f78cfd2c7ae83fb7059de41e307abda36
This commit is contained in:
James Zern 2024-07-11 13:33:48 -07:00
parent 952a989b1b
commit c7bb4cb585

View File

@ -997,10 +997,10 @@ static void Intra4Preds_NEON(uint8_t* dst, const uint8_t* top) {
sub = vsubl_u8(sub_c, sub_a);
sum_lo = vaddw_u8(sub, vget_low_u8(full_b));
res_lo = vqmovn_u16(sum_lo);
res_lo = vqmovun_s16(vreinterpretq_s16_u16(sum_lo));
sum_hi = vaddw_u8(sub, vget_high_u8(full_b));
res_hi = vqmovn_u16(sum_hi);
res_hi = vqmovun_s16(vreinterpretq_s16_u16(sum_hi));
// DC4, VE4, HE4, TM4
DC4_VE4_HE4_TM4_NEON(dst + I4DC4 + BPS * 0, lookup_avgs3.val[0], res_lo, 0);