dsp,x86: normalize types w/_mm_cvtsi128_si32 calls

fixes integer sanitizer warnings of the form:
implicit conversion from type 'int' of value -2122283647 (32-bit,
signed) to type 'uint32_t' (aka 'unsigned int') changed the value to
2172683649 (32-bit, unsigned)

implicit conversion from type 'uint32_t' (aka 'unsigned int') of value
3724541952 (32-bit, unsigned) to type 'int' changed the value to
-570425344 (32-bit, signed)

Bug: b/229626362
Change-Id: I79f68e3e2fcab7cc0402477d2e88d629348c9ff4
This commit is contained in:
James Zern 2022-08-03 21:50:43 -07:00
parent ab540ae0c5
commit aff1c546ef
3 changed files with 26 additions and 28 deletions

View File

@ -908,10 +908,10 @@ static void VE4_SSE2(uint8_t* dst) { // vertical
const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGH00), one); const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGH00), one);
const __m128i b = _mm_subs_epu8(a, lsb); const __m128i b = _mm_subs_epu8(a, lsb);
const __m128i avg = _mm_avg_epu8(b, BCDEFGH0); const __m128i avg = _mm_avg_epu8(b, BCDEFGH0);
const uint32_t vals = _mm_cvtsi128_si32(avg); const int vals = _mm_cvtsi128_si32(avg);
int i; int i;
for (i = 0; i < 4; ++i) { for (i = 0; i < 4; ++i) {
WebPUint32ToMem(dst + i * BPS, vals); WebPInt32ToMem(dst + i * BPS, vals);
} }
} }
@ -970,7 +970,8 @@ static void VL4_SSE2(uint8_t* dst) { // Vertical-Left
const __m128i abbc = _mm_or_si128(ab, bc); const __m128i abbc = _mm_or_si128(ab, bc);
const __m128i lsb2 = _mm_and_si128(abbc, lsb1); const __m128i lsb2 = _mm_and_si128(abbc, lsb1);
const __m128i avg4 = _mm_subs_epu8(avg3, lsb2); const __m128i avg4 = _mm_subs_epu8(avg3, lsb2);
const uint32_t extra_out = _mm_cvtsi128_si32(_mm_srli_si128(avg4, 4)); const uint32_t extra_out =
(uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 )); WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 ));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 )); WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 ));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1))); WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));

View File

@ -722,10 +722,10 @@ static WEBP_INLINE void VE4_SSE2(uint8_t* dst,
const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGH00), one); const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGH00), one);
const __m128i b = _mm_subs_epu8(a, lsb); const __m128i b = _mm_subs_epu8(a, lsb);
const __m128i avg = _mm_avg_epu8(b, BCDEFGH0); const __m128i avg = _mm_avg_epu8(b, BCDEFGH0);
const uint32_t vals = _mm_cvtsi128_si32(avg); const int vals = _mm_cvtsi128_si32(avg);
int i; int i;
for (i = 0; i < 4; ++i) { for (i = 0; i < 4; ++i) {
WebPUint32ToMem(dst + i * BPS, vals); WebPInt32ToMem(dst + i * BPS, vals);
} }
} }
@ -807,7 +807,8 @@ static WEBP_INLINE void VL4_SSE2(uint8_t* dst,
const __m128i abbc = _mm_or_si128(ab, bc); const __m128i abbc = _mm_or_si128(ab, bc);
const __m128i lsb2 = _mm_and_si128(abbc, lsb1); const __m128i lsb2 = _mm_and_si128(abbc, lsb1);
const __m128i avg4 = _mm_subs_epu8(avg3, lsb2); const __m128i avg4 = _mm_subs_epu8(avg3, lsb2);
const uint32_t extra_out = _mm_cvtsi128_si32(_mm_srli_si128(avg4, 4)); const uint32_t extra_out =
(uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 )); WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 ));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 )); WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 ));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1))); WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));

View File

@ -33,8 +33,7 @@ static WEBP_INLINE uint32_t ClampedAddSubtractFull_SSE2(uint32_t c0,
const __m128i V1 = _mm_add_epi16(C0, C1); const __m128i V1 = _mm_add_epi16(C0, C1);
const __m128i V2 = _mm_sub_epi16(V1, C2); const __m128i V2 = _mm_sub_epi16(V1, C2);
const __m128i b = _mm_packus_epi16(V2, V2); const __m128i b = _mm_packus_epi16(V2, V2);
const uint32_t output = _mm_cvtsi128_si32(b); return (uint32_t)_mm_cvtsi128_si32(b);
return output;
} }
static WEBP_INLINE uint32_t ClampedAddSubtractHalf_SSE2(uint32_t c0, static WEBP_INLINE uint32_t ClampedAddSubtractHalf_SSE2(uint32_t c0,
@ -52,8 +51,7 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf_SSE2(uint32_t c0,
const __m128i A3 = _mm_srai_epi16(A2, 1); const __m128i A3 = _mm_srai_epi16(A2, 1);
const __m128i A4 = _mm_add_epi16(A0, A3); const __m128i A4 = _mm_add_epi16(A0, A3);
const __m128i A5 = _mm_packus_epi16(A4, A4); const __m128i A5 = _mm_packus_epi16(A4, A4);
const uint32_t output = _mm_cvtsi128_si32(A5); return (uint32_t)_mm_cvtsi128_si32(A5);
return output;
} }
static WEBP_INLINE uint32_t Select_SSE2(uint32_t a, uint32_t b, uint32_t c) { static WEBP_INLINE uint32_t Select_SSE2(uint32_t a, uint32_t b, uint32_t c) {
@ -112,7 +110,7 @@ static WEBP_INLINE __m128i Average2_uint32_16_SSE2(uint32_t a0, uint32_t a1) {
static WEBP_INLINE uint32_t Average2_SSE2(uint32_t a0, uint32_t a1) { static WEBP_INLINE uint32_t Average2_SSE2(uint32_t a0, uint32_t a1) {
__m128i output; __m128i output;
Average2_uint32_SSE2(a0, a1, &output); Average2_uint32_SSE2(a0, a1, &output);
return _mm_cvtsi128_si32(output); return (uint32_t)_mm_cvtsi128_si32(output);
} }
static WEBP_INLINE uint32_t Average3_SSE2(uint32_t a0, uint32_t a1, static WEBP_INLINE uint32_t Average3_SSE2(uint32_t a0, uint32_t a1,
@ -123,8 +121,7 @@ static WEBP_INLINE uint32_t Average3_SSE2(uint32_t a0, uint32_t a1,
const __m128i sum = _mm_add_epi16(avg1, A1); const __m128i sum = _mm_add_epi16(avg1, A1);
const __m128i avg2 = _mm_srli_epi16(sum, 1); const __m128i avg2 = _mm_srli_epi16(sum, 1);
const __m128i A2 = _mm_packus_epi16(avg2, avg2); const __m128i A2 = _mm_packus_epi16(avg2, avg2);
const uint32_t output = _mm_cvtsi128_si32(A2); return (uint32_t)_mm_cvtsi128_si32(A2);
return output;
} }
static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1, static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1,
@ -134,8 +131,7 @@ static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1,
const __m128i sum = _mm_add_epi16(avg2, avg1); const __m128i sum = _mm_add_epi16(avg2, avg1);
const __m128i avg3 = _mm_srli_epi16(sum, 1); const __m128i avg3 = _mm_srli_epi16(sum, 1);
const __m128i A0 = _mm_packus_epi16(avg3, avg3); const __m128i A0 = _mm_packus_epi16(avg3, avg3);
const uint32_t output = _mm_cvtsi128_si32(A0); return (uint32_t)_mm_cvtsi128_si32(A0);
return output;
} }
static uint32_t Predictor5_SSE2(const uint32_t* const left, static uint32_t Predictor5_SSE2(const uint32_t* const left,
@ -285,12 +281,12 @@ GENERATE_PREDICTOR_2(9, upper[i + 1])
#undef GENERATE_PREDICTOR_2 #undef GENERATE_PREDICTOR_2
// Predictor10: average of (average of (L,TL), average of (T, TR)). // Predictor10: average of (average of (L,TL), average of (T, TR)).
#define DO_PRED10(OUT) do { \ #define DO_PRED10(OUT) do { \
__m128i avgLTL, avg; \ __m128i avgLTL, avg; \
Average2_m128i(&L, &TL, &avgLTL); \ Average2_m128i(&L, &TL, &avgLTL); \
Average2_m128i(&avgTTR, &avgLTL, &avg); \ Average2_m128i(&avgTTR, &avgLTL, &avg); \
L = _mm_add_epi8(avg, src); \ L = _mm_add_epi8(avg, src); \
out[i + (OUT)] = _mm_cvtsi128_si32(L); \ out[i + (OUT)] = (uint32_t)_mm_cvtsi128_si32(L); \
} while (0) } while (0)
#define DO_PRED10_SHIFT do { \ #define DO_PRED10_SHIFT do { \
@ -336,7 +332,7 @@ static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
const __m128i B = _mm_andnot_si128(mask, T); \ const __m128i B = _mm_andnot_si128(mask, T); \
const __m128i pred = _mm_or_si128(A, B); /* pred = (pa > b)? L : T*/ \ const __m128i pred = _mm_or_si128(A, B); /* pred = (pa > b)? L : T*/ \
L = _mm_add_epi8(src, pred); \ L = _mm_add_epi8(src, pred); \
out[i + (OUT)] = _mm_cvtsi128_si32(L); \ out[i + (OUT)] = (uint32_t)_mm_cvtsi128_si32(L); \
} while (0) } while (0)
#define DO_PRED11_SHIFT do { \ #define DO_PRED11_SHIFT do { \
@ -384,12 +380,12 @@ static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper,
#undef DO_PRED11_SHIFT #undef DO_PRED11_SHIFT
// Predictor12: ClampedAddSubtractFull. // Predictor12: ClampedAddSubtractFull.
#define DO_PRED12(DIFF, LANE, OUT) do { \ #define DO_PRED12(DIFF, LANE, OUT) do { \
const __m128i all = _mm_add_epi16(L, (DIFF)); \ const __m128i all = _mm_add_epi16(L, (DIFF)); \
const __m128i alls = _mm_packus_epi16(all, all); \ const __m128i alls = _mm_packus_epi16(all, all); \
const __m128i res = _mm_add_epi8(src, alls); \ const __m128i res = _mm_add_epi8(src, alls); \
out[i + (OUT)] = _mm_cvtsi128_si32(res); \ out[i + (OUT)] = (uint32_t)_mm_cvtsi128_si32(res); \
L = _mm_unpacklo_epi8(res, zero); \ L = _mm_unpacklo_epi8(res, zero); \
} while (0) } while (0)
#define DO_PRED12_SHIFT(DIFF, LANE) do { \ #define DO_PRED12_SHIFT(DIFF, LANE) do { \