From aa893914fcc78f341e79624d6d0b2f110d2c08dd Mon Sep 17 00:00:00 2001 From: Parag Salasakar Date: Wed, 1 Feb 2017 17:45:17 +0530 Subject: [PATCH] Add clang build fix for MSA Change-Id: If139f4ecbdce756c69ba4ae032a70f81179683f8 --- src/dsp/dec_msa.c | 11 ++++++----- src/dsp/enc_msa.c | 19 +++++++++++-------- src/dsp/lossless_msa.c | 2 +- src/dsp/msa_macro.h | 2 ++ src/dsp/upsampling_msa.c | 6 +++--- 5 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/dsp/dec_msa.c b/src/dsp/dec_msa.c index 8d9c98c3..cc1e2850 100644 --- a/src/dsp/dec_msa.c +++ b/src/dsp/dec_msa.c @@ -222,6 +222,7 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) { const v16i8 cnst4b = __msa_ldi_b(4); \ const v16i8 cnst3b = __msa_ldi_b(3); \ const v8i16 cnst9h = __msa_ldi_h(9); \ + const v8i16 cnst63h = __msa_ldi_h(63); \ \ FLIP_SIGN4(p1, p0, q0, q1, p1_m, p0_m, q0_m, q1_m); \ filt = __msa_subs_s_b(p1_m, q1_m); \ @@ -241,9 +242,9 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) { ILVRL_B2_SH(filt_sign, filt, filt_r, filt_l); \ /* update q2/p2 */ \ temp0 = filt_r * cnst9h; \ - temp1 = ADDVI_H(temp0, 63); \ + temp1 = temp0 + cnst63h; \ temp2 = filt_l * cnst9h; \ - temp3 = ADDVI_H(temp2, 63); \ + temp3 = temp2 + cnst63h; \ FILT2(q2_m, p2_m, q2, p2); \ /* update q1/p1 */ \ temp1 = temp1 + temp0; \ @@ -708,7 +709,7 @@ static void VE4(uint8_t* dst) { // vertical const uint32_t val0 = LW(ptop + 0); const uint32_t val1 = LW(ptop + 4); uint32_t out; - v16u8 A, B, C, AC, B2, R; + v16u8 A = { 0 }, B, C, AC, B2, R; INSERT_W2_UB(val0, val1, A); B = SLDI_UB(A, A, 1); @@ -725,7 +726,7 @@ static void RD4(uint8_t* dst) { // Down-right uint32_t val0 = LW(ptop + 0); uint32_t val1 = LW(ptop + 4); uint32_t val2, val3; - v16u8 A, B, C, AC, B2, R, A1; + v16u8 A, B, C, AC, B2, R, A1 = { 0 }; INSERT_W2_UB(val0, val1, A1); A = SLDI_UB(A1, A1, 12); @@ -753,7 +754,7 @@ static void LD4(uint8_t* dst) { // Down-Left uint32_t val0 = LW(ptop + 0); uint32_t val1 = LW(ptop + 4); uint32_t val2, val3; - v16u8 A, B, C, AC, B2, R; + v16u8 A = { 0 }, B, C, AC, B2, R; INSERT_W2_UB(val0, val1, A); B = SLDI_UB(A, A, 1); diff --git a/src/dsp/enc_msa.c b/src/dsp/enc_msa.c index 909b46d5..d1c8bdf2 100644 --- a/src/dsp/enc_msa.c +++ b/src/dsp/enc_msa.c @@ -82,7 +82,7 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) { uint32_t in0, in1, in2, in3; v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; v8i16 t0, t1, t2, t3; - v16u8 srcl0, srcl1, src0, src1; + v16u8 srcl0, srcl1, src0 = { 0 }, src1 = { 0 }; const v8i16 mask0 = { 0, 4, 8, 12, 1, 5, 9, 13 }; const v8i16 mask1 = { 3, 7, 11, 15, 2, 6, 10, 14 }; const v8i16 mask2 = { 4, 0, 5, 1, 6, 2, 7, 3 }; @@ -170,7 +170,7 @@ static void FTransformWHT(const int16_t* in, int16_t* out) { static int TTransform(const uint8_t* in, const uint16_t* w) { int sum; uint32_t in0_m, in1_m, in2_m, in3_m; - v16i8 src0; + v16i8 src0 = { 0 }; v8i16 in0, in1, tmp0, tmp1, tmp2, tmp3; v4i32 dst0, dst1; const v16i8 zero = { 0 }; @@ -259,8 +259,9 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, #define AVG2(a, b) (((a) + (b) + 1) >> 1) static WEBP_INLINE void VE4(uint8_t* dst, const uint8_t* top) { // vertical + const v16u8 A1 = { 0 }; const uint64_t val_m = LD(top - 1); - const v16u8 A = (v16u8)__msa_insert_d((v2i64)A, 0, val_m); + const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m); const v16u8 B = SLDI_UB(A, A, 1); const v16u8 C = SLDI_UB(A, A, 2); const v16u8 AC = __msa_ave_u_b(A, C); @@ -292,8 +293,9 @@ static WEBP_INLINE void DC4(uint8_t* dst, const uint8_t* top) { } static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) { + const v16u8 A2 = { 0 }; const uint64_t val_m = LD(top - 5); - const v16u8 A1 = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m); + const v16u8 A1 = (v16u8)__msa_insert_d((v2i64)A2, 0, val_m); const v16u8 A = (v16u8)__msa_insert_b((v16i8)A1, 8, top[3]); const v16u8 B = SLDI_UB(A, A, 1); const v16u8 C = SLDI_UB(A, A, 2); @@ -311,8 +313,9 @@ static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) { } static WEBP_INLINE void LD4(uint8_t* dst, const uint8_t* top) { + const v16u8 A1 = { 0 }; const uint64_t val_m = LD(top); - const v16u8 A = (v16u8)__msa_insert_d((v2i64)A, 0, val_m); + const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m); const v16u8 B = SLDI_UB(A, A, 1); const v16u8 C1 = SLDI_UB(A, A, 2); const v16u8 C = (v16u8)__msa_insert_b((v16i8)C1, 6, top[7]); @@ -645,7 +648,7 @@ static WEBP_INLINE void TrueMotion8x8(uint8_t* dst, const uint8_t* left, static WEBP_INLINE void DCMode8x8(uint8_t* dst, const uint8_t* left, const uint8_t* top) { uint64_t out; - v16u8 src; + v16u8 src = { 0 }; if (top != NULL && left != NULL) { const uint64_t left_m = LD(left); const uint64_t top_m = LD(top); @@ -777,7 +780,7 @@ static int SSE8x8(const uint8_t* a, const uint8_t* b) { static int SSE4x4(const uint8_t* a, const uint8_t* b) { uint32_t sum = 0; uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3; - v16u8 src, ref, tmp0, tmp1; + v16u8 src = { 0 }, ref = { 0 }, tmp0, tmp1; v8i16 diff0, diff1; v4i32 out0, out1; @@ -828,7 +831,7 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16], tmp1 = (tmp3 > maxlevel); tmp2 = (v8i16)__msa_bmnz_v((v16u8)tmp2, (v16u8)maxlevel, (v16u8)tmp0); tmp3 = (v8i16)__msa_bmnz_v((v16u8)tmp3, (v16u8)maxlevel, (v16u8)tmp1); - SUB2(0, tmp2, 0, tmp3, tmp0, tmp1); + SUB2(zero, tmp2, zero, tmp3, tmp0, tmp1); tmp2 = (v8i16)__msa_bmnz_v((v16u8)tmp2, (v16u8)tmp0, (v16u8)sign0); tmp3 = (v8i16)__msa_bmnz_v((v16u8)tmp3, (v16u8)tmp1, (v16u8)sign1); LD_SW4(&mtx->zthresh_[0], 4, t0, t1, t2, t3); // zthresh diff --git a/src/dsp/lossless_msa.c b/src/dsp/lossless_msa.c index f6dd5649..2529c7e3 100644 --- a/src/dsp/lossless_msa.c +++ b/src/dsp/lossless_msa.c @@ -43,7 +43,7 @@ #define CONVERT8_BGRA_XXX(psrc, pdst, m0, m1) do { \ uint64_t pix_d; \ - v16u8 src0, src1, src2, dst0, dst1; \ + v16u8 src0, src1, src2 = { 0 }, dst0, dst1; \ LD_UB2(psrc, 16, src0, src1); \ VSHF_B2_UB(src0, src1, src1, src2, m0, m1, dst0, dst1); \ ST_UB(dst0, pdst); \ diff --git a/src/dsp/msa_macro.h b/src/dsp/msa_macro.h index d0e5f45e..dfacda6c 100644 --- a/src/dsp/msa_macro.h +++ b/src/dsp/msa_macro.h @@ -22,6 +22,7 @@ #endif #ifdef CLANG_BUILD + #define ALPHAVAL (-1) #define ADDVI_H(a, b) __msa_addvi_h((v8i16)a, b) #define ADDVI_W(a, b) __msa_addvi_w((v4i32)a, b) #define SRAI_B(a, b) __msa_srai_b((v16i8)a, b) @@ -32,6 +33,7 @@ #define ANDI_B(a, b) __msa_andi_b((v16u8)a, b) #define ORI_B(a, b) __msa_ori_b((v16u8)a, b) #else + #define ALPHAVAL (0xff) #define ADDVI_H(a, b) (a + b) #define ADDVI_W(a, b) (a + b) #define SRAI_B(a, b) (a >> b) diff --git a/src/dsp/upsampling_msa.c b/src/dsp/upsampling_msa.c index f24926fa..561e4525 100644 --- a/src/dsp/upsampling_msa.c +++ b/src/dsp/upsampling_msa.c @@ -374,7 +374,7 @@ static void YuvToBgrLine(const uint8_t* y, const uint8_t* u, static void YuvToRgbaLine(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int length) { v16u8 R, G, B; - const v16u8 A = (v16u8)__msa_ldi_b(0xff); + const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL); while (length >= 16) { CALC_RGB16(y, u, v, R, G, B); STORE16_4(R, G, B, A, dst); @@ -402,7 +402,7 @@ static void YuvToRgbaLine(const uint8_t* y, const uint8_t* u, static void YuvToBgraLine(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int length) { v16u8 R, G, B; - const v16u8 A = (v16u8)__msa_ldi_b(0xff); + const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL); while (length >= 16) { CALC_RGB16(y, u, v, R, G, B); STORE16_4(B, G, R, A, dst); @@ -430,7 +430,7 @@ static void YuvToBgraLine(const uint8_t* y, const uint8_t* u, static void YuvToArgbLine(const uint8_t* y, const uint8_t* u, const uint8_t* v, uint8_t* dst, int length) { v16u8 R, G, B; - const v16u8 A = (v16u8)__msa_ldi_b(0xff); + const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL); while (length >= 16) { CALC_RGB16(y, u, v, R, G, B); STORE16_4(A, R, G, B, dst);