Merge "Add clang build fix for MSA"

This commit is contained in:
Pascal Massimino 2017-02-01 12:43:09 +00:00 committed by Gerrit Code Review
commit be73378684
5 changed files with 23 additions and 17 deletions

View File

@ -222,6 +222,7 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
const v16i8 cnst4b = __msa_ldi_b(4); \
const v16i8 cnst3b = __msa_ldi_b(3); \
const v8i16 cnst9h = __msa_ldi_h(9); \
const v8i16 cnst63h = __msa_ldi_h(63); \
\
FLIP_SIGN4(p1, p0, q0, q1, p1_m, p0_m, q0_m, q1_m); \
filt = __msa_subs_s_b(p1_m, q1_m); \
@ -241,9 +242,9 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
ILVRL_B2_SH(filt_sign, filt, filt_r, filt_l); \
/* update q2/p2 */ \
temp0 = filt_r * cnst9h; \
temp1 = ADDVI_H(temp0, 63); \
temp1 = temp0 + cnst63h; \
temp2 = filt_l * cnst9h; \
temp3 = ADDVI_H(temp2, 63); \
temp3 = temp2 + cnst63h; \
FILT2(q2_m, p2_m, q2, p2); \
/* update q1/p1 */ \
temp1 = temp1 + temp0; \
@ -708,7 +709,7 @@ static void VE4(uint8_t* dst) { // vertical
const uint32_t val0 = LW(ptop + 0);
const uint32_t val1 = LW(ptop + 4);
uint32_t out;
v16u8 A, B, C, AC, B2, R;
v16u8 A = { 0 }, B, C, AC, B2, R;
INSERT_W2_UB(val0, val1, A);
B = SLDI_UB(A, A, 1);
@ -725,7 +726,7 @@ static void RD4(uint8_t* dst) { // Down-right
uint32_t val0 = LW(ptop + 0);
uint32_t val1 = LW(ptop + 4);
uint32_t val2, val3;
v16u8 A, B, C, AC, B2, R, A1;
v16u8 A, B, C, AC, B2, R, A1 = { 0 };
INSERT_W2_UB(val0, val1, A1);
A = SLDI_UB(A1, A1, 12);
@ -753,7 +754,7 @@ static void LD4(uint8_t* dst) { // Down-Left
uint32_t val0 = LW(ptop + 0);
uint32_t val1 = LW(ptop + 4);
uint32_t val2, val3;
v16u8 A, B, C, AC, B2, R;
v16u8 A = { 0 }, B, C, AC, B2, R;
INSERT_W2_UB(val0, val1, A);
B = SLDI_UB(A, A, 1);

View File

@ -82,7 +82,7 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
uint32_t in0, in1, in2, in3;
v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
v8i16 t0, t1, t2, t3;
v16u8 srcl0, srcl1, src0, src1;
v16u8 srcl0, srcl1, src0 = { 0 }, src1 = { 0 };
const v8i16 mask0 = { 0, 4, 8, 12, 1, 5, 9, 13 };
const v8i16 mask1 = { 3, 7, 11, 15, 2, 6, 10, 14 };
const v8i16 mask2 = { 4, 0, 5, 1, 6, 2, 7, 3 };
@ -170,7 +170,7 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
static int TTransform(const uint8_t* in, const uint16_t* w) {
int sum;
uint32_t in0_m, in1_m, in2_m, in3_m;
v16i8 src0;
v16i8 src0 = { 0 };
v8i16 in0, in1, tmp0, tmp1, tmp2, tmp3;
v4i32 dst0, dst1;
const v16i8 zero = { 0 };
@ -259,8 +259,9 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
#define AVG2(a, b) (((a) + (b) + 1) >> 1)
static WEBP_INLINE void VE4(uint8_t* dst, const uint8_t* top) { // vertical
const v16u8 A1 = { 0 };
const uint64_t val_m = LD(top - 1);
const v16u8 A = (v16u8)__msa_insert_d((v2i64)A, 0, val_m);
const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
const v16u8 B = SLDI_UB(A, A, 1);
const v16u8 C = SLDI_UB(A, A, 2);
const v16u8 AC = __msa_ave_u_b(A, C);
@ -292,8 +293,9 @@ static WEBP_INLINE void DC4(uint8_t* dst, const uint8_t* top) {
}
static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) {
const v16u8 A2 = { 0 };
const uint64_t val_m = LD(top - 5);
const v16u8 A1 = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
const v16u8 A1 = (v16u8)__msa_insert_d((v2i64)A2, 0, val_m);
const v16u8 A = (v16u8)__msa_insert_b((v16i8)A1, 8, top[3]);
const v16u8 B = SLDI_UB(A, A, 1);
const v16u8 C = SLDI_UB(A, A, 2);
@ -311,8 +313,9 @@ static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) {
}
static WEBP_INLINE void LD4(uint8_t* dst, const uint8_t* top) {
const v16u8 A1 = { 0 };
const uint64_t val_m = LD(top);
const v16u8 A = (v16u8)__msa_insert_d((v2i64)A, 0, val_m);
const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
const v16u8 B = SLDI_UB(A, A, 1);
const v16u8 C1 = SLDI_UB(A, A, 2);
const v16u8 C = (v16u8)__msa_insert_b((v16i8)C1, 6, top[7]);
@ -645,7 +648,7 @@ static WEBP_INLINE void TrueMotion8x8(uint8_t* dst, const uint8_t* left,
static WEBP_INLINE void DCMode8x8(uint8_t* dst, const uint8_t* left,
const uint8_t* top) {
uint64_t out;
v16u8 src;
v16u8 src = { 0 };
if (top != NULL && left != NULL) {
const uint64_t left_m = LD(left);
const uint64_t top_m = LD(top);
@ -777,7 +780,7 @@ static int SSE8x8(const uint8_t* a, const uint8_t* b) {
static int SSE4x4(const uint8_t* a, const uint8_t* b) {
uint32_t sum = 0;
uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3;
v16u8 src, ref, tmp0, tmp1;
v16u8 src = { 0 }, ref = { 0 }, tmp0, tmp1;
v8i16 diff0, diff1;
v4i32 out0, out1;
@ -828,7 +831,7 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
tmp1 = (tmp3 > maxlevel);
tmp2 = (v8i16)__msa_bmnz_v((v16u8)tmp2, (v16u8)maxlevel, (v16u8)tmp0);
tmp3 = (v8i16)__msa_bmnz_v((v16u8)tmp3, (v16u8)maxlevel, (v16u8)tmp1);
SUB2(0, tmp2, 0, tmp3, tmp0, tmp1);
SUB2(zero, tmp2, zero, tmp3, tmp0, tmp1);
tmp2 = (v8i16)__msa_bmnz_v((v16u8)tmp2, (v16u8)tmp0, (v16u8)sign0);
tmp3 = (v8i16)__msa_bmnz_v((v16u8)tmp3, (v16u8)tmp1, (v16u8)sign1);
LD_SW4(&mtx->zthresh_[0], 4, t0, t1, t2, t3); // zthresh

View File

@ -43,7 +43,7 @@
#define CONVERT8_BGRA_XXX(psrc, pdst, m0, m1) do { \
uint64_t pix_d; \
v16u8 src0, src1, src2, dst0, dst1; \
v16u8 src0, src1, src2 = { 0 }, dst0, dst1; \
LD_UB2(psrc, 16, src0, src1); \
VSHF_B2_UB(src0, src1, src1, src2, m0, m1, dst0, dst1); \
ST_UB(dst0, pdst); \

View File

@ -22,6 +22,7 @@
#endif
#ifdef CLANG_BUILD
#define ALPHAVAL (-1)
#define ADDVI_H(a, b) __msa_addvi_h((v8i16)a, b)
#define ADDVI_W(a, b) __msa_addvi_w((v4i32)a, b)
#define SRAI_B(a, b) __msa_srai_b((v16i8)a, b)
@ -32,6 +33,7 @@
#define ANDI_B(a, b) __msa_andi_b((v16u8)a, b)
#define ORI_B(a, b) __msa_ori_b((v16u8)a, b)
#else
#define ALPHAVAL (0xff)
#define ADDVI_H(a, b) (a + b)
#define ADDVI_W(a, b) (a + b)
#define SRAI_B(a, b) (a >> b)

View File

@ -374,7 +374,7 @@ static void YuvToBgrLine(const uint8_t* y, const uint8_t* u,
static void YuvToRgbaLine(const uint8_t* y, const uint8_t* u,
const uint8_t* v, uint8_t* dst, int length) {
v16u8 R, G, B;
const v16u8 A = (v16u8)__msa_ldi_b(0xff);
const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL);
while (length >= 16) {
CALC_RGB16(y, u, v, R, G, B);
STORE16_4(R, G, B, A, dst);
@ -402,7 +402,7 @@ static void YuvToRgbaLine(const uint8_t* y, const uint8_t* u,
static void YuvToBgraLine(const uint8_t* y, const uint8_t* u,
const uint8_t* v, uint8_t* dst, int length) {
v16u8 R, G, B;
const v16u8 A = (v16u8)__msa_ldi_b(0xff);
const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL);
while (length >= 16) {
CALC_RGB16(y, u, v, R, G, B);
STORE16_4(B, G, R, A, dst);
@ -430,7 +430,7 @@ static void YuvToBgraLine(const uint8_t* y, const uint8_t* u,
static void YuvToArgbLine(const uint8_t* y, const uint8_t* u,
const uint8_t* v, uint8_t* dst, int length) {
v16u8 R, G, B;
const v16u8 A = (v16u8)__msa_ldi_b(0xff);
const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL);
while (length >= 16) {
CALC_RGB16(y, u, v, R, G, B);
STORE16_4(A, R, G, B, dst);