mirror of
https://github.com/webmproject/libwebp.git
synced 2024-11-20 04:18:26 +01:00
Merge "Add clang build fix for MSA"
This commit is contained in:
commit
be73378684
@ -222,6 +222,7 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
const v16i8 cnst4b = __msa_ldi_b(4); \
|
||||
const v16i8 cnst3b = __msa_ldi_b(3); \
|
||||
const v8i16 cnst9h = __msa_ldi_h(9); \
|
||||
const v8i16 cnst63h = __msa_ldi_h(63); \
|
||||
\
|
||||
FLIP_SIGN4(p1, p0, q0, q1, p1_m, p0_m, q0_m, q1_m); \
|
||||
filt = __msa_subs_s_b(p1_m, q1_m); \
|
||||
@ -241,9 +242,9 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
ILVRL_B2_SH(filt_sign, filt, filt_r, filt_l); \
|
||||
/* update q2/p2 */ \
|
||||
temp0 = filt_r * cnst9h; \
|
||||
temp1 = ADDVI_H(temp0, 63); \
|
||||
temp1 = temp0 + cnst63h; \
|
||||
temp2 = filt_l * cnst9h; \
|
||||
temp3 = ADDVI_H(temp2, 63); \
|
||||
temp3 = temp2 + cnst63h; \
|
||||
FILT2(q2_m, p2_m, q2, p2); \
|
||||
/* update q1/p1 */ \
|
||||
temp1 = temp1 + temp0; \
|
||||
@ -708,7 +709,7 @@ static void VE4(uint8_t* dst) { // vertical
|
||||
const uint32_t val0 = LW(ptop + 0);
|
||||
const uint32_t val1 = LW(ptop + 4);
|
||||
uint32_t out;
|
||||
v16u8 A, B, C, AC, B2, R;
|
||||
v16u8 A = { 0 }, B, C, AC, B2, R;
|
||||
|
||||
INSERT_W2_UB(val0, val1, A);
|
||||
B = SLDI_UB(A, A, 1);
|
||||
@ -725,7 +726,7 @@ static void RD4(uint8_t* dst) { // Down-right
|
||||
uint32_t val0 = LW(ptop + 0);
|
||||
uint32_t val1 = LW(ptop + 4);
|
||||
uint32_t val2, val3;
|
||||
v16u8 A, B, C, AC, B2, R, A1;
|
||||
v16u8 A, B, C, AC, B2, R, A1 = { 0 };
|
||||
|
||||
INSERT_W2_UB(val0, val1, A1);
|
||||
A = SLDI_UB(A1, A1, 12);
|
||||
@ -753,7 +754,7 @@ static void LD4(uint8_t* dst) { // Down-Left
|
||||
uint32_t val0 = LW(ptop + 0);
|
||||
uint32_t val1 = LW(ptop + 4);
|
||||
uint32_t val2, val3;
|
||||
v16u8 A, B, C, AC, B2, R;
|
||||
v16u8 A = { 0 }, B, C, AC, B2, R;
|
||||
|
||||
INSERT_W2_UB(val0, val1, A);
|
||||
B = SLDI_UB(A, A, 1);
|
||||
|
@ -82,7 +82,7 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
uint32_t in0, in1, in2, in3;
|
||||
v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
|
||||
v8i16 t0, t1, t2, t3;
|
||||
v16u8 srcl0, srcl1, src0, src1;
|
||||
v16u8 srcl0, srcl1, src0 = { 0 }, src1 = { 0 };
|
||||
const v8i16 mask0 = { 0, 4, 8, 12, 1, 5, 9, 13 };
|
||||
const v8i16 mask1 = { 3, 7, 11, 15, 2, 6, 10, 14 };
|
||||
const v8i16 mask2 = { 4, 0, 5, 1, 6, 2, 7, 3 };
|
||||
@ -170,7 +170,7 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
|
||||
static int TTransform(const uint8_t* in, const uint16_t* w) {
|
||||
int sum;
|
||||
uint32_t in0_m, in1_m, in2_m, in3_m;
|
||||
v16i8 src0;
|
||||
v16i8 src0 = { 0 };
|
||||
v8i16 in0, in1, tmp0, tmp1, tmp2, tmp3;
|
||||
v4i32 dst0, dst1;
|
||||
const v16i8 zero = { 0 };
|
||||
@ -259,8 +259,9 @@ static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
|
||||
#define AVG2(a, b) (((a) + (b) + 1) >> 1)
|
||||
|
||||
static WEBP_INLINE void VE4(uint8_t* dst, const uint8_t* top) { // vertical
|
||||
const v16u8 A1 = { 0 };
|
||||
const uint64_t val_m = LD(top - 1);
|
||||
const v16u8 A = (v16u8)__msa_insert_d((v2i64)A, 0, val_m);
|
||||
const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
|
||||
const v16u8 B = SLDI_UB(A, A, 1);
|
||||
const v16u8 C = SLDI_UB(A, A, 2);
|
||||
const v16u8 AC = __msa_ave_u_b(A, C);
|
||||
@ -292,8 +293,9 @@ static WEBP_INLINE void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
}
|
||||
|
||||
static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
const v16u8 A2 = { 0 };
|
||||
const uint64_t val_m = LD(top - 5);
|
||||
const v16u8 A1 = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
|
||||
const v16u8 A1 = (v16u8)__msa_insert_d((v2i64)A2, 0, val_m);
|
||||
const v16u8 A = (v16u8)__msa_insert_b((v16i8)A1, 8, top[3]);
|
||||
const v16u8 B = SLDI_UB(A, A, 1);
|
||||
const v16u8 C = SLDI_UB(A, A, 2);
|
||||
@ -311,8 +313,9 @@ static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
}
|
||||
|
||||
static WEBP_INLINE void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
const v16u8 A1 = { 0 };
|
||||
const uint64_t val_m = LD(top);
|
||||
const v16u8 A = (v16u8)__msa_insert_d((v2i64)A, 0, val_m);
|
||||
const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
|
||||
const v16u8 B = SLDI_UB(A, A, 1);
|
||||
const v16u8 C1 = SLDI_UB(A, A, 2);
|
||||
const v16u8 C = (v16u8)__msa_insert_b((v16i8)C1, 6, top[7]);
|
||||
@ -645,7 +648,7 @@ static WEBP_INLINE void TrueMotion8x8(uint8_t* dst, const uint8_t* left,
|
||||
static WEBP_INLINE void DCMode8x8(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
uint64_t out;
|
||||
v16u8 src;
|
||||
v16u8 src = { 0 };
|
||||
if (top != NULL && left != NULL) {
|
||||
const uint64_t left_m = LD(left);
|
||||
const uint64_t top_m = LD(top);
|
||||
@ -777,7 +780,7 @@ static int SSE8x8(const uint8_t* a, const uint8_t* b) {
|
||||
static int SSE4x4(const uint8_t* a, const uint8_t* b) {
|
||||
uint32_t sum = 0;
|
||||
uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3;
|
||||
v16u8 src, ref, tmp0, tmp1;
|
||||
v16u8 src = { 0 }, ref = { 0 }, tmp0, tmp1;
|
||||
v8i16 diff0, diff1;
|
||||
v4i32 out0, out1;
|
||||
|
||||
@ -828,7 +831,7 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||
tmp1 = (tmp3 > maxlevel);
|
||||
tmp2 = (v8i16)__msa_bmnz_v((v16u8)tmp2, (v16u8)maxlevel, (v16u8)tmp0);
|
||||
tmp3 = (v8i16)__msa_bmnz_v((v16u8)tmp3, (v16u8)maxlevel, (v16u8)tmp1);
|
||||
SUB2(0, tmp2, 0, tmp3, tmp0, tmp1);
|
||||
SUB2(zero, tmp2, zero, tmp3, tmp0, tmp1);
|
||||
tmp2 = (v8i16)__msa_bmnz_v((v16u8)tmp2, (v16u8)tmp0, (v16u8)sign0);
|
||||
tmp3 = (v8i16)__msa_bmnz_v((v16u8)tmp3, (v16u8)tmp1, (v16u8)sign1);
|
||||
LD_SW4(&mtx->zthresh_[0], 4, t0, t1, t2, t3); // zthresh
|
||||
|
@ -43,7 +43,7 @@
|
||||
|
||||
#define CONVERT8_BGRA_XXX(psrc, pdst, m0, m1) do { \
|
||||
uint64_t pix_d; \
|
||||
v16u8 src0, src1, src2, dst0, dst1; \
|
||||
v16u8 src0, src1, src2 = { 0 }, dst0, dst1; \
|
||||
LD_UB2(psrc, 16, src0, src1); \
|
||||
VSHF_B2_UB(src0, src1, src1, src2, m0, m1, dst0, dst1); \
|
||||
ST_UB(dst0, pdst); \
|
||||
|
@ -22,6 +22,7 @@
|
||||
#endif
|
||||
|
||||
#ifdef CLANG_BUILD
|
||||
#define ALPHAVAL (-1)
|
||||
#define ADDVI_H(a, b) __msa_addvi_h((v8i16)a, b)
|
||||
#define ADDVI_W(a, b) __msa_addvi_w((v4i32)a, b)
|
||||
#define SRAI_B(a, b) __msa_srai_b((v16i8)a, b)
|
||||
@ -32,6 +33,7 @@
|
||||
#define ANDI_B(a, b) __msa_andi_b((v16u8)a, b)
|
||||
#define ORI_B(a, b) __msa_ori_b((v16u8)a, b)
|
||||
#else
|
||||
#define ALPHAVAL (0xff)
|
||||
#define ADDVI_H(a, b) (a + b)
|
||||
#define ADDVI_W(a, b) (a + b)
|
||||
#define SRAI_B(a, b) (a >> b)
|
||||
|
@ -374,7 +374,7 @@ static void YuvToBgrLine(const uint8_t* y, const uint8_t* u,
|
||||
static void YuvToRgbaLine(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
v16u8 R, G, B;
|
||||
const v16u8 A = (v16u8)__msa_ldi_b(0xff);
|
||||
const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL);
|
||||
while (length >= 16) {
|
||||
CALC_RGB16(y, u, v, R, G, B);
|
||||
STORE16_4(R, G, B, A, dst);
|
||||
@ -402,7 +402,7 @@ static void YuvToRgbaLine(const uint8_t* y, const uint8_t* u,
|
||||
static void YuvToBgraLine(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
v16u8 R, G, B;
|
||||
const v16u8 A = (v16u8)__msa_ldi_b(0xff);
|
||||
const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL);
|
||||
while (length >= 16) {
|
||||
CALC_RGB16(y, u, v, R, G, B);
|
||||
STORE16_4(B, G, R, A, dst);
|
||||
@ -430,7 +430,7 @@ static void YuvToBgraLine(const uint8_t* y, const uint8_t* u,
|
||||
static void YuvToArgbLine(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
v16u8 R, G, B;
|
||||
const v16u8 A = (v16u8)__msa_ldi_b(0xff);
|
||||
const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL);
|
||||
while (length >= 16) {
|
||||
CALC_RGB16(y, u, v, R, G, B);
|
||||
STORE16_4(A, R, G, B, dst);
|
||||
|
Loading…
Reference in New Issue
Block a user