MIPS: dspr2: added optimization for function ClampedAddSubtractHalf

Change-Id: Iec22e897a4f56e79c18ec00f8caa9cefac67f186
This commit is contained in:
Djordje Pesut 2014-10-10 10:35:43 +02:00
parent 829a8c19a0
commit 18d5a1efa8
3 changed files with 44 additions and 29 deletions

View File

@ -65,13 +65,6 @@ static const int kC2 = 35468;
"ulw %["#O2"], 64(%[dst]) \n\t" \
"ulw %["#O3"], 96(%[dst]) \n\t"
// O - output
// I - input (macro doesn't change it)
#define ADD_SUB_HALVES(O0, O1, \
I0, I1) \
"addq.ph %["#O0"], %["#I0"], %["#I1"] \n\t" \
"subq.ph %["#O1"], %["#I0"], %["#I1"] \n\t"
static void TransformDC(const int16_t* in, uint8_t* dst) {
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10;
@ -517,17 +510,7 @@ static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
}
#undef OUTPUT_EARLY_CLOBBER_REGS_18
#undef OUTPUT_EARLY_CLOBBER_REGS_10
#undef INSERT_HALF_X2
#undef SRA_16
#undef LOAD_IN_X2
#undef ADD_SUB_HALVES
#undef MUL_SHIFT_SUM
#undef PACK_2_HALVES_TO_WORD
#undef LOAD_DST
#undef CONVERT_2_BYTES_TO_HALF
#undef SHIFT_R_SUM_X2
#undef STORE_SAT_SUM_X2
#undef MUL

View File

@ -29,6 +29,14 @@ static const int kC2 = 35468;
"ulw %["#O2"], 32(%[ref]) \n\t" \
"ulw %["#O3"], 48(%[ref]) \n\t"
// temp0[31..16 | 15..0] = temp0[31..16 | 15..0] + temp8[31..16 | 15..0]
// temp0[31..16 | 15..0] = temp0[31..16 <<(s) 7 | 15..0 <<(s) 7]
// temp1..temp7 same as temp0
// precrqu_s.qb.ph temp0, temp1, temp0:
// temp0 = temp1[31..24] | temp1[15..8] | temp0[31..24] | temp0[15..8]
// store temp0 to dst
// IO - input/output
// I - input (macro doesn't change it)
#define STORE_SAT_SUM_X2(IO0, IO1, IO2, IO3, IO4, IO5, IO6, IO7, \
I0, I1, I2, I3, I4, I5, I6, I7) \
"addq.ph %["#IO0"], %["#IO0"], %["#I0"] \n\t" \
@ -128,19 +136,8 @@ static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
}
}
#undef OUTPUT_EARLY_CLOBBER_REGS_18
#undef OUTPUT_EARLY_CLOBBER_REGS_10
#undef INSERT_HALF_X2
#undef SRA_16
#undef LOAD_IN_X2
#undef ADD_SUB_HALVES
#undef MUL_SHIFT_SUM
#undef PACK_2_HALVES_TO_WORD
#undef LOAD_REF
#undef CONVERT_2_BYTES_TO_HALF
#undef SHIFT_R_SUM_X2
#undef STORE_SAT_SUM_X2
#undef MUL
#endif // WEBP_USE_MIPS_DSP_R2
@ -152,5 +149,5 @@ extern WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMIPSdspR2(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMIPSdspR2(void) {
#if defined(WEBP_USE_MIPS_DSP_R2)
VP8ITransform = ITransform;
#endif // WEBP_USE_MIPS32
#endif // WEBP_USE_MIPS_DSP_R2
}

View File

@ -90,6 +90,40 @@ MAP_COLOR_FUNCS(MapAlpha, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue)
#undef MAP_COLOR_FUNCS
static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
uint32_t c2) {
int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
__asm__ volatile (
"adduh.qb %[tmp5], %[c0], %[c1] \n\t"
"preceu.ph.qbr %[tmp3], %[c2] \n\t"
"preceu.ph.qbr %[tmp1], %[tmp5] \n\t"
"preceu.ph.qbl %[tmp2], %[tmp5] \n\t"
"preceu.ph.qbl %[tmp4], %[c2] \n\t"
"subq.ph %[tmp3], %[tmp1], %[tmp3] \n\t"
"subq.ph %[tmp4], %[tmp2], %[tmp4] \n\t"
"shrl.ph %[tmp5], %[tmp3], 15 \n\t"
"shrl.ph %[tmp6], %[tmp4], 15 \n\t"
"addq.ph %[tmp3], %[tmp3], %[tmp5] \n\t"
"addq.ph %[tmp4], %[tmp6], %[tmp4] \n\t"
"shra.ph %[tmp3], %[tmp3], 1 \n\t"
"shra.ph %[tmp4], %[tmp4], 1 \n\t"
"addq.ph %[tmp1], %[tmp1], %[tmp3] \n\t"
"addq.ph %[tmp2], %[tmp2], %[tmp4] \n\t"
"shll_s.ph %[tmp1], %[tmp1], 7 \n\t"
"shll_s.ph %[tmp2], %[tmp2], 7 \n\t"
"precrqu_s.qb.ph %[tmp1], %[tmp2], %[tmp1] \n\t"
: [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3),
[tmp4]"=&r"(tmp4), [tmp5]"=&r"(tmp5), [tmp6]"=r"(tmp6)
: [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
: "memory"
);
return tmp1;
}
static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
return ClampedAddSubtractHalf(left, top[0], top[-1]);
}
#endif // WEBP_USE_MIPS_DSP_R2
//------------------------------------------------------------------------------
@ -100,6 +134,7 @@ void VP8LDspInitMIPSdspR2(void) {
#if defined(WEBP_USE_MIPS_DSP_R2)
VP8LMapColor32b = MapARGB;
VP8LMapColor8b = MapAlpha;
VP8LPredictors[13] = Predictor13;
#endif // WEBP_USE_MIPS_DSP_R2
}