MIPS: dspr2: added optimization for TransformAC3

Change-Id: Icd789ee5f6d764297e7dc0a0f8a3bc47ab92ac65
This commit is contained in:
Djordje Pesut 2014-09-02 14:18:51 +02:00
parent 24e1072aac
commit e2502a97c1

View File

@ -16,6 +16,11 @@
#if defined(WEBP_USE_MIPS_DSP_R2)
static const int kC1 = 20091 + (1 << 16);
static const int kC2 = 35468;
#define MUL(a, b) (((a) * (b)) >> 16)
static void TransformDC(const int16_t* in, uint8_t* dst) {
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10;
@ -69,6 +74,94 @@ static void TransformDC(const int16_t* in, uint8_t* dst) {
);
}
static void TransformAC3(const int16_t* in, uint8_t* dst) {
const int a = in[0] + 4;
int c4 = MUL(in[4], kC2);
const int d4 = MUL(in[4], kC1);
const int c1 = MUL(in[1], kC2);
const int d1 = MUL(in[1], kC1);
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
__asm__ volatile (
"ins %[c4], %[d4], 16, 16 \n\t"
"replv.ph %[temp1], %[a] \n\t"
"replv.ph %[temp4], %[d1] \n\t"
"addq.ph %[temp2], %[temp1], %[c4] \n\t"
"subq.ph %[temp3], %[temp1], %[c4] \n\t"
"replv.ph %[temp5], %[c1] \n\t"
"addq.ph %[temp1], %[temp2], %[temp4] \n\t"
"subq.ph %[temp6], %[temp2], %[temp4] \n\t"
"addq.ph %[temp7], %[temp2], %[temp5] \n\t"
"subq.ph %[temp8], %[temp2], %[temp5] \n\t"
"addq.ph %[temp2], %[temp3], %[temp4] \n\t"
"subq.ph %[temp9], %[temp3], %[temp4] \n\t"
"addq.ph %[temp10], %[temp3], %[temp5] \n\t"
"subq.ph %[temp4], %[temp3], %[temp5] \n\t"
"shra.ph %[temp1], %[temp1], 3 \n\t"
"shra.ph %[temp6], %[temp6], 3 \n\t"
"shra.ph %[temp7], %[temp7], 3 \n\t"
"shra.ph %[temp8], %[temp8], 3 \n\t"
"shra.ph %[temp2], %[temp2], 3 \n\t"
"shra.ph %[temp9], %[temp9], 3 \n\t"
"shra.ph %[temp10], %[temp10], 3 \n\t"
"shra.ph %[temp4], %[temp4], 3 \n\t"
"ulw %[temp3], 0(%[dst]) \n\t"
"ulw %[temp5], 32(%[dst]) \n\t"
"ulw %[temp11], 64(%[dst]) \n\t"
"ulw %[temp12], 96(%[dst]) \n\t"
"preceu.ph.qbr %[temp13], %[temp3] \n\t"
"preceu.ph.qbl %[temp14], %[temp3] \n\t"
"preceu.ph.qbr %[temp3], %[temp5] \n\t"
"preceu.ph.qbl %[temp15], %[temp5] \n\t"
"preceu.ph.qbr %[temp5], %[temp11] \n\t"
"preceu.ph.qbl %[temp16], %[temp11] \n\t"
"preceu.ph.qbr %[temp11], %[temp12] \n\t"
"preceu.ph.qbl %[temp17], %[temp12] \n\t"
"precrq.ph.w %[temp12], %[temp7], %[temp1] \n\t"
"precrq.ph.w %[temp18], %[temp6], %[temp8] \n\t"
"ins %[temp1], %[temp7], 16, 16 \n\t"
"ins %[temp8], %[temp6], 16, 16 \n\t"
"precrq.ph.w %[temp7], %[temp10], %[temp2] \n\t"
"precrq.ph.w %[temp6], %[temp9], %[temp4] \n\t"
"ins %[temp2], %[temp10], 16, 16 \n\t"
"ins %[temp4], %[temp9], 16, 16 \n\t"
"addq.ph %[temp13], %[temp13], %[temp12] \n\t"
"addq.ph %[temp14], %[temp14], %[temp18] \n\t"
"addq.ph %[temp3], %[temp3], %[temp1] \n\t"
"addq.ph %[temp15], %[temp15], %[temp8] \n\t"
"addq.ph %[temp5], %[temp5], %[temp2] \n\t"
"addq.ph %[temp16], %[temp16], %[temp4] \n\t"
"addq.ph %[temp11], %[temp11], %[temp7] \n\t"
"addq.ph %[temp17], %[temp17], %[temp6] \n\t"
"shll_s.ph %[temp13], %[temp13], 7 \n\t"
"shll_s.ph %[temp14], %[temp14], 7 \n\t"
"shll_s.ph %[temp3], %[temp3], 7 \n\t"
"shll_s.ph %[temp15], %[temp15], 7 \n\t"
"shll_s.ph %[temp5], %[temp5], 7 \n\t"
"shll_s.ph %[temp16], %[temp16], 7 \n\t"
"shll_s.ph %[temp11], %[temp11], 7 \n\t"
"shll_s.ph %[temp17], %[temp17], 7 \n\t"
"precrqu_s.qb.ph %[temp13], %[temp14], %[temp13] \n\t"
"precrqu_s.qb.ph %[temp3], %[temp15], %[temp3] \n\t"
"precrqu_s.qb.ph %[temp5], %[temp16], %[temp5] \n\t"
"precrqu_s.qb.ph %[temp11], %[temp17], %[temp11] \n\t"
"usw %[temp13], 0(%[dst]) \n\t"
"usw %[temp3], 32(%[dst]) \n\t"
"usw %[temp5], 64(%[dst]) \n\t"
"usw %[temp11], 96(%[dst]) \n\t"
: [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
[temp7]"=&r"(temp7), [temp8]"=&r"(temp8), [temp9]"=&r"(temp9),
[temp10]"=&r"(temp10), [temp11]"=&r"(temp11), [temp12]"=&r"(temp12),
[temp13]"=&r"(temp13), [temp14]"=&r"(temp14), [temp15]"=&r"(temp15),
[temp16]"=&r"(temp16), [temp17]"=&r"(temp17), [temp18]"=&r"(temp18),
[c4]"+&r"(c4)
: [dst]"r"(dst), [a]"r"(a), [d1]"r"(d1), [d4]"r"(d4), [c1]"r"(c1)
: "memory"
);
}
#endif // WEBP_USE_MIPS_DSP_R2
//------------------------------------------------------------------------------
@ -79,5 +172,6 @@ extern void VP8DspInitMIPSdspR2(void);
void VP8DspInitMIPSdspR2(void) {
#if defined(WEBP_USE_MIPS_DSP_R2)
VP8TransformDC = TransformDC;
VP8TransformAC3 = TransformAC3;
#endif
}