MIPS: dspr2: Added optimization for function VP8LTransformColorInverse_C

Change-Id: I8b60e22c9f6c0badab6267a33751dfc28750f457
This commit is contained in:
Djordje Pesut 2015-02-13 07:53:29 +01:00
parent 3030f11525
commit 42a8a6280c

View File

@ -518,6 +518,73 @@ static void AddGreenToBlueAndRed(uint32_t* data, int num_pixels) {
);
}
static void TransformColorInverse(const VP8LMultipliers* const m,
uint32_t* data, int num_pixels) {
int temp0, temp1, temp2, temp3, temp4, temp5;
uint32_t argb, argb1, new_red;
const uint32_t G_to_R = m->green_to_red_;
const uint32_t G_to_B = m->green_to_blue_;
const uint32_t R_to_B = m->red_to_blue_;
uint32_t* const p_loop_end = data + (num_pixels & ~1);
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
"beq %[data], %[p_loop_end], 1f \n\t"
" nop \n\t"
"replv.ph %[temp0], %[G_to_R] \n\t"
"replv.ph %[temp1], %[G_to_B] \n\t"
"replv.ph %[temp2], %[R_to_B] \n\t"
"shll.ph %[temp0], %[temp0], 8 \n\t"
"shll.ph %[temp1], %[temp1], 8 \n\t"
"shll.ph %[temp2], %[temp2], 8 \n\t"
"shra.ph %[temp0], %[temp0], 8 \n\t"
"shra.ph %[temp1], %[temp1], 8 \n\t"
"shra.ph %[temp2], %[temp2], 8 \n\t"
"0: \n\t"
"lw %[argb], 0(%[data]) \n\t"
"lw %[argb1], 4(%[data]) \n\t"
"addiu %[data], %[data], 8 \n\t"
"precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t"
"preceu.ph.qbra %[temp3], %[temp3] \n\t"
"shll.ph %[temp3], %[temp3], 8 \n\t"
"shra.ph %[temp3], %[temp3], 8 \n\t"
"mul.ph %[temp5], %[temp3], %[temp0] \n\t"
"mul.ph %[temp3], %[temp3], %[temp1] \n\t"
"precrq.ph.w %[new_red], %[argb], %[argb1] \n\t"
"ins %[argb1], %[argb], 16, 16 \n\t"
"shra.ph %[temp5], %[temp5], 5 \n\t"
"shra.ph %[temp3], %[temp3], 5 \n\t"
"addu.ph %[new_red], %[new_red], %[temp5] \n\t"
"addu.ph %[argb1], %[argb1], %[temp3] \n\t"
"preceu.ph.qbra %[temp5], %[new_red] \n\t"
"shll.ph %[temp4], %[temp5], 8 \n\t"
"shra.ph %[temp4], %[temp4], 8 \n\t"
"mul.ph %[temp4], %[temp4], %[temp2] \n\t"
"sb %[temp5], -2(%[data]) \n\t"
"sra %[temp5], %[temp5], 16 \n\t"
"shra.ph %[temp4], %[temp4], 5 \n\t"
"addu.ph %[argb1], %[argb1], %[temp4] \n\t"
"preceu.ph.qbra %[temp3], %[argb1] \n\t"
"sb %[temp5], -6(%[data]) \n\t"
"sb %[temp3], -4(%[data]) \n\t"
"sra %[temp3], %[temp3], 16 \n\t"
"bne %[data], %[p_loop_end], 0b \n\t"
" sb %[temp3], -8(%[data]) \n\t"
"1: \n\t"
".set pop \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[new_red]"=&r"(new_red), [argb]"=&r"(argb),
[argb1]"=&r"(argb1), [data]"+&r"(data)
: [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),
[G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)
: "memory", "hi", "lo"
);
// Fall-back to C-version for left-overs.
if (num_pixels & 1) VP8LTransformColorInverse_C(m, data, 1);
}
#endif // WEBP_USE_MIPS_DSP_R2
//------------------------------------------------------------------------------
@ -542,6 +609,7 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) {
VP8LCollectColorBlueTransforms = CollectColorBlueTransforms;
VP8LCollectColorRedTransforms = CollectColorRedTransforms;
VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
VP8LTransformColorInverse = TransformColorInverse;
#endif // WEBP_USE_MIPS_DSP_R2
}