Remove memcpy in lossless decoding.

Change-Id: Iba694b306486d67764e2fc5576c98a974c9b886c
This commit is contained in:
Vincent Rabaud
2016-11-24 17:45:22 +01:00
parent 7474d46e45
commit 71e2f5cadf
8 changed files with 151 additions and 112 deletions

View File

@ -228,25 +228,27 @@ static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
// Add green to blue and red channels (i.e. perform the inverse transform of
// 'subtract green').
static void AddGreenToBlueAndRed(uint32_t* data, int num_pixels) {
static void AddGreenToBlueAndRed(const uint32_t* src, int num_pixels,
uint32_t* dst) {
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
uint32_t* const p_loop1_end = data + (num_pixels & ~3);
uint32_t* const p_loop2_end = data + num_pixels;
const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
const uint32_t* const p_loop2_end = src + num_pixels;
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
"beq %[data], %[p_loop1_end], 3f \n\t"
"beq %[src], %[p_loop1_end], 3f \n\t"
" nop \n\t"
"0: \n\t"
"lw %[temp0], 0(%[data]) \n\t"
"lw %[temp1], 4(%[data]) \n\t"
"lw %[temp2], 8(%[data]) \n\t"
"lw %[temp3], 12(%[data]) \n\t"
"lw %[temp0], 0(%[src]) \n\t"
"lw %[temp1], 4(%[src]) \n\t"
"lw %[temp2], 8(%[src]) \n\t"
"lw %[temp3], 12(%[src]) \n\t"
"ext %[temp4], %[temp0], 8, 8 \n\t"
"ext %[temp5], %[temp1], 8, 8 \n\t"
"ext %[temp6], %[temp2], 8, 8 \n\t"
"ext %[temp7], %[temp3], 8, 8 \n\t"
"addiu %[data], %[data], 16 \n\t"
"addiu %[src], %[src], 16 \n\t"
"addiu %[dst], %[dst], 16 \n\t"
"replv.ph %[temp4], %[temp4] \n\t"
"replv.ph %[temp5], %[temp5] \n\t"
"replv.ph %[temp6], %[temp6] \n\t"
@ -255,44 +257,47 @@ static void AddGreenToBlueAndRed(uint32_t* data, int num_pixels) {
"addu.qb %[temp1], %[temp1], %[temp5] \n\t"
"addu.qb %[temp2], %[temp2], %[temp6] \n\t"
"addu.qb %[temp3], %[temp3], %[temp7] \n\t"
"sw %[temp0], -16(%[data]) \n\t"
"sw %[temp1], -12(%[data]) \n\t"
"sw %[temp2], -8(%[data]) \n\t"
"bne %[data], %[p_loop1_end], 0b \n\t"
" sw %[temp3], -4(%[data]) \n\t"
"sw %[temp0], -16(%[dst]) \n\t"
"sw %[temp1], -12(%[dst]) \n\t"
"sw %[temp2], -8(%[dst]) \n\t"
"bne %[src], %[p_loop1_end], 0b \n\t"
" sw %[temp3], -4(%[dst]) \n\t"
"3: \n\t"
"beq %[data], %[p_loop2_end], 2f \n\t"
"beq %[src], %[p_loop2_end], 2f \n\t"
" nop \n\t"
"1: \n\t"
"lw %[temp0], 0(%[data]) \n\t"
"addiu %[data], %[data], 4 \n\t"
"lw %[temp0], 0(%[src]) \n\t"
"addiu %[src], %[src], 4 \n\t"
"addiu %[dst], %[dst], 4 \n\t"
"ext %[temp4], %[temp0], 8, 8 \n\t"
"replv.ph %[temp4], %[temp4] \n\t"
"addu.qb %[temp0], %[temp0], %[temp4] \n\t"
"bne %[data], %[p_loop2_end], 1b \n\t"
" sw %[temp0], -4(%[data]) \n\t"
"bne %[src], %[p_loop2_end], 1b \n\t"
" sw %[temp0], -4(%[dst]) \n\t"
"2: \n\t"
".set pop \n\t"
: [data]"+&r"(data), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
[temp5]"=&r"(temp5), [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)
: [dst]"+&r"(dst), [src]"+&r"(src), [temp0]"=&r"(temp0),
[temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
[temp7]"=&r"(temp7)
: [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
: "memory"
);
}
static void TransformColorInverse(const VP8LMultipliers* const m,
uint32_t* data, int num_pixels) {
const uint32_t* src, int num_pixels,
uint32_t* dst) {
int temp0, temp1, temp2, temp3, temp4, temp5;
uint32_t argb, argb1, new_red;
const uint32_t G_to_R = m->green_to_red_;
const uint32_t G_to_B = m->green_to_blue_;
const uint32_t R_to_B = m->red_to_blue_;
uint32_t* const p_loop_end = data + (num_pixels & ~1);
const uint32_t* const p_loop_end = src + (num_pixels & ~1);
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
"beq %[data], %[p_loop_end], 1f \n\t"
"beq %[src], %[p_loop_end], 1f \n\t"
" nop \n\t"
"replv.ph %[temp0], %[G_to_R] \n\t"
"replv.ph %[temp1], %[G_to_B] \n\t"
@ -304,9 +309,12 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
"shra.ph %[temp1], %[temp1], 8 \n\t"
"shra.ph %[temp2], %[temp2], 8 \n\t"
"0: \n\t"
"lw %[argb], 0(%[data]) \n\t"
"lw %[argb1], 4(%[data]) \n\t"
"addiu %[data], %[data], 8 \n\t"
"lw %[argb], 0(%[src]) \n\t"
"lw %[argb1], 4(%[src]) \n\t"
"sw %[argb], 0(%[dst]) \n\t"
"sw %[argb1], 4(%[dst]) \n\t"
"addiu %[src], %[src], 8 \n\t"
"addiu %[dst], %[dst], 8 \n\t"
"precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t"
"preceu.ph.qbra %[temp3], %[temp3] \n\t"
"shll.ph %[temp3], %[temp3], 8 \n\t"
@ -323,29 +331,29 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
"shll.ph %[temp4], %[temp5], 8 \n\t"
"shra.ph %[temp4], %[temp4], 8 \n\t"
"mul.ph %[temp4], %[temp4], %[temp2] \n\t"
"sb %[temp5], -2(%[data]) \n\t"
"sb %[temp5], -2(%[dst]) \n\t"
"sra %[temp5], %[temp5], 16 \n\t"
"shra.ph %[temp4], %[temp4], 5 \n\t"
"addu.ph %[argb1], %[argb1], %[temp4] \n\t"
"preceu.ph.qbra %[temp3], %[argb1] \n\t"
"sb %[temp5], -6(%[data]) \n\t"
"sb %[temp3], -4(%[data]) \n\t"
"sb %[temp5], -6(%[dst]) \n\t"
"sb %[temp3], -4(%[dst]) \n\t"
"sra %[temp3], %[temp3], 16 \n\t"
"bne %[data], %[p_loop_end], 0b \n\t"
" sb %[temp3], -8(%[data]) \n\t"
"bne %[src], %[p_loop_end], 0b \n\t"
" sb %[temp3], -8(%[dst]) \n\t"
"1: \n\t"
".set pop \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
[new_red]"=&r"(new_red), [argb]"=&r"(argb),
[argb1]"=&r"(argb1), [data]"+&r"(data)
[argb1]"=&r"(argb1), [dst]"+&r"(dst), [src]"+&r"(src)
: [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),
[G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)
: "memory", "hi", "lo"
);
// Fall-back to C-version for left-overs.
if (num_pixels & 1) VP8LTransformColorInverse_C(m, data, 1);
if (num_pixels & 1) VP8LTransformColorInverse_C(m, src, 1, dst);
}
static void ConvertBGRAToRGB(const uint32_t* src,