From 92d8fc7dd45e444fc06aac33fb545160935ad37e Mon Sep 17 00:00:00 2001 From: Djordje Pesut Date: Wed, 25 Dec 2013 11:59:46 +0100 Subject: [PATCH] MIPS: MIPS32r1: Optimization of function WebPRescalerImportRow. PATCH [3/6] Change-Id: I32339a8d2d03f1a8d8638563d2b2c9e3a13a4909 --- src/utils/rescaler.c | 94 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/src/utils/rescaler.c b/src/utils/rescaler.c index 2a04d3a1..1da2f0ff 100644 --- a/src/utils/rescaler.c +++ b/src/utils/rescaler.c @@ -51,6 +51,7 @@ void WebPRescalerImportRow(WebPRescaler* const wrk, const uint8_t* const src, int channel) { const int x_stride = wrk->num_channels; const int x_out_max = wrk->dst_width * wrk->num_channels; +#if !defined (__mips__) int x_in = channel; int x_out; int accum = 0; @@ -88,6 +89,99 @@ void WebPRescalerImportRow(WebPRescaler* const wrk, for (x_out = channel; x_out < x_out_max; x_out += x_stride) { wrk->irow[x_out] += wrk->frow[x_out]; } +#else + const int fx_scale = wrk->fx_scale; + const int x_add = wrk->x_add; + const int x_sub = wrk->x_sub; + int* frow = wrk->frow + channel; + int* irow = wrk->irow + channel; + uint8_t* src1 = (uint8_t*)src + channel; + int temp1, temp2, temp3; + int base, frac, sum; + int accum, accum1; + const int x_stride1 = x_stride << 2; + int loop_c = x_out_max - channel; + + if (!wrk->x_expand) { + __asm__ volatile ( + "li %[temp1], 0x8000 \n\t" + "li %[temp2], 0x10000 \n\t" + "li %[sum], 0 \n\t" + "li %[accum], 0 \n\t" + "1: \n\t" + "addu %[accum], %[accum], %[x_add] \n\t" + "blez %[accum], 3f \n\t" + "2: \n\t" + "lbu %[temp3], 0(%[src1]) \n\t" + "subu %[accum], %[accum], %[x_sub] \n\t" + "addu %[src1], %[src1], %[x_stride] \n\t" + "addu %[sum], %[sum], %[temp3] \n\t" + "bgtz %[accum], 2b \n\t" + "3: \n\t" + "lbu %[base], 0(%[src1]) \n\t" + "addu %[src1], %[src1], %[x_stride] \n\t" + "negu %[accum1], %[accum] \n\t" + "mul %[frac], %[base], %[accum1] \n\t" + "addu %[temp3], %[sum], %[base] \n\t" + "mul %[temp3], %[temp3], %[x_sub] \n\t" + "lw %[base], 0(%[irow]) \n\t" + "subu %[loop_c], %[loop_c], %[x_stride] \n\t" + "sll %[accum1], %[frac], 2 \n\t" + "mult %[temp1], %[temp2] \n\t" + "madd %[accum1], %[fx_scale] \n\t" + "mfhi %[sum] \n\t" + "subu %[temp3], %[temp3], %[frac] \n\t" + "sw %[temp3], 0(%[frow]) \n\t" + "add %[base], %[base], %[temp3] \n\t" + "sw %[base], 0(%[irow]) \n\t" + "addu %[irow], %[irow], %[x_stride1] \n\t" + "addu %[frow], %[frow], %[x_stride1] \n\t" + "bgtz %[loop_c], 1b \n\t" + + : [accum] "=&r" (accum), [src1] "+r" (src1), [temp3] "=&r" (temp3), + [sum] "=&r" (sum), [base] "=&r" (base), [frac] "=&r" (frac), + [frow] "+r" (frow), [irow] "+r" (irow), [accum1] "=&r" (accum1), + [temp2] "=&r" (temp2), [temp1] "=&r" (temp1) + : [x_stride] "r" (x_stride), [fx_scale] "r" (fx_scale), + [x_sub] "r" (x_sub), [x_add] "r" (x_add), + [loop_c] "r" (loop_c), [x_stride1] "r" (x_stride1) + : "memory", "hi", "lo" + ); + } else { + __asm__ volatile ( + "lbu %[temp1], 0(%[src1]) \n\t" + "move %[temp2], %[temp1] \n\t" + "li %[accum], 0 \n\t" + "1: \n\t" + "bgez %[accum], 2f \n\t" + "move %[temp2], %[temp1] \n\t" + "addu %[src1], %[x_stride] \n\t" + "lbu %[temp1], 0(%[src1]) \n\t" + "addu %[accum], %[x_add] \n\t" + "2: \n\t" + "subu %[temp3], %[temp2], %[temp1] \n\t" + "mul %[temp3], %[temp3], %[accum] \n\t" + "mul %[base], %[temp1], %[x_add] \n\t" + "subu %[accum], %[accum], %[x_sub] \n\t" + "lw %[frac], 0(%[irow]) \n\t" + "subu %[loop_c], %[loop_c], %[x_stride] \n\t" + "addu %[temp3], %[base], %[temp3] \n\t" + "sw %[temp3], 0(%[frow]) \n\t" + "addu %[frow], %[x_stride1] \n\t" + "addu %[frac], %[temp3] \n\t" + "sw %[frac], 0(%[irow]) \n\t" + "addu %[irow], %[x_stride1] \n\t" + "bgtz %[loop_c], 1b \n\t" + + : [src1] "+r" (src1), [accum] "=&r" (accum), [temp1] "=&r" (temp1), + [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), [base] "=&r" (base), + [frac] "=&r" (frac), [frow] "+r" (frow), [irow] "+r" (irow) + : [x_stride] "r" (x_stride), [x_add] "r" (x_add), [x_sub] "r" (x_sub), + [x_stride1] "r" (x_stride1), [loop_c] "r" (loop_c) + : "memory", "hi", "lo" + ); + } +#endif // (__mips__) } uint8_t* WebPRescalerExportRow(WebPRescaler* const wrk) {