rescaler: simplify ImportRow logic

incorporates the loop over 'channel' and removes one parameter

Change-Id: I4e3b33c111ca825fe96461583420413b17326409
This commit is contained in:
Pascal Massimino 2015-09-19 09:59:32 -07:00
parent 5ff0079ece
commit 9ba1894b9b
5 changed files with 250 additions and 239 deletions

View File

@ -338,8 +338,7 @@ struct WebPRescaler;
// 'channel' denotes the channel number to be imported. 'Expand' corresponds to // 'channel' denotes the channel number to be imported. 'Expand' corresponds to
// the wrk->x_expand case. Otherwise, 'Shrink' is to be used. // the wrk->x_expand case. Otherwise, 'Shrink' is to be used.
typedef void (*WebPRescalerImportRowFunc)(struct WebPRescaler* const wrk, typedef void (*WebPRescalerImportRowFunc)(struct WebPRescaler* const wrk,
const uint8_t* const src, const uint8_t* src);
int channel);
extern WebPRescalerImportRowFunc WebPRescalerImportRowExpand; extern WebPRescalerImportRowFunc WebPRescalerImportRowExpand;
extern WebPRescalerImportRowFunc WebPRescalerImportRowShrink; extern WebPRescalerImportRowFunc WebPRescalerImportRowShrink;
@ -353,15 +352,15 @@ extern WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
// Plain-C implementation, as fall-back. // Plain-C implementation, as fall-back.
extern void WebPRescalerImportRowExpandC(struct WebPRescaler* const wrk, extern void WebPRescalerImportRowExpandC(struct WebPRescaler* const wrk,
const uint8_t* const src, int channel); const uint8_t* src);
extern void WebPRescalerImportRowShrinkC(struct WebPRescaler* const wrk, extern void WebPRescalerImportRowShrinkC(struct WebPRescaler* const wrk,
const uint8_t* const src, int channel); const uint8_t* src);
extern void WebPRescalerExportRowExpandC(struct WebPRescaler* const wrk); extern void WebPRescalerExportRowExpandC(struct WebPRescaler* const wrk);
extern void WebPRescalerExportRowShrinkC(struct WebPRescaler* const wrk); extern void WebPRescalerExportRowShrinkC(struct WebPRescaler* const wrk);
// Main entry calls: // Main entry calls:
extern void WebPRescalerImportRow(struct WebPRescaler* const wrk, extern void WebPRescalerImportRow(struct WebPRescaler* const wrk,
const uint8_t* const src, int channel); const uint8_t* src);
// Export one row (starting at x_out position) from rescaler. // Export one row (starting at x_out position) from rescaler.
extern void WebPRescalerExportRow(struct WebPRescaler* const wrk); extern void WebPRescalerExportRow(struct WebPRescaler* const wrk);

View File

@ -23,66 +23,69 @@
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Row import // Row import
void WebPRescalerImportRowExpandC(WebPRescaler* const wrk, void WebPRescalerImportRowExpandC(WebPRescaler* wrk, const uint8_t* src) {
const uint8_t* const src, int channel) {
const int x_stride = wrk->num_channels; const int x_stride = wrk->num_channels;
const int x_out_max = wrk->dst_width * wrk->num_channels; const int x_out_max = wrk->dst_width * wrk->num_channels;
int x_in = channel; int channel;
int x_out;
// simple bilinear interpolation
int accum = wrk->x_add;
int left = src[x_in];
int right = (wrk->src_width > 1) ? src[x_in + x_stride] : left;
x_in += x_stride;
x_out = channel;
assert(!WebPRescalerInputDone(wrk)); assert(!WebPRescalerInputDone(wrk));
assert(wrk->x_expand); assert(wrk->x_expand);
while (1) { for (channel = 0; channel < x_stride; ++channel) {
wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum; int x_in = channel;
x_out += x_stride; int x_out = channel;
if (x_out >= x_out_max) break; // simple bilinear interpolation
accum -= wrk->x_sub; int accum = wrk->x_add;
if (accum < 0) { int left = src[x_in];
left = right; int right = (wrk->src_width > 1) ? src[x_in + x_stride] : left;
x_in += x_stride; x_in += x_stride;
assert(x_in < wrk->src_width * x_stride); while (1) {
right = src[x_in]; wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum;
accum += wrk->x_add; x_out += x_stride;
if (x_out >= x_out_max) break;
accum -= wrk->x_sub;
if (accum < 0) {
left = right;
x_in += x_stride;
assert(x_in < wrk->src_width * x_stride);
right = src[x_in];
accum += wrk->x_add;
}
} }
assert(wrk->x_sub == 0 /* <- special case for src_width=1 */ || accum == 0);
} }
assert(wrk->x_sub == 0 /* <- special case for src_width=1 */ || accum == 0);
} }
void WebPRescalerImportRowShrinkC(WebPRescaler* const wrk, void WebPRescalerImportRowShrinkC(WebPRescaler* const wrk,
const uint8_t* const src, int channel) { const uint8_t* src) {
const int x_stride = wrk->num_channels; const int x_stride = wrk->num_channels;
const int x_out_max = wrk->dst_width * wrk->num_channels; const int x_out_max = wrk->dst_width * wrk->num_channels;
int x_in = channel; int channel;
int x_out;
uint32_t sum = 0;
int accum = 0;
assert(!WebPRescalerInputDone(wrk)); assert(!WebPRescalerInputDone(wrk));
assert(!wrk->x_expand); assert(!wrk->x_expand);
for (x_out = channel; x_out < x_out_max; x_out += x_stride) { for (channel = 0; channel < x_stride; ++channel) {
uint32_t base = 0; int x_in = channel;
accum += wrk->x_add; int x_out = channel;
while (accum > 0) { uint32_t sum = 0;
accum -= wrk->x_sub; int accum = 0;
assert(x_in < wrk->src_width * x_stride); while (x_out < x_out_max) {
base = src[x_in]; uint32_t base = 0;
sum += base; accum += wrk->x_add;
x_in += x_stride; while (accum > 0) {
} accum -= wrk->x_sub;
{ // Emit next horizontal pixel. assert(x_in < wrk->src_width * x_stride);
const rescaler_t frac = base * (-accum); base = src[x_in];
wrk->frow[x_out] = sum * wrk->x_sub - frac; sum += base;
// fresh fractional start for next pixel x_in += x_stride;
sum = (int)MULT_FIX(frac, wrk->fx_scale); }
{ // Emit next horizontal pixel.
const rescaler_t frac = base * (-accum);
wrk->frow[x_out] = sum * wrk->x_sub - frac;
// fresh fractional start for next pixel
sum = (int)MULT_FIX(frac, wrk->fx_scale);
}
x_out += x_stride;
} }
assert(accum == 0);
} }
assert(accum == 0);
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
@ -145,13 +148,12 @@ void WebPRescalerExportRowShrinkC(WebPRescaler* const wrk) {
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Main entry calls // Main entry calls
void WebPRescalerImportRow(WebPRescaler* const wrk, void WebPRescalerImportRow(WebPRescaler* const wrk, const uint8_t* const src) {
const uint8_t* const src, int channel) {
assert(!WebPRescalerInputDone(wrk)); assert(!WebPRescalerInputDone(wrk));
if (!wrk->x_expand) { if (!wrk->x_expand) {
WebPRescalerImportRowShrink(wrk, src, channel); WebPRescalerImportRowShrink(wrk, src);
} else { } else {
WebPRescalerImportRowExpand(wrk, src, channel); WebPRescalerImportRowExpand(wrk, src);
} }
} }

View File

@ -18,117 +18,123 @@
#include <assert.h> #include <assert.h>
#include "../utils/rescaler.h" #include "../utils/rescaler.h"
static void ImportRowShrink(WebPRescaler* const wrk, static void ImportRowShrink(WebPRescaler* const wrk, const uint8_t* src) {
const uint8_t* const src, int channel) {
const int x_stride = wrk->num_channels; const int x_stride = wrk->num_channels;
const int x_out_max = wrk->dst_width * wrk->num_channels; const int x_out_max = wrk->dst_width * wrk->num_channels;
const int fx_scale = wrk->fx_scale; const int fx_scale = wrk->fx_scale;
const int x_add = wrk->x_add; const int x_add = wrk->x_add;
const int x_sub = wrk->x_sub; const int x_sub = wrk->x_sub;
int* frow = wrk->frow + channel;
const uint8_t* src1 = src + channel;
int temp1, temp2, temp3;
int base, frac, sum;
int accum, accum1;
const int x_stride1 = x_stride << 2; const int x_stride1 = x_stride << 2;
int loop_c = x_out_max - channel; int channel;
assert(!wrk->x_expand); assert(!wrk->x_expand);
assert(!WebPRescalerInputDone(wrk)); assert(!WebPRescalerInputDone(wrk));
__asm__ volatile (
"li %[temp1], 0x8000 \n\t" for (channel = 0; channel < x_stride; ++channel) {
"li %[temp2], 0x10000 \n\t" const uint8_t* src1 = src + channel;
"li %[sum], 0 \n\t" int* frow = wrk->frow + channel;
"li %[accum], 0 \n\t" int temp1, temp2, temp3;
"1: \n\t" int base, frac, sum;
"addu %[accum], %[accum], %[x_add] \n\t" int accum, accum1;
"li %[base], 0 \n\t" int loop_c = x_out_max;
"blez %[accum], 3f \n\t"
"2: \n\t" __asm__ volatile (
"lbu %[base], 0(%[src1]) \n\t" "li %[temp1], 0x8000 \n\t"
"subu %[accum], %[accum], %[x_sub] \n\t" "li %[temp2], 0x10000 \n\t"
"addu %[src1], %[src1], %[x_stride] \n\t" "li %[sum], 0 \n\t"
"addu %[sum], %[sum], %[base] \n\t" "li %[accum], 0 \n\t"
"bgtz %[accum], 2b \n\t" "1: \n\t"
"3: \n\t" "addu %[accum], %[accum], %[x_add] \n\t"
"negu %[accum1], %[accum] \n\t" "li %[base], 0 \n\t"
"mul %[frac], %[base], %[accum1] \n\t" "blez %[accum], 3f \n\t"
"mul %[temp3], %[sum], %[x_sub] \n\t" "2: \n\t"
"subu %[loop_c], %[loop_c], %[x_stride] \n\t" "lbu %[base], 0(%[src1]) \n\t"
"sll %[accum1], %[frac], 2 \n\t" "subu %[accum], %[accum], %[x_sub] \n\t"
"mult %[temp1], %[temp2] \n\t" "addu %[src1], %[src1], %[x_stride] \n\t"
"madd %[accum1], %[fx_scale] \n\t" "addu %[sum], %[sum], %[base] \n\t"
"mfhi %[sum] \n\t" "bgtz %[accum], 2b \n\t"
"subu %[temp3], %[temp3], %[frac] \n\t" "3: \n\t"
"sw %[temp3], 0(%[frow]) \n\t" "negu %[accum1], %[accum] \n\t"
"addu %[frow], %[frow], %[x_stride1] \n\t" "mul %[frac], %[base], %[accum1] \n\t"
"bgtz %[loop_c], 1b \n\t" "mul %[temp3], %[sum], %[x_sub] \n\t"
: [accum] "=&r" (accum), [src1] "+r" (src1), [temp3] "=&r" (temp3), "subu %[loop_c], %[loop_c], %[x_stride] \n\t"
[sum] "=&r" (sum), [base] "=&r" (base), [frac] "=&r" (frac), "sll %[accum1], %[frac], 2 \n\t"
[frow] "+r" (frow), [accum1] "=&r" (accum1), "mult %[temp1], %[temp2] \n\t"
[temp2] "=&r" (temp2), [temp1] "=&r" (temp1) "madd %[accum1], %[fx_scale] \n\t"
: [x_stride] "r" (x_stride), [fx_scale] "r" (fx_scale), "mfhi %[sum] \n\t"
[x_sub] "r" (x_sub), [x_add] "r" (x_add), "subu %[temp3], %[temp3], %[frac] \n\t"
[loop_c] "r" (loop_c), [x_stride1] "r" (x_stride1) "sw %[temp3], 0(%[frow]) \n\t"
: "memory", "hi", "lo" "addu %[frow], %[frow], %[x_stride1] \n\t"
); "bgtz %[loop_c], 1b \n\t"
: [accum] "=&r" (accum), [src1] "+r" (src1), [temp3] "=&r" (temp3),
[sum] "=&r" (sum), [base] "=&r" (base), [frac] "=&r" (frac),
[frow] "+r" (frow), [accum1] "=&r" (accum1),
[temp2] "=&r" (temp2), [temp1] "=&r" (temp1)
: [x_stride] "r" (x_stride), [fx_scale] "r" (fx_scale),
[x_sub] "r" (x_sub), [x_add] "r" (x_add),
[loop_c] "r" (loop_c), [x_stride1] "r" (x_stride1)
: "memory", "hi", "lo"
);
}
} }
static void ImportRowExpand(WebPRescaler* const wrk, static void ImportRowExpand(WebPRescaler* const wrk, const uint8_t* src) {
const uint8_t* const src, int channel) {
const int x_stride = wrk->num_channels; const int x_stride = wrk->num_channels;
const int x_out_max = wrk->dst_width * wrk->num_channels; const int x_out_max = wrk->dst_width * wrk->num_channels;
const int x_add = wrk->x_add; const int x_add = wrk->x_add;
const int x_sub = wrk->x_sub; const int x_sub = wrk->x_sub;
const int src_width = wrk->src_width; const int src_width = wrk->src_width;
int* frow = wrk->frow + channel;
const uint8_t* src1 = src + channel;
int temp1, temp2, temp3, temp4;
int frac;
int accum;
const int x_stride1 = x_stride << 2; const int x_stride1 = x_stride << 2;
int x_out = channel; int channel;
assert(wrk->x_expand); assert(wrk->x_expand);
assert(!WebPRescalerInputDone(wrk)); assert(!WebPRescalerInputDone(wrk));
__asm__ volatile (
"addiu %[temp3], %[src_width], -1 \n\t" for (channel = 0; channel < x_stride; ++channel) {
"lbu %[temp2], 0(%[src1]) \n\t" const uint8_t* src1 = src + channel;
"addu %[src1], %[src1], %[x_stride] \n\t" int* frow = wrk->frow + channel;
"bgtz %[temp3], 0f \n\t" int temp1, temp2, temp3, temp4;
"addiu %[temp1], %[temp2], 0 \n\t" int frac;
"b 3f \n\t" int accum;
"0: \n\t" int x_out = 0;
"lbu %[temp1], 0(%[src1]) \n\t"
"3: \n\t" __asm__ volatile (
"addiu %[accum], %[x_add], 0 \n\t" "addiu %[temp3], %[src_width], -1 \n\t"
"1: \n\t" "lbu %[temp2], 0(%[src1]) \n\t"
"subu %[temp3], %[temp2], %[temp1] \n\t" "addu %[src1], %[src1], %[x_stride] \n\t"
"mul %[temp3], %[temp3], %[accum] \n\t" "bgtz %[temp3], 0f \n\t"
"mul %[temp4], %[temp1], %[x_add] \n\t" "addiu %[temp1], %[temp2], 0 \n\t"
"addu %[temp3], %[temp4], %[temp3] \n\t" "b 3f \n\t"
"sw %[temp3], 0(%[frow]) \n\t" "0: \n\t"
"addu %[frow], %[frow], %[x_stride1] \n\t" "lbu %[temp1], 0(%[src1]) \n\t"
"addu %[x_out], %[x_out], %[x_stride] \n\t" "3: \n\t"
"subu %[temp3], %[x_out], %[x_out_max] \n\t" "addiu %[accum], %[x_add], 0 \n\t"
"bgez %[temp3], 2f \n\t" "1: \n\t"
"subu %[accum], %[accum], %[x_sub] \n\t" "subu %[temp3], %[temp2], %[temp1] \n\t"
"bgez %[accum], 4f \n\t" "mul %[temp3], %[temp3], %[accum] \n\t"
"addiu %[temp2], %[temp1], 0 \n\t" "mul %[temp4], %[temp1], %[x_add] \n\t"
"addu %[src1], %[src1], %[x_stride] \n\t" "addu %[temp3], %[temp4], %[temp3] \n\t"
"lbu %[temp1], 0(%[src1]) \n\t" "sw %[temp3], 0(%[frow]) \n\t"
"addu %[accum], %[accum], %[x_add] \n\t" "addu %[frow], %[frow], %[x_stride1] \n\t"
"4: \n\t" "addu %[x_out], %[x_out], %[x_stride] \n\t"
"b 1b \n\t" "subu %[temp3], %[x_out], %[x_out_max] \n\t"
"2: \n\t" "bgez %[temp3], 2f \n\t"
: [src1] "+r" (src1), [accum] "=&r" (accum), [temp1] "=&r" (temp1), "subu %[accum], %[accum], %[x_sub] \n\t"
[temp2] "=&r" (temp2), [temp3] "=&r" (temp3), [temp4] "=&r" (temp4), "bgez %[accum], 4f \n\t"
[x_out] "+r" (x_out), [frac] "=&r" (frac), [frow] "+r" (frow) "addiu %[temp2], %[temp1], 0 \n\t"
: [x_stride] "r" (x_stride), [x_add] "r" (x_add), [x_sub] "r" (x_sub), "addu %[src1], %[src1], %[x_stride] \n\t"
[x_stride1] "r" (x_stride1), [src_width] "r" (src_width), "lbu %[temp1], 0(%[src1]) \n\t"
[x_out_max] "r" (x_out_max) "addu %[accum], %[accum], %[x_add] \n\t"
: "memory", "hi", "lo" "4: \n\t"
); "b 1b \n\t"
"2: \n\t"
: [src1] "+r" (src1), [accum] "=&r" (accum), [temp1] "=&r" (temp1),
[temp2] "=&r" (temp2), [temp3] "=&r" (temp3), [temp4] "=&r" (temp4),
[x_out] "+r" (x_out), [frac] "=&r" (frac), [frow] "+r" (frow)
: [x_stride] "r" (x_stride), [x_add] "r" (x_add), [x_sub] "r" (x_sub),
[x_stride1] "r" (x_stride1), [src_width] "r" (src_width),
[x_out_max] "r" (x_out_max)
: "memory", "hi", "lo"
);
}
} }
static void ExportRowShrink(WebPRescaler* const wrk) { static void ExportRowShrink(WebPRescaler* const wrk) {

View File

@ -18,112 +18,118 @@
#include <assert.h> #include <assert.h>
#include "../utils/rescaler.h" #include "../utils/rescaler.h"
static void ImportRowShrink(WebPRescaler* const wrk, static void ImportRowShrink(WebPRescaler* const wrk, const uint8_t* src) {
const uint8_t* const src, int channel) {
const int x_stride = wrk->num_channels; const int x_stride = wrk->num_channels;
const int x_out_max = wrk->dst_width * wrk->num_channels; const int x_out_max = wrk->dst_width * wrk->num_channels;
const int fx_scale = wrk->fx_scale; const int fx_scale = wrk->fx_scale;
const int x_add = wrk->x_add; const int x_add = wrk->x_add;
const int x_sub = wrk->x_sub; const int x_sub = wrk->x_sub;
int* frow = wrk->frow + channel;
const uint8_t* src1 = src + channel;
int temp3;
int base, frac, sum;
int accum, accum1;
const int x_stride1 = x_stride << 2; const int x_stride1 = x_stride << 2;
int loop_c = x_out_max - channel; int channel;
assert(!wrk->x_expand); assert(!wrk->x_expand);
assert(!WebPRescalerInputDone(wrk)); assert(!WebPRescalerInputDone(wrk));
__asm__ volatile (
"li %[sum], 0 \n\t" for (channel = 0; channel < x_stride; ++channel) {
"li %[accum], 0 \n\t" int* frow = wrk->frow + channel;
"1: \n\t" const uint8_t* src1 = src + channel;
"addu %[accum], %[accum], %[x_add] \n\t" int temp3;
"li %[base], 0 \n\t" int base, frac, sum;
"blez %[accum], 3f \n\t" int accum, accum1;
"2: \n\t" int loop_c = x_out_max;
"lbu %[base], 0(%[src1]) \n\t"
"subu %[accum], %[accum], %[x_sub] \n\t" __asm__ volatile (
"addu %[src1], %[src1], %[x_stride] \n\t" "li %[sum], 0 \n\t"
"addu %[sum], %[sum], %[base] \n\t" "li %[accum], 0 \n\t"
"bgtz %[accum], 2b \n\t" "1: \n\t"
"3: \n\t" "addu %[accum], %[accum], %[x_add] \n\t"
"negu %[accum1], %[accum] \n\t" "li %[base], 0 \n\t"
"mul %[frac], %[base], %[accum1] \n\t" "blez %[accum], 3f \n\t"
"mul %[temp3], %[sum], %[x_sub] \n\t" "2: \n\t"
"sll %[accum1], %[frac], 1 \n\t" "lbu %[base], 0(%[src1]) \n\t"
"subu %[loop_c], %[loop_c], %[x_stride] \n\t" "subu %[accum], %[accum], %[x_sub] \n\t"
"mulq_rs.w %[sum], %[accum1], %[fx_scale] \n\t" "addu %[src1], %[src1], %[x_stride] \n\t"
"subu %[temp3], %[temp3], %[frac] \n\t" "addu %[sum], %[sum], %[base] \n\t"
"sw %[temp3], 0(%[frow]) \n\t" "bgtz %[accum], 2b \n\t"
"addu %[frow], %[frow], %[x_stride1] \n\t" "3: \n\t"
"bgtz %[loop_c], 1b \n\t" "negu %[accum1], %[accum] \n\t"
: [accum]"=&r"(accum), [src1]"+&r"(src1), [temp3]"=&r"(temp3), "mul %[frac], %[base], %[accum1] \n\t"
[sum]"=&r"(sum), [base]"=&r"(base), [frac]"=&r"(frac), "mul %[temp3], %[sum], %[x_sub] \n\t"
[frow]"+&r"(frow), [accum1]"=&r"(accum1), "sll %[accum1], %[frac], 1 \n\t"
[loop_c]"+&r"(loop_c) "subu %[loop_c], %[loop_c], %[x_stride] \n\t"
: [x_stride]"r"(x_stride), [fx_scale]"r"(fx_scale), [x_sub]"r"(x_sub), "mulq_rs.w %[sum], %[accum1], %[fx_scale] \n\t"
[x_add] "r" (x_add), [x_stride1] "r" (x_stride1) "subu %[temp3], %[temp3], %[frac] \n\t"
: "memory", "hi", "lo" "sw %[temp3], 0(%[frow]) \n\t"
); "addu %[frow], %[frow], %[x_stride1] \n\t"
"bgtz %[loop_c], 1b \n\t"
: [accum]"=&r"(accum), [src1]"+&r"(src1), [temp3]"=&r"(temp3),
[sum]"=&r"(sum), [base]"=&r"(base), [frac]"=&r"(frac),
[frow]"+&r"(frow), [accum1]"=&r"(accum1),
[loop_c]"+&r"(loop_c)
: [x_stride]"r"(x_stride), [fx_scale]"r"(fx_scale), [x_sub]"r"(x_sub),
[x_add] "r" (x_add), [x_stride1] "r" (x_stride1)
: "memory", "hi", "lo"
);
}
} }
static void ImportRowExpand(WebPRescaler* const wrk, static void ImportRowExpand(WebPRescaler* const wrk, const uint8_t* src) {
const uint8_t* const src, int channel) {
const int x_stride = wrk->num_channels; const int x_stride = wrk->num_channels;
const int x_out_max = wrk->dst_width * wrk->num_channels; const int x_out_max = wrk->dst_width * wrk->num_channels;
const int x_add = wrk->x_add; const int x_add = wrk->x_add;
const int x_sub = wrk->x_sub; const int x_sub = wrk->x_sub;
const int src_width = wrk->src_width; const int src_width = wrk->src_width;
int* frow = wrk->frow + channel;
const uint8_t* src1 = src + channel;
int temp1, temp2, temp3, temp4;
int frac;
int accum;
const int x_stride1 = x_stride << 2; const int x_stride1 = x_stride << 2;
int x_out = channel; int channel;
assert(wrk->x_expand); assert(wrk->x_expand);
assert(!WebPRescalerInputDone(wrk)); assert(!WebPRescalerInputDone(wrk));
__asm__ volatile (
"addiu %[temp3], %[src_width], -1 \n\t" for (channel = 0; channel < x_stride; ++channel) {
"lbu %[temp2], 0(%[src1]) \n\t" int* frow = wrk->frow + channel;
"addu %[src1], %[src1], %[x_stride] \n\t" const uint8_t* src1 = src + channel;
"bgtz %[temp3], 0f \n\t" int temp1, temp2, temp3, temp4;
"addiu %[temp1], %[temp2], 0 \n\t" int frac;
"b 3f \n\t" int accum;
"0: \n\t" int x_out = channel;
"lbu %[temp1], 0(%[src1]) \n\t"
"3: \n\t" __asm__ volatile (
"addiu %[accum], %[x_add], 0 \n\t" "addiu %[temp3], %[src_width], -1 \n\t"
"1: \n\t" "lbu %[temp2], 0(%[src1]) \n\t"
"subu %[temp3], %[temp2], %[temp1] \n\t" "addu %[src1], %[src1], %[x_stride] \n\t"
"mul %[temp3], %[temp3], %[accum] \n\t" "bgtz %[temp3], 0f \n\t"
"mul %[temp4], %[temp1], %[x_add] \n\t" "addiu %[temp1], %[temp2], 0 \n\t"
"addu %[temp3], %[temp4], %[temp3] \n\t" "b 3f \n\t"
"sw %[temp3], 0(%[frow]) \n\t" "0: \n\t"
"addu %[frow], %[frow], %[x_stride1] \n\t" "lbu %[temp1], 0(%[src1]) \n\t"
"addu %[x_out], %[x_out], %[x_stride] \n\t" "3: \n\t"
"subu %[temp3], %[x_out], %[x_out_max] \n\t" "addiu %[accum], %[x_add], 0 \n\t"
"bgez %[temp3], 2f \n\t" "1: \n\t"
"subu %[accum], %[accum], %[x_sub] \n\t" "subu %[temp3], %[temp2], %[temp1] \n\t"
"bgez %[accum], 4f \n\t" "mul %[temp3], %[temp3], %[accum] \n\t"
"addiu %[temp2], %[temp1], 0 \n\t" "mul %[temp4], %[temp1], %[x_add] \n\t"
"addu %[src1], %[src1], %[x_stride] \n\t" "addu %[temp3], %[temp4], %[temp3] \n\t"
"lbu %[temp1], 0(%[src1]) \n\t" "sw %[temp3], 0(%[frow]) \n\t"
"addu %[accum], %[accum], %[x_add] \n\t" "addu %[frow], %[frow], %[x_stride1] \n\t"
"4: \n\t" "addu %[x_out], %[x_out], %[x_stride] \n\t"
"b 1b \n\t" "subu %[temp3], %[x_out], %[x_out_max] \n\t"
"2: \n\t" "bgez %[temp3], 2f \n\t"
: [src1] "+r" (src1), [accum] "=&r" (accum), [temp1] "=&r" (temp1), "subu %[accum], %[accum], %[x_sub] \n\t"
[temp2] "=&r" (temp2), [temp3] "=&r" (temp3), [temp4] "=&r" (temp4), "bgez %[accum], 4f \n\t"
[x_out] "+r" (x_out), [frac] "=&r" (frac), [frow] "+r" (frow) "addiu %[temp2], %[temp1], 0 \n\t"
: [x_stride] "r" (x_stride), [x_add] "r" (x_add), [x_sub] "r" (x_sub), "addu %[src1], %[src1], %[x_stride] \n\t"
[x_stride1] "r" (x_stride1), [src_width] "r" (src_width), "lbu %[temp1], 0(%[src1]) \n\t"
[x_out_max] "r" (x_out_max) "addu %[accum], %[accum], %[x_add] \n\t"
: "memory", "hi", "lo" "4: \n\t"
); "b 1b \n\t"
"2: \n\t"
: [src1] "+r" (src1), [accum] "=&r" (accum), [temp1] "=&r" (temp1),
[temp2] "=&r" (temp2), [temp3] "=&r" (temp3), [temp4] "=&r" (temp4),
[x_out] "+r" (x_out), [frac] "=&r" (frac), [frow] "+r" (frow)
: [x_stride] "r" (x_stride), [x_add] "r" (x_add), [x_sub] "r" (x_sub),
[x_stride1] "r" (x_stride1), [src_width] "r" (src_width),
[x_out_max] "r" (x_out_max)
: "memory", "hi", "lo"
);
}
} }
static void ExportRowShrink(WebPRescaler* const wrk) { static void ExportRowShrink(WebPRescaler* const wrk) {

View File

@ -102,16 +102,14 @@ int WebPRescalerImport(WebPRescaler* const wrk, int num_lines,
const uint8_t* src, int src_stride) { const uint8_t* src, int src_stride) {
int total_imported = 0; int total_imported = 0;
while (total_imported < num_lines && !WebPRescalerHasPendingOutput(wrk)) { while (total_imported < num_lines && !WebPRescalerHasPendingOutput(wrk)) {
int x, channel;
if (wrk->y_expand) { if (wrk->y_expand) {
rescaler_t* const tmp = wrk->irow; rescaler_t* const tmp = wrk->irow;
wrk->irow = wrk->frow; wrk->irow = wrk->frow;
wrk->frow = tmp; wrk->frow = tmp;
} }
for (channel = 0; channel < wrk->num_channels; ++channel) { WebPRescalerImportRow(wrk, src);
WebPRescalerImportRow(wrk, src, channel);
}
if (!wrk->y_expand) { // Accumulate the contribution of the new row. if (!wrk->y_expand) { // Accumulate the contribution of the new row.
int x;
for (x = 0; x < wrk->num_channels * wrk->dst_width; ++x) { for (x = 0; x < wrk->num_channels * wrk->dst_width; ++x) {
wrk->irow[x] += wrk->frow[x]; wrk->irow[x] += wrk->frow[x];
} }