mirror of
https://github.com/webmproject/libwebp.git
synced 2025-07-17 14:29:48 +02:00
rescaler: add some SSE2 code
The rounding and arithmetic is not the same as previously, to prevent overflow cases for large upscale factors. We still rely on 32b x 32b -> 64b multiplies. Raised the fixed-point precision to 32b so that we have some nice shifts from epi64 to epi32. Changed rescaler_t type to 'uint32_t' in order to squeeze in all the precision required. The MIPS code has been disabled because it's now out-of-sync. Will be fixed in a subsequent CL when the dust settles. ~30-35% faster Change-Id: I32e4ddc00933f1b1aa3463403086199fd5dad07b
This commit is contained in:
@ -31,7 +31,7 @@ static void ImportRowShrink(WebPRescaler* const wrk, const uint8_t* src) {
|
||||
|
||||
for (channel = 0; channel < x_stride; ++channel) {
|
||||
const uint8_t* src1 = src + channel;
|
||||
int* frow = wrk->frow + channel;
|
||||
rescaler_t* frow = wrk->frow + channel;
|
||||
int temp1, temp2, temp3;
|
||||
int base, frac, sum;
|
||||
int accum, accum1;
|
||||
@ -90,7 +90,7 @@ static void ImportRowExpand(WebPRescaler* const wrk, const uint8_t* src) {
|
||||
|
||||
for (channel = 0; channel < x_stride; ++channel) {
|
||||
const uint8_t* src1 = src + channel;
|
||||
int* frow = wrk->frow + channel;
|
||||
rescaler_t* frow = wrk->frow + channel;
|
||||
int temp1, temp2, temp3, temp4;
|
||||
int frac;
|
||||
int accum;
|
||||
@ -138,18 +138,15 @@ static void ImportRowExpand(WebPRescaler* const wrk, const uint8_t* src) {
|
||||
}
|
||||
|
||||
static void ExportRowShrink(WebPRescaler* const wrk) {
|
||||
const int x_out_max = wrk->dst_width * wrk->num_channels;
|
||||
uint8_t* dst = wrk->dst;
|
||||
rescaler_t* irow = wrk->irow;
|
||||
assert(!WebPRescalerOutputDone(wrk));
|
||||
assert(wrk->y_accum <= 0);
|
||||
assert(!wrk->y_expand);
|
||||
// if wrk->fxy_scale can fit into 32 bits use optimized code,
|
||||
// otherwise use C code
|
||||
if ((wrk->fxy_scale >> 32) == 0) {
|
||||
uint8_t* dst = wrk->dst;
|
||||
rescaler_t* irow = wrk->irow;
|
||||
if (wrk->fxy_scale != 0) {
|
||||
const rescaler_t* frow = wrk->frow;
|
||||
const int yscale = wrk->fy_scale * (-wrk->y_accum);
|
||||
const int x_out_max = wrk->dst_width * wrk->num_channels;
|
||||
|
||||
int temp0, temp1, temp3, temp4, temp5, temp6, temp7, loop_end;
|
||||
const int temp2 = (int)(wrk->fxy_scale);
|
||||
const int temp8 = x_out_max << 2;
|
||||
@ -192,8 +189,12 @@ static void ExportRowShrink(WebPRescaler* const wrk) {
|
||||
: [temp2]"r"(temp2), [yscale]"r"(yscale), [temp8]"r"(temp8)
|
||||
: "memory", "hi", "lo"
|
||||
);
|
||||
} else {
|
||||
WebPRescalerExportRowShrinkC(wrk);
|
||||
} else { // very special case for src = dst = 1x1
|
||||
int x_out;
|
||||
for (x_out = 0; x_out < x_out_max; ++x_out) {
|
||||
dst[x_out] = irow[x_out];
|
||||
irow[x_out] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -205,9 +206,17 @@ static void ExportRowShrink(WebPRescaler* const wrk) {
|
||||
extern void WebPRescalerDspInitMIPS32(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPS32(void) {
|
||||
#if 0
|
||||
// The assembly code is currently out-of-sync wrt the C-implementation.
|
||||
// Disabled for now.
|
||||
WebPRescalerImportRowExpand = ImportRowExpand;
|
||||
WebPRescalerImportRowShrink = ImportRowShrink;
|
||||
WebPRescalerExportRowShrink = ExportRowShrink;
|
||||
#else
|
||||
(void)ImportRowExpand;
|
||||
(void)ImportRowShrink;
|
||||
(void)ExportRowShrink;
|
||||
#endif
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_MIPS32
|
||||
|
Reference in New Issue
Block a user