From b7685d73fea42fe3a2a38f942aca3b1b4e8706c7 Mon Sep 17 00:00:00 2001 From: Pascal Massimino Date: Mon, 17 Feb 2014 00:58:17 -0800 Subject: [PATCH] Rescale: let ImportRow / ExportRow be pointer-to-function Separate the C version from the MIPS32 version and have run-time initialization during RescalerInit() Change-Id: I93cfa5691c073a099fe62eda1333ad2bb749915b --- src/dec/io.c | 10 ++-- src/dec/vp8l.c | 4 +- src/utils/rescaler.c | 134 ++++++++++++++++++++++++++----------------- src/utils/rescaler.h | 16 +++--- 4 files changed, 96 insertions(+), 68 deletions(-) diff --git a/src/dec/io.c b/src/dec/io.c index 0f2c8d10..04e50bdd 100644 --- a/src/dec/io.c +++ b/src/dec/io.c @@ -370,9 +370,9 @@ static int ExportRGB(WebPDecParams* const p, int y_pos) { WebPRescalerHasPendingOutput(&p->scaler_u)) { assert(p->last_y + y_pos + num_lines_out < p->output->height); assert(p->scaler_u.y_accum == p->scaler_v.y_accum); - WebPRescalerExportRow(&p->scaler_y); - WebPRescalerExportRow(&p->scaler_u); - WebPRescalerExportRow(&p->scaler_v); + WebPRescalerExportRow(&p->scaler_y, 0); + WebPRescalerExportRow(&p->scaler_u, 0); + WebPRescalerExportRow(&p->scaler_v, 0); convert(p->scaler_y.dst, p->scaler_u.dst, p->scaler_v.dst, dst, p->scaler_y.dst_width); dst += buf->stride; @@ -420,7 +420,7 @@ static int ExportAlpha(WebPDecParams* const p, int y_pos) { while (WebPRescalerHasPendingOutput(&p->scaler_a)) { int i; assert(p->last_y + y_pos + num_lines_out < p->output->height); - WebPRescalerExportRow(&p->scaler_a); + WebPRescalerExportRow(&p->scaler_a, 0); for (i = 0; i < width; ++i) { const uint32_t alpha_value = p->scaler_a.dst[i]; dst[4 * i] = alpha_value; @@ -453,7 +453,7 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos) { while (WebPRescalerHasPendingOutput(&p->scaler_a)) { int i; assert(p->last_y + y_pos + num_lines_out < p->output->height); - WebPRescalerExportRow(&p->scaler_a); + WebPRescalerExportRow(&p->scaler_a, 0); for (i = 0; i < width; ++i) { // Fill in the alpha value (converted to 4 bits). const uint32_t alpha_value = p->scaler_a.dst[i] >> 4; diff --git a/src/dec/vp8l.c b/src/dec/vp8l.c index ea0254d7..159eefa3 100644 --- a/src/dec/vp8l.c +++ b/src/dec/vp8l.c @@ -420,7 +420,7 @@ static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace, int num_lines_out = 0; while (WebPRescalerHasPendingOutput(rescaler)) { uint8_t* const dst = rgba + num_lines_out * rgba_stride; - WebPRescalerExportRow(rescaler); + WebPRescalerExportRow(rescaler, 0); WebPMultARGBRow(src, dst_width, 1); VP8LConvertFromBGRA(src, dst_width, colorspace, dst); ++num_lines_out; @@ -537,7 +537,7 @@ static int ExportYUVA(const VP8LDecoder* const dec, int y_pos) { const int dst_width = rescaler->dst_width; int num_lines_out = 0; while (WebPRescalerHasPendingOutput(rescaler)) { - WebPRescalerExportRow(rescaler); + WebPRescalerExportRow(rescaler, 0); WebPMultARGBRow(src, dst_width, 1); ConvertToYUVA(src, dst_width, y_pos, dec->output_); ++y_pos; diff --git a/src/utils/rescaler.c b/src/utils/rescaler.c index f5b52881..1c1392dd 100644 --- a/src/utils/rescaler.c +++ b/src/utils/rescaler.c @@ -16,42 +16,19 @@ #include "./rescaler.h" //------------------------------------------------------------------------------ +// Implementations of critical functions ImportRow / ExportRow + +void (*WebPRescalerImportRow)(WebPRescaler* const wrk, + const uint8_t* const src, int channel) = NULL; +void (*WebPRescalerExportRow)(WebPRescaler* const wrk, int x_out) = NULL; #define RFIX 30 #define MULT_FIX(x, y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX) -void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height, - uint8_t* const dst, int dst_width, int dst_height, - int dst_stride, int num_channels, int x_add, int x_sub, - int y_add, int y_sub, int32_t* const work) { - wrk->x_expand = (src_width < dst_width); - wrk->src_width = src_width; - wrk->src_height = src_height; - wrk->dst_width = dst_width; - wrk->dst_height = dst_height; - wrk->dst = dst; - wrk->dst_stride = dst_stride; - wrk->num_channels = num_channels; - // for 'x_expand', we use bilinear interpolation - wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add - x_sub; - wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub; - wrk->y_accum = y_add; - wrk->y_add = y_add; - wrk->y_sub = y_sub; - wrk->fx_scale = (1 << RFIX) / x_sub; - wrk->fy_scale = (1 << RFIX) / y_sub; - wrk->fxy_scale = wrk->x_expand ? - ((int64_t)dst_height << RFIX) / (x_sub * src_height) : - ((int64_t)dst_height << RFIX) / (x_add * src_height); - wrk->irow = work; - wrk->frow = work + num_channels * dst_width; -} - -void WebPRescalerImportRow(WebPRescaler* const wrk, - const uint8_t* const src, int channel) { +static void ImportRowC(WebPRescaler* const wrk, + const uint8_t* const src, int channel) { const int x_stride = wrk->num_channels; const int x_out_max = wrk->dst_width * wrk->num_channels; -#if !defined(__mips__) int x_in = channel; int x_out; int accum = 0; @@ -85,17 +62,45 @@ void WebPRescalerImportRow(WebPRescaler* const wrk, accum -= wrk->x_sub; } } - // Accumulate the new row's contribution + // Accumulate the contribution of the new row. for (x_out = channel; x_out < x_out_max; x_out += x_stride) { wrk->irow[x_out] += wrk->frow[x_out]; } -#else // __mips__ +} + +static void ExportRowC(WebPRescaler* const wrk, int x_out) { + if (wrk->y_accum <= 0) { + uint8_t* const dst = wrk->dst; + int32_t* const irow = wrk->irow; + const int32_t* const frow = wrk->frow; + const int yscale = wrk->fy_scale * (-wrk->y_accum); + const int x_out_max = wrk->dst_width * wrk->num_channels; + for (; x_out < x_out_max; ++x_out) { + const int frac = (int)MULT_FIX(frow[x_out], yscale); + const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale); + dst[x_out] = (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; + irow[x_out] = frac; // new fractional start + } + wrk->y_accum += wrk->y_add; + wrk->dst += wrk->dst_stride; + } +} + +//------------------------------------------------------------------------------ +// MIPS version + +#if defined(WEBP_USE_MIPS32) + +static void ImportRowMIPS(WebPRescaler* const wrk, + const uint8_t* const src, int channel) { + const int x_stride = wrk->num_channels; + const int x_out_max = wrk->dst_width * wrk->num_channels; const int fx_scale = wrk->fx_scale; const int x_add = wrk->x_add; const int x_sub = wrk->x_sub; int* frow = wrk->frow + channel; int* irow = wrk->irow + channel; - uint8_t* src1 = (uint8_t*)src + channel; + const uint8_t* src1 = src + channel; int temp1, temp2, temp3; int base, frac, sum; int accum, accum1; @@ -181,19 +186,15 @@ void WebPRescalerImportRow(WebPRescaler* const wrk, : "memory", "hi", "lo" ); } -#endif // !__mips__ } -uint8_t* WebPRescalerExportRow(WebPRescaler* const wrk) { +static void ExportRowMIPS(WebPRescaler* const wrk, int x_out) { if (wrk->y_accum <= 0) { - int x_out = 0; uint8_t* const dst = wrk->dst; int32_t* const irow = wrk->irow; const int32_t* const frow = wrk->frow; const int yscale = wrk->fy_scale * (-wrk->y_accum); const int x_out_max = wrk->dst_width * wrk->num_channels; - -#if defined(__mips__) // if wrk->fxy_scale can fit into 32 bits use optimized code, // otherwise use C code if ((wrk->fxy_scale >> 32) == 0) { @@ -242,20 +243,50 @@ uint8_t* WebPRescalerExportRow(WebPRescaler* const wrk) { : [temp2]"r"(temp2), [yscale]"r"(yscale), [temp8]"r"(temp8) : "memory", "hi", "lo" ); - x_out = x_out_max; + } else { + ExportRowC(wrk, x_out); } -#endif // __mips__ - for (; x_out < x_out_max; ++x_out) { - const int frac = (int)MULT_FIX(frow[x_out], yscale); - const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale); - dst[x_out] = (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; - irow[x_out] = frac; // new fractional start + } +} +#endif // WEBP_USE_MIPS32 + +//------------------------------------------------------------------------------ + +void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height, + uint8_t* const dst, int dst_width, int dst_height, + int dst_stride, int num_channels, int x_add, int x_sub, + int y_add, int y_sub, int32_t* const work) { + wrk->x_expand = (src_width < dst_width); + wrk->src_width = src_width; + wrk->src_height = src_height; + wrk->dst_width = dst_width; + wrk->dst_height = dst_height; + wrk->dst = dst; + wrk->dst_stride = dst_stride; + wrk->num_channels = num_channels; + // for 'x_expand', we use bilinear interpolation + wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add - x_sub; + wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub; + wrk->y_accum = y_add; + wrk->y_add = y_add; + wrk->y_sub = y_sub; + wrk->fx_scale = (1 << RFIX) / x_sub; + wrk->fy_scale = (1 << RFIX) / y_sub; + wrk->fxy_scale = wrk->x_expand ? + ((int64_t)dst_height << RFIX) / (x_sub * src_height) : + ((int64_t)dst_height << RFIX) / (x_add * src_height); + wrk->irow = work; + wrk->frow = work + num_channels * dst_width; + + if (WebPRescalerImportRow == NULL) { + WebPRescalerImportRow = ImportRowC; + WebPRescalerExportRow = ExportRowC; +#if defined(WEBP_USE_MIPS32) + if (VP8GetCPUInfo(kMIPS32)) { + WebPRescalerImportRow = ImportRowMIPS; + WebPRescalerExportRow = ExportRowMIPS; } - wrk->y_accum += wrk->y_add; - wrk->dst += wrk->dst_stride; - return dst; - } else { - return NULL; +#endif } } @@ -288,11 +319,10 @@ int WebPRescalerImport(WebPRescaler* const wrk, int num_lines, int WebPRescalerExport(WebPRescaler* const rescaler) { int total_exported = 0; while (WebPRescalerHasPendingOutput(rescaler)) { - WebPRescalerExportRow(rescaler); + WebPRescalerExportRow(rescaler, 0); ++total_exported; } return total_exported; } //------------------------------------------------------------------------------ - diff --git a/src/utils/rescaler.h b/src/utils/rescaler.h index 68e49cee..a6f37871 100644 --- a/src/utils/rescaler.h +++ b/src/utils/rescaler.h @@ -52,26 +52,24 @@ void WebPRescalerInit(WebPRescaler* const rescaler, int WebPRescaleNeededLines(const WebPRescaler* const rescaler, int max_num_lines); -// Import a row of data and save its contribution in the rescaler. -// 'channel' denotes the channel number to be imported. -void WebPRescalerImportRow(WebPRescaler* const rescaler, - const uint8_t* const src, int channel); - // Import multiple rows over all channels, until at least one row is ready to // be exported. Returns the actual number of lines that were imported. int WebPRescalerImport(WebPRescaler* const rescaler, int num_rows, const uint8_t* src, int src_stride); +// Import a row of data and save its contribution in the rescaler. +// 'channel' denotes the channel number to be imported. +extern void (*WebPRescalerImportRow)(WebPRescaler* const wrk, + const uint8_t* const src, int channel); +// Export one row (starting at x_out position) from rescaler. +extern void (*WebPRescalerExportRow)(WebPRescaler* const wrk, int x_out); + // Return true if there is pending output rows ready. static WEBP_INLINE int WebPRescalerHasPendingOutput(const WebPRescaler* const rescaler) { return (rescaler->y_accum <= 0); } -// Export one row from rescaler. Returns the pointer where output was written, -// or NULL if no row was pending. -uint8_t* WebPRescalerExportRow(WebPRescaler* const rescaler); - // Export as many rows as possible. Return the numbers of rows written. int WebPRescalerExport(WebPRescaler* const rescaler);