From a2f8b28905bdd39aa397fa76f9d326ac19bf5e6d Mon Sep 17 00:00:00 2001 From: Pascal Massimino Date: Tue, 20 May 2014 09:43:02 -0700 Subject: [PATCH] revamp the point-sampling functions by processing a full plane -nofancy is slower than fancy upsampler, because the latter has SSE2 optim. Change-Id: Ibf22e5a8ea1de86a54248d4a4ecc63d514c01b88 --- src/dec/io.c | 28 +++-------- src/dsp/dsp.h | 20 ++++++-- src/dsp/upsampling.c | 94 ++++++++++++++++++++++--------------- src/dsp/upsampling_mips32.c | 94 +++++++++++++++---------------------- 4 files changed, 115 insertions(+), 121 deletions(-) diff --git a/src/dec/io.c b/src/dec/io.c index 58bc1075..aeef652a 100644 --- a/src/dec/io.c +++ b/src/dec/io.c @@ -45,27 +45,12 @@ static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) { // Point-sampling U/V sampler. static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) { - WebPDecBuffer* output = p->output; - const WebPRGBABuffer* const buf = &output->u.RGBA; - uint8_t* dst = buf->rgba + io->mb_y * buf->stride; - const uint8_t* y_src = io->y; - const uint8_t* u_src = io->u; - const uint8_t* v_src = io->v; - const WebPSampleLinePairFunc sample = WebPSamplers[output->colorspace]; - const int mb_w = io->mb_w; - const int last = io->mb_h - 1; - int j; - for (j = 0; j < last; j += 2) { - sample(y_src, y_src + io->y_stride, u_src, v_src, - dst, dst + buf->stride, mb_w); - y_src += 2 * io->y_stride; - u_src += io->uv_stride; - v_src += io->uv_stride; - dst += 2 * buf->stride; - } - if (j == last) { // Just do the last line twice - sample(y_src, y_src, u_src, v_src, dst, dst, mb_w); - } + WebPDecBuffer* const output = p->output; + WebPRGBABuffer* const buf = &output->u.RGBA; + uint8_t* const dst = buf->rgba + io->mb_y * buf->stride; + WebPSamplers[output->colorspace](io->y, io->y_stride, + io->u, io->v, io->uv_stride, + dst, buf->stride, io->mb_w, io->mb_h); return io->mb_h; } @@ -637,4 +622,3 @@ void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io) { } //------------------------------------------------------------------------------ - diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h index 7181eb4f..7938d97f 100644 --- a/src/dsp/dsp.h +++ b/src/dsp/dsp.h @@ -195,13 +195,23 @@ void WebPInitUpsamplersNEON(void); #endif // FANCY_UPSAMPLING // Point-sampling methods. -typedef void (*WebPSampleLinePairFunc)( - const uint8_t* top_y, const uint8_t* bottom_y, - const uint8_t* u, const uint8_t* v, - uint8_t* top_dst, uint8_t* bottom_dst, int len); +typedef void (*WebPSamplePlaneFunc)(const uint8_t* y, int y_stride, + const uint8_t* u, const uint8_t* v, + int uv_stride, + uint8_t* dst, int dst_stride, + int width, int height); + +typedef void (*WebPSamplerRowFunc)(const uint8_t* y, + const uint8_t* u, const uint8_t* v, + uint8_t* dst, int len); +// Generic function to apply 'WebPSamplerRowFunc' to the whole plane: +void WebPSamplerProcessPlane(const uint8_t* y, int y_stride, + const uint8_t* u, const uint8_t* v, int uv_stride, + uint8_t* dst, int dst_stride, + int width, int height, WebPSamplerRowFunc func); // Sampling functions to convert YUV to RGB(A) modes -extern WebPSampleLinePairFunc WebPSamplers[/* MODE_LAST */]; +extern WebPSamplePlaneFunc WebPSamplers[/* MODE_LAST */]; // Initializes MIPS version of the samplers. void WebPInitSamplersMIPS32(void); diff --git a/src/dsp/upsampling.c b/src/dsp/upsampling.c index 0716f21c..3510fc86 100644 --- a/src/dsp/upsampling.c +++ b/src/dsp/upsampling.c @@ -109,40 +109,59 @@ UPSAMPLE_FUNC(UpsampleRgb565LinePair, VP8YuvToRgb565, 2) //------------------------------------------------------------------------------ // simple point-sampling +WebPSamplePlaneFunc WebPSamplers[MODE_LAST]; -WebPSampleLinePairFunc WebPSamplers[MODE_LAST]; +void WebPSamplerProcessPlane(const uint8_t* y, int y_stride, + const uint8_t* u, const uint8_t* v, int uv_stride, + uint8_t* dst, int dst_stride, + int width, int height, WebPSamplerRowFunc func) { + int j; + for (j = 0; j < height; ++j) { + func(y, u, v, dst, width); + y += y_stride; + if (j & 1) { + u += uv_stride; + v += uv_stride; + } + dst += dst_stride; + } +} +// TODO(skal): maybe some of these per-row functions should be in yuv.h? #define SAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \ -static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \ - const uint8_t* u, const uint8_t* v, \ - uint8_t* top_dst, uint8_t* bottom_dst, int len) { \ - int i; \ - for (i = 0; i < len - 1; i += 2) { \ - FUNC(top_y[0], u[0], v[0], top_dst); \ - FUNC(top_y[1], u[0], v[0], top_dst + XSTEP); \ - FUNC(bottom_y[0], u[0], v[0], bottom_dst); \ - FUNC(bottom_y[1], u[0], v[0], bottom_dst + XSTEP); \ - top_y += 2; \ - bottom_y += 2; \ - u++; \ - v++; \ - top_dst += 2 * XSTEP; \ - bottom_dst += 2 * XSTEP; \ +static void FUNC_NAME##Row(const uint8_t* y, \ + const uint8_t* u, const uint8_t* v, \ + uint8_t* dst, int len) { \ + const uint8_t* const end = dst + (len & ~1) * XSTEP; \ + while (dst != end) { \ + FUNC(y[0], u[0], v[0], dst); \ + FUNC(y[1], u[0], v[0], dst + XSTEP); \ + y += 2; \ + ++u; \ + ++v; \ + dst += 2 * XSTEP; \ } \ - if (i == len - 1) { /* last one */ \ - FUNC(top_y[0], u[0], v[0], top_dst); \ - FUNC(bottom_y[0], u[0], v[0], bottom_dst); \ + if (len & 1) { \ + FUNC(y[0], u[0], v[0], dst); \ } \ +} \ +static void FUNC_NAME(const uint8_t* y, int y_stride, \ + const uint8_t* u, const uint8_t* v, int uv_stride, \ + uint8_t* dst, int dst_stride, \ + int width, int height) { \ + WebPSamplerProcessPlane(y, y_stride, u, v, uv_stride, \ + dst, dst_stride, width, height, \ + FUNC_NAME##Row); \ } // All variants implemented. -SAMPLE_FUNC(SampleRgbLinePair, VP8YuvToRgb, 3) -SAMPLE_FUNC(SampleBgrLinePair, VP8YuvToBgr, 3) -SAMPLE_FUNC(SampleRgbaLinePair, VP8YuvToRgba, 4) -SAMPLE_FUNC(SampleBgraLinePair, VP8YuvToBgra, 4) -SAMPLE_FUNC(SampleArgbLinePair, VP8YuvToArgb, 4) -SAMPLE_FUNC(SampleRgba4444LinePair, VP8YuvToRgba4444, 2) -SAMPLE_FUNC(SampleRgb565LinePair, VP8YuvToRgb565, 2) +SAMPLE_FUNC(SampleRgbPlane, VP8YuvToRgb, 3) +SAMPLE_FUNC(SampleBgrPlane, VP8YuvToBgr, 3) +SAMPLE_FUNC(SampleRgbaPlane, VP8YuvToRgba, 4) +SAMPLE_FUNC(SampleBgraPlane, VP8YuvToBgra, 4) +SAMPLE_FUNC(SampleArgbPlane, VP8YuvToArgb, 4) +SAMPLE_FUNC(SampleRgba4444Plane, VP8YuvToRgba4444, 2) +SAMPLE_FUNC(SampleRgb565Plane, VP8YuvToRgb565, 2) #undef SAMPLE_FUNC @@ -341,17 +360,17 @@ void WebPInitUpsamplers(void) { } void WebPInitSamplers(void) { - WebPSamplers[MODE_RGB] = SampleRgbLinePair; - WebPSamplers[MODE_RGBA] = SampleRgbaLinePair; - WebPSamplers[MODE_BGR] = SampleBgrLinePair; - WebPSamplers[MODE_BGRA] = SampleBgraLinePair; - WebPSamplers[MODE_ARGB] = SampleArgbLinePair; - WebPSamplers[MODE_RGBA_4444] = SampleRgba4444LinePair; - WebPSamplers[MODE_RGB_565] = SampleRgb565LinePair; - WebPSamplers[MODE_rgbA] = SampleRgbaLinePair; - WebPSamplers[MODE_bgrA] = SampleBgraLinePair; - WebPSamplers[MODE_Argb] = SampleArgbLinePair; - WebPSamplers[MODE_rgbA_4444] = SampleRgba4444LinePair; + WebPSamplers[MODE_RGB] = SampleRgbPlane; + WebPSamplers[MODE_RGBA] = SampleRgbaPlane; + WebPSamplers[MODE_BGR] = SampleBgrPlane; + WebPSamplers[MODE_BGRA] = SampleBgraPlane; + WebPSamplers[MODE_ARGB] = SampleArgbPlane; + WebPSamplers[MODE_RGBA_4444] = SampleRgba4444Plane; + WebPSamplers[MODE_RGB_565] = SampleRgb565Plane; + WebPSamplers[MODE_rgbA] = SampleRgbaPlane; + WebPSamplers[MODE_bgrA] = SampleBgraPlane; + WebPSamplers[MODE_Argb] = SampleArgbPlane; + WebPSamplers[MODE_rgbA_4444] = SampleRgba4444Plane; // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { @@ -387,4 +406,3 @@ void WebPInitPremultiply(void) { } #endif // FANCY_UPSAMPLING } - diff --git a/src/dsp/upsampling_mips32.c b/src/dsp/upsampling_mips32.c index 3c8e7ce8..5c1a1afc 100644 --- a/src/dsp/upsampling_mips32.c +++ b/src/dsp/upsampling_mips32.c @@ -22,9 +22,9 @@ // simple point-sampling #define SAMPLE_FUNC_MIPS(FUNC_NAME, XSTEP, R, G, B, A) \ -static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \ - const uint8_t* u, const uint8_t* v, \ - uint8_t* top_dst, uint8_t* bottom_dst, int len) { \ +static void FUNC_NAME##Row(const uint8_t* y, \ + const uint8_t* u, const uint8_t* v, \ + uint8_t* dst, int len) { \ int i, r, g, b; \ int temp0, temp1, temp2, temp3, temp4; \ for (i = 0; i < (len >> 1); i++) { \ @@ -32,7 +32,7 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \ temp3 = kVToG * v[0]; \ temp2 = kUToG * u[0]; \ temp4 = kUToB * u[0]; \ - temp0 = kYScale * top_y[0]; \ + temp0 = kYScale * y[0]; \ temp1 += kRCst; \ temp3 -= kGCst; \ temp2 += temp3; \ @@ -40,47 +40,29 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \ r = VP8Clip8(temp0 + temp1); \ g = VP8Clip8(temp0 - temp2); \ b = VP8Clip8(temp0 + temp4); \ - temp0 = kYScale * top_y[1]; \ - top_dst[R] = r; \ - top_dst[G] = g; \ - top_dst[B] = b; \ - if (A) top_dst[A] = 0xff; \ + temp0 = kYScale * y[1]; \ + dst[R] = r; \ + dst[G] = g; \ + dst[B] = b; \ + if (A) dst[A] = 0xff; \ r = VP8Clip8(temp0 + temp1); \ g = VP8Clip8(temp0 - temp2); \ b = VP8Clip8(temp0 + temp4); \ - temp0 = kYScale * bottom_y[0]; \ - top_dst[R + XSTEP] = r; \ - top_dst[G + XSTEP] = g; \ - top_dst[B + XSTEP] = b; \ - if (A) top_dst[A + XSTEP] = 0xff; \ - r = VP8Clip8(temp0 + temp1); \ - g = VP8Clip8(temp0 - temp2); \ - b = VP8Clip8(temp0 + temp4); \ - temp0 = kYScale * bottom_y[1]; \ - bottom_dst[R] = r; \ - bottom_dst[G] = g; \ - bottom_dst[B] = b; \ - if (A) bottom_dst[A] = 0xff; \ - r = VP8Clip8(temp0 + temp1); \ - g = VP8Clip8(temp0 - temp2); \ - b = VP8Clip8(temp0 + temp4); \ - bottom_dst[R + XSTEP] = r; \ - bottom_dst[G + XSTEP] = g; \ - bottom_dst[B + XSTEP] = b; \ - if (A) bottom_dst[A + XSTEP] = 0xff; \ - top_y += 2; \ - bottom_y += 2; \ - u++; \ - v++; \ - top_dst += 2 * XSTEP; \ - bottom_dst += 2 * XSTEP; \ + dst[R + XSTEP] = r; \ + dst[G + XSTEP] = g; \ + dst[B + XSTEP] = b; \ + if (A) dst[A + XSTEP] = 0xff; \ + y += 2; \ + ++u; \ + ++v; \ + dst += 2 * XSTEP; \ } \ if (len & 1) { \ temp1 = kVToR * v[0]; \ temp3 = kVToG * v[0]; \ temp2 = kUToG * u[0]; \ temp4 = kUToB * u[0]; \ - temp0 = kYScale * top_y[0]; \ + temp0 = kYScale * y[0]; \ temp1 += kRCst; \ temp3 -= kGCst; \ temp2 += temp3; \ @@ -88,25 +70,25 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \ r = VP8Clip8(temp0 + temp1); \ g = VP8Clip8(temp0 - temp2); \ b = VP8Clip8(temp0 + temp4); \ - temp0 = kYScale * bottom_y[0]; \ - top_dst[R] = r; \ - top_dst[G] = g; \ - top_dst[B] = b; \ - if (A) top_dst[A] = 0xff; \ - r = VP8Clip8(temp0 + temp1); \ - g = VP8Clip8(temp0 - temp2); \ - b = VP8Clip8(temp0 + temp4); \ - bottom_dst[R] = r; \ - bottom_dst[G] = g; \ - bottom_dst[B] = b; \ - if (A) bottom_dst[A] = 0xff; \ + dst[R] = r; \ + dst[G] = g; \ + dst[B] = b; \ + if (A) dst[A] = 0xff; \ } \ +} \ +static void FUNC_NAME(const uint8_t* y, int y_stride, \ + const uint8_t* u, const uint8_t* v, int uv_stride, \ + uint8_t* dst, int dst_stride, \ + int width, int height) { \ + WebPSamplerProcessPlane(y, y_stride, u, v, uv_stride, \ + dst, dst_stride, width, height, \ + FUNC_NAME##Row); \ } -SAMPLE_FUNC_MIPS(SampleRgbLinePair, 3, 0, 1, 2, 0) -SAMPLE_FUNC_MIPS(SampleRgbaLinePair, 4, 0, 1, 2, 3) -SAMPLE_FUNC_MIPS(SampleBgrLinePair, 3, 2, 1, 0, 0) -SAMPLE_FUNC_MIPS(SampleBgraLinePair, 4, 2, 1, 0, 3) +SAMPLE_FUNC_MIPS(SampleRgbPlane, 3, 0, 1, 2, 0) +SAMPLE_FUNC_MIPS(SampleRgbaPlane, 4, 0, 1, 2, 3) +SAMPLE_FUNC_MIPS(SampleBgrPlane, 3, 2, 1, 0, 0) +SAMPLE_FUNC_MIPS(SampleBgraPlane, 4, 2, 1, 0, 3) #endif // WEBP_USE_MIPS32 @@ -114,9 +96,9 @@ SAMPLE_FUNC_MIPS(SampleBgraLinePair, 4, 2, 1, 0, 3) void WebPInitSamplersMIPS32(void) { #if defined(WEBP_USE_MIPS32) - WebPSamplers[MODE_RGB] = SampleRgbLinePair; - WebPSamplers[MODE_RGBA] = SampleRgbaLinePair; - WebPSamplers[MODE_BGR] = SampleBgrLinePair; - WebPSamplers[MODE_BGRA] = SampleBgraLinePair; + WebPSamplers[MODE_RGB] = SampleRgbPlane; + WebPSamplers[MODE_RGBA] = SampleRgbaPlane; + WebPSamplers[MODE_BGR] = SampleBgrPlane; + WebPSamplers[MODE_BGRA] = SampleBgraPlane; #endif // WEBP_USE_MIPS32 }