faster RGB->YUV conversion function (~7% speedup)

with a special case for dithering==0., it gets somewhat faster on x86
thanks to inlining.

Also, less macros.

(cherry picked from commit e2a83d7109)

Change-Id: Ic2f2bf6718310743bb40cef2104fa759a073e6d5
This commit is contained in:
skal 2014-08-15 11:13:25 -07:00 committed by James Zern
parent 21abaa05e3
commit e2159fdff7

View File

@ -138,15 +138,18 @@ static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
// RGB -> YUV conversion // RGB -> YUV conversion
static int RGBToY(int r, int g, int b, VP8Random* const rg) { static int RGBToY(int r, int g, int b, VP8Random* const rg) {
return VP8RGBToY(r, g, b, VP8RandomBits(rg, YUV_FIX)); return (rg == NULL) ? VP8RGBToY(r, g, b, YUV_HALF)
: VP8RGBToY(r, g, b, VP8RandomBits(rg, YUV_FIX));
} }
static int RGBToU(int r, int g, int b, VP8Random* const rg) { static int RGBToU(int r, int g, int b, VP8Random* const rg) {
return VP8RGBToU(r, g, b, VP8RandomBits(rg, YUV_FIX + 2)); return (rg == NULL) ? VP8RGBToU(r, g, b, YUV_HALF << 2)
: VP8RGBToU(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
} }
static int RGBToV(int r, int g, int b, VP8Random* const rg) { static int RGBToV(int r, int g, int b, VP8Random* const rg) {
return VP8RGBToV(r, g, b, VP8RandomBits(rg, YUV_FIX + 2)); return (rg == NULL) ? VP8RGBToV(r, g, b, YUV_HALF << 2)
: VP8RGBToV(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
@ -568,21 +571,45 @@ static int PreprocessARGB(const uint8_t* const r_ptr,
GammaToLinear((ptr)[rgb_stride]) + \ GammaToLinear((ptr)[rgb_stride]) + \
GammaToLinear((ptr)[rgb_stride + step]), 0) \ GammaToLinear((ptr)[rgb_stride + step]), 0) \
#define SUM2H(ptr) \
LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[step]), 1)
#define SUM2V(ptr) \ #define SUM2V(ptr) \
LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1) LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1)
#define SUM1(ptr) \
LinearToGamma(GammaToLinear((ptr)[0]), 2)
#define RGB_TO_UV(x, y, SUM) { \ static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr,
const int src = (2 * (step * (x) + (y) * rgb_stride)); \ const uint8_t* const g_ptr,
const int dst = (x) + (y) * picture->uv_stride; \ const uint8_t* const b_ptr,
const int r = SUM(r_ptr + src); \ int step,
const int g = SUM(g_ptr + src); \ uint8_t* const dst_y,
const int b = SUM(b_ptr + src); \ int width,
picture->u[dst] = RGBToU(r, g, b, &rg); \ VP8Random* const rg) {
picture->v[dst] = RGBToV(r, g, b, &rg); \ int i, j;
for (i = 0, j = 0; i < width; ++i, j += step) {
dst_y[i] = RGBToY(r_ptr[j], g_ptr[j], b_ptr[j], rg);
}
}
static WEBP_INLINE void ConvertRowsToUV(const uint8_t* const r_ptr,
const uint8_t* const g_ptr,
const uint8_t* const b_ptr,
int step, int rgb_stride,
uint8_t* const dst_u,
uint8_t* const dst_v,
int width,
VP8Random* const rg) {
int i, j;
for (i = 0, j = 0; i < (width >> 1); ++i, j += 2 * step) {
const int r = SUM4(r_ptr + j);
const int g = SUM4(g_ptr + j);
const int b = SUM4(b_ptr + j);
dst_u[i] = RGBToU(r, g, b, rg);
dst_v[i] = RGBToV(r, g, b, rg);
}
if (width & 1) {
const int r = SUM2V(r_ptr + j);
const int g = SUM2V(g_ptr + j);
const int b = SUM2V(b_ptr + j);
dst_u[i] = RGBToU(r, g, b, rg);
dst_v[i] = RGBToV(r, g, b, rg);
}
} }
static int ImportYUVAFromRGBA(const uint8_t* const r_ptr, static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
@ -594,7 +621,7 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
float dithering, float dithering,
int use_iterative_conversion, int use_iterative_conversion,
WebPPicture* const picture) { WebPPicture* const picture) {
int x, y; int y;
const int width = picture->width; const int width = picture->width;
const int height = picture->height; const int height = picture->height;
const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride); const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride);
@ -612,36 +639,39 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
return 0; return 0;
} }
} else { } else {
VP8Random rg; uint8_t* dst_y = picture->y;
VP8InitRandom(&rg, dithering); uint8_t* dst_u = picture->u;
uint8_t* dst_v = picture->v;
VP8Random base_rg;
VP8Random* rg = NULL;
if (dithering > 0.) {
VP8InitRandom(&base_rg, dithering);
rg = &base_rg;
}
InitGammaTables(); InitGammaTables();
// Import luma plane // Downsample Y/U/V planes, two rows at a time
for (y = 0; y < height; ++y) {
uint8_t* const dst = &picture->y[y * picture->y_stride];
for (x = 0; x < width; ++x) {
const int offset = step * x + y * rgb_stride;
dst[x] = RGBToY(r_ptr[offset], g_ptr[offset], b_ptr[offset], &rg);
}
}
// Downsample U/V plane
for (y = 0; y < (height >> 1); ++y) { for (y = 0; y < (height >> 1); ++y) {
for (x = 0; x < (width >> 1); ++x) { const int off1 = (2 * y + 0) * rgb_stride;
RGB_TO_UV(x, y, SUM4); const int off2 = (2 * y + 1) * rgb_stride;
} ConvertRowToY(r_ptr + off1, g_ptr + off1, b_ptr + off1, step,
if (width & 1) { dst_y, width, rg);
RGB_TO_UV(x, y, SUM2V); ConvertRowToY(r_ptr + off2, g_ptr + off2, b_ptr + off2, step,
} dst_y + picture->y_stride, width, rg);
dst_y += 2 * picture->y_stride;
ConvertRowsToUV(r_ptr + off1, g_ptr + off1, b_ptr + off1,
step, rgb_stride, dst_u, dst_v, width, rg);
dst_u += picture->uv_stride;
dst_v += picture->uv_stride;
} }
if (height & 1) { if (height & 1) { // extra last row
for (x = 0; x < (width >> 1); ++x) { const int off = 2 * y * rgb_stride;
RGB_TO_UV(x, y, SUM2H); ConvertRowToY(r_ptr + off, g_ptr + off, b_ptr + off, step,
} dst_y, width, rg);
if (width & 1) { ConvertRowsToUV(r_ptr + off, g_ptr + off, b_ptr + off,
RGB_TO_UV(x, y, SUM1); step, 0, dst_u, dst_v, width, rg);
}
} }
} }
@ -649,6 +679,7 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
assert(step >= 4); assert(step >= 4);
assert(picture->a != NULL); assert(picture->a != NULL);
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
int x;
for (x = 0; x < width; ++x) { for (x = 0; x < width; ++x) {
picture->a[x + y * picture->a_stride] = picture->a[x + y * picture->a_stride] =
a_ptr[step * x + y * rgb_stride]; a_ptr[step * x + y * rgb_stride];