diff --git a/src/enc/picture_csp.c b/src/enc/picture_csp.c index 516e4b86..e18f1096 100644 --- a/src/enc/picture_csp.c +++ b/src/enc/picture_csp.c @@ -23,6 +23,9 @@ // Uncomment to disable gamma-compression during RGB->U/V averaging #define USE_GAMMA_COMPRESSION +// If defined, use table to compute x / alpha. +#define USE_INVERSE_ALPHA_TABLE + static const union { uint32_t argb; uint8_t bytes[4]; @@ -114,6 +117,7 @@ static WEBP_INLINE int Interpolate(int v) { const int v0 = kLinearToGammaTab[tab_pos]; const int v1 = kLinearToGammaTab[tab_pos + 1]; const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x); // interpolate + assert(tab_pos + 1 < kGammaTabSize + 1); return y; } @@ -444,7 +448,6 @@ static int ConvertWRGBToYUV(const fixed_y_t* const best_y, return 1; } - //------------------------------------------------------------------------------ // Main function @@ -571,15 +574,186 @@ static int PreprocessARGB(const uint8_t* const r_ptr, //------------------------------------------------------------------------------ // "Fast" regular RGB->YUV -#define SUM4(ptr) LinearToGamma( \ +#define SUM4(ptr, step) LinearToGamma( \ GammaToLinear((ptr)[0]) + \ - GammaToLinear((ptr)[step]) + \ + GammaToLinear((ptr)[(step)]) + \ GammaToLinear((ptr)[rgb_stride]) + \ - GammaToLinear((ptr)[rgb_stride + step]), 0) \ + GammaToLinear((ptr)[rgb_stride + (step)]), 0) \ -#define SUM2V(ptr) \ +#define SUM2(ptr) \ LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1) +#define SUM2ALPHA(ptr) ((ptr)[0] + (ptr)[rgb_stride]) +#define SUM4ALPHA(ptr) (SUM2ALPHA(ptr) + SUM2ALPHA((ptr) + 4)) + +#if defined(USE_INVERSE_ALPHA_TABLE) + +static const int kAlphaFix = 19; +// Following table is (1 << kAlphaFix) / a. The (v * kInvAlpha[a]) >> kAlphaFix +// formula is then equal to v / a in most (99.6%) cases. Note that this table +// and constant are adjusted very tightly to fit 32b arithmetic. +// In particular, they use the fact that the operands for 'v / a' are actually +// derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3 +// with ai in [0..255] and pi in [0..1<> (kAlphaFix - 2)) + +#else + +#define DIVIDE_BY_ALPHA(sum, a) (4 * (sum) / (a)) + +#endif // USE_INVERSE_ALPHA_TABLE + +static WEBP_INLINE int LinearToGammaWeighted(const uint8_t* src, + const uint8_t* a_ptr, + uint32_t total_a, int step, + int rgb_stride) { + const uint32_t sum = + a_ptr[0] * GammaToLinear(src[0]) + + a_ptr[step] * GammaToLinear(src[step]) + + a_ptr[rgb_stride] * GammaToLinear(src[rgb_stride]) + + a_ptr[rgb_stride + step] * GammaToLinear(src[rgb_stride + step]); + assert(total_a > 0 && total_a <= 4 * 0xff); +#if defined(USE_INVERSE_ALPHA_TABLE) + assert((uint64_t)sum * kInvAlpha[total_a] < ((uint64_t)1 << 32)); +#endif + return LinearToGamma(DIVIDE_BY_ALPHA(sum, total_a), 0); +} + static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr, const uint8_t* const g_ptr, const uint8_t* const b_ptr, @@ -593,6 +767,49 @@ static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr, } } +static WEBP_INLINE void ConvertRowsToUVWithAlpha(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + const uint8_t* const a_ptr, + int rgb_stride, + uint8_t* const dst_u, + uint8_t* const dst_v, + int width, + VP8Random* const rg) { + int i, j; + // we loop over 2x2 blocks and produce one U/V value for each. + for (i = 0, j = 0; i < (width >> 1); ++i, j += 2 * sizeof(uint32_t)) { + const uint32_t a = SUM4ALPHA(a_ptr + j); + int r, g, b; + if (a == 4 * 0xff || a == 0) { + r = SUM4(r_ptr + j, 4); + g = SUM4(g_ptr + j, 4); + b = SUM4(b_ptr + j, 4); + } else { + r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 4, rgb_stride); + g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 4, rgb_stride); + b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 4, rgb_stride); + } + dst_u[i] = RGBToU(r, g, b, rg); + dst_v[i] = RGBToV(r, g, b, rg); + } + if (width & 1) { + const uint32_t a = 2u * SUM2ALPHA(a_ptr + j); + int r, g, b; + if (a == 4 * 0xff || a == 0) { + r = SUM2(r_ptr + j); + g = SUM2(g_ptr + j); + b = SUM2(b_ptr + j); + } else { + r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 0, rgb_stride); + g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 0, rgb_stride); + b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 0, rgb_stride); + } + dst_u[i] = RGBToU(r, g, b, rg); + dst_v[i] = RGBToV(r, g, b, rg); + } +} + static WEBP_INLINE void ConvertRowsToUV(const uint8_t* const r_ptr, const uint8_t* const g_ptr, const uint8_t* const b_ptr, @@ -603,16 +820,16 @@ static WEBP_INLINE void ConvertRowsToUV(const uint8_t* const r_ptr, VP8Random* const rg) { int i, j; for (i = 0, j = 0; i < (width >> 1); ++i, j += 2 * step) { - const int r = SUM4(r_ptr + j); - const int g = SUM4(g_ptr + j); - const int b = SUM4(b_ptr + j); + const int r = SUM4(r_ptr + j, step); + const int g = SUM4(g_ptr + j, step); + const int b = SUM4(b_ptr + j, step); dst_u[i] = RGBToU(r, g, b, rg); dst_v[i] = RGBToV(r, g, b, rg); } if (width & 1) { - const int r = SUM2V(r_ptr + j); - const int g = SUM2V(g_ptr + j); - const int b = SUM2V(b_ptr + j); + const int r = SUM2(r_ptr + j); + const int g = SUM2(g_ptr + j); + const int b = SUM2(b_ptr + j); dst_u[i] = RGBToU(r, g, b, rg); dst_v[i] = RGBToV(r, g, b, rg); } @@ -644,16 +861,28 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr, if (!WebPPictureAllocYUVA(picture, width, height)) { return 0; } + if (has_alpha) { + WebPInitAlphaProcessing(); + assert(step == 4); +#if defined(USE_INVERSE_ALPHA_TABLE) + assert(kAlphaFix + kGammaFix <= 31); +#endif + } if (use_iterative_conversion) { InitGammaTablesF(); if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) { return 0; } + if (has_alpha) { + WebPExtractAlpha(a_ptr, rgb_stride, width, height, + picture->a, picture->a_stride); + } } else { uint8_t* dst_y = picture->y; uint8_t* dst_u = picture->u; uint8_t* dst_v = picture->v; + uint8_t* dst_a = picture->a; VP8Random base_rg; VP8Random* rg = NULL; @@ -666,6 +895,7 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr, // Downsample Y/U/V planes, two rows at a time for (y = 0; y < (height >> 1); ++y) { + int rows_have_alpha = has_alpha; const int off1 = (2 * y + 0) * rgb_stride; const int off2 = (2 * y + 1) * rgb_stride; ConvertRowToY(r_ptr + off1, g_ptr + off1, b_ptr + off1, step, @@ -673,28 +903,38 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr, ConvertRowToY(r_ptr + off2, g_ptr + off2, b_ptr + off2, step, dst_y + picture->y_stride, width, rg); dst_y += 2 * picture->y_stride; - ConvertRowsToUV(r_ptr + off1, g_ptr + off1, b_ptr + off1, - step, rgb_stride, dst_u, dst_v, width, rg); + if (has_alpha) { + rows_have_alpha &= !WebPExtractAlpha(a_ptr + off1, rgb_stride, + width, 2, + dst_a, picture->a_stride); + dst_a += 2 * picture->a_stride; + } + if (!rows_have_alpha) { + ConvertRowsToUV(r_ptr + off1, g_ptr + off1, b_ptr + off1, + step, rgb_stride, dst_u, dst_v, width, rg); + } else { + ConvertRowsToUVWithAlpha(r_ptr + off1, g_ptr + off1, b_ptr + off1, + a_ptr + off1, rgb_stride, + dst_u, dst_v, width, rg); + } dst_u += picture->uv_stride; dst_v += picture->uv_stride; } if (height & 1) { // extra last row const int off = 2 * y * rgb_stride; + int row_has_alpha = has_alpha; ConvertRowToY(r_ptr + off, g_ptr + off, b_ptr + off, step, dst_y, width, rg); - ConvertRowsToUV(r_ptr + off, g_ptr + off, b_ptr + off, - step, 0, dst_u, dst_v, width, rg); - } - } - - if (has_alpha) { - assert(step >= 4); - assert(picture->a != NULL); - for (y = 0; y < height; ++y) { - int x; - for (x = 0; x < width; ++x) { - picture->a[x + y * picture->a_stride] = - a_ptr[step * x + y * rgb_stride]; + if (row_has_alpha) { + row_has_alpha &= !WebPExtractAlpha(a_ptr + off, 0, width, 1, dst_a, 0); + } + if (!row_has_alpha) { + ConvertRowsToUV(r_ptr + off, g_ptr + off, b_ptr + off, + step, 0, dst_u, dst_v, width, rg); + } else { + ConvertRowsToUVWithAlpha(r_ptr + off, g_ptr + off, b_ptr + off, + a_ptr + off, 0, + dst_u, dst_v, width, rg); } } } @@ -702,10 +942,9 @@ static int ImportYUVAFromRGBA(const uint8_t* const r_ptr, } #undef SUM4 -#undef SUM2V -#undef SUM2H -#undef SUM1 -#undef RGB_TO_UV +#undef SUM2 +#undef SUM4ALPHA +#undef SUM2ALPHA //------------------------------------------------------------------------------ // call for ARGB->YUVA conversion