diff --git a/src/dec/buffer.c b/src/dec/buffer.c index 13e01ee0..ad953737 100644 --- a/src/dec/buffer.c +++ b/src/dec/buffer.c @@ -22,7 +22,10 @@ extern "C" { // WebPDecBuffer // Number of bytes per pixel for the different color-spaces. -static const int kModeBpp[MODE_LAST] = { 3, 4, 3, 4, 4, 2, 2, 1, 1 }; +static const int kModeBpp[MODE_LAST] = { + 3, 4, 3, 4, 4, 2, 2, + 4, 4, 4, 2, // pre-multiplied modes + 1, 1 }; // Check that webp_csp_mode is within the bounds of WEBP_CSP_MODE. // Convert to an integer to handle both the unsigned/signed enum cases @@ -38,7 +41,7 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) { const int height = buffer->height; if (!IsValidColorspace(mode)) { ok = 0; - } else if (mode >= MODE_YUV) { // YUV checks + } else if (!WebPIsRGBMode(mode)) { // YUV checks const WebPYUVABuffer* const buf = &buffer->u.YUVA; const size_t size = buf->y_stride * height; const size_t u_size = buf->u_stride * ((height + 1) / 2); @@ -81,7 +84,7 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) { const int stride = w * kModeBpp[mode]; const uint64_t size = (uint64_t)stride * h; - if (mode >= MODE_YUV) { + if (!WebPIsRGBMode(mode)) { uv_stride = (w + 1) / 2; uv_size = (uint64_t)uv_stride * ((h + 1) / 2); if (mode == MODE_YUVA) { @@ -101,7 +104,7 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) { return VP8_STATUS_OUT_OF_MEMORY; } - if (mode >= MODE_YUV) { // YUVA initialization + if (!WebPIsRGBMode(mode)) { // YUVA initialization WebPYUVABuffer* const buf = &buffer->u.YUVA; buf->y = output; buf->y_stride = stride; diff --git a/src/dec/frame.c b/src/dec/frame.c index 50982dbd..16666627 100644 --- a/src/dec/frame.c +++ b/src/dec/frame.c @@ -200,7 +200,12 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) { y_end = io->crop_bottom; // make sure we don't overflow on last row. } io->a = NULL; - if (dec->alpha_data_ && y_start < y_end) { + if (dec->alpha_data_ != NULL && y_start < y_end) { + // TODO(skal): several things to correct here: + // * testing presence of alpha with dec->alpha_data_ is not a good idea + // * we're actually decompressing the full plane only once. It should be + // more obvious from signature. + // * we could free alpha_data_ right after this call, but we don't own. io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start); if (io->a == NULL) { return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, @@ -214,7 +219,7 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) { io->y += dec->cache_y_stride_ * delta_y; io->u += dec->cache_uv_stride_ * (delta_y >> 1); io->v += dec->cache_uv_stride_ * (delta_y >> 1); - if (io->a) { + if (io->a != NULL) { io->a += io->width * delta_y; } } @@ -222,7 +227,7 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) { io->y += io->crop_left; io->u += io->crop_left >> 1; io->v += io->crop_left >> 1; - if (io->a) { + if (io->a != NULL) { io->a += io->crop_left; } io->mb_y = y_start - io->crop_top; @@ -417,7 +422,8 @@ static int AllocateMemory(VP8Decoder* const dec) { + kFilterExtraRows[dec->filter_type_]) * 3 / 2; const size_t cache_size = top_size * cache_height; const size_t alpha_size = - dec->alpha_data_ ? (dec->pic_hdr_.width_ * dec->pic_hdr_.height_) : 0; + (dec->alpha_data_ != NULL) ? dec->pic_hdr_.width_ * dec->pic_hdr_.height_ + : 0; const size_t needed = intra_pred_mode_size + top_size + mb_info_size + f_info_size + yuv_size + coeffs_size diff --git a/src/dec/io.c b/src/dec/io.c index 2760a29d..82c7c0d3 100644 --- a/src/dec/io.c +++ b/src/dec/io.c @@ -52,9 +52,7 @@ static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) { const uint8_t* y_src = io->y; const uint8_t* u_src = io->u; const uint8_t* v_src = io->v; - const WebPSampleLinePairFunc sample = - io->a ? WebPSamplersKeepAlpha[output->colorspace] - : WebPSamplers[output->colorspace]; + const WebPSampleLinePairFunc sample = WebPSamplers[output->colorspace]; const int mb_w = io->mb_w; const int last = io->mb_h - 1; int j; @@ -106,9 +104,7 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) { int num_lines_out = io->mb_h; // a priori guess const WebPRGBABuffer* const buf = &p->output->u.RGBA; uint8_t* dst = buf->rgba + io->mb_y * buf->stride; - const WebPUpsampleLinePairFunc upsample = - io->a ? WebPUpsamplersKeepAlpha[p->output->colorspace] - : WebPUpsamplers[p->output->colorspace]; + WebPUpsampleLinePairFunc upsample = WebPUpsamplers[p->output->colorspace]; const uint8_t* cur_y = io->y; const uint8_t* cur_u = io->u; const uint8_t* cur_v = io->v; @@ -124,8 +120,6 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) { upsample(NULL, cur_y, cur_u, cur_v, cur_u, cur_v, NULL, dst, mb_w); } else { // We can finish the left-over line from previous call. - // Warning! Don't overwrite the alpha values (if any), as they - // are not lagging one line behind but are already written. upsample(p->tmp_y, cur_y, top_u, top_v, cur_u, cur_v, dst - buf->stride, dst, mb_w); ++num_lines_out; @@ -184,45 +178,73 @@ static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p) { } static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p) { - const int mb_w = io->mb_w; - const int mb_h = io->mb_h; - int i, j; - const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* dst = buf->rgba + io->mb_y * buf->stride + - (p->output->colorspace == MODE_ARGB ? 0 : 3); const uint8_t* alpha = io->a; - if (alpha) { - for (j = 0; j < mb_h; ++j) { - for (i = 0; i < mb_w; ++i) { - dst[4 * i] = alpha[i]; + if (alpha != NULL) { + const int mb_w = io->mb_w; + const int mb_h = io->mb_h; + int i, j; + const WEBP_CSP_MODE colorspace = p->output->colorspace; + const int alpha_first = + (colorspace == MODE_ARGB || colorspace == MODE_Argb); + const WebPRGBABuffer* const buf = &p->output->u.RGBA; + int start_y = io->mb_y; + int num_rows = mb_h; + + // We compensate for the 1-line delay of fancy upscaler. + // This is similar to EmitFancyRGB(). + if (io->fancy_upsampling) { + if (start_y == 0) { + // We don't process the last row yet. It'll be done during next call. + --num_rows; + } else { + --start_y; + // Fortunately, *alpha data is persistent, so we can go back + // one row and finish alpha blending, now that the fancy upscaler + // completed the YUV->RGB interpolation. + alpha -= io->width; + } + if (io->crop_top + io->mb_y + mb_h == io->crop_bottom) { + // If it's the very last call, we process all the remaing rows! + num_rows = io->crop_bottom - io->crop_top - start_y; + } + } + { + uint8_t* const base_rgba = buf->rgba + start_y * buf->stride; + uint8_t* dst = base_rgba + (alpha_first ? 0 : 3); + for (j = 0; j < num_rows; ++j) { + for (i = 0; i < mb_w; ++i) dst[4 * i] = alpha[i]; + alpha += io->width; + dst += buf->stride; + } + if (WebPIsPremultipliedMode(colorspace)) { + WebPApplyAlphaMultiply(base_rgba, alpha_first, + mb_w, num_rows, buf->stride); } - alpha += io->width; - dst += buf->stride; } } return 0; } -static WEBP_INLINE uint32_t clip(uint32_t v, uint32_t max_value) { - return (v > max_value) ? max_value : v; -} - static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p) { - const int mb_w = io->mb_w; - const int mb_h = io->mb_h; - int i, j; - const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* dst = buf->rgba + io->mb_y * buf->stride + 1; const uint8_t* alpha = io->a; - if (alpha) { + if (alpha != NULL) { + const int mb_w = io->mb_w; + const int mb_h = io->mb_h; + int i, j; + const WebPRGBABuffer* const buf = &p->output->u.RGBA; + uint8_t* const base_rgba = buf->rgba + io->mb_y * buf->stride; + uint8_t* alpha_dst = base_rgba + 1; for (j = 0; j < mb_h; ++j) { for (i = 0; i < mb_w; ++i) { // Fill in the alpha value (converted to 4 bits). - const uint32_t alpha_val = clip((alpha[i] + 8) >> 4, 15); - dst[2 * i] = (dst[2 * i] & 0xf0) | alpha_val; + const uint32_t alpha_val = VP8Clip4Bits(alpha[i]); + alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_val; } alpha += io->width; - dst += buf->stride; + alpha_dst += buf->stride; + } + if (p->output->colorspace == MODE_rgbA_4444) { + WebPApplyAlphaMultiply4444(base_rgba, mb_w, mb_h, buf->stride); } } return 0; @@ -259,13 +281,8 @@ static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p) { return 0; } -static int IsAlphaMode(WEBP_CSP_MODE mode) { - return (mode == MODE_RGBA || mode == MODE_BGRA || mode == MODE_ARGB || - mode == MODE_RGBA_4444 || mode == MODE_YUVA); -} - static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) { - const int has_alpha = IsAlphaMode(p->output->colorspace); + const int has_alpha = WebPIsAlphaMode(p->output->colorspace); const WebPYUVABuffer* const buf = &p->output->u.YUVA; const int out_width = io->scaled_width; const int out_height = io->scaled_height; @@ -302,6 +319,7 @@ static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) { uv_in_height, uv_out_height, work + work_size + uv_work_size); p->emit = EmitRescaledYUV; + if (has_alpha) { WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h, buf->a, out_width, out_height, buf->a_stride, 1, @@ -364,59 +382,73 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) { static int ExportAlpha(WebPDecParams* const p, int y_pos) { const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* dst = buf->rgba + (p->last_y + y_pos) * buf->stride + - (p->output->colorspace == MODE_ARGB ? 0 : 3); + uint8_t* const base_rgba = buf->rgba + (p->last_y + y_pos) * buf->stride; + const WEBP_CSP_MODE colorspace = p->output->colorspace; + const int alpha_first = + (colorspace == MODE_ARGB || colorspace == MODE_Argb); + uint8_t* dst = base_rgba + (alpha_first ? 0 : 3); int num_lines_out = 0; + const int is_premult_alpha = WebPIsPremultipliedMode(colorspace); + const int width = p->scaler_a.dst_width; + while (WebPRescalerHasPendingOutput(&p->scaler_a)) { int i; assert(p->last_y + y_pos + num_lines_out < p->output->height); WebPRescalerExportRow(&p->scaler_a); - for (i = 0; i < p->scaler_a.dst_width; ++i) { - dst[4 * i] = p->scaler_a.dst[i]; - } + for (i = 0; i < width; ++i) dst[4 * i] = p->scaler_a.dst[i]; dst += buf->stride; ++num_lines_out; } + if (is_premult_alpha) { + WebPApplyAlphaMultiply(base_rgba, alpha_first, + width, num_lines_out, buf->stride); + } return num_lines_out; } static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos) { const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* dst = buf->rgba + (p->last_y + y_pos) * buf->stride + 1; + uint8_t* base_rgba = buf->rgba + (p->last_y + y_pos) * buf->stride; + uint8_t* alpha_dst = base_rgba + 1; int num_lines_out = 0; + const WEBP_CSP_MODE colorspace = p->output->colorspace; + const int width = p->scaler_a.dst_width; + const int is_premult_alpha = WebPIsPremultipliedMode(colorspace); + while (WebPRescalerHasPendingOutput(&p->scaler_a)) { int i; assert(p->last_y + y_pos + num_lines_out < p->output->height); WebPRescalerExportRow(&p->scaler_a); - for (i = 0; i < p->scaler_a.dst_width; ++i) { + for (i = 0; i < width; ++i) { // Fill in the alpha value (converted to 4 bits). - const uint32_t alpha_val = clip((p->scaler_a.dst[i] + 8) >> 4, 15); - dst[2 * i] = (dst[2 * i] & 0xf0) | alpha_val; + const uint32_t alpha_val = VP8Clip4Bits(p->scaler_a.dst[i]); + alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_val; } - dst += buf->stride; + alpha_dst += buf->stride; ++num_lines_out; } + if (is_premult_alpha) { + WebPApplyAlphaMultiply4444(base_rgba, width, num_lines_out, buf->stride); + } return num_lines_out; } static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p) { if (io->a != NULL) { - int (* const output_func)(WebPDecParams* const, int) = - (p->output->colorspace == MODE_RGBA_4444) ? ExportAlphaRGBA4444 - : ExportAlpha; WebPRescaler* const scaler = &p->scaler_a; - int j = 0, pos = 0; + int j = 0; + int pos = 0; while (j < io->mb_h) { j += WebPRescalerImport(scaler, io->mb_h - j, io->a + j * io->width, io->width); - pos += output_func(p, pos); + pos += p->emit_alpha_row(p, pos); } } return 0; } static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) { - const int has_alpha = IsAlphaMode(p->output->colorspace); + const int has_alpha = WebPIsAlphaMode(p->output->colorspace); const int out_width = io->scaled_width; const int out_height = io->scaled_height; const int uv_in_width = (io->mb_w + 1) >> 1; @@ -459,6 +491,12 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) { io->mb_w, out_width, io->mb_h, out_height, work + 3 * work_size); p->emit_alpha = EmitRescaledAlphaRGB; + if (p->output->colorspace == MODE_RGBA_4444 || + p->output->colorspace == MODE_rgbA_4444) { + p->emit_alpha_row = ExportAlphaRGBA4444; + } else { + p->emit_alpha_row = ExportAlpha; + } } return 1; } @@ -469,12 +507,13 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) { static int CustomSetup(VP8Io* io) { WebPDecParams* const p = (WebPDecParams*)io->opaque; const WEBP_CSP_MODE colorspace = p->output->colorspace; - const int is_rgb = (colorspace < MODE_YUV); - const int is_alpha = IsAlphaMode(colorspace); + const int is_rgb = WebPIsRGBMode(colorspace); + const int is_alpha = WebPIsAlphaMode(colorspace); p->memory = NULL; p->emit = NULL; p->emit_alpha = NULL; + p->emit_alpha_row = NULL; if (!WebPIoInitFromOptions(p->options, io, is_alpha ? MODE_YUV : MODE_YUVA)) { return 0; } @@ -505,10 +544,12 @@ static int CustomSetup(VP8Io* io) { p->emit = EmitYUV; } if (is_alpha) { // need transparency output + if (WebPIsPremultipliedMode(colorspace)) WebPInitPremultiply(); p->emit_alpha = - is_rgb ? (colorspace == MODE_RGBA_4444 ? EmitAlphaRGBA4444 - : EmitAlphaRGB) - : EmitAlphaYUV; + (colorspace == MODE_RGBA_4444 || colorspace == MODE_rgbA_4444) ? + EmitAlphaRGBA4444 + : is_rgb ? EmitAlphaRGB + : EmitAlphaYUV; } } diff --git a/src/dec/vp8i.h b/src/dec/vp8i.h index d811ca51..af96dc91 100644 --- a/src/dec/vp8i.h +++ b/src/dec/vp8i.h @@ -276,7 +276,7 @@ struct VP8Decoder { // extensions const uint8_t* alpha_data_; // compressed alpha data (if present) size_t alpha_data_size_; - uint8_t* alpha_plane_; // output + uint8_t* alpha_plane_; // output. Persistent, contains the whole data. int layer_colorspace_; const uint8_t* layer_data_; // compressed layer data (if present) diff --git a/src/dec/webp.c b/src/dec/webp.c index 22e998b4..67614796 100644 --- a/src/dec/webp.c +++ b/src/dec/webp.c @@ -477,7 +477,7 @@ static uint8_t* Decode(WEBP_CSP_MODE mode, const uint8_t* data, WebPCopyDecBuffer(&output, keep_info); } // return decoded samples (don't clear 'output'!) - return (mode >= MODE_YUV) ? output.u.YUVA.y : output.u.RGBA.rgba; + return WebPIsRGBMode(mode) ? output.u.RGBA.rgba : output.u.YUVA.y; } uint8_t* WebPDecodeRGB(const uint8_t* data, size_t data_size, @@ -685,7 +685,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options, h = options->crop_height; x = options->crop_left; y = options->crop_top; - if (src_colorspace >= MODE_YUV) { // only snap for YUV420 or YUV422 + if (!WebPIsRGBMode(src_colorspace)) { // only snap for YUV420 or YUV422 x &= ~1; y &= ~1; // TODO(later): only for YUV420, not YUV422. } diff --git a/src/dec/webpi.h b/src/dec/webpi.h index 9d29e633..527a3888 100644 --- a/src/dec/webpi.h +++ b/src/dec/webpi.h @@ -24,6 +24,7 @@ extern "C" { typedef struct WebPDecParams WebPDecParams; typedef int (*OutputFunc)(const VP8Io* const io, WebPDecParams* const p); +typedef int (*OutputRowFunc)(WebPDecParams* const p, int y_pos); struct WebPDecParams { WebPDecBuffer* output; // output buffer. @@ -34,9 +35,11 @@ struct WebPDecParams { const WebPDecoderOptions* options; // if not NULL, use alt decoding features // rescalers WebPRescaler scaler_y, scaler_u, scaler_v, scaler_a; - void* memory; // overall scratch memory for the output work. - OutputFunc emit; // output RGB or YUV samples - OutputFunc emit_alpha; // output alpha channel + void* memory; // overall scratch memory for the output work. + + OutputFunc emit; // output RGB or YUV samples + OutputFunc emit_alpha; // output alpha channel + OutputRowFunc emit_alpha_row; // output one line of rescaled alpha values }; // Should be called first, before any use of the WebPDecParams object. diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h index db9c1455..ff78b4c7 100644 --- a/src/dsp/dsp.h +++ b/src/dsp/dsp.h @@ -155,7 +155,6 @@ typedef void (*WebPUpsampleLinePairFunc)( // Fancy upsampling functions to convert YUV to RGB(A) modes extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */]; -extern WebPUpsampleLinePairFunc WebPUpsamplersKeepAlpha[/* MODE_LAST */]; // Initializes SSE2 version of the fancy upsamplers. void WebPInitUpsamplersSSE2(void); @@ -169,7 +168,6 @@ typedef void (*WebPSampleLinePairFunc)( uint8_t* top_dst, uint8_t* bottom_dst, int len); extern const WebPSampleLinePairFunc WebPSamplers[/* MODE_LAST */]; -extern const WebPSampleLinePairFunc WebPSamplersKeepAlpha[/* MODE_LAST */]; // YUV444->RGB converters typedef void (*WebPYUV444Converter)(const uint8_t* y, @@ -181,6 +179,23 @@ extern const WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */]; // Main function to be called void WebPInitUpsamplers(void); +//------------------------------------------------------------------------------ +// Pre-multiply planes with alpha values + +// Apply alpha pre-multiply on an rgba, bgra or argb plane of size w * h. +// alpha_first should be 0 for argb, 1 for rgba or bgra (where alpha is last). +extern void (*WebPApplyAlphaMultiply)( + uint8_t* rgba, int alpha_first, int w, int h, int stride); + +// Same, buf specifically for RGBA4444 format +extern void (*WebPApplyAlphaMultiply4444)( + uint8_t* rgba4444, int w, int h, int stride); + +// To be called first before using the above. +void WebPInitPremultiply(void); + +void WebPInitPremultiplySSE2(void); // should not be called directly. + //------------------------------------------------------------------------------ #if defined(__cplusplus) || defined(c_plusplus) diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c index 51745875..e5a51ca7 100644 --- a/src/dsp/lossless.c +++ b/src/dsp/lossless.c @@ -19,6 +19,8 @@ extern "C" { #include #include "./lossless.h" #include "../dec/vp8li.h" +#include "../dsp/yuv.h" +#include "../dsp/dsp.h" #ifdef USE_LOSSLESS_ENCODER @@ -1041,8 +1043,7 @@ static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst, } void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels, - WEBP_CSP_MODE out_colorspace, - uint8_t* const rgba) { + WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) { switch (out_colorspace) { case MODE_RGB: ConvertBGRAToRGB(in_data, num_pixels, rgba); @@ -1050,18 +1051,34 @@ void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels, case MODE_RGBA: ConvertBGRAToRGBA(in_data, num_pixels, rgba); break; + case MODE_rgbA: + ConvertBGRAToRGBA(in_data, num_pixels, rgba); + WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0); + break; case MODE_BGR: ConvertBGRAToBGR(in_data, num_pixels, rgba); break; case MODE_BGRA: CopyOrSwap(in_data, num_pixels, rgba, 1); break; + case MODE_bgrA: + CopyOrSwap(in_data, num_pixels, rgba, 1); + WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0); + break; case MODE_ARGB: CopyOrSwap(in_data, num_pixels, rgba, 0); break; + case MODE_Argb: + CopyOrSwap(in_data, num_pixels, rgba, 0); + WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0); + break; case MODE_RGBA_4444: ConvertBGRAToRGBA4444(in_data, num_pixels, rgba); break; + case MODE_rgbA_4444: + ConvertBGRAToRGBA4444(in_data, num_pixels, rgba); + WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0); + break; case MODE_RGB_565: ConvertBGRAToRGB565(in_data, num_pixels, rgba); break; diff --git a/src/dsp/upsampling.c b/src/dsp/upsampling.c index d5fd902c..094c7311 100644 --- a/src/dsp/upsampling.c +++ b/src/dsp/upsampling.c @@ -23,7 +23,6 @@ extern "C" { // Fancy upsampling functions to convert YUV to RGB WebPUpsampleLinePairFunc WebPUpsamplers[MODE_LAST]; -WebPUpsampleLinePairFunc WebPUpsamplersKeepAlpha[MODE_LAST]; // Given samples laid out in a square as: // [a b] @@ -100,11 +99,6 @@ UPSAMPLE_FUNC(UpsampleBgraLinePair, VP8YuvToBgra, 4) UPSAMPLE_FUNC(UpsampleArgbLinePair, VP8YuvToArgb, 4) UPSAMPLE_FUNC(UpsampleRgba4444LinePair, VP8YuvToRgba4444, 2) UPSAMPLE_FUNC(UpsampleRgb565LinePair, VP8YuvToRgb565, 2) -// These variants don't erase the alpha value -UPSAMPLE_FUNC(UpsampleRgbaKeepAlphaLinePair, VP8YuvToRgb, 4) -UPSAMPLE_FUNC(UpsampleBgraKeepAlphaLinePair, VP8YuvToBgr, 4) -UPSAMPLE_FUNC(UpsampleArgbKeepAlphaLinePair, VP8YuvToArgbKeepA, 4) -UPSAMPLE_FUNC(UpsampleRgba4444KeepAlphaLinePair, VP8YuvToRgba4444KeepA, 2) #undef LOAD_UV #undef UPSAMPLE_FUNC @@ -145,11 +139,6 @@ SAMPLE_FUNC(SampleBgraLinePair, VP8YuvToBgra, 4) SAMPLE_FUNC(SampleArgbLinePair, VP8YuvToArgb, 4) SAMPLE_FUNC(SampleRgba4444LinePair, VP8YuvToRgba4444, 2) SAMPLE_FUNC(SampleRgb565LinePair, VP8YuvToRgb565, 2) -// These variants don't erase the alpha value -SAMPLE_FUNC(SampleRgbaKeepAlphaLinePair, VP8YuvToRgb, 4) -SAMPLE_FUNC(SampleBgraKeepAlphaLinePair, VP8YuvToBgr, 4) -SAMPLE_FUNC(SampleArgbKeepAlphaLinePair, VP8YuvToArgbKeepA, 4) -SAMPLE_FUNC(SampleRgba4444KeepAlphaLinePair, VP8YuvToRgba4444KeepA, 2) #undef SAMPLE_FUNC @@ -160,17 +149,11 @@ const WebPSampleLinePairFunc WebPSamplers[MODE_LAST] = { SampleBgraLinePair, // MODE_BGRA SampleArgbLinePair, // MODE_ARGB SampleRgba4444LinePair, // MODE_RGBA_4444 - SampleRgb565LinePair // MODE_RGB_565 -}; - -const WebPSampleLinePairFunc WebPSamplersKeepAlpha[MODE_LAST] = { - SampleRgbLinePair, // MODE_RGB - SampleRgbaKeepAlphaLinePair, // MODE_RGBA - SampleBgrLinePair, // MODE_BGR - SampleBgraKeepAlphaLinePair, // MODE_BGRA - SampleArgbKeepAlphaLinePair, // MODE_ARGB - SampleRgba4444KeepAlphaLinePair, // MODE_RGBA_4444 - SampleRgb565LinePair // MODE_RGB_565 + SampleRgb565LinePair, // MODE_RGB_565 + SampleRgbaLinePair, // MODE_rgbA + SampleBgraLinePair, // MODE_bgrA + SampleArgbLinePair, // MODE_Argb + SampleRgba4444LinePair // MODE_rgbA_4444 }; //------------------------------------------------------------------------------ @@ -200,9 +183,82 @@ const WebPYUV444Converter WebPYUV444Converters[MODE_LAST] = { Yuv444ToBgra, // MODE_BGRA Yuv444ToArgb, // MODE_ARGB Yuv444ToRgba4444, // MODE_RGBA_4444 - Yuv444ToRgb565 // MODE_RGB_565 + Yuv444ToRgb565, // MODE_RGB_565 + Yuv444ToRgba, // MODE_rgbA + Yuv444ToBgra, // MODE_bgrA + Yuv444ToArgb, // MODE_Argb + Yuv444ToRgba4444 // MODE_rgbA_4444 }; +//------------------------------------------------------------------------------ +// Premultiplied modes + +// non dithered-modes + +// (x * a * 32897) >> 23 is bit-wise equivalent to (int)(x * a / 255.) +// for all 8bit x or a. For bit-wise equivalence to (int)(x * a / 255. + .5), +// one can use instead: (x * a * 65793 + (1 << 23)) >> 24 +#if 1 // (int)(x * a / 255.) +#define MULTIPLIER(a) ((a) * 32897UL) +#define PREMULTIPLY(x, m) (((x) * (m)) >> 23) +#else // (int)(x * a / 255. + .5) +#define MULTIPLIER(a) ((a) * 65793UL) +#define PREMULTIPLY(x, m) (((x) * (m) + (1UL << 23)) >> 24) +#endif + +static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_1rst, + int w, int h, int stride) { + while (h-- > 0) { + uint8_t* const rgb = rgba + (alpha_1rst ? 1 : 0); + const uint8_t* const alpha = rgba + (alpha_1rst ? 0 : 3); + int i; + for (i = 0; i < w; ++i) { + const uint32_t a = alpha[4 * i]; + if (a != 0xff) { + const uint32_t mult = MULTIPLIER(a); + rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult); + rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult); + rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult); + } + } + rgba += stride; + } +} +#undef MULTIPLIER +#undef PREMULTIPLY + +// rgbA4444 + +#define MULTIPLIER(a) ((a) * 0x11) +#define PREMULTIPLY(x, m) (((x) * (m)) >> 12) + +static WEBP_INLINE uint8_t dither_hi(uint8_t x) { return (x & 0xf0) | (x >> 4); } +static WEBP_INLINE uint8_t dither_lo(uint8_t x) { return (x & 0x0f) | (x << 4); } + +static void ApplyAlphaMultiply4444(uint8_t* rgba4444, + int w, int h, int stride) { + while (h-- > 0) { + int i; + for (i = 0; i < w; ++i) { + const uint8_t a = dither_lo(rgba4444[2 * i + 1]); + const uint32_t mult = MULTIPLIER(a); + const uint8_t r = PREMULTIPLY(dither_hi(rgba4444[2 * i + 0]), mult); + const uint8_t g = PREMULTIPLY(dither_lo(rgba4444[2 * i + 0]), mult); + const uint8_t b = PREMULTIPLY(dither_hi(rgba4444[2 * i + 1]), mult); + rgba4444[2 * i + 0] = (r & 0xf0) | (g & 0x0f); + rgba4444[2 * i + 1] = (b & 0xf0) | a; + } + rgba4444 += stride; + } +} +#undef MULTIPLIER +#undef PREMULTIPLY + +void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int) + = ApplyAlphaMultiply; +void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int) + = ApplyAlphaMultiply4444; + //------------------------------------------------------------------------------ // Main call @@ -216,16 +272,8 @@ void WebPInitUpsamplers(void) { WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair; WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair; - WebPUpsamplersKeepAlpha[MODE_RGB] = UpsampleRgbLinePair; - WebPUpsamplersKeepAlpha[MODE_RGBA] = UpsampleRgbaKeepAlphaLinePair; - WebPUpsamplersKeepAlpha[MODE_BGR] = UpsampleBgrLinePair; - WebPUpsamplersKeepAlpha[MODE_BGRA] = UpsampleBgraKeepAlphaLinePair; - WebPUpsamplersKeepAlpha[MODE_ARGB] = UpsampleArgbKeepAlphaLinePair; - WebPUpsamplersKeepAlpha[MODE_RGBA_4444] = UpsampleRgba4444KeepAlphaLinePair; - WebPUpsamplersKeepAlpha[MODE_RGB_565] = UpsampleRgb565LinePair; - // If defined, use CPUInfo() to overwrite some pointers with faster versions. - if (VP8GetCPUInfo) { + if (VP8GetCPUInfo != NULL) { #if defined(WEBP_USE_SSE2) if (VP8GetCPUInfo(kSSE2)) { WebPInitUpsamplersSSE2(); @@ -235,6 +283,26 @@ void WebPInitUpsamplers(void) { #endif // FANCY_UPSAMPLING } +void WebPInitPremultiply(void) { + WebPApplyAlphaMultiply = ApplyAlphaMultiply; + WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply4444; + +#ifdef FANCY_UPSAMPLING + WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair; + WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair; + WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair; + WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair; + + if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_SSE2) + if (VP8GetCPUInfo(kSSE2)) { + WebPInitPremultiplySSE2(); + } +#endif + } +#endif // FANCY_UPSAMPLING +} + #if defined(__cplusplus) || defined(c_plusplus) } // extern "C" #endif diff --git a/src/dsp/upsampling_sse2.c b/src/dsp/upsampling_sse2.c index 6116b0a5..93fc2cca 100644 --- a/src/dsp/upsampling_sse2.c +++ b/src/dsp/upsampling_sse2.c @@ -176,9 +176,6 @@ SSE2_UPSAMPLE_FUNC(UpsampleRgbLinePairSSE2, VP8YuvToRgb, 3) SSE2_UPSAMPLE_FUNC(UpsampleBgrLinePairSSE2, VP8YuvToBgr, 3) SSE2_UPSAMPLE_FUNC(UpsampleRgbaLinePairSSE2, VP8YuvToRgba, 4) SSE2_UPSAMPLE_FUNC(UpsampleBgraLinePairSSE2, VP8YuvToBgra, 4) -// These two don't erase the alpha value -SSE2_UPSAMPLE_FUNC(UpsampleRgbKeepAlphaLinePairSSE2, VP8YuvToRgb, 4) -SSE2_UPSAMPLE_FUNC(UpsampleBgrKeepAlphaLinePairSSE2, VP8YuvToBgr, 4) #undef GET_M #undef PACK_AND_STORE @@ -190,7 +187,6 @@ SSE2_UPSAMPLE_FUNC(UpsampleBgrKeepAlphaLinePairSSE2, VP8YuvToBgr, 4) //------------------------------------------------------------------------------ extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */]; -extern WebPUpsampleLinePairFunc WebPUpsamplersKeepAlpha[/* MODE_LAST */]; #endif // FANCY_UPSAMPLING @@ -200,11 +196,13 @@ void WebPInitUpsamplersSSE2(void) { WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePairSSE2; WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePairSSE2; WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePairSSE2; +#endif // FANCY_UPSAMPLING +} - WebPUpsamplersKeepAlpha[MODE_RGB] = UpsampleRgbLinePairSSE2; - WebPUpsamplersKeepAlpha[MODE_RGBA] = UpsampleRgbKeepAlphaLinePairSSE2; - WebPUpsamplersKeepAlpha[MODE_BGR] = UpsampleBgrLinePairSSE2; - WebPUpsamplersKeepAlpha[MODE_BGRA] = UpsampleBgrKeepAlphaLinePairSSE2; +void WebPInitPremultiplySSE2(void) { +#ifdef FANCY_UPSAMPLING + WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePairSSE2; + WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePairSSE2; #endif // FANCY_UPSAMPLING } diff --git a/src/dsp/yuv.h b/src/dsp/yuv.h index 5000b7d3..8f52acb2 100644 --- a/src/dsp/yuv.h +++ b/src/dsp/yuv.h @@ -99,6 +99,11 @@ static WEBP_INLINE void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v, rgba[3] = 0xff; } +static WEBP_INLINE uint32_t VP8Clip4Bits(uint8_t c) { + const uint32_t v = (c + 8) >> 4; + return (v > 15) ? 15 : v; +} + // Must be called before everything, to initialize the tables. void VP8YUVInit(void); diff --git a/src/webp/decode.h b/src/webp/decode.h index e3f0d346..dc8140b7 100644 --- a/src/webp/decode.h +++ b/src/webp/decode.h @@ -113,12 +113,35 @@ typedef enum { MODE_RGB = 0, MODE_RGBA = 1, MODE_BGR = 2, MODE_BGRA = 3, MODE_ARGB = 4, MODE_RGBA_4444 = 5, MODE_RGB_565 = 6, + // RGB-premultiplied transparent modes (alpha value is preserved) + MODE_rgbA = 7, + MODE_bgrA = 8, + MODE_Argb = 9, + MODE_rgbA_4444 = 10, // YUV modes must come after RGB ones. - MODE_YUV = 7, MODE_YUVA = 8, // yuv 4:2:0 - MODE_LAST = 9 + MODE_YUV = 11, MODE_YUVA = 12, // yuv 4:2:0 + MODE_LAST = 13 } WEBP_CSP_MODE; -// Generic structure for describing the sample buffer. +// Some useful macros: +static WEBP_INLINE int WebPIsPremultipliedMode(WEBP_CSP_MODE mode) { + return (mode == MODE_rgbA || mode == MODE_bgrA || mode == MODE_Argb || + mode == MODE_rgbA_4444); +} + +static WEBP_INLINE int WebPIsAlphaMode(WEBP_CSP_MODE mode) { + return (mode == MODE_RGBA || mode == MODE_BGRA || mode == MODE_ARGB || + mode == MODE_RGBA_4444 || mode == MODE_YUVA || + WebPIsPremultipliedMode(mode)); +} + +static WEBP_INLINE int WebPIsRGBMode(WEBP_CSP_MODE mode) { + return (mode < MODE_YUV); +} + +//------------------------------------------------------------------------------ +// WebPDecBuffer: Generic structure for describing the sample buffer. + typedef struct { // view as RGBA uint8_t* rgba; // pointer to RGBA samples int stride; // stride in bytes from one scanline to the next. diff --git a/src/webp/decode_vp8.h b/src/webp/decode_vp8.h index dde7fc94..7cbb2457 100644 --- a/src/webp/decode_vp8.h +++ b/src/webp/decode_vp8.h @@ -99,7 +99,9 @@ struct VP8Io { int use_scaling; int scaled_width, scaled_height; - // pointer to the alpha data (if present) corresponding to the rows + // If non NULL, pointer to the alpha data (if present) corresponding to the + // start of the current row (That is: it is pre-offset by mb_y and takes + // cropping into account). const uint8_t* a; };