From 48f827574e7e3926d666b0c02d439b0599c3aace Mon Sep 17 00:00:00 2001 From: Pascal Massimino Date: Mon, 4 Jun 2012 07:40:32 -0700 Subject: [PATCH] add colorspace for premultiplied alpha The new modes are MODE_rgbA MODE_bgrA MODE_Argb MODE_rgbA_4444 It's binary incompatible, since the enums changed. While at it, i removed the now unneeded KeepAlpha methods. -> Saved ~12k of code! * made explicit mention that alpha_plane is persistent, so we have access to the full alpha plane data at all time. Incremental decoding of alpha was planned for, but not implemented. So better not dragged this constaint for now and make the code easier until we revisit that. Change-Id: Idaba281a6ca819965ca062d1c23329f36d90c7ff --- src/dec/buffer.c | 11 ++- src/dec/frame.c | 14 +++- src/dec/io.c | 161 ++++++++++++++++++++++++-------------- src/dec/vp8i.h | 2 +- src/dec/webp.c | 4 +- src/dec/webpi.h | 9 ++- src/dsp/dsp.h | 19 ++++- src/dsp/lossless.c | 21 ++++- src/dsp/upsampling.c | 132 +++++++++++++++++++++++-------- src/dsp/upsampling_sse2.c | 14 ++-- src/dsp/yuv.h | 5 ++ src/webp/decode.h | 29 ++++++- src/webp/decode_vp8.h | 4 +- 13 files changed, 303 insertions(+), 122 deletions(-) diff --git a/src/dec/buffer.c b/src/dec/buffer.c index 13e01ee0..ad953737 100644 --- a/src/dec/buffer.c +++ b/src/dec/buffer.c @@ -22,7 +22,10 @@ extern "C" { // WebPDecBuffer // Number of bytes per pixel for the different color-spaces. -static const int kModeBpp[MODE_LAST] = { 3, 4, 3, 4, 4, 2, 2, 1, 1 }; +static const int kModeBpp[MODE_LAST] = { + 3, 4, 3, 4, 4, 2, 2, + 4, 4, 4, 2, // pre-multiplied modes + 1, 1 }; // Check that webp_csp_mode is within the bounds of WEBP_CSP_MODE. // Convert to an integer to handle both the unsigned/signed enum cases @@ -38,7 +41,7 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) { const int height = buffer->height; if (!IsValidColorspace(mode)) { ok = 0; - } else if (mode >= MODE_YUV) { // YUV checks + } else if (!WebPIsRGBMode(mode)) { // YUV checks const WebPYUVABuffer* const buf = &buffer->u.YUVA; const size_t size = buf->y_stride * height; const size_t u_size = buf->u_stride * ((height + 1) / 2); @@ -81,7 +84,7 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) { const int stride = w * kModeBpp[mode]; const uint64_t size = (uint64_t)stride * h; - if (mode >= MODE_YUV) { + if (!WebPIsRGBMode(mode)) { uv_stride = (w + 1) / 2; uv_size = (uint64_t)uv_stride * ((h + 1) / 2); if (mode == MODE_YUVA) { @@ -101,7 +104,7 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) { return VP8_STATUS_OUT_OF_MEMORY; } - if (mode >= MODE_YUV) { // YUVA initialization + if (!WebPIsRGBMode(mode)) { // YUVA initialization WebPYUVABuffer* const buf = &buffer->u.YUVA; buf->y = output; buf->y_stride = stride; diff --git a/src/dec/frame.c b/src/dec/frame.c index 50982dbd..16666627 100644 --- a/src/dec/frame.c +++ b/src/dec/frame.c @@ -200,7 +200,12 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) { y_end = io->crop_bottom; // make sure we don't overflow on last row. } io->a = NULL; - if (dec->alpha_data_ && y_start < y_end) { + if (dec->alpha_data_ != NULL && y_start < y_end) { + // TODO(skal): several things to correct here: + // * testing presence of alpha with dec->alpha_data_ is not a good idea + // * we're actually decompressing the full plane only once. It should be + // more obvious from signature. + // * we could free alpha_data_ right after this call, but we don't own. io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start); if (io->a == NULL) { return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, @@ -214,7 +219,7 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) { io->y += dec->cache_y_stride_ * delta_y; io->u += dec->cache_uv_stride_ * (delta_y >> 1); io->v += dec->cache_uv_stride_ * (delta_y >> 1); - if (io->a) { + if (io->a != NULL) { io->a += io->width * delta_y; } } @@ -222,7 +227,7 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) { io->y += io->crop_left; io->u += io->crop_left >> 1; io->v += io->crop_left >> 1; - if (io->a) { + if (io->a != NULL) { io->a += io->crop_left; } io->mb_y = y_start - io->crop_top; @@ -417,7 +422,8 @@ static int AllocateMemory(VP8Decoder* const dec) { + kFilterExtraRows[dec->filter_type_]) * 3 / 2; const size_t cache_size = top_size * cache_height; const size_t alpha_size = - dec->alpha_data_ ? (dec->pic_hdr_.width_ * dec->pic_hdr_.height_) : 0; + (dec->alpha_data_ != NULL) ? dec->pic_hdr_.width_ * dec->pic_hdr_.height_ + : 0; const size_t needed = intra_pred_mode_size + top_size + mb_info_size + f_info_size + yuv_size + coeffs_size diff --git a/src/dec/io.c b/src/dec/io.c index 2760a29d..82c7c0d3 100644 --- a/src/dec/io.c +++ b/src/dec/io.c @@ -52,9 +52,7 @@ static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) { const uint8_t* y_src = io->y; const uint8_t* u_src = io->u; const uint8_t* v_src = io->v; - const WebPSampleLinePairFunc sample = - io->a ? WebPSamplersKeepAlpha[output->colorspace] - : WebPSamplers[output->colorspace]; + const WebPSampleLinePairFunc sample = WebPSamplers[output->colorspace]; const int mb_w = io->mb_w; const int last = io->mb_h - 1; int j; @@ -106,9 +104,7 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) { int num_lines_out = io->mb_h; // a priori guess const WebPRGBABuffer* const buf = &p->output->u.RGBA; uint8_t* dst = buf->rgba + io->mb_y * buf->stride; - const WebPUpsampleLinePairFunc upsample = - io->a ? WebPUpsamplersKeepAlpha[p->output->colorspace] - : WebPUpsamplers[p->output->colorspace]; + WebPUpsampleLinePairFunc upsample = WebPUpsamplers[p->output->colorspace]; const uint8_t* cur_y = io->y; const uint8_t* cur_u = io->u; const uint8_t* cur_v = io->v; @@ -124,8 +120,6 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) { upsample(NULL, cur_y, cur_u, cur_v, cur_u, cur_v, NULL, dst, mb_w); } else { // We can finish the left-over line from previous call. - // Warning! Don't overwrite the alpha values (if any), as they - // are not lagging one line behind but are already written. upsample(p->tmp_y, cur_y, top_u, top_v, cur_u, cur_v, dst - buf->stride, dst, mb_w); ++num_lines_out; @@ -184,45 +178,73 @@ static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p) { } static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p) { - const int mb_w = io->mb_w; - const int mb_h = io->mb_h; - int i, j; - const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* dst = buf->rgba + io->mb_y * buf->stride + - (p->output->colorspace == MODE_ARGB ? 0 : 3); const uint8_t* alpha = io->a; - if (alpha) { - for (j = 0; j < mb_h; ++j) { - for (i = 0; i < mb_w; ++i) { - dst[4 * i] = alpha[i]; + if (alpha != NULL) { + const int mb_w = io->mb_w; + const int mb_h = io->mb_h; + int i, j; + const WEBP_CSP_MODE colorspace = p->output->colorspace; + const int alpha_first = + (colorspace == MODE_ARGB || colorspace == MODE_Argb); + const WebPRGBABuffer* const buf = &p->output->u.RGBA; + int start_y = io->mb_y; + int num_rows = mb_h; + + // We compensate for the 1-line delay of fancy upscaler. + // This is similar to EmitFancyRGB(). + if (io->fancy_upsampling) { + if (start_y == 0) { + // We don't process the last row yet. It'll be done during next call. + --num_rows; + } else { + --start_y; + // Fortunately, *alpha data is persistent, so we can go back + // one row and finish alpha blending, now that the fancy upscaler + // completed the YUV->RGB interpolation. + alpha -= io->width; + } + if (io->crop_top + io->mb_y + mb_h == io->crop_bottom) { + // If it's the very last call, we process all the remaing rows! + num_rows = io->crop_bottom - io->crop_top - start_y; + } + } + { + uint8_t* const base_rgba = buf->rgba + start_y * buf->stride; + uint8_t* dst = base_rgba + (alpha_first ? 0 : 3); + for (j = 0; j < num_rows; ++j) { + for (i = 0; i < mb_w; ++i) dst[4 * i] = alpha[i]; + alpha += io->width; + dst += buf->stride; + } + if (WebPIsPremultipliedMode(colorspace)) { + WebPApplyAlphaMultiply(base_rgba, alpha_first, + mb_w, num_rows, buf->stride); } - alpha += io->width; - dst += buf->stride; } } return 0; } -static WEBP_INLINE uint32_t clip(uint32_t v, uint32_t max_value) { - return (v > max_value) ? max_value : v; -} - static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p) { - const int mb_w = io->mb_w; - const int mb_h = io->mb_h; - int i, j; - const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* dst = buf->rgba + io->mb_y * buf->stride + 1; const uint8_t* alpha = io->a; - if (alpha) { + if (alpha != NULL) { + const int mb_w = io->mb_w; + const int mb_h = io->mb_h; + int i, j; + const WebPRGBABuffer* const buf = &p->output->u.RGBA; + uint8_t* const base_rgba = buf->rgba + io->mb_y * buf->stride; + uint8_t* alpha_dst = base_rgba + 1; for (j = 0; j < mb_h; ++j) { for (i = 0; i < mb_w; ++i) { // Fill in the alpha value (converted to 4 bits). - const uint32_t alpha_val = clip((alpha[i] + 8) >> 4, 15); - dst[2 * i] = (dst[2 * i] & 0xf0) | alpha_val; + const uint32_t alpha_val = VP8Clip4Bits(alpha[i]); + alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_val; } alpha += io->width; - dst += buf->stride; + alpha_dst += buf->stride; + } + if (p->output->colorspace == MODE_rgbA_4444) { + WebPApplyAlphaMultiply4444(base_rgba, mb_w, mb_h, buf->stride); } } return 0; @@ -259,13 +281,8 @@ static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p) { return 0; } -static int IsAlphaMode(WEBP_CSP_MODE mode) { - return (mode == MODE_RGBA || mode == MODE_BGRA || mode == MODE_ARGB || - mode == MODE_RGBA_4444 || mode == MODE_YUVA); -} - static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) { - const int has_alpha = IsAlphaMode(p->output->colorspace); + const int has_alpha = WebPIsAlphaMode(p->output->colorspace); const WebPYUVABuffer* const buf = &p->output->u.YUVA; const int out_width = io->scaled_width; const int out_height = io->scaled_height; @@ -302,6 +319,7 @@ static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) { uv_in_height, uv_out_height, work + work_size + uv_work_size); p->emit = EmitRescaledYUV; + if (has_alpha) { WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h, buf->a, out_width, out_height, buf->a_stride, 1, @@ -364,59 +382,73 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) { static int ExportAlpha(WebPDecParams* const p, int y_pos) { const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* dst = buf->rgba + (p->last_y + y_pos) * buf->stride + - (p->output->colorspace == MODE_ARGB ? 0 : 3); + uint8_t* const base_rgba = buf->rgba + (p->last_y + y_pos) * buf->stride; + const WEBP_CSP_MODE colorspace = p->output->colorspace; + const int alpha_first = + (colorspace == MODE_ARGB || colorspace == MODE_Argb); + uint8_t* dst = base_rgba + (alpha_first ? 0 : 3); int num_lines_out = 0; + const int is_premult_alpha = WebPIsPremultipliedMode(colorspace); + const int width = p->scaler_a.dst_width; + while (WebPRescalerHasPendingOutput(&p->scaler_a)) { int i; assert(p->last_y + y_pos + num_lines_out < p->output->height); WebPRescalerExportRow(&p->scaler_a); - for (i = 0; i < p->scaler_a.dst_width; ++i) { - dst[4 * i] = p->scaler_a.dst[i]; - } + for (i = 0; i < width; ++i) dst[4 * i] = p->scaler_a.dst[i]; dst += buf->stride; ++num_lines_out; } + if (is_premult_alpha) { + WebPApplyAlphaMultiply(base_rgba, alpha_first, + width, num_lines_out, buf->stride); + } return num_lines_out; } static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos) { const WebPRGBABuffer* const buf = &p->output->u.RGBA; - uint8_t* dst = buf->rgba + (p->last_y + y_pos) * buf->stride + 1; + uint8_t* base_rgba = buf->rgba + (p->last_y + y_pos) * buf->stride; + uint8_t* alpha_dst = base_rgba + 1; int num_lines_out = 0; + const WEBP_CSP_MODE colorspace = p->output->colorspace; + const int width = p->scaler_a.dst_width; + const int is_premult_alpha = WebPIsPremultipliedMode(colorspace); + while (WebPRescalerHasPendingOutput(&p->scaler_a)) { int i; assert(p->last_y + y_pos + num_lines_out < p->output->height); WebPRescalerExportRow(&p->scaler_a); - for (i = 0; i < p->scaler_a.dst_width; ++i) { + for (i = 0; i < width; ++i) { // Fill in the alpha value (converted to 4 bits). - const uint32_t alpha_val = clip((p->scaler_a.dst[i] + 8) >> 4, 15); - dst[2 * i] = (dst[2 * i] & 0xf0) | alpha_val; + const uint32_t alpha_val = VP8Clip4Bits(p->scaler_a.dst[i]); + alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_val; } - dst += buf->stride; + alpha_dst += buf->stride; ++num_lines_out; } + if (is_premult_alpha) { + WebPApplyAlphaMultiply4444(base_rgba, width, num_lines_out, buf->stride); + } return num_lines_out; } static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p) { if (io->a != NULL) { - int (* const output_func)(WebPDecParams* const, int) = - (p->output->colorspace == MODE_RGBA_4444) ? ExportAlphaRGBA4444 - : ExportAlpha; WebPRescaler* const scaler = &p->scaler_a; - int j = 0, pos = 0; + int j = 0; + int pos = 0; while (j < io->mb_h) { j += WebPRescalerImport(scaler, io->mb_h - j, io->a + j * io->width, io->width); - pos += output_func(p, pos); + pos += p->emit_alpha_row(p, pos); } } return 0; } static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) { - const int has_alpha = IsAlphaMode(p->output->colorspace); + const int has_alpha = WebPIsAlphaMode(p->output->colorspace); const int out_width = io->scaled_width; const int out_height = io->scaled_height; const int uv_in_width = (io->mb_w + 1) >> 1; @@ -459,6 +491,12 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) { io->mb_w, out_width, io->mb_h, out_height, work + 3 * work_size); p->emit_alpha = EmitRescaledAlphaRGB; + if (p->output->colorspace == MODE_RGBA_4444 || + p->output->colorspace == MODE_rgbA_4444) { + p->emit_alpha_row = ExportAlphaRGBA4444; + } else { + p->emit_alpha_row = ExportAlpha; + } } return 1; } @@ -469,12 +507,13 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) { static int CustomSetup(VP8Io* io) { WebPDecParams* const p = (WebPDecParams*)io->opaque; const WEBP_CSP_MODE colorspace = p->output->colorspace; - const int is_rgb = (colorspace < MODE_YUV); - const int is_alpha = IsAlphaMode(colorspace); + const int is_rgb = WebPIsRGBMode(colorspace); + const int is_alpha = WebPIsAlphaMode(colorspace); p->memory = NULL; p->emit = NULL; p->emit_alpha = NULL; + p->emit_alpha_row = NULL; if (!WebPIoInitFromOptions(p->options, io, is_alpha ? MODE_YUV : MODE_YUVA)) { return 0; } @@ -505,10 +544,12 @@ static int CustomSetup(VP8Io* io) { p->emit = EmitYUV; } if (is_alpha) { // need transparency output + if (WebPIsPremultipliedMode(colorspace)) WebPInitPremultiply(); p->emit_alpha = - is_rgb ? (colorspace == MODE_RGBA_4444 ? EmitAlphaRGBA4444 - : EmitAlphaRGB) - : EmitAlphaYUV; + (colorspace == MODE_RGBA_4444 || colorspace == MODE_rgbA_4444) ? + EmitAlphaRGBA4444 + : is_rgb ? EmitAlphaRGB + : EmitAlphaYUV; } } diff --git a/src/dec/vp8i.h b/src/dec/vp8i.h index d811ca51..af96dc91 100644 --- a/src/dec/vp8i.h +++ b/src/dec/vp8i.h @@ -276,7 +276,7 @@ struct VP8Decoder { // extensions const uint8_t* alpha_data_; // compressed alpha data (if present) size_t alpha_data_size_; - uint8_t* alpha_plane_; // output + uint8_t* alpha_plane_; // output. Persistent, contains the whole data. int layer_colorspace_; const uint8_t* layer_data_; // compressed layer data (if present) diff --git a/src/dec/webp.c b/src/dec/webp.c index fd9cc7ab..fbad69ed 100644 --- a/src/dec/webp.c +++ b/src/dec/webp.c @@ -477,7 +477,7 @@ static uint8_t* Decode(WEBP_CSP_MODE mode, const uint8_t* data, WebPCopyDecBuffer(&output, keep_info); } // return decoded samples (don't clear 'output'!) - return (mode >= MODE_YUV) ? output.u.YUVA.y : output.u.RGBA.rgba; + return WebPIsRGBMode(mode) ? output.u.RGBA.rgba : output.u.YUVA.y; } uint8_t* WebPDecodeRGB(const uint8_t* data, size_t data_size, @@ -684,7 +684,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options, h = options->crop_height; x = options->crop_left; y = options->crop_top; - if (src_colorspace >= MODE_YUV) { // only snap for YUV420 or YUV422 + if (!WebPIsRGBMode(src_colorspace)) { // only snap for YUV420 or YUV422 x &= ~1; y &= ~1; // TODO(later): only for YUV420, not YUV422. } diff --git a/src/dec/webpi.h b/src/dec/webpi.h index 9d29e633..527a3888 100644 --- a/src/dec/webpi.h +++ b/src/dec/webpi.h @@ -24,6 +24,7 @@ extern "C" { typedef struct WebPDecParams WebPDecParams; typedef int (*OutputFunc)(const VP8Io* const io, WebPDecParams* const p); +typedef int (*OutputRowFunc)(WebPDecParams* const p, int y_pos); struct WebPDecParams { WebPDecBuffer* output; // output buffer. @@ -34,9 +35,11 @@ struct WebPDecParams { const WebPDecoderOptions* options; // if not NULL, use alt decoding features // rescalers WebPRescaler scaler_y, scaler_u, scaler_v, scaler_a; - void* memory; // overall scratch memory for the output work. - OutputFunc emit; // output RGB or YUV samples - OutputFunc emit_alpha; // output alpha channel + void* memory; // overall scratch memory for the output work. + + OutputFunc emit; // output RGB or YUV samples + OutputFunc emit_alpha; // output alpha channel + OutputRowFunc emit_alpha_row; // output one line of rescaled alpha values }; // Should be called first, before any use of the WebPDecParams object. diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h index db9c1455..ff78b4c7 100644 --- a/src/dsp/dsp.h +++ b/src/dsp/dsp.h @@ -155,7 +155,6 @@ typedef void (*WebPUpsampleLinePairFunc)( // Fancy upsampling functions to convert YUV to RGB(A) modes extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */]; -extern WebPUpsampleLinePairFunc WebPUpsamplersKeepAlpha[/* MODE_LAST */]; // Initializes SSE2 version of the fancy upsamplers. void WebPInitUpsamplersSSE2(void); @@ -169,7 +168,6 @@ typedef void (*WebPSampleLinePairFunc)( uint8_t* top_dst, uint8_t* bottom_dst, int len); extern const WebPSampleLinePairFunc WebPSamplers[/* MODE_LAST */]; -extern const WebPSampleLinePairFunc WebPSamplersKeepAlpha[/* MODE_LAST */]; // YUV444->RGB converters typedef void (*WebPYUV444Converter)(const uint8_t* y, @@ -181,6 +179,23 @@ extern const WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */]; // Main function to be called void WebPInitUpsamplers(void); +//------------------------------------------------------------------------------ +// Pre-multiply planes with alpha values + +// Apply alpha pre-multiply on an rgba, bgra or argb plane of size w * h. +// alpha_first should be 0 for argb, 1 for rgba or bgra (where alpha is last). +extern void (*WebPApplyAlphaMultiply)( + uint8_t* rgba, int alpha_first, int w, int h, int stride); + +// Same, buf specifically for RGBA4444 format +extern void (*WebPApplyAlphaMultiply4444)( + uint8_t* rgba4444, int w, int h, int stride); + +// To be called first before using the above. +void WebPInitPremultiply(void); + +void WebPInitPremultiplySSE2(void); // should not be called directly. + //------------------------------------------------------------------------------ #if defined(__cplusplus) || defined(c_plusplus) diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c index 51745875..e5a51ca7 100644 --- a/src/dsp/lossless.c +++ b/src/dsp/lossless.c @@ -19,6 +19,8 @@ extern "C" { #include #include "./lossless.h" #include "../dec/vp8li.h" +#include "../dsp/yuv.h" +#include "../dsp/dsp.h" #ifdef USE_LOSSLESS_ENCODER @@ -1041,8 +1043,7 @@ static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst, } void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels, - WEBP_CSP_MODE out_colorspace, - uint8_t* const rgba) { + WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) { switch (out_colorspace) { case MODE_RGB: ConvertBGRAToRGB(in_data, num_pixels, rgba); @@ -1050,18 +1051,34 @@ void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels, case MODE_RGBA: ConvertBGRAToRGBA(in_data, num_pixels, rgba); break; + case MODE_rgbA: + ConvertBGRAToRGBA(in_data, num_pixels, rgba); + WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0); + break; case MODE_BGR: ConvertBGRAToBGR(in_data, num_pixels, rgba); break; case MODE_BGRA: CopyOrSwap(in_data, num_pixels, rgba, 1); break; + case MODE_bgrA: + CopyOrSwap(in_data, num_pixels, rgba, 1); + WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0); + break; case MODE_ARGB: CopyOrSwap(in_data, num_pixels, rgba, 0); break; + case MODE_Argb: + CopyOrSwap(in_data, num_pixels, rgba, 0); + WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0); + break; case MODE_RGBA_4444: ConvertBGRAToRGBA4444(in_data, num_pixels, rgba); break; + case MODE_rgbA_4444: + ConvertBGRAToRGBA4444(in_data, num_pixels, rgba); + WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0); + break; case MODE_RGB_565: ConvertBGRAToRGB565(in_data, num_pixels, rgba); break; diff --git a/src/dsp/upsampling.c b/src/dsp/upsampling.c index d5fd902c..094c7311 100644 --- a/src/dsp/upsampling.c +++ b/src/dsp/upsampling.c @@ -23,7 +23,6 @@ extern "C" { // Fancy upsampling functions to convert YUV to RGB WebPUpsampleLinePairFunc WebPUpsamplers[MODE_LAST]; -WebPUpsampleLinePairFunc WebPUpsamplersKeepAlpha[MODE_LAST]; // Given samples laid out in a square as: // [a b] @@ -100,11 +99,6 @@ UPSAMPLE_FUNC(UpsampleBgraLinePair, VP8YuvToBgra, 4) UPSAMPLE_FUNC(UpsampleArgbLinePair, VP8YuvToArgb, 4) UPSAMPLE_FUNC(UpsampleRgba4444LinePair, VP8YuvToRgba4444, 2) UPSAMPLE_FUNC(UpsampleRgb565LinePair, VP8YuvToRgb565, 2) -// These variants don't erase the alpha value -UPSAMPLE_FUNC(UpsampleRgbaKeepAlphaLinePair, VP8YuvToRgb, 4) -UPSAMPLE_FUNC(UpsampleBgraKeepAlphaLinePair, VP8YuvToBgr, 4) -UPSAMPLE_FUNC(UpsampleArgbKeepAlphaLinePair, VP8YuvToArgbKeepA, 4) -UPSAMPLE_FUNC(UpsampleRgba4444KeepAlphaLinePair, VP8YuvToRgba4444KeepA, 2) #undef LOAD_UV #undef UPSAMPLE_FUNC @@ -145,11 +139,6 @@ SAMPLE_FUNC(SampleBgraLinePair, VP8YuvToBgra, 4) SAMPLE_FUNC(SampleArgbLinePair, VP8YuvToArgb, 4) SAMPLE_FUNC(SampleRgba4444LinePair, VP8YuvToRgba4444, 2) SAMPLE_FUNC(SampleRgb565LinePair, VP8YuvToRgb565, 2) -// These variants don't erase the alpha value -SAMPLE_FUNC(SampleRgbaKeepAlphaLinePair, VP8YuvToRgb, 4) -SAMPLE_FUNC(SampleBgraKeepAlphaLinePair, VP8YuvToBgr, 4) -SAMPLE_FUNC(SampleArgbKeepAlphaLinePair, VP8YuvToArgbKeepA, 4) -SAMPLE_FUNC(SampleRgba4444KeepAlphaLinePair, VP8YuvToRgba4444KeepA, 2) #undef SAMPLE_FUNC @@ -160,17 +149,11 @@ const WebPSampleLinePairFunc WebPSamplers[MODE_LAST] = { SampleBgraLinePair, // MODE_BGRA SampleArgbLinePair, // MODE_ARGB SampleRgba4444LinePair, // MODE_RGBA_4444 - SampleRgb565LinePair // MODE_RGB_565 -}; - -const WebPSampleLinePairFunc WebPSamplersKeepAlpha[MODE_LAST] = { - SampleRgbLinePair, // MODE_RGB - SampleRgbaKeepAlphaLinePair, // MODE_RGBA - SampleBgrLinePair, // MODE_BGR - SampleBgraKeepAlphaLinePair, // MODE_BGRA - SampleArgbKeepAlphaLinePair, // MODE_ARGB - SampleRgba4444KeepAlphaLinePair, // MODE_RGBA_4444 - SampleRgb565LinePair // MODE_RGB_565 + SampleRgb565LinePair, // MODE_RGB_565 + SampleRgbaLinePair, // MODE_rgbA + SampleBgraLinePair, // MODE_bgrA + SampleArgbLinePair, // MODE_Argb + SampleRgba4444LinePair // MODE_rgbA_4444 }; //------------------------------------------------------------------------------ @@ -200,9 +183,82 @@ const WebPYUV444Converter WebPYUV444Converters[MODE_LAST] = { Yuv444ToBgra, // MODE_BGRA Yuv444ToArgb, // MODE_ARGB Yuv444ToRgba4444, // MODE_RGBA_4444 - Yuv444ToRgb565 // MODE_RGB_565 + Yuv444ToRgb565, // MODE_RGB_565 + Yuv444ToRgba, // MODE_rgbA + Yuv444ToBgra, // MODE_bgrA + Yuv444ToArgb, // MODE_Argb + Yuv444ToRgba4444 // MODE_rgbA_4444 }; +//------------------------------------------------------------------------------ +// Premultiplied modes + +// non dithered-modes + +// (x * a * 32897) >> 23 is bit-wise equivalent to (int)(x * a / 255.) +// for all 8bit x or a. For bit-wise equivalence to (int)(x * a / 255. + .5), +// one can use instead: (x * a * 65793 + (1 << 23)) >> 24 +#if 1 // (int)(x * a / 255.) +#define MULTIPLIER(a) ((a) * 32897UL) +#define PREMULTIPLY(x, m) (((x) * (m)) >> 23) +#else // (int)(x * a / 255. + .5) +#define MULTIPLIER(a) ((a) * 65793UL) +#define PREMULTIPLY(x, m) (((x) * (m) + (1UL << 23)) >> 24) +#endif + +static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_1rst, + int w, int h, int stride) { + while (h-- > 0) { + uint8_t* const rgb = rgba + (alpha_1rst ? 1 : 0); + const uint8_t* const alpha = rgba + (alpha_1rst ? 0 : 3); + int i; + for (i = 0; i < w; ++i) { + const uint32_t a = alpha[4 * i]; + if (a != 0xff) { + const uint32_t mult = MULTIPLIER(a); + rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult); + rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult); + rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult); + } + } + rgba += stride; + } +} +#undef MULTIPLIER +#undef PREMULTIPLY + +// rgbA4444 + +#define MULTIPLIER(a) ((a) * 0x11) +#define PREMULTIPLY(x, m) (((x) * (m)) >> 12) + +static WEBP_INLINE uint8_t dither_hi(uint8_t x) { return (x & 0xf0) | (x >> 4); } +static WEBP_INLINE uint8_t dither_lo(uint8_t x) { return (x & 0x0f) | (x << 4); } + +static void ApplyAlphaMultiply4444(uint8_t* rgba4444, + int w, int h, int stride) { + while (h-- > 0) { + int i; + for (i = 0; i < w; ++i) { + const uint8_t a = dither_lo(rgba4444[2 * i + 1]); + const uint32_t mult = MULTIPLIER(a); + const uint8_t r = PREMULTIPLY(dither_hi(rgba4444[2 * i + 0]), mult); + const uint8_t g = PREMULTIPLY(dither_lo(rgba4444[2 * i + 0]), mult); + const uint8_t b = PREMULTIPLY(dither_hi(rgba4444[2 * i + 1]), mult); + rgba4444[2 * i + 0] = (r & 0xf0) | (g & 0x0f); + rgba4444[2 * i + 1] = (b & 0xf0) | a; + } + rgba4444 += stride; + } +} +#undef MULTIPLIER +#undef PREMULTIPLY + +void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int) + = ApplyAlphaMultiply; +void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int) + = ApplyAlphaMultiply4444; + //------------------------------------------------------------------------------ // Main call @@ -216,16 +272,8 @@ void WebPInitUpsamplers(void) { WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair; WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair; - WebPUpsamplersKeepAlpha[MODE_RGB] = UpsampleRgbLinePair; - WebPUpsamplersKeepAlpha[MODE_RGBA] = UpsampleRgbaKeepAlphaLinePair; - WebPUpsamplersKeepAlpha[MODE_BGR] = UpsampleBgrLinePair; - WebPUpsamplersKeepAlpha[MODE_BGRA] = UpsampleBgraKeepAlphaLinePair; - WebPUpsamplersKeepAlpha[MODE_ARGB] = UpsampleArgbKeepAlphaLinePair; - WebPUpsamplersKeepAlpha[MODE_RGBA_4444] = UpsampleRgba4444KeepAlphaLinePair; - WebPUpsamplersKeepAlpha[MODE_RGB_565] = UpsampleRgb565LinePair; - // If defined, use CPUInfo() to overwrite some pointers with faster versions. - if (VP8GetCPUInfo) { + if (VP8GetCPUInfo != NULL) { #if defined(WEBP_USE_SSE2) if (VP8GetCPUInfo(kSSE2)) { WebPInitUpsamplersSSE2(); @@ -235,6 +283,26 @@ void WebPInitUpsamplers(void) { #endif // FANCY_UPSAMPLING } +void WebPInitPremultiply(void) { + WebPApplyAlphaMultiply = ApplyAlphaMultiply; + WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply4444; + +#ifdef FANCY_UPSAMPLING + WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair; + WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair; + WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair; + WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair; + + if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_SSE2) + if (VP8GetCPUInfo(kSSE2)) { + WebPInitPremultiplySSE2(); + } +#endif + } +#endif // FANCY_UPSAMPLING +} + #if defined(__cplusplus) || defined(c_plusplus) } // extern "C" #endif diff --git a/src/dsp/upsampling_sse2.c b/src/dsp/upsampling_sse2.c index 6116b0a5..93fc2cca 100644 --- a/src/dsp/upsampling_sse2.c +++ b/src/dsp/upsampling_sse2.c @@ -176,9 +176,6 @@ SSE2_UPSAMPLE_FUNC(UpsampleRgbLinePairSSE2, VP8YuvToRgb, 3) SSE2_UPSAMPLE_FUNC(UpsampleBgrLinePairSSE2, VP8YuvToBgr, 3) SSE2_UPSAMPLE_FUNC(UpsampleRgbaLinePairSSE2, VP8YuvToRgba, 4) SSE2_UPSAMPLE_FUNC(UpsampleBgraLinePairSSE2, VP8YuvToBgra, 4) -// These two don't erase the alpha value -SSE2_UPSAMPLE_FUNC(UpsampleRgbKeepAlphaLinePairSSE2, VP8YuvToRgb, 4) -SSE2_UPSAMPLE_FUNC(UpsampleBgrKeepAlphaLinePairSSE2, VP8YuvToBgr, 4) #undef GET_M #undef PACK_AND_STORE @@ -190,7 +187,6 @@ SSE2_UPSAMPLE_FUNC(UpsampleBgrKeepAlphaLinePairSSE2, VP8YuvToBgr, 4) //------------------------------------------------------------------------------ extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */]; -extern WebPUpsampleLinePairFunc WebPUpsamplersKeepAlpha[/* MODE_LAST */]; #endif // FANCY_UPSAMPLING @@ -200,11 +196,13 @@ void WebPInitUpsamplersSSE2(void) { WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePairSSE2; WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePairSSE2; WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePairSSE2; +#endif // FANCY_UPSAMPLING +} - WebPUpsamplersKeepAlpha[MODE_RGB] = UpsampleRgbLinePairSSE2; - WebPUpsamplersKeepAlpha[MODE_RGBA] = UpsampleRgbKeepAlphaLinePairSSE2; - WebPUpsamplersKeepAlpha[MODE_BGR] = UpsampleBgrLinePairSSE2; - WebPUpsamplersKeepAlpha[MODE_BGRA] = UpsampleBgrKeepAlphaLinePairSSE2; +void WebPInitPremultiplySSE2(void) { +#ifdef FANCY_UPSAMPLING + WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePairSSE2; + WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePairSSE2; #endif // FANCY_UPSAMPLING } diff --git a/src/dsp/yuv.h b/src/dsp/yuv.h index 5000b7d3..8f52acb2 100644 --- a/src/dsp/yuv.h +++ b/src/dsp/yuv.h @@ -99,6 +99,11 @@ static WEBP_INLINE void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v, rgba[3] = 0xff; } +static WEBP_INLINE uint32_t VP8Clip4Bits(uint8_t c) { + const uint32_t v = (c + 8) >> 4; + return (v > 15) ? 15 : v; +} + // Must be called before everything, to initialize the tables. void VP8YUVInit(void); diff --git a/src/webp/decode.h b/src/webp/decode.h index e3f0d346..dc8140b7 100644 --- a/src/webp/decode.h +++ b/src/webp/decode.h @@ -113,12 +113,35 @@ typedef enum { MODE_RGB = 0, MODE_RGBA = 1, MODE_BGR = 2, MODE_BGRA = 3, MODE_ARGB = 4, MODE_RGBA_4444 = 5, MODE_RGB_565 = 6, + // RGB-premultiplied transparent modes (alpha value is preserved) + MODE_rgbA = 7, + MODE_bgrA = 8, + MODE_Argb = 9, + MODE_rgbA_4444 = 10, // YUV modes must come after RGB ones. - MODE_YUV = 7, MODE_YUVA = 8, // yuv 4:2:0 - MODE_LAST = 9 + MODE_YUV = 11, MODE_YUVA = 12, // yuv 4:2:0 + MODE_LAST = 13 } WEBP_CSP_MODE; -// Generic structure for describing the sample buffer. +// Some useful macros: +static WEBP_INLINE int WebPIsPremultipliedMode(WEBP_CSP_MODE mode) { + return (mode == MODE_rgbA || mode == MODE_bgrA || mode == MODE_Argb || + mode == MODE_rgbA_4444); +} + +static WEBP_INLINE int WebPIsAlphaMode(WEBP_CSP_MODE mode) { + return (mode == MODE_RGBA || mode == MODE_BGRA || mode == MODE_ARGB || + mode == MODE_RGBA_4444 || mode == MODE_YUVA || + WebPIsPremultipliedMode(mode)); +} + +static WEBP_INLINE int WebPIsRGBMode(WEBP_CSP_MODE mode) { + return (mode < MODE_YUV); +} + +//------------------------------------------------------------------------------ +// WebPDecBuffer: Generic structure for describing the sample buffer. + typedef struct { // view as RGBA uint8_t* rgba; // pointer to RGBA samples int stride; // stride in bytes from one scanline to the next. diff --git a/src/webp/decode_vp8.h b/src/webp/decode_vp8.h index dde7fc94..7cbb2457 100644 --- a/src/webp/decode_vp8.h +++ b/src/webp/decode_vp8.h @@ -99,7 +99,9 @@ struct VP8Io { int use_scaling; int scaled_width, scaled_height; - // pointer to the alpha data (if present) corresponding to the rows + // If non NULL, pointer to the alpha data (if present) corresponding to the + // start of the current row (That is: it is pre-offset by mb_y and takes + // cropping into account). const uint8_t* a; };