diff --git a/src/dec/vp8l.c b/src/dec/vp8l.c index 01c3274f..de3711a0 100644 --- a/src/dec/vp8l.c +++ b/src/dec/vp8l.c @@ -1492,9 +1492,8 @@ static void ExtractAlphaRows(VP8LDecoder* const dec, int last_row) { const int cache_pixs = width * num_rows_to_process; uint8_t* const dst = output + width * cur_row; const uint32_t* const src = dec->argb_cache_; - int i; ApplyInverseTransforms(dec, num_rows_to_process, in); - for (i = 0; i < cache_pixs; ++i) dst[i] = (src[i] >> 8) & 0xff; + WebPExtractGreen(src, dst, cache_pixs); AlphaApplyFilter(alph_dec, cur_row, cur_row + num_rows_to_process, dst, width); num_rows -= num_rows_to_process; @@ -1562,6 +1561,8 @@ int VP8LDecodeAlphaImageStream(ALPHDecoder* const alph_dec, int last_row) { return 1; // done } + if (!alph_dec->use_8b_decode_) WebPInitAlphaProcessing(); + // Decode (with special row processing). return alph_dec->use_8b_decode_ ? DecodeAlphaData(dec, (uint8_t*)dec->pixels_, dec->width_, dec->height_, diff --git a/src/dsp/alpha_processing.c b/src/dsp/alpha_processing.c index fd564d5d..4b60e092 100644 --- a/src/dsp/alpha_processing.c +++ b/src/dsp/alpha_processing.c @@ -284,9 +284,9 @@ static void ApplyAlphaMultiply_16b(uint8_t* rgba4444, #endif } -static int DispatchAlpha(const uint8_t* alpha, int alpha_stride, - int width, int height, - uint8_t* dst, int dst_stride) { +static int DispatchAlpha_C(const uint8_t* alpha, int alpha_stride, + int width, int height, + uint8_t* dst, int dst_stride) { uint32_t alpha_mask = 0xff; int i, j; @@ -303,9 +303,9 @@ static int DispatchAlpha(const uint8_t* alpha, int alpha_stride, return (alpha_mask != 0xff); } -static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride, - int width, int height, - uint32_t* dst, int dst_stride) { +static void DispatchAlphaToGreen_C(const uint8_t* alpha, int alpha_stride, + int width, int height, + uint32_t* dst, int dst_stride) { int i, j; for (j = 0; j < height; ++j) { for (i = 0; i < width; ++i) { @@ -316,9 +316,9 @@ static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride, } } -static int ExtractAlpha(const uint8_t* argb, int argb_stride, - int width, int height, - uint8_t* alpha, int alpha_stride) { +static int ExtractAlpha_C(const uint8_t* argb, int argb_stride, + int width, int height, + uint8_t* alpha, int alpha_stride) { uint8_t alpha_mask = 0xff; int i, j; @@ -334,11 +334,17 @@ static int ExtractAlpha(const uint8_t* argb, int argb_stride, return (alpha_mask == 0xff); } +static void ExtractGreen_C(const uint32_t* argb, uint8_t* alpha, int size) { + int i; + for (i = 0; i < size; ++i) alpha[i] = argb[i] >> 8; +} + void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int); void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int); int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int); void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int); int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int); +void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size); //------------------------------------------------------------------------------ // Init function @@ -358,9 +364,11 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) { WebPMultRow = WebPMultRowC; WebPApplyAlphaMultiply = ApplyAlphaMultiply; WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b; - WebPDispatchAlpha = DispatchAlpha; - WebPDispatchAlphaToGreen = DispatchAlphaToGreen; - WebPExtractAlpha = ExtractAlpha; + + WebPDispatchAlpha = DispatchAlpha_C; + WebPDispatchAlphaToGreen = DispatchAlphaToGreen_C; + WebPExtractAlpha = ExtractAlpha_C; + WebPExtractGreen = ExtractGreen_C; // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { diff --git a/src/dsp/alpha_processing_neon.c b/src/dsp/alpha_processing_neon.c index ed845729..606a401c 100644 --- a/src/dsp/alpha_processing_neon.c +++ b/src/dsp/alpha_processing_neon.c @@ -161,6 +161,17 @@ static int ExtractAlpha_NEON(const uint8_t* argb, int argb_stride, return (alpha_mask == 0xffffffffu); } +static void ExtractGreen_NEON(const uint32_t* argb, + uint8_t* alpha, int size) { + int i; + for (i = 0; i + 16 <= size; i += 16) { + const uint8x16x4_t rgbX = vld4q_u8((const uint8_t*)(argb + i)); + const uint8x16_t greens = rgbX.val[1]; + vst1q_u8(alpha + i, greens); + } + for (; i < size; ++i) alpha[i] = (argb[i] >> 8) & 0xff; +} + //------------------------------------------------------------------------------ extern void WebPInitAlphaProcessingNEON(void); @@ -170,6 +181,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingNEON(void) { WebPDispatchAlpha = DispatchAlpha_NEON; WebPDispatchAlphaToGreen = DispatchAlphaToGreen_NEON; WebPExtractAlpha = ExtractAlpha_NEON; + WebPExtractGreen = ExtractGreen_NEON; } #else // !WEBP_USE_NEON diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h index d6d50e77..7ce58856 100644 --- a/src/dsp/dsp.h +++ b/src/dsp/dsp.h @@ -509,6 +509,10 @@ extern int (*WebPExtractAlpha)(const uint8_t* argb, int argb_stride, int width, int height, uint8_t* alpha, int alpha_stride); +// Extract the green values from 32b values in argb[] and pack them into alpha[] +// (this is the opposite of WebPDispatchAlphaToGreen). +extern void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size); + // Pre-Multiply operation transforms x into x * A / 255 (where x=Y,R,G or B). // Un-Multiply operation transforms x into x * 255 / A.