From 9102a7b63d6653e81d2124364bbe88ec7f2f4b1b Mon Sep 17 00:00:00 2001 From: Pascal Massimino Date: Mon, 15 Sep 2014 08:11:36 +0200 Subject: [PATCH] Add a WebPExtractAlpha function to dsp This is the opposite of WebPDispatchAlpha + Implement the SSE2 version (cherry picked from commit cddd33405055d70e5aab7b2aabaf1970522dd2e4) Conflicts: src/dsp/alpha_processing_sse2.c Change-Id: I0c297309255f508c5261da8aad01f7e57f924d6c --- Android.mk | 1 + Makefile.vc | 1 + makefile.unix | 1 + src/dsp/Makefile.am | 1 + src/dsp/alpha_processing.c | 31 +++++++++++++ src/dsp/alpha_processing_sse2.c | 77 +++++++++++++++++++++++++++++++++ src/dsp/dsp.h | 7 +++ 7 files changed, 119 insertions(+) create mode 100644 src/dsp/alpha_processing_sse2.c diff --git a/Android.mk b/Android.mk index e02caef2..12fdd94b 100644 --- a/Android.mk +++ b/Android.mk @@ -33,6 +33,7 @@ LOCAL_SRC_FILES := \ src/dec/vp8l.c \ src/dec/webp.c \ src/dsp/alpha_processing.c \ + src/dsp/alpha_processing_sse2.c \ src/dsp/cpu.c \ src/dsp/dec.c \ src/dsp/dec_clip_tables.c \ diff --git a/Makefile.vc b/Makefile.vc index 5a387846..8135bc15 100644 --- a/Makefile.vc +++ b/Makefile.vc @@ -172,6 +172,7 @@ DEMUX_OBJS = \ DSP_DEC_OBJS = \ $(DIROBJ)\dsp\alpha_processing.obj \ + $(DIROBJ)\dsp\alpha_processing_sse2.obj \ $(DIROBJ)\dsp\cpu.obj \ $(DIROBJ)\dsp\dec.obj \ $(DIROBJ)\dsp\dec_clip_tables.obj \ diff --git a/makefile.unix b/makefile.unix index 023b5217..7454a13f 100644 --- a/makefile.unix +++ b/makefile.unix @@ -108,6 +108,7 @@ DEMUX_OBJS = \ DSP_DEC_OBJS = \ src/dsp/alpha_processing.o \ + src/dsp/alpha_processing_sse2.o \ src/dsp/cpu.o \ src/dsp/dec.o \ src/dsp/dec_clip_tables.o \ diff --git a/src/dsp/Makefile.am b/src/dsp/Makefile.am index 02c96444..71d0445a 100644 --- a/src/dsp/Makefile.am +++ b/src/dsp/Makefile.am @@ -38,6 +38,7 @@ libwebpdsp_avx2_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS) libwebpdsp_avx2_la_CFLAGS = $(AM_CFLAGS) $(AVX2_FLAGS) libwebpdspdecode_sse2_la_SOURCES = +libwebpdspdecode_sse2_la_SOURCES += alpha_processing_sse2.c libwebpdspdecode_sse2_la_SOURCES += dec_sse2.c libwebpdspdecode_sse2_la_SOURCES += lossless_sse2.c libwebpdspdecode_sse2_la_SOURCES += upsampling_sse2.c diff --git a/src/dsp/alpha_processing.c b/src/dsp/alpha_processing.c index 09deacfb..d0f7a6cc 100644 --- a/src/dsp/alpha_processing.c +++ b/src/dsp/alpha_processing.c @@ -284,15 +284,46 @@ static void ApplyAlphaMultiply_16b(uint8_t* rgba4444, #endif } +static int ExtractAlpha(const uint8_t* argb, int argb_stride, + int width, int height, + uint8_t* alpha, int alpha_stride) { + uint8_t alpha_mask = 0xff; + int i, j; + + for (j = 0; j < height; ++j) { + for (i = 0; i < width; ++i) { + const uint8_t alpha_value = argb[4 * i]; + alpha[i] = alpha_value; + alpha_mask &= alpha_value; + } + argb += argb_stride; + alpha += alpha_stride; + } + return (alpha_mask == 0xff); +} + void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int); void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int); +int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int); //------------------------------------------------------------------------------ // Init function +extern void WebPInitAlphaProcessingSSE2(void); + void WebPInitAlphaProcessing(void) { WebPMultARGBRow = MultARGBRow; WebPMultRow = MultRow; WebPApplyAlphaMultiply = ApplyAlphaMultiply; WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b; + WebPExtractAlpha = ExtractAlpha; + + // If defined, use CPUInfo() to overwrite some pointers with faster versions. + if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_SSE2) + if (VP8GetCPUInfo(kSSE2)) { + WebPInitAlphaProcessingSSE2(); + } +#endif + } } diff --git a/src/dsp/alpha_processing_sse2.c b/src/dsp/alpha_processing_sse2.c new file mode 100644 index 00000000..3d0a9b57 --- /dev/null +++ b/src/dsp/alpha_processing_sse2.c @@ -0,0 +1,77 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Utilities for processing transparent channel. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include "./dsp.h" + +#if defined(WEBP_USE_SSE2) +#include + +//------------------------------------------------------------------------------ + +static int ExtractAlpha(const uint8_t* argb, int argb_stride, + int width, int height, + uint8_t* alpha, int alpha_stride) { + // alpha_and stores an 'and' operation of all the alpha[] values. The final + // value is not 0xff if any of the alpha[] is not equal to 0xff. + uint32_t alpha_and = 0xff; + int i, j; + const __m128i a_mask = _mm_set1_epi32(0xffu); // to preserve alpha + const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u); + __m128i all_alphas = all_0xff; + + // We must be able to access 3 extra bytes after the last written byte + // 'src[4 * width - 4]', because we don't know if alpha is the first or the + // last byte of the quadruplet. + const int limit = (width - 1) & ~7; + + for (j = 0; j < height; ++j) { + const __m128i* src = (const __m128i*)argb; + for (i = 0; i < limit; i += 8) { + // load 32 argb bytes + const __m128i a0 = _mm_loadu_si128(src + 0); + const __m128i a1 = _mm_loadu_si128(src + 1); + const __m128i b0 = _mm_and_si128(a0, a_mask); + const __m128i b1 = _mm_and_si128(a1, a_mask); + const __m128i c0 = _mm_packs_epi32(b0, b1); + const __m128i d0 = _mm_packus_epi16(c0, c0); + // store + _mm_storel_epi64((__m128i*)&alpha[i], d0); + // accumulate eight alpha 'and' in parallel + all_alphas = _mm_and_si128(all_alphas, d0); + src += 2; + } + for (; i < width; ++i) { + const uint32_t alpha_value = argb[4 * i]; + alpha[i] = alpha_value; + alpha_and &= alpha_value; + } + argb += argb_stride; + alpha += alpha_stride; + } + // Combine the eight alpha 'and' into a 8-bit mask. + alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff)); + return (alpha_and == 0xff); +} + +#endif // WEBP_USE_SSE2 + +//------------------------------------------------------------------------------ +// Init function + +extern void WebPInitAlphaProcessingSSE2(void); + +void WebPInitAlphaProcessingSSE2(void) { +#if defined(WEBP_USE_SSE2) + WebPExtractAlpha = ExtractAlpha; +#endif +} diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h index 7dc09e03..52c44b2d 100644 --- a/src/dsp/dsp.h +++ b/src/dsp/dsp.h @@ -257,6 +257,13 @@ extern void (*WebPApplyAlphaMultiply)( extern void (*WebPApplyAlphaMultiply4444)( uint8_t* rgba4444, int w, int h, int stride); +// Extract the alpha values from 32b values in argb[] and pack them into alpha[] +// (this is the opposite of WebPDispatchAlpha). +// Returns true if there's only trivial 0xff alpha values. +extern int (*WebPExtractAlpha)(const uint8_t* argb, int argb_stride, + int width, int height, + uint8_t* alpha, int alpha_stride); + // Pre-Multiply operation transforms x into x * A / 255 (where x=Y,R,G or B). // Un-Multiply operation transforms x into x * 255 / A.