diff --git a/Android.mk b/Android.mk index c33c4e02..58505d0e 100644 --- a/Android.mk +++ b/Android.mk @@ -37,6 +37,7 @@ LOCAL_SRC_FILES := \ src/dsp/alpha_processing_sse2.c \ src/dsp/argb.c \ src/dsp/argb_mips_dsp_r2.c \ + src/dsp/argb_sse2.c \ src/dsp/cpu.c \ src/dsp/dec.c \ src/dsp/dec_clip_tables.c \ diff --git a/Makefile.vc b/Makefile.vc index e973df09..f6fbb632 100644 --- a/Makefile.vc +++ b/Makefile.vc @@ -182,6 +182,7 @@ DSP_DEC_OBJS = \ $(DIROBJ)\dsp\alpha_processing_sse2.obj \ $(DIROBJ)\dsp\argb.obj \ $(DIROBJ)\dsp\argb_mips_dsp_r2.obj \ + $(DIROBJ)\dsp\argb_sse2.obj \ $(DIROBJ)\dsp\cpu.obj \ $(DIROBJ)\dsp\dec.obj \ $(DIROBJ)\dsp\dec_clip_tables.obj \ diff --git a/makefile.unix b/makefile.unix index 10a8bcc5..0c1e4bba 100644 --- a/makefile.unix +++ b/makefile.unix @@ -114,6 +114,7 @@ DSP_DEC_OBJS = \ src/dsp/alpha_processing_sse2.o \ src/dsp/argb.o \ src/dsp/argb_mips_dsp_r2.o \ + src/dsp/argb_sse2.o \ src/dsp/cpu.o \ src/dsp/dec.o \ src/dsp/dec_clip_tables.o \ diff --git a/src/dsp/Makefile.am b/src/dsp/Makefile.am index a1336273..bd5f38b3 100644 --- a/src/dsp/Makefile.am +++ b/src/dsp/Makefile.am @@ -11,8 +11,6 @@ commondir = $(includedir)/webp COMMON_SOURCES = COMMON_SOURCES += alpha_processing.c COMMON_SOURCES += alpha_processing_mips_dsp_r2.c -COMMON_SOURCES += argb.c -COMMON_SOURCES += argb_mips_dsp_r2.c COMMON_SOURCES += cpu.c COMMON_SOURCES += dec.c COMMON_SOURCES += dec_clip_tables.c @@ -37,6 +35,8 @@ COMMON_SOURCES += yuv_mips32.c COMMON_SOURCES += yuv_mips_dsp_r2.c ENC_SOURCES = +ENC_SOURCES += argb.c +ENC_SOURCES += argb_mips_dsp_r2.c ENC_SOURCES += enc.c ENC_SOURCES += enc_mips32.c ENC_SOURCES += enc_mips_dsp_r2.c @@ -58,6 +58,7 @@ libwebpdspdecode_sse2_la_CPPFLAGS = $(libwebpdsp_sse2_la_CPPFLAGS) libwebpdspdecode_sse2_la_CFLAGS = $(libwebpdsp_sse2_la_CFLAGS) libwebpdsp_sse2_la_SOURCES = +libwebpdsp_sse2_la_SOURCES += argb_sse2.c libwebpdsp_sse2_la_SOURCES += enc_sse2.c libwebpdsp_sse2_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS) libwebpdsp_sse2_la_CFLAGS = $(AM_CFLAGS) $(SSE2_FLAGS) diff --git a/src/dsp/argb.c b/src/dsp/argb.c index 5ab61090..1c9df852 100644 --- a/src/dsp/argb.c +++ b/src/dsp/argb.c @@ -40,6 +40,7 @@ void (*VP8PackRGB)(const uint8_t*, const uint8_t*, const uint8_t*, int, int, uint32_t*); extern void VP8EncDspARGBInitMIPSdspR2(void); +extern void VP8EncDspARGBInitSSE2(void); WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInit(void) { VP8PackARGB = PackARGB; @@ -47,6 +48,11 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInit(void) { // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_SSE2) + if (VP8GetCPUInfo(kSSE2)) { + VP8EncDspARGBInitSSE2(); + } +#endif #if defined(WEBP_USE_MIPS_DSP_R2) if (VP8GetCPUInfo(kMIPSdspR2)) { VP8EncDspARGBInitMIPSdspR2(); diff --git a/src/dsp/argb_sse2.c b/src/dsp/argb_sse2.c new file mode 100644 index 00000000..5009c5f3 --- /dev/null +++ b/src/dsp/argb_sse2.c @@ -0,0 +1,62 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// ARGB making functions (SSE2 version). +// +// Author: Skal (pascal.massimino@gmail.com) + +#include "./dsp.h" + +#if defined(WEBP_USE_SSE2) + +#include +#include +#include + +static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) { + return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b); +} + +static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g, + const uint8_t* b, int len, uint32_t* out) { + if (g == r + 1) { // RGBA input order. Need to swap R and B. + int i = 0; + const int len4 = len & ~3; + const __m128i mask = _mm_set1_epi32(0x00ff00ffu); + assert(b == r + 2); + assert(a == r + 3); + for (; i < len4; i += 4) { + const __m128i A = _mm_loadu_si128((__m128i*)(r + 4 * i)); + const __m128i B = _mm_and_si128(A, mask); // R 0 B 0 + const __m128i C = _mm_andnot_si128(mask, A); // 0 G 0 A + const __m128i D = _mm_shufflelo_epi16(B, _MM_SHUFFLE(2, 3, 0, 1)); + const __m128i E = _mm_shufflehi_epi16(D, _MM_SHUFFLE(2, 3, 0, 1)); + const __m128i F = _mm_or_si128(E, C); + _mm_storeu_si128((__m128i*)(out + i), F); + } + for (; i < len; ++i) { + out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]); + } + } else { + assert(g == b + 1); + assert(r == b + 2); + assert(a == b + 3); + memcpy(out, b, len * 4); + } +} + +#endif // WEBP_USE_SSE2 + +extern void VP8EncDspARGBInitSSE2(void); + +WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitSSE2(void) { +#if defined(WEBP_USE_SSE2) + VP8PackARGB = PackARGB; +#endif +}