stub for SSE4.1 support.

Change-Id: I0c845a98d2871cc8907ff7b914bab7747a92c7ed
This commit is contained in:
Pascal Massimino 2015-03-19 13:18:59 -07:00 committed by James Zern
parent 4a95384b34
commit e9570dd987
8 changed files with 79 additions and 2 deletions

View File

@ -48,6 +48,7 @@ dsp_dec_srcs := \
src/dsp/dec_mips_dsp_r2.c \
src/dsp/dec_neon.$(NEON) \
src/dsp/dec_sse2.c \
src/dsp/dec_sse41.c \
src/dsp/filters.c \
src/dsp/filters_mips_dsp_r2.c \
src/dsp/filters_sse2.c \

View File

@ -195,6 +195,7 @@ DSP_DEC_OBJS = \
$(DIROBJ)\dsp\dec_mips_dsp_r2.obj \
$(DIROBJ)\dsp\dec_neon.obj \
$(DIROBJ)\dsp\dec_sse2.obj \
$(DIROBJ)\dsp\dec_sse41.obj \
$(DIROBJ)\dsp\filters.obj \
$(DIROBJ)\dsp\filters_mips_dsp_r2.obj \
$(DIROBJ)\dsp\filters_sse2.obj \

View File

@ -97,6 +97,17 @@ AS_IF([test -n "$AVX2_FLAGS"], [
CFLAGS=$SAVED_CFLAGS])
AC_SUBST([AVX2_FLAGS])
TEST_AND_ADD_CFLAGS([SSE41_FLAGS], [-msse4.1])
AS_IF([test -n "$SSE41_FLAGS"], [
SAVED_CFLAGS=$CFLAGS
CFLAGS="$CFLAGS $SSE41_FLAGS"
AC_CHECK_HEADER([smmintrin.h],
[AC_DEFINE(WEBP_HAVE_SSE41, [1],
[Set to 1 if SSE4.1 is supported])],
[SSE41_FLAGS=""])
CFLAGS=$SAVED_CFLAGS])
AC_SUBST([SSE41_FLAGS])
TEST_AND_ADD_CFLAGS([SSE2_FLAGS], [-msse2])
AS_IF([test -n "$SSE2_FLAGS"], [
SAVED_CFLAGS=$CFLAGS

View File

@ -119,6 +119,7 @@ DSP_DEC_OBJS = \
src/dsp/dec_mips_dsp_r2.o \
src/dsp/dec_neon.o \
src/dsp/dec_sse2.o \
src/dsp/dec_sse41.o \
src/dsp/filters.o \
src/dsp/filters_mips_dsp_r2.o \
src/dsp/filters_sse2.o \

View File

@ -1,5 +1,6 @@
noinst_LTLIBRARIES = libwebpdsp.la libwebpdsp_avx2.la
noinst_LTLIBRARIES += libwebpdsp_sse2.la libwebpdspdecode_sse2.la
noinst_LTLIBRARIES += libwebpdspdecode_sse41.la
if BUILD_LIBWEBPDECODER
noinst_LTLIBRARIES += libwebpdspdecode.la
@ -54,6 +55,11 @@ libwebpdsp_avx2_la_SOURCES += enc_avx2.c
libwebpdsp_avx2_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
libwebpdsp_avx2_la_CFLAGS = $(AM_CFLAGS) $(AVX2_FLAGS)
libwebpdspdecode_sse41_la_SOURCES =
libwebpdspdecode_sse41_la_SOURCES += dec_sse41.c
libwebpdspdecode_sse41_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
libwebpdspdecode_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_FLAGS)
libwebpdspdecode_sse2_la_SOURCES =
libwebpdspdecode_sse2_la_SOURCES += alpha_processing_sse2.c
libwebpdspdecode_sse2_la_SOURCES += dec_sse2.c
@ -81,12 +87,16 @@ noinst_HEADERS += ../webp/decode.h
libwebpdsp_la_CPPFLAGS = $(USE_EXPERIMENTAL_CODE) $(USE_SWAP_16BIT_CSP)
libwebpdsp_la_LDFLAGS = -lm
libwebpdsp_la_LIBADD = libwebpdsp_avx2.la libwebpdsp_sse2.la
libwebpdsp_la_LIBADD =
libwebpdsp_la_LIBADD += libwebpdsp_avx2.la libwebpdsp_sse2.la
libwebpdsp_la_LIBADD += libwebpdspdecode_sse41.la
if BUILD_LIBWEBPDECODER
libwebpdspdecode_la_SOURCES = $(COMMON_SOURCES)
libwebpdspdecode_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS)
libwebpdspdecode_la_LDFLAGS = $(libwebpdsp_la_LDFLAGS)
libwebpdspdecode_la_LIBADD = libwebpdspdecode_sse2.la
libwebpdspdecode_la_LIBADD =
libwebpdspdecode_la_LIBADD += libwebpdspdecode_sse2.la
libwebpdspdecode_la_LIBADD += libwebpdspdecode_sse41.la
endif

View File

@ -674,6 +674,7 @@ VP8SimpleFilterFunc VP8SimpleVFilter16i;
VP8SimpleFilterFunc VP8SimpleHFilter16i;
extern void VP8DspInitSSE2(void);
extern void VP8DspInitSSE41(void);
extern void VP8DspInitNEON(void);
extern void VP8DspInitMIPS32(void);
extern void VP8DspInitMIPSdspR2(void);
@ -738,6 +739,11 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
#if defined(WEBP_USE_SSE2)
if (VP8GetCPUInfo(kSSE2)) {
VP8DspInitSSE2();
#if defined(WEBP_USE_SSE41)
if (VP8GetCPUInfo(kSSE4_1)) {
VP8DspInitSSE41();
}
#endif
}
#endif
#if defined(WEBP_USE_NEON)

38
src/dsp/dec_sse41.c Normal file
View File

@ -0,0 +1,38 @@
// Copyright 2015 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// SSE4 version of some decoding functions.
//
// Author: Skal (pascal.massimino@gmail.com)
#include "./dsp.h"
#if defined(WEBP_USE_SSE41)
#include <smmintrin.h>
#include "../dec/vp8i.h"
static void HE16(uint8_t* dst) { // horizontal
int j;
const __m128i kShuffle3 = _mm_set1_epi8(3);
for (j = 16; j > 0; --j) {
const __m128i in = _mm_cvtsi32_si128(*(int*)(dst - 4));
const __m128i values = _mm_shuffle_epi8(in, kShuffle3);
_mm_storeu_si128((__m128i*)dst, values);
dst += BPS;
}
}
#endif // WEBP_USE_SSE41
extern void VP8DspInitSSE41(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE41(void) {
#if defined(WEBP_USE_SSE41)
VP8PredLuma16[3] = HE16;
#endif // WEBP_USE_SSE41
}

View File

@ -52,6 +52,11 @@ extern "C" {
#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets
#endif
#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
(defined(_M_X64) || defined(_M_IX86))
#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets
#endif
// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
// files without intrinsics, allowing the corresponding Init() to be called.
// Files containing intrinsics will need to be built targeting the instruction
@ -60,6 +65,10 @@ extern "C" {
#define WEBP_USE_SSE2
#endif
#if defined(__SSE4_1__) || defined(WEBP_MSC_SSE41) || defined(WEBP_HAVE_SSE41)
#define WEBP_USE_SSE41
#endif
#if defined(__AVX2__) || defined(WEBP_HAVE_AVX2)
#define WEBP_USE_AVX2
#endif