From dce64bfa1b00b16ec53b9fb5ca171df929bd0f0f Mon Sep 17 00:00:00 2001 From: Parag Salasakar Date: Fri, 1 Jul 2016 19:26:00 +0530 Subject: [PATCH] Add MSA optimized alpha filter functions We add the following MSA optimized alpha filter functions: - HorizontalFilter - VerticalFilter - GradientFilter Change-Id: I71e2e04050e569b8c0bf086fadf210ee16d50924 --- Android.mk | 3 +- Makefile.vc | 3 +- build.gradle | 1 + makefile.unix | 1 + src/dsp/Makefile.am | 1 + src/dsp/filters.c | 6 ++ src/dsp/filters_msa.c | 202 ++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 215 insertions(+), 2 deletions(-) create mode 100644 src/dsp/filters_msa.c diff --git a/Android.mk b/Android.mk index 21042b2a..8aaa7544 100644 --- a/Android.mk +++ b/Android.mk @@ -50,15 +50,16 @@ dsp_dec_srcs := \ src/dsp/dec_mips32.c \ src/dsp/dec_mips_dsp_r2.c \ src/dsp/dec_msa.c \ - src/dsp/lossless_msa.c \ src/dsp/dec_neon.$(NEON) \ src/dsp/dec_sse2.c \ src/dsp/dec_sse41.c \ src/dsp/filters.c \ src/dsp/filters_mips_dsp_r2.c \ + src/dsp/filters_msa.c \ src/dsp/filters_sse2.c \ src/dsp/lossless.c \ src/dsp/lossless_mips_dsp_r2.c \ + src/dsp/lossless_msa.c \ src/dsp/lossless_neon.$(NEON) \ src/dsp/lossless_sse2.c \ src/dsp/rescaler.c \ diff --git a/Makefile.vc b/Makefile.vc index c37f945a..502d4898 100644 --- a/Makefile.vc +++ b/Makefile.vc @@ -195,15 +195,16 @@ DSP_DEC_OBJS = \ $(DIROBJ)\dsp\dec_mips32.obj \ $(DIROBJ)\dsp\dec_mips_dsp_r2.obj \ $(DIROBJ)\dsp\dec_msa.obj \ - $(DIROBJ)\dsp\lossless_msa.obj \ $(DIROBJ)\dsp\dec_neon.obj \ $(DIROBJ)\dsp\dec_sse2.obj \ $(DIROBJ)\dsp\dec_sse41.obj \ $(DIROBJ)\dsp\filters.obj \ $(DIROBJ)\dsp\filters_mips_dsp_r2.obj \ + $(DIROBJ)\dsp\filters_msa.obj \ $(DIROBJ)\dsp\filters_sse2.obj \ $(DIROBJ)\dsp\lossless.obj \ $(DIROBJ)\dsp\lossless_mips_dsp_r2.obj \ + $(DIROBJ)\dsp\lossless_msa.obj \ $(DIROBJ)\dsp\lossless_neon.obj \ $(DIROBJ)\dsp\lossless_sse2.obj \ $(DIROBJ)\dsp\rescaler.obj \ diff --git a/build.gradle b/build.gradle index 450e0895..0358e033 100644 --- a/build.gradle +++ b/build.gradle @@ -125,6 +125,7 @@ model { include "dec_sse41.c" include "filters.c" include "filters_mips_dsp_r2.c" + include "filters_msa.c" include "filters_sse2.c" include "lossless.c" include "lossless_mips_dsp_r2.c" diff --git a/makefile.unix b/makefile.unix index 9ce2969c..abb7649a 100644 --- a/makefile.unix +++ b/makefile.unix @@ -144,6 +144,7 @@ DSP_DEC_OBJS = \ src/dsp/dec_sse41.o \ src/dsp/filters.o \ src/dsp/filters_mips_dsp_r2.o \ + src/dsp/filters_msa.o \ src/dsp/filters_sse2.o \ src/dsp/lossless.o \ src/dsp/lossless_mips_dsp_r2.o \ diff --git a/src/dsp/Makefile.am b/src/dsp/Makefile.am index fc772243..b4152f5a 100644 --- a/src/dsp/Makefile.am +++ b/src/dsp/Makefile.am @@ -83,6 +83,7 @@ libwebpdspdecode_neon_la_CFLAGS = $(libwebpdsp_neon_la_CFLAGS) libwebpdspdecode_msa_la_SOURCES = libwebpdspdecode_msa_la_SOURCES += dec_msa.c +libwebpdspdecode_msa_la_SOURCES += filters_msa.c libwebpdspdecode_msa_la_SOURCES += lossless_msa.c libwebpdspdecode_msa_la_SOURCES += msa_macro.h libwebpdspdecode_msa_la_CPPFLAGS = $(libwebpdsp_la_CPPFLAGS) diff --git a/src/dsp/filters.c b/src/dsp/filters.c index 9f04faf0..64fdedc3 100644 --- a/src/dsp/filters.c +++ b/src/dsp/filters.c @@ -228,6 +228,7 @@ WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST]; extern void VP8FiltersInitMIPSdspR2(void); extern void VP8FiltersInitSSE2(void); +extern void VP8FiltersInitMSA(void); static volatile VP8CPUInfo filters_last_cpuinfo_used = (VP8CPUInfo)&filters_last_cpuinfo_used; @@ -255,6 +256,11 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) { if (VP8GetCPUInfo(kMIPSdspR2)) { VP8FiltersInitMIPSdspR2(); } +#endif +#if defined(WEBP_USE_MSA) + if (VP8GetCPUInfo(kMSA)) { + VP8FiltersInitMSA(); + } #endif } filters_last_cpuinfo_used = VP8GetCPUInfo; diff --git a/src/dsp/filters_msa.c b/src/dsp/filters_msa.c new file mode 100644 index 00000000..206c16d8 --- /dev/null +++ b/src/dsp/filters_msa.c @@ -0,0 +1,202 @@ +// Copyright 2016 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// MSA variant of alpha filters +// +// Author: Prashant Patil (prashant.patil@imgtec.com) + +#include "./dsp.h" + +#if defined(WEBP_USE_MSA) + +#include "./msa_macro.h" + +#include + +static WEBP_INLINE void PredictLineInverse0(const uint8_t* src, + const uint8_t* pred, + uint8_t* dst, int length) { + v16u8 src0, pred0, dst0; + assert(length >= 0); + while (length >= 32) { + v16u8 src1, pred1, dst1; + LD_UB2(src, 16, src0, src1); + LD_UB2(pred, 16, pred0, pred1); + SUB2(src0, pred0, src1, pred1, dst0, dst1); + ST_UB2(dst0, dst1, dst, 16); + src += 32; + pred += 32; + dst += 32; + length -= 32; + } + if (length > 0) { + int i; + if (length >= 16) { + src0 = LD_UB(src); + pred0 = LD_UB(pred); + dst0 = src0 - pred0; + ST_UB(dst0, dst); + src += 16; + pred += 16; + dst += 16; + length -= 16; + } + for (i = 0; i < length; i++) { + dst[i] = src[i] - pred[i]; + } + } +} + +//------------------------------------------------------------------------------ +// Helpful macro. + +#define SANITY_CHECK(in, out) \ + assert(in != NULL); \ + assert(out != NULL); \ + assert(width > 0); \ + assert(height > 0); \ + assert(stride >= width); + +//------------------------------------------------------------------------------ +// Horrizontal filter + +static void HorizontalFilter(const uint8_t* data, int width, int height, + int stride, uint8_t* filtered_data) { + const uint8_t* preds = data; + const uint8_t* in = data; + uint8_t* out = filtered_data; + int row = 1; + SANITY_CHECK(in, out); + + // Leftmost pixel is the same as input for topmost scanline. + out[0] = in[0]; + PredictLineInverse0(in + 1, preds, out + 1, width - 1); + preds += stride; + in += stride; + out += stride; + // Filter line-by-line. + while (row < height) { + // Leftmost pixel is predicted from above. + PredictLineInverse0(in, preds - stride, out, 1); + PredictLineInverse0(in + 1, preds, out + 1, width - 1); + ++row; + preds += stride; + in += stride; + out += stride; + } +} + +//------------------------------------------------------------------------------ +// Gradient filter + +static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput, + const uint8_t* ppred, + uint8_t *poutput, int stride, + int size) { + int w; + const v16i8 zero = { 0 }; + while (size >= 16) { + v16u8 pred0, dst0; + v8i16 a0, a1, b0, b1, c0, c1; + const v16u8 tmp0 = LD_UB(ppred - 1); + const v16u8 tmp1 = LD_UB(ppred - stride); + const v16u8 tmp2 = LD_UB(ppred - stride - 1); + const v16u8 src0 = LD_UB(pinput); + ILVRL_B2_SH(zero, tmp0, a0, a1); + ILVRL_B2_SH(zero, tmp1, b0, b1); + ILVRL_B2_SH(zero, tmp2, c0, c1); + ADD2(a0, b0, a1, b1, a0, a1); + SUB2(a0, c0, a1, c1, a0, a1); + CLIP_SH2_0_255(a0, a1); + pred0 = (v16u8)__msa_pckev_b((v16i8)a1, (v16i8)a0); + dst0 = src0 - pred0; + ST_UB(dst0, poutput); + ppred += 16; + pinput += 16; + poutput += 16; + size -= 16; + } + for (w = 0; w < size; ++w) { + const int pred = ppred[w - 1] + ppred[w - stride] - ppred[w - stride - 1]; + poutput[w] = pinput[w] - (pred < 0 ? 0 : pred > 255 ? 255 : pred); + } +} + + +static void GradientFilter(const uint8_t* data, int width, int height, + int stride, uint8_t* filtered_data) { + const uint8_t* in = data; + const uint8_t* preds = data; + uint8_t* out = filtered_data; + int row = 1; + SANITY_CHECK(in, out); + + // left prediction for top scan-line + out[0] = in[0]; + PredictLineInverse0(in + 1, preds, out + 1, width - 1); + preds += stride; + in += stride; + out += stride; + // Filter line-by-line. + while (row < height) { + out[0] = in[0] - preds[- stride]; + PredictLineGradient(preds + 1, in + 1, out + 1, stride, width - 1); + ++row; + preds += stride; + in += stride; + out += stride; + } +} + +//------------------------------------------------------------------------------ +// Vertical filter + +static void VerticalFilter(const uint8_t* data, int width, int height, + int stride, uint8_t* filtered_data) { + const uint8_t* in = data; + const uint8_t* preds = data; + uint8_t* out = filtered_data; + int row = 1; + SANITY_CHECK(in, out); + + // Very first top-left pixel is copied. + out[0] = in[0]; + // Rest of top scan-line is left-predicted. + PredictLineInverse0(in + 1, preds, out + 1, width - 1); + in += stride; + out += stride; + + // Filter line-by-line. + while (row < height) { + PredictLineInverse0(in, preds, out, width); + ++row; + preds += stride; + in += stride; + out += stride; + } +} + +#undef SANITY_CHECK + +//------------------------------------------------------------------------------ +// Entry point + +extern void VP8FiltersInitMSA(void); + +WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitMSA(void) { + WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter; + WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter; + WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter; +} + +#else // !WEBP_USE_MSA + +WEBP_DSP_INIT_STUB(VP8FiltersInitMSA) + +#endif // WEBP_USE_MSA