From 7ce8788b0625b8a1628f2fe53b0073e71433dd54 Mon Sep 17 00:00:00 2001 From: Djordje Pesut Date: Thu, 20 Nov 2014 16:53:05 +0100 Subject: [PATCH] MIPS: dspr2: added optimization for function MakeARGB32 inline function MakeARGB32 calls changed to call via pointers to functions which make (a)rgb for entire row Change-Id: Ia4bd4be171a46c1e1821e408b073ff5791c587a9 --- Android.mk | 2 + Makefile.vc | 2 + makefile.unix | 2 + src/dsp/Makefile.am | 2 + src/dsp/argb.c | 57 ++++++++++++++++++++ src/dsp/argb_mips_dsp_r2.c | 107 +++++++++++++++++++++++++++++++++++++ src/dsp/dsp.h | 10 ++++ src/enc/picture.c | 2 + src/enc/picture_csp.c | 24 +++++---- 9 files changed, 197 insertions(+), 11 deletions(-) create mode 100644 src/dsp/argb.c create mode 100644 src/dsp/argb_mips_dsp_r2.c diff --git a/Android.mk b/Android.mk index 814ad437..c33c4e02 100644 --- a/Android.mk +++ b/Android.mk @@ -35,6 +35,8 @@ LOCAL_SRC_FILES := \ src/dsp/alpha_processing.c \ src/dsp/alpha_processing_mips_dsp_r2.c \ src/dsp/alpha_processing_sse2.c \ + src/dsp/argb.c \ + src/dsp/argb_mips_dsp_r2.c \ src/dsp/cpu.c \ src/dsp/dec.c \ src/dsp/dec_clip_tables.c \ diff --git a/Makefile.vc b/Makefile.vc index 1bc7ff0c..e973df09 100644 --- a/Makefile.vc +++ b/Makefile.vc @@ -180,6 +180,8 @@ DSP_DEC_OBJS = \ $(DIROBJ)\dsp\alpha_processing.obj \ $(DIROBJ)\dsp\alpha_processing_mips_dsp_r2.obj \ $(DIROBJ)\dsp\alpha_processing_sse2.obj \ + $(DIROBJ)\dsp\argb.obj \ + $(DIROBJ)\dsp\argb_mips_dsp_r2.obj \ $(DIROBJ)\dsp\cpu.obj \ $(DIROBJ)\dsp\dec.obj \ $(DIROBJ)\dsp\dec_clip_tables.obj \ diff --git a/makefile.unix b/makefile.unix index fd41527f..57f2fe46 100644 --- a/makefile.unix +++ b/makefile.unix @@ -110,6 +110,8 @@ DSP_DEC_OBJS = \ src/dsp/alpha_processing.o \ src/dsp/alpha_processing_mips_dsp_r2.o \ src/dsp/alpha_processing_sse2.o \ + src/dsp/argb.o \ + src/dsp/argb_mips_dsp_r2.o \ src/dsp/cpu.o \ src/dsp/dec.o \ src/dsp/dec_clip_tables.o \ diff --git a/src/dsp/Makefile.am b/src/dsp/Makefile.am index 7395178d..a1336273 100644 --- a/src/dsp/Makefile.am +++ b/src/dsp/Makefile.am @@ -11,6 +11,8 @@ commondir = $(includedir)/webp COMMON_SOURCES = COMMON_SOURCES += alpha_processing.c COMMON_SOURCES += alpha_processing_mips_dsp_r2.c +COMMON_SOURCES += argb.c +COMMON_SOURCES += argb_mips_dsp_r2.c COMMON_SOURCES += cpu.c COMMON_SOURCES += dec.c COMMON_SOURCES += dec_clip_tables.c diff --git a/src/dsp/argb.c b/src/dsp/argb.c new file mode 100644 index 00000000..bdbdd677 --- /dev/null +++ b/src/dsp/argb.c @@ -0,0 +1,57 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// ARGB making functions. +// +// Author: Djordje Pesut (djordje.pesut@imgtec.com) + +#include "./dsp.h" + +static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) { + return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b); +} + +static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g, + const uint8_t* b, int len, int step, uint32_t* out) { + int i, offset = 0; + for (i = 0; i < len; ++i) { + out[i] = MakeARGB32(a[offset], r[offset], g[offset], b[offset]); + offset += step; + } +} + +static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b, + int len, int step, uint32_t* out) { + int i, offset = 0; + for (i = 0; i < len; ++i) { + out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]); + offset += step; + } +} + +void (*VP8PackARGB)(const uint8_t*, const uint8_t*, const uint8_t*, + const uint8_t*, int, int, uint32_t*); +void (*VP8PackRGB)(const uint8_t*, const uint8_t*, const uint8_t*, + int, int, uint32_t*); + +extern void VP8EncDspARGBInitMIPSdspR2(void); + +WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInit(void) { + VP8PackARGB = PackARGB; + VP8PackRGB = PackRGB; + + // If defined, use CPUInfo() to overwrite some pointers with faster versions. + if (VP8GetCPUInfo != NULL) { +#if defined(WEBP_USE_MIPS_DSP_R2) + if (VP8GetCPUInfo(kMIPSdspR2)) { + VP8EncDspARGBInitMIPSdspR2(); + } +#endif + } +} diff --git a/src/dsp/argb_mips_dsp_r2.c b/src/dsp/argb_mips_dsp_r2.c new file mode 100644 index 00000000..5b9fa8f8 --- /dev/null +++ b/src/dsp/argb_mips_dsp_r2.c @@ -0,0 +1,107 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// ARGB making functions (mips version). +// +// Author: Djordje Pesut (djordje.pesut@imgtec.com) + +#include "./dsp.h" + +#if defined(WEBP_USE_MIPS_DSP_R2) + +static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g, + const uint8_t* b, int len, int step, uint32_t* out) { + int temp0, temp1, temp2, temp3, offset; + const int rest = len & 1; + const uint32_t* const loop_end = out + len - rest; + __asm__ volatile ( + "xor %[offset], %[offset], %[offset] \n\t" + "beq %[loop_end], %[out], 0f \n\t" + "2: \n\t" + "lbux %[temp0], %[offset](%[a]) \n\t" + "lbux %[temp1], %[offset](%[r]) \n\t" + "lbux %[temp2], %[offset](%[g]) \n\t" + "lbux %[temp3], %[offset](%[b]) \n\t" + "ins %[temp1], %[temp0], 16, 16 \n\t" + "ins %[temp3], %[temp2], 16, 16 \n\t" + "addiu %[out], %[out], 4 \n\t" + "precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t" + "sw %[temp0], -4(%[out]) \n\t" + "addu %[offset], %[offset], %[step] \n\t" + "bne %[loop_end], %[out], 2b \n\t" + "0: \n\t" + "beq %[rest], $zero, 1f \n\t" + "lbux %[temp0], %[offset](%[a]) \n\t" + "lbux %[temp1], %[offset](%[r]) \n\t" + "lbux %[temp2], %[offset](%[g]) \n\t" + "lbux %[temp3], %[offset](%[b]) \n\t" + "ins %[temp1], %[temp0], 16, 16 \n\t" + "ins %[temp3], %[temp2], 16, 16 \n\t" + "precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t" + "sw %[temp0], 0(%[out]) \n\t" + "1: \n\t" + : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), + [temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out) + : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step), + [loop_end]"r"(loop_end), [rest]"r"(rest) + : "memory" + ); +} + +static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b, + int len, int step, uint32_t* out) { + int temp0, temp1, temp2, offset; + const int rest = len & 1; + const int a = 0xff; + const uint32_t* const loop_end = out + len - rest; + __asm__ volatile ( + "xor %[offset], %[offset], %[offset] \n\t" + "beq %[loop_end], %[out], 0f \n\t" + "2: \n\t" + "lbux %[temp0], %[offset](%[r]) \n\t" + "lbux %[temp1], %[offset](%[g]) \n\t" + "lbux %[temp2], %[offset](%[b]) \n\t" + "ins %[temp0], %[a], 16, 16 \n\t" + "ins %[temp2], %[temp1], 16, 16 \n\t" + "addiu %[out], %[out], 4 \n\t" + "precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t" + "sw %[temp0], -4(%[out]) \n\t" + "addu %[offset], %[offset], %[step] \n\t" + "bne %[loop_end], %[out], 2b \n\t" + "0: \n\t" + "beq %[rest], $zero, 1f \n\t" + "lbux %[temp0], %[offset](%[r]) \n\t" + "lbux %[temp1], %[offset](%[g]) \n\t" + "lbux %[temp2], %[offset](%[b]) \n\t" + "ins %[temp0], %[a], 16, 16 \n\t" + "ins %[temp2], %[temp1], 16, 16 \n\t" + "precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t" + "sw %[temp0], 0(%[out]) \n\t" + "1: \n\t" + : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), + [offset]"=&r"(offset), [out]"+&r"(out) + : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step), + [loop_end]"r"(loop_end), [rest]"r"(rest) + : "memory" + ); +} + +#endif // WEBP_USE_MIPS_DSP_R2 + +//------------------------------------------------------------------------------ +// Entry point + +extern WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitMIPSdspR2(void); + +WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitMIPSdspR2(void) { +#if defined(WEBP_USE_MIPS_DSP_R2) + VP8PackARGB = PackARGB; + VP8PackRGB = PackRGB; +#endif // WEBP_USE_MIPS_DSP_R2 +} diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h index d35235ed..b2b30114 100644 --- a/src/dsp/dsp.h +++ b/src/dsp/dsp.h @@ -337,6 +337,16 @@ void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse); // To be called first before using the above. WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void); +// ARGB making functions. +extern void (*VP8PackARGB)(const uint8_t* a, const uint8_t* r, + const uint8_t* g, const uint8_t* b, int len, + int step, uint32_t* out); +extern void (*VP8PackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b, + int len, int step, uint32_t* out); + +// To be called first before using the above. +WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInit(void); + #ifdef __cplusplus } // extern "C" #endif diff --git a/src/enc/picture.c b/src/enc/picture.c index 9a66fbe7..e1e72c74 100644 --- a/src/enc/picture.c +++ b/src/enc/picture.c @@ -15,6 +15,7 @@ #include #include "./vp8enci.h" +#include "../dsp/dsp.h" #include "../utils/utils.h" //------------------------------------------------------------------------------ @@ -38,6 +39,7 @@ int WebPPictureInitInternal(WebPPicture* picture, int version) { memset(picture, 0, sizeof(*picture)); picture->writer = DummyWriter; WebPEncodingSetError(picture, VP8_ENC_OK); + VP8EncDspARGBInit(); } return 1; } diff --git a/src/enc/picture_csp.c b/src/enc/picture_csp.c index e18f1096..5a1f8de8 100644 --- a/src/enc/picture_csp.c +++ b/src/enc/picture_csp.c @@ -32,10 +32,6 @@ static const union { } test_endian = { 0xff000000u }; #define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff) -static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) { - return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b); -} - //------------------------------------------------------------------------------ // Detection of non-trivial transparency @@ -1065,13 +1061,19 @@ static int Import(WebPPicture* const picture, if (!WebPPictureAlloc(picture)) return 0; assert(step >= (import_alpha ? 4 : 3)); - for (y = 0; y < height; ++y) { - uint32_t* const dst = &picture->argb[y * picture->argb_stride]; - int x; - for (x = 0; x < width; ++x) { - const int offset = step * x + y * rgb_stride; - dst[x] = MakeARGB32(import_alpha ? a_ptr[offset] : 0xff, - r_ptr[offset], g_ptr[offset], b_ptr[offset]); + if (import_alpha) { + for (y = 0; y < height; ++y) { + uint32_t* const dst = &picture->argb[y * picture->argb_stride]; + const int offset = y * rgb_stride; + VP8PackARGB(a_ptr + offset, r_ptr + offset, g_ptr + offset, + b_ptr + offset, width, step, dst); + } + } else { + for (y = 0; y < height; ++y) { + uint32_t* const dst = &picture->argb[y * picture->argb_stride]; + const int offset = y * rgb_stride; + VP8PackRGB(r_ptr + offset, g_ptr + offset, b_ptr + offset, + width, step, dst); } } return 1;