MIPS: dspr2: Optimization of some simple point-sampling functions

Change-Id: I6a4ab29bd0cc5a2951a8882cf9997032dc38bd79
This commit is contained in:
Djordje Pesut 2014-08-12 11:47:36 +02:00
parent 98c54107df
commit b61c9ceca8
8 changed files with 153 additions and 4 deletions

View File

@ -53,6 +53,7 @@ LOCAL_SRC_FILES := \
src/dsp/upsampling_sse2.c \ src/dsp/upsampling_sse2.c \
src/dsp/yuv.c \ src/dsp/yuv.c \
src/dsp/yuv_mips32.c \ src/dsp/yuv_mips32.c \
src/dsp/yuv_mips_dsp_r2.c \
src/dsp/yuv_sse2.c \ src/dsp/yuv_sse2.c \
src/enc/alpha.c \ src/enc/alpha.c \
src/enc/analysis.c \ src/enc/analysis.c \

View File

@ -187,6 +187,7 @@ DSP_DEC_OBJS = \
$(DIROBJ)\dsp\upsampling_sse2.obj \ $(DIROBJ)\dsp\upsampling_sse2.obj \
$(DIROBJ)\dsp\yuv.obj \ $(DIROBJ)\dsp\yuv.obj \
$(DIROBJ)\dsp\yuv_mips32.obj \ $(DIROBJ)\dsp\yuv_mips32.obj \
$(DIROBJ)\dsp\yuv_mips_dsp_r2.obj \
$(DIROBJ)\dsp\yuv_sse2.obj \ $(DIROBJ)\dsp\yuv_sse2.obj \
DSP_ENC_OBJS = \ DSP_ENC_OBJS = \

View File

@ -123,6 +123,7 @@ DSP_DEC_OBJS = \
src/dsp/upsampling_sse2.o \ src/dsp/upsampling_sse2.o \
src/dsp/yuv.o \ src/dsp/yuv.o \
src/dsp/yuv_mips32.o \ src/dsp/yuv_mips32.o \
src/dsp/yuv_mips_dsp_r2.o \
src/dsp/yuv_sse2.o \ src/dsp/yuv_sse2.o \
DSP_ENC_OBJS = \ DSP_ENC_OBJS = \

View File

@ -26,6 +26,7 @@ COMMON_SOURCES += upsampling_neon.c
COMMON_SOURCES += yuv.c COMMON_SOURCES += yuv.c
COMMON_SOURCES += yuv.h COMMON_SOURCES += yuv.h
COMMON_SOURCES += yuv_mips32.c COMMON_SOURCES += yuv_mips32.c
COMMON_SOURCES += yuv_mips_dsp_r2.c
ENC_SOURCES = ENC_SOURCES =
ENC_SOURCES += enc.c ENC_SOURCES += enc.c

View File

@ -118,10 +118,14 @@ static int armCPUInfo(CPUFeature feature) {
return 1; return 1;
} }
VP8CPUInfo VP8GetCPUInfo = armCPUInfo; VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
#elif defined(WEBP_USE_MIPS32) #elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2)
static int mipsCPUInfo(CPUFeature feature) { static int mipsCPUInfo(CPUFeature feature) {
(void)feature; if ((feature == kMIPS32) || (feature == kMIPSdspR2)) {
return 1; return 1;
} else {
return 0;
}
} }
VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo; VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
#else #else

View File

@ -64,6 +64,9 @@ extern "C" {
#define WEBP_USE_MIPS32 #define WEBP_USE_MIPS32
#if (__mips_isa_rev >= 2) #if (__mips_isa_rev >= 2)
#define WEBP_USE_MIPS32_R2 #define WEBP_USE_MIPS32_R2
#if defined(__mips_dspr2) || (__mips_dsp_rev >= 2)
#define WEBP_USE_MIPS_DSP_R2
#endif
#endif #endif
#endif #endif
@ -73,7 +76,8 @@ typedef enum {
kAVX, kAVX,
kAVX2, kAVX2,
kNEON, kNEON,
kMIPS32 kMIPS32,
kMIPSdspR2
} CPUFeature; } CPUFeature;
// returns true if the CPU supports the feature. // returns true if the CPU supports the feature.
typedef int (*VP8CPUInfo)(CPUFeature feature); typedef int (*VP8CPUInfo)(CPUFeature feature);

View File

@ -122,6 +122,7 @@ WebPSamplerRowFunc WebPSamplers[MODE_LAST];
extern void WebPInitSamplersSSE2(void); extern void WebPInitSamplersSSE2(void);
extern void WebPInitSamplersMIPS32(void); extern void WebPInitSamplersMIPS32(void);
extern void WebPInitSamplersMIPSdspR2(void);
void WebPInitSamplers(void) { void WebPInitSamplers(void) {
WebPSamplers[MODE_RGB] = YuvToRgbRow; WebPSamplers[MODE_RGB] = YuvToRgbRow;
@ -148,6 +149,11 @@ void WebPInitSamplers(void) {
WebPInitSamplersMIPS32(); WebPInitSamplersMIPS32();
} }
#endif // WEBP_USE_MIPS32 #endif // WEBP_USE_MIPS32
#if defined(WEBP_USE_MIPS_DSP_R2)
if (VP8GetCPUInfo(kMIPSdspR2)) {
WebPInitSamplersMIPSdspR2();
}
#endif // WEBP_USE_MIPS_DSP_R2
} }
} }

131
src/dsp/yuv_mips_dsp_r2.c Normal file
View File

@ -0,0 +1,131 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// MIPS DSPr2 version of YUV to RGB upsampling functions.
//
// Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
// Djordje Pesut (djordje.pesut@imgtec.com)
#include "./dsp.h"
#if defined(WEBP_USE_MIPS_DSP_R2)
#include "./yuv.h"
//------------------------------------------------------------------------------
// simple point-sampling
#define ROW_FUNC_PART_1() \
"lbu %[temp3], 0(%[v]) \n\t" \
"lbu %[temp4], 0(%[u]) \n\t" \
"lbu %[temp0], 0(%[y]) \n\t" \
"mul %[temp1], %[t_con_1], %[temp3] \n\t" \
"mul %[temp3], %[t_con_2], %[temp3] \n\t" \
"mul %[temp2], %[t_con_3], %[temp4] \n\t" \
"mul %[temp4], %[t_con_4], %[temp4] \n\t" \
"mul %[temp0], %[t_con_5], %[temp0] \n\t" \
"addu %[temp1], %[temp1], %[t_con_6] \n\t" \
"subu %[temp3], %[temp3], %[t_con_7] \n\t" \
"addu %[temp2], %[temp2], %[temp3] \n\t" \
"addu %[temp4], %[temp4], %[t_con_8] \n\t" \
#define ROW_FUNC_PART_2(R, G, B, K) \
"addu %[temp5], %[temp0], %[temp1] \n\t" \
"subu %[temp6], %[temp0], %[temp2] \n\t" \
"addu %[temp7], %[temp0], %[temp4] \n\t" \
".if "#K" \n\t" \
"lbu %[temp0], 1(%[y]) \n\t" \
".endif \n\t" \
"shll_s.w %[temp5], %[temp5], 9 \n\t" \
"shll_s.w %[temp6], %[temp6], 9 \n\t" \
".if "#K" \n\t" \
"mul %[temp0], %[t_con_5], %[temp0] \n\t" \
".endif \n\t" \
"shll_s.w %[temp7], %[temp7], 9 \n\t" \
"precrqu_s.qb.ph %[temp5], %[temp5], $zero \n\t" \
"precrqu_s.qb.ph %[temp6], %[temp6], $zero \n\t" \
"precrqu_s.qb.ph %[temp7], %[temp7], $zero \n\t" \
"srl %[temp5], %[temp5], 24 \n\t" \
"srl %[temp6], %[temp6], 24 \n\t" \
"srl %[temp7], %[temp7], 24 \n\t" \
"sb %[temp5], "#R"(%[dst]) \n\t" \
"sb %[temp6], "#G"(%[dst]) \n\t" \
"sb %[temp7], "#B"(%[dst]) \n\t" \
#define ASM_CLOBBER_LIST() \
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), \
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), \
[temp6]"=&r"(temp6), [temp7]"=&r"(temp7) \
: [t_con_1]"r"(t_con_1), [t_con_2]"r"(t_con_2), [t_con_3]"r"(t_con_3), \
[t_con_4]"r"(t_con_4), [t_con_5]"r"(t_con_5), [t_con_6]"r"(t_con_6), \
[u]"r"(u), [v]"r"(v), [y]"r"(y), [dst]"r"(dst), \
[t_con_7]"r"(t_con_7), [t_con_8]"r"(t_con_8) \
: "memory", "hi", "lo" \
#define ROW_FUNC(FUNC_NAME, XSTEP, R, G, B, A) \
static void FUNC_NAME(const uint8_t* y, \
const uint8_t* u, const uint8_t* v, \
uint8_t* dst, int len) { \
int i; \
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; \
const int t_con_1 = kVToR; \
const int t_con_2 = kVToG; \
const int t_con_3 = kUToG; \
const int t_con_4 = kUToB; \
const int t_con_5 = kYScale; \
const int t_con_6 = kRCst; \
const int t_con_7 = kGCst; \
const int t_con_8 = kBCst; \
for (i = 0; i < (len >> 1); i++) { \
__asm__ volatile ( \
ROW_FUNC_PART_1() \
ROW_FUNC_PART_2(R, G, B, 1) \
ROW_FUNC_PART_2(R + XSTEP, G + XSTEP, B + XSTEP, 0) \
ASM_CLOBBER_LIST() \
); \
if (A) dst[A] = dst[A + XSTEP] = 0xff; \
y += 2; \
++u; \
++v; \
dst += 2 * XSTEP; \
} \
if (len & 1) { \
__asm__ volatile ( \
ROW_FUNC_PART_1() \
ROW_FUNC_PART_2(R, G, B, 0) \
ASM_CLOBBER_LIST() \
); \
if (A) dst[A] = 0xff; \
} \
}
ROW_FUNC(YuvToRgbRow, 3, 0, 1, 2, 0)
ROW_FUNC(YuvToRgbaRow, 4, 0, 1, 2, 3)
ROW_FUNC(YuvToBgrRow, 3, 2, 1, 0, 0)
ROW_FUNC(YuvToBgraRow, 4, 2, 1, 0, 3)
#undef ROW_FUNC
#undef ASM_CLOBBER_LIST
#undef ROW_FUNC_PART_2
#undef ROW_FUNC_PART_1
#endif // WEBP_USE_MIPS_DSP_R2
//------------------------------------------------------------------------------
extern void WebPInitSamplersMIPSdspR2(void);
void WebPInitSamplersMIPSdspR2(void) {
#if defined(WEBP_USE_MIPS_DSP_R2)
WebPSamplers[MODE_RGB] = YuvToRgbRow;
WebPSamplers[MODE_RGBA] = YuvToRgbaRow;
WebPSamplers[MODE_BGR] = YuvToBgrRow;
WebPSamplers[MODE_BGRA] = YuvToBgraRow;
#endif // WEBP_USE_MIPS_DSP_R2
}