From 1d62acf6af32f965587d3dc598a39d65a50d0c45 Mon Sep 17 00:00:00 2001 From: Djordje Pesut Date: Tue, 22 Apr 2014 14:55:14 +0200 Subject: [PATCH] MIPS: MIPS32r1: Added optimization for HuffmanCost functions. HuffmanCost and HuffmanCostCombined optimized and added 'const' to some variables from ExtraCost functions. Change-Id: I28b2b357a06766bee78bdab294b5fc8c05ac120d --- src/dsp/lossless_mips32.c | 97 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 91 insertions(+), 6 deletions(-) diff --git a/src/dsp/lossless_mips32.c b/src/dsp/lossless_mips32.c index dc329118..a8d3ca7f 100644 --- a/src/dsp/lossless_mips32.c +++ b/src/dsp/lossless_mips32.c @@ -17,9 +17,10 @@ #if defined(WEBP_USE_MIPS32) +#include #include #include -#include +#include #define APPROX_LOG_WITH_CORRECTION_MAX 65536 #define APPROX_LOG_MAX 4096 @@ -108,8 +109,8 @@ static float FastLog2Slow(int v) { // return (double)cost; static double ExtraCost(const int* const population, int length) { int i, temp0, temp1; - int* pop = (int*)&population[4]; - const int* LoopEnd = (int*)&population[length]; + const int* pop = &population[4]; + const int* const LoopEnd = &population[length]; __asm__ volatile( "mult $zero, $zero \n\t" @@ -154,9 +155,9 @@ static double ExtraCost(const int* const population, int length) { static double ExtraCostCombined(const int* const X, const int* const Y, int length) { int i, temp0, temp1, temp2, temp3; - int* pX = (int*)&X[4]; - int* pY = (int*)&Y[4]; - const int* LoopEnd = (int*)&X[length]; + const int* pX = &X[4]; + const int* pY = &Y[4]; + const int* const LoopEnd = &X[length]; __asm__ volatile( "mult $zero, $zero \n\t" @@ -188,6 +189,88 @@ static double ExtraCostCombined(const int* const X, const int* const Y, return (double)((int64_t)temp0 << 32 | temp1); } +#define HUFFMAN_COST_PASS \ + __asm__ volatile( \ + "sll %[temp1], %[temp0], 3 \n\t" \ + "addiu %[temp3], %[streak], -3 \n\t" \ + "addu %[temp2], %[pstreaks], %[temp1] \n\t" \ + "blez %[temp3], 1f \n\t" \ + "srl %[temp1], %[temp1], 1 \n\t" \ + "addu %[temp3], %[pcnts], %[temp1] \n\t" \ + "lw %[temp0], 4(%[temp2]) \n\t" \ + "lw %[temp1], 0(%[temp3]) \n\t" \ + "addu %[temp0], %[temp0], %[streak] \n\t" \ + "addiu %[temp1], %[temp1], 1 \n\t" \ + "sw %[temp0], 4(%[temp2]) \n\t" \ + "sw %[temp1], 0(%[temp3]) \n\t" \ + "b 2f \n\t" \ + "1: \n\t" \ + "lw %[temp0], 0(%[temp2]) \n\t" \ + "addu %[temp0], %[temp0], %[streak] \n\t" \ + "sw %[temp0], 0(%[temp2]) \n\t" \ + "2: \n\t" \ + : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), \ + [temp3]"=&r"(temp3), [temp0]"+r"(temp0) \ + : [pstreaks]"r"(pstreaks), [pcnts]"r"(pcnts), \ + [streak]"r"(streak) \ + : "memory" \ + ); + +// Returns the various RLE counts +static VP8LStreaks HuffmanCostCount(const int* population, int length) { + int i; + int streak = 0; + VP8LStreaks stats; + int* const pstreaks = &stats.streaks[0][0]; + int* const pcnts = &stats.counts[0]; + int temp0, temp1, temp2, temp3; + memset(&stats, 0, sizeof(stats)); + for (i = 0; i < length - 1; ++i) { + ++streak; + if (population[i] == population[i + 1]) { + continue; + } + temp0 = population[i] != 0; + HUFFMAN_COST_PASS + streak = 0; + } + ++streak; + temp0 = population[i] != 0; + HUFFMAN_COST_PASS + + return stats; +} + +static VP8LStreaks HuffmanCostCombinedCount(const int* X, const int* Y, + int length) { + int i; + int streak = 0; + VP8LStreaks stats; + int* const pstreaks = &stats.streaks[0][0]; + int* const pcnts = &stats.counts[0]; + int temp0, temp1, temp2, temp3; + memset(&stats, 0, sizeof(stats)); + for (i = 0; i < length - 1; ++i) { + const int xy = X[i] + Y[i]; + const int xy_next = X[i + 1] + Y[i + 1]; + ++streak; + if (xy == xy_next) { + continue; + } + temp0 = xy != 0; + HUFFMAN_COST_PASS + streak = 0; + } + { + const int xy = X[i] + Y[i]; + ++streak; + temp0 = xy != 0; + HUFFMAN_COST_PASS + } + + return stats; +} + #endif // WEBP_USE_MIPS32 //------------------------------------------------------------------------------ @@ -201,5 +284,7 @@ void VP8LDspInitMIPS32(void) { VP8LFastLog2Slow = FastLog2Slow; VP8LExtraCost = ExtraCost; VP8LExtraCostCombined = ExtraCostCombined; + VP8LHuffmanCostCount = HuffmanCostCount; + VP8LHuffmanCostCombinedCount = HuffmanCostCombinedCount; #endif // WEBP_USE_MIPS32 }