diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c index a91be156..758f9bca 100644 --- a/src/dsp/lossless.c +++ b/src/dsp/lossless.c @@ -1458,6 +1458,24 @@ void VP8LBundleColorMap(const uint8_t* const row, int width, } } +static double ExtraCost(const int* const population, int length) { + int i; + double cost = 0.; + for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2]; + return cost; +} + +static double ExtraCostCombined(const int* const X, const int* const Y, + int length) { + int i; + double cost = 0.; + for (i = 2; i < length - 2; ++i) { + const int xy = X[i + 2] + Y[i + 2]; + cost += (i >> 1) * xy; + } + return cost; +} + //------------------------------------------------------------------------------ VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed; @@ -1476,6 +1494,9 @@ VP8LConvertFunc VP8LConvertBGRAToBGR; VP8LFastLog2SlowFunc VP8LFastLog2Slow; VP8LFastLog2SlowFunc VP8LFastSLog2Slow; +VP8LCostFunc VP8LExtraCost; +VP8LCostCombinedFunc VP8LExtraCostCombined; + extern void VP8LDspInitSSE2(void); extern void VP8LDspInitNEON(void); extern void VP8LDspInitMIPS32(void); @@ -1498,6 +1519,9 @@ void VP8LDspInit(void) { VP8LFastLog2Slow = FastLog2Slow; VP8LFastSLog2Slow = FastSLog2Slow; + VP8LExtraCost = ExtraCost; + VP8LExtraCostCombined = ExtraCostCombined; + // If defined, use CPUInfo() to overwrite some pointers with faster versions. if (VP8GetCPUInfo != NULL) { #if defined(WEBP_USE_SSE2) diff --git a/src/dsp/lossless.h b/src/dsp/lossless.h index 5967b28b..2ce1629b 100644 --- a/src/dsp/lossless.h +++ b/src/dsp/lossless.h @@ -118,6 +118,7 @@ static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size, return (size + (1 << sampling_bits) - 1) >> sampling_bits; } +// ----------------------------------------------------------------------------- // Faster logarithm for integers. Small values use a look-up table. #define LOG_LOOKUP_IDX_MAX 256 extern const float kLog2Table[LOG_LOOKUP_IDX_MAX]; @@ -135,6 +136,17 @@ static WEBP_INLINE float VP8LFastSLog2(int v) { return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v); } +// ----------------------------------------------------------------------------- +// Huffman-cost related functions. + +typedef double (*VP8LCostFunc)(const int* const population, int length); +typedef double (*VP8LCostCombinedFunc)(const int* const X, + const int* const Y, + int length); + +extern VP8LCostFunc VP8LExtraCost; +extern VP8LCostCombinedFunc VP8LExtraCostCombined; + // ----------------------------------------------------------------------------- // PrefixEncode() diff --git a/src/dsp/lossless_mips32.c b/src/dsp/lossless_mips32.c index 673bea77..9df7fb6e 100644 --- a/src/dsp/lossless_mips32.c +++ b/src/dsp/lossless_mips32.c @@ -9,7 +9,8 @@ // // MIPS version of lossless functions // -// Author(s): Jovan Zelincevic (jovan.zelincevic@imgtec.com) +// Author(s): Djordje Pesut (djordje.pesut@imgtec.com) +// Jovan Zelincevic (jovan.zelincevic@imgtec.com) #include "./dsp.h" #include "./lossless.h" @@ -93,6 +94,100 @@ static float FastLog2SlowMIPS32(int v) { } } +// C version of this function: +// int i = 0; +// int64_t cost = 0; +// int* pop = (int*)&population[4]; +// const int* LoopEnd = (int*)&population[length]; +// while (pop != LoopEnd) { +// ++i; +// cost += i * *pop; +// cost += i * *(pop + 1); +// pop += 2; +// } +// return (double)cost; +static double ExtraCostMIPS32(const int* const population, int length) { + int i, temp0, temp1; + int* pop = (int*)&population[4]; + const int* LoopEnd = (int*)&population[length]; + + __asm__ volatile( + "mult $zero, $zero \n\t" + "xor %[i], %[i], %[i] \n\t" + "beq %[pop], %[LoopEnd], 2f \n\t" + "1: \n\t" + "lw %[temp0], 0(%[pop]) \n\t" + "lw %[temp1], 4(%[pop]) \n\t" + "addiu %[i], %[i], 1 \n\t" + "addiu %[pop], %[pop], 8 \n\t" + "madd %[i], %[temp0] \n\t" + "madd %[i], %[temp1] \n\t" + "bne %[pop], %[LoopEnd], 1b \n\t" + "2: \n\t" + "mfhi %[temp0] \n\t" + "mflo %[temp1] \n\t" + : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), + [i]"=&r"(i), [pop]"+r"(pop) + : [LoopEnd]"r"(LoopEnd) + : "memory", "hi", "lo" + ); + + return (double)((int64_t)temp0 << 32 | temp1); +} + +// C version of this function: +// int i = 0; +// int64_t cost = 0; +// int* pX = (int*)&X[4]; +// int* pY = (int*)&Y[4]; +// const int* LoopEnd = (int*)&X[length]; +// while (pX != LoopEnd) { +// const int xy0 = *pX + *pY; +// const int xy1 = *(pX + 1) + *(pY + 1); +// ++i; +// cost += i * xy0; +// cost += i * xy1; +// pX += 2; +// pY += 2; +// } +// return (double)cost; +static double ExtraCostCombinedMIPS32(const int* const X, const int* const Y, + int length) { + int i, temp0, temp1, temp2, temp3; + int* pX = (int*)&X[4]; + int* pY = (int*)&Y[4]; + const int* LoopEnd = (int*)&X[length]; + + __asm__ volatile( + "mult $zero, $zero \n\t" + "xor %[i], %[i], %[i] \n\t" + "beq %[pX], %[LoopEnd], 2f \n\t" + "1: \n\t" + "lw %[temp0], 0(%[pX]) \n\t" + "lw %[temp1], 0(%[pY]) \n\t" + "lw %[temp2], 4(%[pX]) \n\t" + "lw %[temp3], 4(%[pY]) \n\t" + "addiu %[i], %[i], 1 \n\t" + "addu %[temp0], %[temp0], %[temp1] \n\t" + "addu %[temp2], %[temp2], %[temp3] \n\t" + "addiu %[pX], %[pX], 8 \n\t" + "addiu %[pY], %[pY], 8 \n\t" + "madd %[i], %[temp0] \n\t" + "madd %[i], %[temp2] \n\t" + "bne %[pX], %[LoopEnd], 1b \n\t" + "2: \n\t" + "mfhi %[temp0] \n\t" + "mflo %[temp1] \n\t" + : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), + [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), + [i]"=&r"(i), [pX]"+r"(pX), [pY]"+r"(pY) + : [LoopEnd]"r"(LoopEnd) + : "memory", "hi", "lo" + ); + + return (double)((int64_t)temp0 << 32 | temp1); +} + #endif // WEBP_USE_MIPS32 //------------------------------------------------------------------------------ @@ -104,5 +199,7 @@ void VP8LDspInitMIPS32(void) { #if defined(WEBP_USE_MIPS32) VP8LFastSLog2Slow = FastSLog2SlowMIPS32; VP8LFastLog2Slow = FastLog2SlowMIPS32; + VP8LExtraCost = ExtraCostMIPS32; + VP8LExtraCostCombined = ExtraCostCombinedMIPS32; #endif // WEBP_USE_MIPS32 } diff --git a/src/enc/histogram.c b/src/enc/histogram.c index 39a12e49..7be49a23 100644 --- a/src/enc/histogram.c +++ b/src/enc/histogram.c @@ -252,24 +252,6 @@ static double GetCombinedEntropy(const int* const X, const int* const Y, return BitsEntropyCombined(X, Y, length) + HuffmanCostCombined(X, Y, length); } -static double ExtraCost(const int* const population, int length) { - int i; - double cost = 0.; - for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2]; - return cost; -} - -static double ExtraCostCombined(const int* const X, const int* const Y, - int length) { - int i; - double cost = 0.; - for (i = 2; i < length - 2; ++i) { - const int xy = X[i + 2] + Y[i + 2]; - cost += (i >> 1) * xy; - } - return cost; -} - // Estimates the Entropy + Huffman + other block overhead size cost. double VP8LHistogramEstimateBits(const VP8LHistogram* const p) { return @@ -278,8 +260,8 @@ double VP8LHistogramEstimateBits(const VP8LHistogram* const p) { + PopulationCost(p->blue_, 256) + PopulationCost(p->alpha_, 256) + PopulationCost(p->distance_, NUM_DISTANCE_CODES) - + ExtraCost(p->literal_ + 256, NUM_LENGTH_CODES) - + ExtraCost(p->distance_, NUM_DISTANCE_CODES); + + VP8LExtraCost(p->literal_ + 256, NUM_LENGTH_CODES) + + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES); } double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) { @@ -289,8 +271,8 @@ double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) { + BitsEntropy(p->blue_, 256) + BitsEntropy(p->alpha_, 256) + BitsEntropy(p->distance_, NUM_DISTANCE_CODES) - + ExtraCost(p->literal_ + 256, NUM_LENGTH_CODES) - + ExtraCost(p->distance_, NUM_DISTANCE_CODES); + + VP8LExtraCost(p->literal_ + 256, NUM_LENGTH_CODES) + + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES); } // ----------------------------------------------------------------------------- @@ -322,8 +304,8 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a, b->palette_code_bits_; *cost += GetCombinedEntropy(a->literal_, b->literal_, VP8LHistogramNumCodes(palette_code_bits)); - *cost += ExtraCostCombined(a->literal_ + 256, b->literal_ + 256, - NUM_LENGTH_CODES); + *cost += VP8LExtraCostCombined(a->literal_ + 256, b->literal_ + 256, + NUM_LENGTH_CODES); if (*cost > cost_threshold) return 0; *cost += GetCombinedEntropy(a->red_, b->red_, 256); @@ -336,7 +318,8 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a, if (*cost > cost_threshold) return 0; *cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES); - *cost += ExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES); + *cost += VP8LExtraCostCombined(a->distance_, b->distance_, + NUM_DISTANCE_CODES); if (*cost > cost_threshold) return 0; return 1; @@ -426,10 +409,10 @@ static void UpdateHistogramCost(VP8LHistogram* const h) { const double alpha_cost = PopulationCost(h->alpha_, 256); const double distance_cost = PopulationCost(h->distance_, NUM_DISTANCE_CODES) + - ExtraCost(h->distance_, NUM_DISTANCE_CODES); + VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES); const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_); h->literal_cost_ = PopulationCost(h->literal_, num_codes) + - ExtraCost(h->literal_ + 256, NUM_LENGTH_CODES); + VP8LExtraCost(h->literal_ + 256, NUM_LENGTH_CODES); h->red_cost_ = PopulationCost(h->red_, 256); h->blue_cost_ = PopulationCost(h->blue_, 256); h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ +