MIPS: MIPS32r1: Added optimizations for ExtraCost functions.

ExtraCost and ExtraCostCombined

Change-Id: I7eceb9ce2807296c6b43b974e4216879ddcd79f2
This commit is contained in:
Djordje Pesut 2014-04-15 12:55:20 +02:00 committed by skal
parent a8fe8ce231
commit 4ae0533f39
4 changed files with 144 additions and 28 deletions

View File

@ -1458,6 +1458,24 @@ void VP8LBundleColorMap(const uint8_t* const row, int width,
} }
} }
static double ExtraCost(const int* const population, int length) {
int i;
double cost = 0.;
for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
return cost;
}
static double ExtraCostCombined(const int* const X, const int* const Y,
int length) {
int i;
double cost = 0.;
for (i = 2; i < length - 2; ++i) {
const int xy = X[i + 2] + Y[i + 2];
cost += (i >> 1) * xy;
}
return cost;
}
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed; VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
@ -1476,6 +1494,9 @@ VP8LConvertFunc VP8LConvertBGRAToBGR;
VP8LFastLog2SlowFunc VP8LFastLog2Slow; VP8LFastLog2SlowFunc VP8LFastLog2Slow;
VP8LFastLog2SlowFunc VP8LFastSLog2Slow; VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
VP8LCostFunc VP8LExtraCost;
VP8LCostCombinedFunc VP8LExtraCostCombined;
extern void VP8LDspInitSSE2(void); extern void VP8LDspInitSSE2(void);
extern void VP8LDspInitNEON(void); extern void VP8LDspInitNEON(void);
extern void VP8LDspInitMIPS32(void); extern void VP8LDspInitMIPS32(void);
@ -1498,6 +1519,9 @@ void VP8LDspInit(void) {
VP8LFastLog2Slow = FastLog2Slow; VP8LFastLog2Slow = FastLog2Slow;
VP8LFastSLog2Slow = FastSLog2Slow; VP8LFastSLog2Slow = FastSLog2Slow;
VP8LExtraCost = ExtraCost;
VP8LExtraCostCombined = ExtraCostCombined;
// If defined, use CPUInfo() to overwrite some pointers with faster versions. // If defined, use CPUInfo() to overwrite some pointers with faster versions.
if (VP8GetCPUInfo != NULL) { if (VP8GetCPUInfo != NULL) {
#if defined(WEBP_USE_SSE2) #if defined(WEBP_USE_SSE2)

View File

@ -118,6 +118,7 @@ static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size,
return (size + (1 << sampling_bits) - 1) >> sampling_bits; return (size + (1 << sampling_bits) - 1) >> sampling_bits;
} }
// -----------------------------------------------------------------------------
// Faster logarithm for integers. Small values use a look-up table. // Faster logarithm for integers. Small values use a look-up table.
#define LOG_LOOKUP_IDX_MAX 256 #define LOG_LOOKUP_IDX_MAX 256
extern const float kLog2Table[LOG_LOOKUP_IDX_MAX]; extern const float kLog2Table[LOG_LOOKUP_IDX_MAX];
@ -135,6 +136,17 @@ static WEBP_INLINE float VP8LFastSLog2(int v) {
return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v); return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v);
} }
// -----------------------------------------------------------------------------
// Huffman-cost related functions.
typedef double (*VP8LCostFunc)(const int* const population, int length);
typedef double (*VP8LCostCombinedFunc)(const int* const X,
const int* const Y,
int length);
extern VP8LCostFunc VP8LExtraCost;
extern VP8LCostCombinedFunc VP8LExtraCostCombined;
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// PrefixEncode() // PrefixEncode()

View File

@ -9,7 +9,8 @@
// //
// MIPS version of lossless functions // MIPS version of lossless functions
// //
// Author(s): Jovan Zelincevic (jovan.zelincevic@imgtec.com) // Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
// Jovan Zelincevic (jovan.zelincevic@imgtec.com)
#include "./dsp.h" #include "./dsp.h"
#include "./lossless.h" #include "./lossless.h"
@ -93,6 +94,100 @@ static float FastLog2SlowMIPS32(int v) {
} }
} }
// C version of this function:
// int i = 0;
// int64_t cost = 0;
// int* pop = (int*)&population[4];
// const int* LoopEnd = (int*)&population[length];
// while (pop != LoopEnd) {
// ++i;
// cost += i * *pop;
// cost += i * *(pop + 1);
// pop += 2;
// }
// return (double)cost;
static double ExtraCostMIPS32(const int* const population, int length) {
int i, temp0, temp1;
int* pop = (int*)&population[4];
const int* LoopEnd = (int*)&population[length];
__asm__ volatile(
"mult $zero, $zero \n\t"
"xor %[i], %[i], %[i] \n\t"
"beq %[pop], %[LoopEnd], 2f \n\t"
"1: \n\t"
"lw %[temp0], 0(%[pop]) \n\t"
"lw %[temp1], 4(%[pop]) \n\t"
"addiu %[i], %[i], 1 \n\t"
"addiu %[pop], %[pop], 8 \n\t"
"madd %[i], %[temp0] \n\t"
"madd %[i], %[temp1] \n\t"
"bne %[pop], %[LoopEnd], 1b \n\t"
"2: \n\t"
"mfhi %[temp0] \n\t"
"mflo %[temp1] \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
[i]"=&r"(i), [pop]"+r"(pop)
: [LoopEnd]"r"(LoopEnd)
: "memory", "hi", "lo"
);
return (double)((int64_t)temp0 << 32 | temp1);
}
// C version of this function:
// int i = 0;
// int64_t cost = 0;
// int* pX = (int*)&X[4];
// int* pY = (int*)&Y[4];
// const int* LoopEnd = (int*)&X[length];
// while (pX != LoopEnd) {
// const int xy0 = *pX + *pY;
// const int xy1 = *(pX + 1) + *(pY + 1);
// ++i;
// cost += i * xy0;
// cost += i * xy1;
// pX += 2;
// pY += 2;
// }
// return (double)cost;
static double ExtraCostCombinedMIPS32(const int* const X, const int* const Y,
int length) {
int i, temp0, temp1, temp2, temp3;
int* pX = (int*)&X[4];
int* pY = (int*)&Y[4];
const int* LoopEnd = (int*)&X[length];
__asm__ volatile(
"mult $zero, $zero \n\t"
"xor %[i], %[i], %[i] \n\t"
"beq %[pX], %[LoopEnd], 2f \n\t"
"1: \n\t"
"lw %[temp0], 0(%[pX]) \n\t"
"lw %[temp1], 0(%[pY]) \n\t"
"lw %[temp2], 4(%[pX]) \n\t"
"lw %[temp3], 4(%[pY]) \n\t"
"addiu %[i], %[i], 1 \n\t"
"addu %[temp0], %[temp0], %[temp1] \n\t"
"addu %[temp2], %[temp2], %[temp3] \n\t"
"addiu %[pX], %[pX], 8 \n\t"
"addiu %[pY], %[pY], 8 \n\t"
"madd %[i], %[temp0] \n\t"
"madd %[i], %[temp2] \n\t"
"bne %[pX], %[LoopEnd], 1b \n\t"
"2: \n\t"
"mfhi %[temp0] \n\t"
"mflo %[temp1] \n\t"
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
[i]"=&r"(i), [pX]"+r"(pX), [pY]"+r"(pY)
: [LoopEnd]"r"(LoopEnd)
: "memory", "hi", "lo"
);
return (double)((int64_t)temp0 << 32 | temp1);
}
#endif // WEBP_USE_MIPS32 #endif // WEBP_USE_MIPS32
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
@ -104,5 +199,7 @@ void VP8LDspInitMIPS32(void) {
#if defined(WEBP_USE_MIPS32) #if defined(WEBP_USE_MIPS32)
VP8LFastSLog2Slow = FastSLog2SlowMIPS32; VP8LFastSLog2Slow = FastSLog2SlowMIPS32;
VP8LFastLog2Slow = FastLog2SlowMIPS32; VP8LFastLog2Slow = FastLog2SlowMIPS32;
VP8LExtraCost = ExtraCostMIPS32;
VP8LExtraCostCombined = ExtraCostCombinedMIPS32;
#endif // WEBP_USE_MIPS32 #endif // WEBP_USE_MIPS32
} }

View File

@ -252,24 +252,6 @@ static double GetCombinedEntropy(const int* const X, const int* const Y,
return BitsEntropyCombined(X, Y, length) + HuffmanCostCombined(X, Y, length); return BitsEntropyCombined(X, Y, length) + HuffmanCostCombined(X, Y, length);
} }
static double ExtraCost(const int* const population, int length) {
int i;
double cost = 0.;
for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
return cost;
}
static double ExtraCostCombined(const int* const X, const int* const Y,
int length) {
int i;
double cost = 0.;
for (i = 2; i < length - 2; ++i) {
const int xy = X[i + 2] + Y[i + 2];
cost += (i >> 1) * xy;
}
return cost;
}
// Estimates the Entropy + Huffman + other block overhead size cost. // Estimates the Entropy + Huffman + other block overhead size cost.
double VP8LHistogramEstimateBits(const VP8LHistogram* const p) { double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
return return
@ -278,8 +260,8 @@ double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
+ PopulationCost(p->blue_, 256) + PopulationCost(p->blue_, 256)
+ PopulationCost(p->alpha_, 256) + PopulationCost(p->alpha_, 256)
+ PopulationCost(p->distance_, NUM_DISTANCE_CODES) + PopulationCost(p->distance_, NUM_DISTANCE_CODES)
+ ExtraCost(p->literal_ + 256, NUM_LENGTH_CODES) + VP8LExtraCost(p->literal_ + 256, NUM_LENGTH_CODES)
+ ExtraCost(p->distance_, NUM_DISTANCE_CODES); + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
} }
double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) { double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
@ -289,8 +271,8 @@ double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
+ BitsEntropy(p->blue_, 256) + BitsEntropy(p->blue_, 256)
+ BitsEntropy(p->alpha_, 256) + BitsEntropy(p->alpha_, 256)
+ BitsEntropy(p->distance_, NUM_DISTANCE_CODES) + BitsEntropy(p->distance_, NUM_DISTANCE_CODES)
+ ExtraCost(p->literal_ + 256, NUM_LENGTH_CODES) + VP8LExtraCost(p->literal_ + 256, NUM_LENGTH_CODES)
+ ExtraCost(p->distance_, NUM_DISTANCE_CODES); + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
} }
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
@ -322,7 +304,7 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
b->palette_code_bits_; b->palette_code_bits_;
*cost += GetCombinedEntropy(a->literal_, b->literal_, *cost += GetCombinedEntropy(a->literal_, b->literal_,
VP8LHistogramNumCodes(palette_code_bits)); VP8LHistogramNumCodes(palette_code_bits));
*cost += ExtraCostCombined(a->literal_ + 256, b->literal_ + 256, *cost += VP8LExtraCostCombined(a->literal_ + 256, b->literal_ + 256,
NUM_LENGTH_CODES); NUM_LENGTH_CODES);
if (*cost > cost_threshold) return 0; if (*cost > cost_threshold) return 0;
@ -336,7 +318,8 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
if (*cost > cost_threshold) return 0; if (*cost > cost_threshold) return 0;
*cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES); *cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES);
*cost += ExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES); *cost += VP8LExtraCostCombined(a->distance_, b->distance_,
NUM_DISTANCE_CODES);
if (*cost > cost_threshold) return 0; if (*cost > cost_threshold) return 0;
return 1; return 1;
@ -426,10 +409,10 @@ static void UpdateHistogramCost(VP8LHistogram* const h) {
const double alpha_cost = PopulationCost(h->alpha_, 256); const double alpha_cost = PopulationCost(h->alpha_, 256);
const double distance_cost = const double distance_cost =
PopulationCost(h->distance_, NUM_DISTANCE_CODES) + PopulationCost(h->distance_, NUM_DISTANCE_CODES) +
ExtraCost(h->distance_, NUM_DISTANCE_CODES); VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_); const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
h->literal_cost_ = PopulationCost(h->literal_, num_codes) + h->literal_cost_ = PopulationCost(h->literal_, num_codes) +
ExtraCost(h->literal_ + 256, NUM_LENGTH_CODES); VP8LExtraCost(h->literal_ + 256, NUM_LENGTH_CODES);
h->red_cost_ = PopulationCost(h->red_, 256); h->red_cost_ = PopulationCost(h->red_, 256);
h->blue_cost_ = PopulationCost(h->blue_, 256); h->blue_cost_ = PopulationCost(h->blue_, 256);
h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ + h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ +