mirror of
https://github.com/webmproject/libwebp.git
synced 2024-11-20 04:18:26 +01:00
MIPS: MIPS32r1: Added optimizations for ExtraCost functions.
ExtraCost and ExtraCostCombined Change-Id: I7eceb9ce2807296c6b43b974e4216879ddcd79f2
This commit is contained in:
parent
a8fe8ce231
commit
4ae0533f39
@ -1458,6 +1458,24 @@ void VP8LBundleColorMap(const uint8_t* const row, int width,
|
||||
}
|
||||
}
|
||||
|
||||
static double ExtraCost(const int* const population, int length) {
|
||||
int i;
|
||||
double cost = 0.;
|
||||
for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
|
||||
return cost;
|
||||
}
|
||||
|
||||
static double ExtraCostCombined(const int* const X, const int* const Y,
|
||||
int length) {
|
||||
int i;
|
||||
double cost = 0.;
|
||||
for (i = 2; i < length - 2; ++i) {
|
||||
const int xy = X[i + 2] + Y[i + 2];
|
||||
cost += (i >> 1) * xy;
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
|
||||
@ -1476,6 +1494,9 @@ VP8LConvertFunc VP8LConvertBGRAToBGR;
|
||||
VP8LFastLog2SlowFunc VP8LFastLog2Slow;
|
||||
VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
|
||||
|
||||
VP8LCostFunc VP8LExtraCost;
|
||||
VP8LCostCombinedFunc VP8LExtraCostCombined;
|
||||
|
||||
extern void VP8LDspInitSSE2(void);
|
||||
extern void VP8LDspInitNEON(void);
|
||||
extern void VP8LDspInitMIPS32(void);
|
||||
@ -1498,6 +1519,9 @@ void VP8LDspInit(void) {
|
||||
VP8LFastLog2Slow = FastLog2Slow;
|
||||
VP8LFastSLog2Slow = FastSLog2Slow;
|
||||
|
||||
VP8LExtraCost = ExtraCost;
|
||||
VP8LExtraCostCombined = ExtraCostCombined;
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
@ -118,6 +118,7 @@ static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size,
|
||||
return (size + (1 << sampling_bits) - 1) >> sampling_bits;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Faster logarithm for integers. Small values use a look-up table.
|
||||
#define LOG_LOOKUP_IDX_MAX 256
|
||||
extern const float kLog2Table[LOG_LOOKUP_IDX_MAX];
|
||||
@ -135,6 +136,17 @@ static WEBP_INLINE float VP8LFastSLog2(int v) {
|
||||
return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Huffman-cost related functions.
|
||||
|
||||
typedef double (*VP8LCostFunc)(const int* const population, int length);
|
||||
typedef double (*VP8LCostCombinedFunc)(const int* const X,
|
||||
const int* const Y,
|
||||
int length);
|
||||
|
||||
extern VP8LCostFunc VP8LExtraCost;
|
||||
extern VP8LCostCombinedFunc VP8LExtraCostCombined;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// PrefixEncode()
|
||||
|
||||
|
@ -9,7 +9,8 @@
|
||||
//
|
||||
// MIPS version of lossless functions
|
||||
//
|
||||
// Author(s): Jovan Zelincevic (jovan.zelincevic@imgtec.com)
|
||||
// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
|
||||
// Jovan Zelincevic (jovan.zelincevic@imgtec.com)
|
||||
|
||||
#include "./dsp.h"
|
||||
#include "./lossless.h"
|
||||
@ -93,6 +94,100 @@ static float FastLog2SlowMIPS32(int v) {
|
||||
}
|
||||
}
|
||||
|
||||
// C version of this function:
|
||||
// int i = 0;
|
||||
// int64_t cost = 0;
|
||||
// int* pop = (int*)&population[4];
|
||||
// const int* LoopEnd = (int*)&population[length];
|
||||
// while (pop != LoopEnd) {
|
||||
// ++i;
|
||||
// cost += i * *pop;
|
||||
// cost += i * *(pop + 1);
|
||||
// pop += 2;
|
||||
// }
|
||||
// return (double)cost;
|
||||
static double ExtraCostMIPS32(const int* const population, int length) {
|
||||
int i, temp0, temp1;
|
||||
int* pop = (int*)&population[4];
|
||||
const int* LoopEnd = (int*)&population[length];
|
||||
|
||||
__asm__ volatile(
|
||||
"mult $zero, $zero \n\t"
|
||||
"xor %[i], %[i], %[i] \n\t"
|
||||
"beq %[pop], %[LoopEnd], 2f \n\t"
|
||||
"1: \n\t"
|
||||
"lw %[temp0], 0(%[pop]) \n\t"
|
||||
"lw %[temp1], 4(%[pop]) \n\t"
|
||||
"addiu %[i], %[i], 1 \n\t"
|
||||
"addiu %[pop], %[pop], 8 \n\t"
|
||||
"madd %[i], %[temp0] \n\t"
|
||||
"madd %[i], %[temp1] \n\t"
|
||||
"bne %[pop], %[LoopEnd], 1b \n\t"
|
||||
"2: \n\t"
|
||||
"mfhi %[temp0] \n\t"
|
||||
"mflo %[temp1] \n\t"
|
||||
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
|
||||
[i]"=&r"(i), [pop]"+r"(pop)
|
||||
: [LoopEnd]"r"(LoopEnd)
|
||||
: "memory", "hi", "lo"
|
||||
);
|
||||
|
||||
return (double)((int64_t)temp0 << 32 | temp1);
|
||||
}
|
||||
|
||||
// C version of this function:
|
||||
// int i = 0;
|
||||
// int64_t cost = 0;
|
||||
// int* pX = (int*)&X[4];
|
||||
// int* pY = (int*)&Y[4];
|
||||
// const int* LoopEnd = (int*)&X[length];
|
||||
// while (pX != LoopEnd) {
|
||||
// const int xy0 = *pX + *pY;
|
||||
// const int xy1 = *(pX + 1) + *(pY + 1);
|
||||
// ++i;
|
||||
// cost += i * xy0;
|
||||
// cost += i * xy1;
|
||||
// pX += 2;
|
||||
// pY += 2;
|
||||
// }
|
||||
// return (double)cost;
|
||||
static double ExtraCostCombinedMIPS32(const int* const X, const int* const Y,
|
||||
int length) {
|
||||
int i, temp0, temp1, temp2, temp3;
|
||||
int* pX = (int*)&X[4];
|
||||
int* pY = (int*)&Y[4];
|
||||
const int* LoopEnd = (int*)&X[length];
|
||||
|
||||
__asm__ volatile(
|
||||
"mult $zero, $zero \n\t"
|
||||
"xor %[i], %[i], %[i] \n\t"
|
||||
"beq %[pX], %[LoopEnd], 2f \n\t"
|
||||
"1: \n\t"
|
||||
"lw %[temp0], 0(%[pX]) \n\t"
|
||||
"lw %[temp1], 0(%[pY]) \n\t"
|
||||
"lw %[temp2], 4(%[pX]) \n\t"
|
||||
"lw %[temp3], 4(%[pY]) \n\t"
|
||||
"addiu %[i], %[i], 1 \n\t"
|
||||
"addu %[temp0], %[temp0], %[temp1] \n\t"
|
||||
"addu %[temp2], %[temp2], %[temp3] \n\t"
|
||||
"addiu %[pX], %[pX], 8 \n\t"
|
||||
"addiu %[pY], %[pY], 8 \n\t"
|
||||
"madd %[i], %[temp0] \n\t"
|
||||
"madd %[i], %[temp2] \n\t"
|
||||
"bne %[pX], %[LoopEnd], 1b \n\t"
|
||||
"2: \n\t"
|
||||
"mfhi %[temp0] \n\t"
|
||||
"mflo %[temp1] \n\t"
|
||||
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
|
||||
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
|
||||
[i]"=&r"(i), [pX]"+r"(pX), [pY]"+r"(pY)
|
||||
: [LoopEnd]"r"(LoopEnd)
|
||||
: "memory", "hi", "lo"
|
||||
);
|
||||
|
||||
return (double)((int64_t)temp0 << 32 | temp1);
|
||||
}
|
||||
|
||||
#endif // WEBP_USE_MIPS32
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -104,5 +199,7 @@ void VP8LDspInitMIPS32(void) {
|
||||
#if defined(WEBP_USE_MIPS32)
|
||||
VP8LFastSLog2Slow = FastSLog2SlowMIPS32;
|
||||
VP8LFastLog2Slow = FastLog2SlowMIPS32;
|
||||
VP8LExtraCost = ExtraCostMIPS32;
|
||||
VP8LExtraCostCombined = ExtraCostCombinedMIPS32;
|
||||
#endif // WEBP_USE_MIPS32
|
||||
}
|
||||
|
@ -252,24 +252,6 @@ static double GetCombinedEntropy(const int* const X, const int* const Y,
|
||||
return BitsEntropyCombined(X, Y, length) + HuffmanCostCombined(X, Y, length);
|
||||
}
|
||||
|
||||
static double ExtraCost(const int* const population, int length) {
|
||||
int i;
|
||||
double cost = 0.;
|
||||
for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
|
||||
return cost;
|
||||
}
|
||||
|
||||
static double ExtraCostCombined(const int* const X, const int* const Y,
|
||||
int length) {
|
||||
int i;
|
||||
double cost = 0.;
|
||||
for (i = 2; i < length - 2; ++i) {
|
||||
const int xy = X[i + 2] + Y[i + 2];
|
||||
cost += (i >> 1) * xy;
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
|
||||
// Estimates the Entropy + Huffman + other block overhead size cost.
|
||||
double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
|
||||
return
|
||||
@ -278,8 +260,8 @@ double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
|
||||
+ PopulationCost(p->blue_, 256)
|
||||
+ PopulationCost(p->alpha_, 256)
|
||||
+ PopulationCost(p->distance_, NUM_DISTANCE_CODES)
|
||||
+ ExtraCost(p->literal_ + 256, NUM_LENGTH_CODES)
|
||||
+ ExtraCost(p->distance_, NUM_DISTANCE_CODES);
|
||||
+ VP8LExtraCost(p->literal_ + 256, NUM_LENGTH_CODES)
|
||||
+ VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
|
||||
}
|
||||
|
||||
double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
|
||||
@ -289,8 +271,8 @@ double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
|
||||
+ BitsEntropy(p->blue_, 256)
|
||||
+ BitsEntropy(p->alpha_, 256)
|
||||
+ BitsEntropy(p->distance_, NUM_DISTANCE_CODES)
|
||||
+ ExtraCost(p->literal_ + 256, NUM_LENGTH_CODES)
|
||||
+ ExtraCost(p->distance_, NUM_DISTANCE_CODES);
|
||||
+ VP8LExtraCost(p->literal_ + 256, NUM_LENGTH_CODES)
|
||||
+ VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
@ -322,7 +304,7 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
|
||||
b->palette_code_bits_;
|
||||
*cost += GetCombinedEntropy(a->literal_, b->literal_,
|
||||
VP8LHistogramNumCodes(palette_code_bits));
|
||||
*cost += ExtraCostCombined(a->literal_ + 256, b->literal_ + 256,
|
||||
*cost += VP8LExtraCostCombined(a->literal_ + 256, b->literal_ + 256,
|
||||
NUM_LENGTH_CODES);
|
||||
if (*cost > cost_threshold) return 0;
|
||||
|
||||
@ -336,7 +318,8 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
|
||||
if (*cost > cost_threshold) return 0;
|
||||
|
||||
*cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES);
|
||||
*cost += ExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES);
|
||||
*cost += VP8LExtraCostCombined(a->distance_, b->distance_,
|
||||
NUM_DISTANCE_CODES);
|
||||
if (*cost > cost_threshold) return 0;
|
||||
|
||||
return 1;
|
||||
@ -426,10 +409,10 @@ static void UpdateHistogramCost(VP8LHistogram* const h) {
|
||||
const double alpha_cost = PopulationCost(h->alpha_, 256);
|
||||
const double distance_cost =
|
||||
PopulationCost(h->distance_, NUM_DISTANCE_CODES) +
|
||||
ExtraCost(h->distance_, NUM_DISTANCE_CODES);
|
||||
VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
|
||||
const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
|
||||
h->literal_cost_ = PopulationCost(h->literal_, num_codes) +
|
||||
ExtraCost(h->literal_ + 256, NUM_LENGTH_CODES);
|
||||
VP8LExtraCost(h->literal_ + 256, NUM_LENGTH_CODES);
|
||||
h->red_cost_ = PopulationCost(h->red_, 256);
|
||||
h->blue_cost_ = PopulationCost(h->blue_, 256);
|
||||
h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ +
|
||||
|
Loading…
Reference in New Issue
Block a user