mirror of
https://github.com/webmproject/libwebp.git
synced 2024-11-20 12:28:26 +01:00
MIPS: MIPS32r1: Added optimizations for ExtraCost functions.
ExtraCost and ExtraCostCombined Change-Id: I7eceb9ce2807296c6b43b974e4216879ddcd79f2
This commit is contained in:
parent
a8fe8ce231
commit
4ae0533f39
@ -1458,6 +1458,24 @@ void VP8LBundleColorMap(const uint8_t* const row, int width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static double ExtraCost(const int* const population, int length) {
|
||||||
|
int i;
|
||||||
|
double cost = 0.;
|
||||||
|
for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
|
||||||
|
return cost;
|
||||||
|
}
|
||||||
|
|
||||||
|
static double ExtraCostCombined(const int* const X, const int* const Y,
|
||||||
|
int length) {
|
||||||
|
int i;
|
||||||
|
double cost = 0.;
|
||||||
|
for (i = 2; i < length - 2; ++i) {
|
||||||
|
const int xy = X[i + 2] + Y[i + 2];
|
||||||
|
cost += (i >> 1) * xy;
|
||||||
|
}
|
||||||
|
return cost;
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
|
VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
|
||||||
@ -1476,6 +1494,9 @@ VP8LConvertFunc VP8LConvertBGRAToBGR;
|
|||||||
VP8LFastLog2SlowFunc VP8LFastLog2Slow;
|
VP8LFastLog2SlowFunc VP8LFastLog2Slow;
|
||||||
VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
|
VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
|
||||||
|
|
||||||
|
VP8LCostFunc VP8LExtraCost;
|
||||||
|
VP8LCostCombinedFunc VP8LExtraCostCombined;
|
||||||
|
|
||||||
extern void VP8LDspInitSSE2(void);
|
extern void VP8LDspInitSSE2(void);
|
||||||
extern void VP8LDspInitNEON(void);
|
extern void VP8LDspInitNEON(void);
|
||||||
extern void VP8LDspInitMIPS32(void);
|
extern void VP8LDspInitMIPS32(void);
|
||||||
@ -1498,6 +1519,9 @@ void VP8LDspInit(void) {
|
|||||||
VP8LFastLog2Slow = FastLog2Slow;
|
VP8LFastLog2Slow = FastLog2Slow;
|
||||||
VP8LFastSLog2Slow = FastSLog2Slow;
|
VP8LFastSLog2Slow = FastSLog2Slow;
|
||||||
|
|
||||||
|
VP8LExtraCost = ExtraCost;
|
||||||
|
VP8LExtraCostCombined = ExtraCostCombined;
|
||||||
|
|
||||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||||
if (VP8GetCPUInfo != NULL) {
|
if (VP8GetCPUInfo != NULL) {
|
||||||
#if defined(WEBP_USE_SSE2)
|
#if defined(WEBP_USE_SSE2)
|
||||||
|
@ -118,6 +118,7 @@ static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size,
|
|||||||
return (size + (1 << sampling_bits) - 1) >> sampling_bits;
|
return (size + (1 << sampling_bits) - 1) >> sampling_bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
// Faster logarithm for integers. Small values use a look-up table.
|
// Faster logarithm for integers. Small values use a look-up table.
|
||||||
#define LOG_LOOKUP_IDX_MAX 256
|
#define LOG_LOOKUP_IDX_MAX 256
|
||||||
extern const float kLog2Table[LOG_LOOKUP_IDX_MAX];
|
extern const float kLog2Table[LOG_LOOKUP_IDX_MAX];
|
||||||
@ -135,6 +136,17 @@ static WEBP_INLINE float VP8LFastSLog2(int v) {
|
|||||||
return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v);
|
return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------
|
||||||
|
// Huffman-cost related functions.
|
||||||
|
|
||||||
|
typedef double (*VP8LCostFunc)(const int* const population, int length);
|
||||||
|
typedef double (*VP8LCostCombinedFunc)(const int* const X,
|
||||||
|
const int* const Y,
|
||||||
|
int length);
|
||||||
|
|
||||||
|
extern VP8LCostFunc VP8LExtraCost;
|
||||||
|
extern VP8LCostCombinedFunc VP8LExtraCostCombined;
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
// PrefixEncode()
|
// PrefixEncode()
|
||||||
|
|
||||||
|
@ -9,7 +9,8 @@
|
|||||||
//
|
//
|
||||||
// MIPS version of lossless functions
|
// MIPS version of lossless functions
|
||||||
//
|
//
|
||||||
// Author(s): Jovan Zelincevic (jovan.zelincevic@imgtec.com)
|
// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
|
||||||
|
// Jovan Zelincevic (jovan.zelincevic@imgtec.com)
|
||||||
|
|
||||||
#include "./dsp.h"
|
#include "./dsp.h"
|
||||||
#include "./lossless.h"
|
#include "./lossless.h"
|
||||||
@ -93,6 +94,100 @@ static float FastLog2SlowMIPS32(int v) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// C version of this function:
|
||||||
|
// int i = 0;
|
||||||
|
// int64_t cost = 0;
|
||||||
|
// int* pop = (int*)&population[4];
|
||||||
|
// const int* LoopEnd = (int*)&population[length];
|
||||||
|
// while (pop != LoopEnd) {
|
||||||
|
// ++i;
|
||||||
|
// cost += i * *pop;
|
||||||
|
// cost += i * *(pop + 1);
|
||||||
|
// pop += 2;
|
||||||
|
// }
|
||||||
|
// return (double)cost;
|
||||||
|
static double ExtraCostMIPS32(const int* const population, int length) {
|
||||||
|
int i, temp0, temp1;
|
||||||
|
int* pop = (int*)&population[4];
|
||||||
|
const int* LoopEnd = (int*)&population[length];
|
||||||
|
|
||||||
|
__asm__ volatile(
|
||||||
|
"mult $zero, $zero \n\t"
|
||||||
|
"xor %[i], %[i], %[i] \n\t"
|
||||||
|
"beq %[pop], %[LoopEnd], 2f \n\t"
|
||||||
|
"1: \n\t"
|
||||||
|
"lw %[temp0], 0(%[pop]) \n\t"
|
||||||
|
"lw %[temp1], 4(%[pop]) \n\t"
|
||||||
|
"addiu %[i], %[i], 1 \n\t"
|
||||||
|
"addiu %[pop], %[pop], 8 \n\t"
|
||||||
|
"madd %[i], %[temp0] \n\t"
|
||||||
|
"madd %[i], %[temp1] \n\t"
|
||||||
|
"bne %[pop], %[LoopEnd], 1b \n\t"
|
||||||
|
"2: \n\t"
|
||||||
|
"mfhi %[temp0] \n\t"
|
||||||
|
"mflo %[temp1] \n\t"
|
||||||
|
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
|
||||||
|
[i]"=&r"(i), [pop]"+r"(pop)
|
||||||
|
: [LoopEnd]"r"(LoopEnd)
|
||||||
|
: "memory", "hi", "lo"
|
||||||
|
);
|
||||||
|
|
||||||
|
return (double)((int64_t)temp0 << 32 | temp1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// C version of this function:
|
||||||
|
// int i = 0;
|
||||||
|
// int64_t cost = 0;
|
||||||
|
// int* pX = (int*)&X[4];
|
||||||
|
// int* pY = (int*)&Y[4];
|
||||||
|
// const int* LoopEnd = (int*)&X[length];
|
||||||
|
// while (pX != LoopEnd) {
|
||||||
|
// const int xy0 = *pX + *pY;
|
||||||
|
// const int xy1 = *(pX + 1) + *(pY + 1);
|
||||||
|
// ++i;
|
||||||
|
// cost += i * xy0;
|
||||||
|
// cost += i * xy1;
|
||||||
|
// pX += 2;
|
||||||
|
// pY += 2;
|
||||||
|
// }
|
||||||
|
// return (double)cost;
|
||||||
|
static double ExtraCostCombinedMIPS32(const int* const X, const int* const Y,
|
||||||
|
int length) {
|
||||||
|
int i, temp0, temp1, temp2, temp3;
|
||||||
|
int* pX = (int*)&X[4];
|
||||||
|
int* pY = (int*)&Y[4];
|
||||||
|
const int* LoopEnd = (int*)&X[length];
|
||||||
|
|
||||||
|
__asm__ volatile(
|
||||||
|
"mult $zero, $zero \n\t"
|
||||||
|
"xor %[i], %[i], %[i] \n\t"
|
||||||
|
"beq %[pX], %[LoopEnd], 2f \n\t"
|
||||||
|
"1: \n\t"
|
||||||
|
"lw %[temp0], 0(%[pX]) \n\t"
|
||||||
|
"lw %[temp1], 0(%[pY]) \n\t"
|
||||||
|
"lw %[temp2], 4(%[pX]) \n\t"
|
||||||
|
"lw %[temp3], 4(%[pY]) \n\t"
|
||||||
|
"addiu %[i], %[i], 1 \n\t"
|
||||||
|
"addu %[temp0], %[temp0], %[temp1] \n\t"
|
||||||
|
"addu %[temp2], %[temp2], %[temp3] \n\t"
|
||||||
|
"addiu %[pX], %[pX], 8 \n\t"
|
||||||
|
"addiu %[pY], %[pY], 8 \n\t"
|
||||||
|
"madd %[i], %[temp0] \n\t"
|
||||||
|
"madd %[i], %[temp2] \n\t"
|
||||||
|
"bne %[pX], %[LoopEnd], 1b \n\t"
|
||||||
|
"2: \n\t"
|
||||||
|
"mfhi %[temp0] \n\t"
|
||||||
|
"mflo %[temp1] \n\t"
|
||||||
|
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
|
||||||
|
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
|
||||||
|
[i]"=&r"(i), [pX]"+r"(pX), [pY]"+r"(pY)
|
||||||
|
: [LoopEnd]"r"(LoopEnd)
|
||||||
|
: "memory", "hi", "lo"
|
||||||
|
);
|
||||||
|
|
||||||
|
return (double)((int64_t)temp0 << 32 | temp1);
|
||||||
|
}
|
||||||
|
|
||||||
#endif // WEBP_USE_MIPS32
|
#endif // WEBP_USE_MIPS32
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
@ -104,5 +199,7 @@ void VP8LDspInitMIPS32(void) {
|
|||||||
#if defined(WEBP_USE_MIPS32)
|
#if defined(WEBP_USE_MIPS32)
|
||||||
VP8LFastSLog2Slow = FastSLog2SlowMIPS32;
|
VP8LFastSLog2Slow = FastSLog2SlowMIPS32;
|
||||||
VP8LFastLog2Slow = FastLog2SlowMIPS32;
|
VP8LFastLog2Slow = FastLog2SlowMIPS32;
|
||||||
|
VP8LExtraCost = ExtraCostMIPS32;
|
||||||
|
VP8LExtraCostCombined = ExtraCostCombinedMIPS32;
|
||||||
#endif // WEBP_USE_MIPS32
|
#endif // WEBP_USE_MIPS32
|
||||||
}
|
}
|
||||||
|
@ -252,24 +252,6 @@ static double GetCombinedEntropy(const int* const X, const int* const Y,
|
|||||||
return BitsEntropyCombined(X, Y, length) + HuffmanCostCombined(X, Y, length);
|
return BitsEntropyCombined(X, Y, length) + HuffmanCostCombined(X, Y, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
static double ExtraCost(const int* const population, int length) {
|
|
||||||
int i;
|
|
||||||
double cost = 0.;
|
|
||||||
for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
|
|
||||||
return cost;
|
|
||||||
}
|
|
||||||
|
|
||||||
static double ExtraCostCombined(const int* const X, const int* const Y,
|
|
||||||
int length) {
|
|
||||||
int i;
|
|
||||||
double cost = 0.;
|
|
||||||
for (i = 2; i < length - 2; ++i) {
|
|
||||||
const int xy = X[i + 2] + Y[i + 2];
|
|
||||||
cost += (i >> 1) * xy;
|
|
||||||
}
|
|
||||||
return cost;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Estimates the Entropy + Huffman + other block overhead size cost.
|
// Estimates the Entropy + Huffman + other block overhead size cost.
|
||||||
double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
|
double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
|
||||||
return
|
return
|
||||||
@ -278,8 +260,8 @@ double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
|
|||||||
+ PopulationCost(p->blue_, 256)
|
+ PopulationCost(p->blue_, 256)
|
||||||
+ PopulationCost(p->alpha_, 256)
|
+ PopulationCost(p->alpha_, 256)
|
||||||
+ PopulationCost(p->distance_, NUM_DISTANCE_CODES)
|
+ PopulationCost(p->distance_, NUM_DISTANCE_CODES)
|
||||||
+ ExtraCost(p->literal_ + 256, NUM_LENGTH_CODES)
|
+ VP8LExtraCost(p->literal_ + 256, NUM_LENGTH_CODES)
|
||||||
+ ExtraCost(p->distance_, NUM_DISTANCE_CODES);
|
+ VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
|
||||||
}
|
}
|
||||||
|
|
||||||
double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
|
double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
|
||||||
@ -289,8 +271,8 @@ double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
|
|||||||
+ BitsEntropy(p->blue_, 256)
|
+ BitsEntropy(p->blue_, 256)
|
||||||
+ BitsEntropy(p->alpha_, 256)
|
+ BitsEntropy(p->alpha_, 256)
|
||||||
+ BitsEntropy(p->distance_, NUM_DISTANCE_CODES)
|
+ BitsEntropy(p->distance_, NUM_DISTANCE_CODES)
|
||||||
+ ExtraCost(p->literal_ + 256, NUM_LENGTH_CODES)
|
+ VP8LExtraCost(p->literal_ + 256, NUM_LENGTH_CODES)
|
||||||
+ ExtraCost(p->distance_, NUM_DISTANCE_CODES);
|
+ VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
|
||||||
}
|
}
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
@ -322,7 +304,7 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
|
|||||||
b->palette_code_bits_;
|
b->palette_code_bits_;
|
||||||
*cost += GetCombinedEntropy(a->literal_, b->literal_,
|
*cost += GetCombinedEntropy(a->literal_, b->literal_,
|
||||||
VP8LHistogramNumCodes(palette_code_bits));
|
VP8LHistogramNumCodes(palette_code_bits));
|
||||||
*cost += ExtraCostCombined(a->literal_ + 256, b->literal_ + 256,
|
*cost += VP8LExtraCostCombined(a->literal_ + 256, b->literal_ + 256,
|
||||||
NUM_LENGTH_CODES);
|
NUM_LENGTH_CODES);
|
||||||
if (*cost > cost_threshold) return 0;
|
if (*cost > cost_threshold) return 0;
|
||||||
|
|
||||||
@ -336,7 +318,8 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
|
|||||||
if (*cost > cost_threshold) return 0;
|
if (*cost > cost_threshold) return 0;
|
||||||
|
|
||||||
*cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES);
|
*cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES);
|
||||||
*cost += ExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES);
|
*cost += VP8LExtraCostCombined(a->distance_, b->distance_,
|
||||||
|
NUM_DISTANCE_CODES);
|
||||||
if (*cost > cost_threshold) return 0;
|
if (*cost > cost_threshold) return 0;
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
@ -426,10 +409,10 @@ static void UpdateHistogramCost(VP8LHistogram* const h) {
|
|||||||
const double alpha_cost = PopulationCost(h->alpha_, 256);
|
const double alpha_cost = PopulationCost(h->alpha_, 256);
|
||||||
const double distance_cost =
|
const double distance_cost =
|
||||||
PopulationCost(h->distance_, NUM_DISTANCE_CODES) +
|
PopulationCost(h->distance_, NUM_DISTANCE_CODES) +
|
||||||
ExtraCost(h->distance_, NUM_DISTANCE_CODES);
|
VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
|
||||||
const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
|
const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
|
||||||
h->literal_cost_ = PopulationCost(h->literal_, num_codes) +
|
h->literal_cost_ = PopulationCost(h->literal_, num_codes) +
|
||||||
ExtraCost(h->literal_ + 256, NUM_LENGTH_CODES);
|
VP8LExtraCost(h->literal_ + 256, NUM_LENGTH_CODES);
|
||||||
h->red_cost_ = PopulationCost(h->red_, 256);
|
h->red_cost_ = PopulationCost(h->red_, 256);
|
||||||
h->blue_cost_ = PopulationCost(h->blue_, 256);
|
h->blue_cost_ = PopulationCost(h->blue_, 256);
|
||||||
h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ +
|
h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ +
|
||||||
|
Loading…
Reference in New Issue
Block a user