Move the HuffmanCost() function to dsp lib

This is to help further optimizations.
(like in https://gerrit.chromium.org/gerrit/#/c/69787/)

There's a small slowdown (~0.5% at -z 9 quality) due to
function pointer usage. Note that, for speed, it's important
to return VP8LStreaks by value, and not pass a pointer.

Change-Id: Id4167366765fb7fc5dff89c1fd75dee456737000
This commit is contained in:
skal
2014-04-18 08:14:46 -07:00
parent 6653b601ef
commit 75b12006e3
4 changed files with 82 additions and 64 deletions

View File

@ -1458,15 +1458,14 @@ void VP8LBundleColorMap(const uint8_t* const row, int width,
}
}
static double ExtraCost(const int* const population, int length) {
static double ExtraCost(const int* population, int length) {
int i;
double cost = 0.;
for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
return cost;
}
static double ExtraCostCombined(const int* const X, const int* const Y,
int length) {
static double ExtraCostCombined(const int* X, const int* Y, int length) {
int i;
double cost = 0.;
for (i = 2; i < length - 2; ++i) {
@ -1476,6 +1475,53 @@ static double ExtraCostCombined(const int* const X, const int* const Y,
return cost;
}
// Returns the various RLE counts
static VP8LStreaks HuffmanCostCount(const int* population, int length) {
int i;
int streak = 0;
VP8LStreaks stats;
memset(&stats, 0, sizeof(stats));
for (i = 0; i < length - 1; ++i) {
++streak;
if (population[i] == population[i + 1]) {
continue;
}
stats.counts[population[i] != 0] += (streak > 3);
stats.streaks[population[i] != 0][(streak > 3)] += streak;
streak = 0;
}
++streak;
stats.counts[population[i] != 0] += (streak > 3);
stats.streaks[population[i] != 0][(streak > 3)] += streak;
return stats;
}
static VP8LStreaks HuffmanCostCombinedCount(const int* X, const int* Y,
int length) {
int i;
int streak = 0;
VP8LStreaks stats;
memset(&stats, 0, sizeof(stats));
for (i = 0; i < length - 1; ++i) {
const int xy = X[i] + Y[i];
const int xy_next = X[i + 1] + Y[i + 1];
++streak;
if (xy == xy_next) {
continue;
}
stats.counts[xy != 0] += (streak > 3);
stats.streaks[xy != 0][(streak > 3)] += streak;
streak = 0;
}
{
const int xy = X[i] + Y[i];
++streak;
stats.counts[xy != 0] += (streak > 3);
stats.streaks[xy != 0][(streak > 3)] += streak;
}
return stats;
}
//------------------------------------------------------------------------------
VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
@ -1497,6 +1543,9 @@ VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
VP8LCostFunc VP8LExtraCost;
VP8LCostCombinedFunc VP8LExtraCostCombined;
VP8LCostCountFunc VP8LHuffmanCostCount;
VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;
extern void VP8LDspInitSSE2(void);
extern void VP8LDspInitNEON(void);
extern void VP8LDspInitMIPS32(void);
@ -1522,6 +1571,9 @@ void VP8LDspInit(void) {
VP8LExtraCost = ExtraCost;
VP8LExtraCostCombined = ExtraCostCombined;
VP8LHuffmanCostCount = HuffmanCostCount;
VP8LHuffmanCostCombinedCount = HuffmanCostCombinedCount;
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
if (VP8GetCPUInfo != NULL) {
#if defined(WEBP_USE_SSE2)

View File

@ -139,14 +139,24 @@ static WEBP_INLINE float VP8LFastSLog2(int v) {
// -----------------------------------------------------------------------------
// Huffman-cost related functions.
typedef double (*VP8LCostFunc)(const int* const population, int length);
typedef double (*VP8LCostCombinedFunc)(const int* const X,
const int* const Y,
int length);
typedef double (*VP8LCostFunc)(const int* population, int length);
typedef double (*VP8LCostCombinedFunc)(const int* X, const int* Y, int length);
extern VP8LCostFunc VP8LExtraCost;
extern VP8LCostCombinedFunc VP8LExtraCostCombined;
typedef struct { // small struct to hold counters
int counts[2]; // index: 0=zero steak, 1=non-zero streak
int streaks[2][2]; // [zero/non-zero][streak<3 / streak>=3]
} VP8LStreaks;
typedef VP8LStreaks (*VP8LCostCountFunc)(const int* population, int length);
typedef VP8LStreaks (*VP8LCostCombinedCountFunc)(const int* X, const int* Y,
int length);
extern VP8LCostCountFunc VP8LHuffmanCostCount;
extern VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;
// -----------------------------------------------------------------------------
// PrefixEncode()