Move the HuffmanCost() function to dsp lib

This is to help further optimizations.
(like in https://gerrit.chromium.org/gerrit/#/c/69787/)

There's a small slowdown (~0.5% at -z 9 quality) due to
function pointer usage. Note that, for speed, it's important
to return VP8LStreaks by value, and not pass a pointer.

Change-Id: Id4167366765fb7fc5dff89c1fd75dee456737000
This commit is contained in:
skal 2014-04-18 08:14:46 -07:00
parent 6653b601ef
commit 75b12006e3
4 changed files with 82 additions and 64 deletions

View File

@ -1458,15 +1458,14 @@ void VP8LBundleColorMap(const uint8_t* const row, int width,
}
}
static double ExtraCost(const int* const population, int length) {
static double ExtraCost(const int* population, int length) {
int i;
double cost = 0.;
for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
return cost;
}
static double ExtraCostCombined(const int* const X, const int* const Y,
int length) {
static double ExtraCostCombined(const int* X, const int* Y, int length) {
int i;
double cost = 0.;
for (i = 2; i < length - 2; ++i) {
@ -1476,6 +1475,53 @@ static double ExtraCostCombined(const int* const X, const int* const Y,
return cost;
}
// Returns the various RLE counts
static VP8LStreaks HuffmanCostCount(const int* population, int length) {
int i;
int streak = 0;
VP8LStreaks stats;
memset(&stats, 0, sizeof(stats));
for (i = 0; i < length - 1; ++i) {
++streak;
if (population[i] == population[i + 1]) {
continue;
}
stats.counts[population[i] != 0] += (streak > 3);
stats.streaks[population[i] != 0][(streak > 3)] += streak;
streak = 0;
}
++streak;
stats.counts[population[i] != 0] += (streak > 3);
stats.streaks[population[i] != 0][(streak > 3)] += streak;
return stats;
}
static VP8LStreaks HuffmanCostCombinedCount(const int* X, const int* Y,
int length) {
int i;
int streak = 0;
VP8LStreaks stats;
memset(&stats, 0, sizeof(stats));
for (i = 0; i < length - 1; ++i) {
const int xy = X[i] + Y[i];
const int xy_next = X[i + 1] + Y[i + 1];
++streak;
if (xy == xy_next) {
continue;
}
stats.counts[xy != 0] += (streak > 3);
stats.streaks[xy != 0][(streak > 3)] += streak;
streak = 0;
}
{
const int xy = X[i] + Y[i];
++streak;
stats.counts[xy != 0] += (streak > 3);
stats.streaks[xy != 0][(streak > 3)] += streak;
}
return stats;
}
//------------------------------------------------------------------------------
VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
@ -1497,6 +1543,9 @@ VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
VP8LCostFunc VP8LExtraCost;
VP8LCostCombinedFunc VP8LExtraCostCombined;
VP8LCostCountFunc VP8LHuffmanCostCount;
VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;
extern void VP8LDspInitSSE2(void);
extern void VP8LDspInitNEON(void);
extern void VP8LDspInitMIPS32(void);
@ -1522,6 +1571,9 @@ void VP8LDspInit(void) {
VP8LExtraCost = ExtraCost;
VP8LExtraCostCombined = ExtraCostCombined;
VP8LHuffmanCostCount = HuffmanCostCount;
VP8LHuffmanCostCombinedCount = HuffmanCostCombinedCount;
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
if (VP8GetCPUInfo != NULL) {
#if defined(WEBP_USE_SSE2)

View File

@ -139,14 +139,24 @@ static WEBP_INLINE float VP8LFastSLog2(int v) {
// -----------------------------------------------------------------------------
// Huffman-cost related functions.
typedef double (*VP8LCostFunc)(const int* const population, int length);
typedef double (*VP8LCostCombinedFunc)(const int* const X,
const int* const Y,
int length);
typedef double (*VP8LCostFunc)(const int* population, int length);
typedef double (*VP8LCostCombinedFunc)(const int* X, const int* Y, int length);
extern VP8LCostFunc VP8LExtraCost;
extern VP8LCostCombinedFunc VP8LExtraCostCombined;
typedef struct { // small struct to hold counters
int counts[2]; // index: 0=zero steak, 1=non-zero streak
int streaks[2][2]; // [zero/non-zero][streak<3 / streak>=3]
} VP8LStreaks;
typedef VP8LStreaks (*VP8LCostCountFunc)(const int* population, int length);
typedef VP8LStreaks (*VP8LCostCombinedCountFunc)(const int* X, const int* Y,
int length);
extern VP8LCostCountFunc VP8LHuffmanCostCount;
extern VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;
// -----------------------------------------------------------------------------
// PrefixEncode()

View File

@ -180,7 +180,7 @@ static double BitsEntropyCombined(const int* const X, const int* const Y,
return BitsEntropyRefine(nonzeros, sum, max_val, retval);
}
static WEBP_INLINE double InitialHuffmanCost(void) {
static double InitialHuffmanCost(void) {
// Small bias because Huffman code length is typically not stored in
// full length.
static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;
@ -188,66 +188,29 @@ static WEBP_INLINE double InitialHuffmanCost(void) {
return kHuffmanCodeOfHuffmanCodeSize - kSmallBias;
}
double VP8FinalHuffmanCost(int cnt_z, int streak_z_le3, int streak_z_gt3,
int cnt_nz, int streak_nz_le3, int streak_nz_gt3) {
// Finalize the Huffman cost based on streak numbers and length type (<3 or >=3)
static double FinalHuffmanCost(const VP8LStreaks* const stats) {
double retval = InitialHuffmanCost();
retval += cnt_z * 1.5625 + 0.234375 * streak_z_gt3;
retval += cnt_nz * 2.578125 + 0.703125 * streak_nz_gt3;
retval += 1.796875 * streak_z_le3;
retval += 3.28125 * streak_nz_le3;
retval += stats->counts[0] * 1.5625 + 0.234375 * stats->streaks[0][1];
retval += stats->counts[1] * 2.578125 + 0.703125 * stats->streaks[1][1];
retval += 1.796875 * stats->streaks[0][0];
retval += 3.28125 * stats->streaks[1][0];
return retval;
}
// Returns the cost encode the rle-encoded entropy code.
// The constants in this function are experimental.
// Trampolines
static double HuffmanCost(const int* const population, int length) {
int cnts[2] = { 0, 0 };
int streaks[2][2] = { { 0, 0 }, { 0, 0 } };
int streak = 0;
int i;
for (i = 0; i < length - 1; ++i) {
++streak;
if (population[i] == population[i + 1]) {
continue;
}
cnts[population[i] != 0] += (streak > 3);
streaks[population[i] != 0][(streak > 3)] += streak;
streak = 0;
}
++streak;
cnts[population[i] != 0] += (streak > 3);
streaks[population[i] != 0][(streak > 3)] += streak;
return VP8FinalHuffmanCost(cnts[0], streaks[0][0], streaks[0][1],
cnts[1], streaks[1][0], streaks[1][1]);
const VP8LStreaks stats = VP8LHuffmanCostCount(population, length);
return FinalHuffmanCost(&stats);
}
static double HuffmanCostCombined(const int* const X, const int* const Y,
int length) {
int cnts[2] = { 0, 0 };
int streaks[2][2] = { { 0, 0 }, { 0, 0 } };
int streak = 0;
int i;
for (i = 0; i < length - 1; ++i) {
const int xy = X[i] + Y[i];
const int xy_next = X[i + 1] + Y[i + 1];
++streak;
if (xy == xy_next) {
continue;
}
cnts[xy != 0] += (streak > 3);
streaks[xy != 0][streak > 3] += streak;
streak = 0;
}
{
const int xy = X[i] + Y[i];
++streak;
cnts[xy != 0] += (streak > 3);
streaks[xy != 0][streak > 3] += streak;
}
return VP8FinalHuffmanCost(cnts[0], streaks[0][0], streaks[0][1],
cnts[1], streaks[1][0], streaks[1][1]);
const VP8LStreaks stats = VP8LHuffmanCostCombinedCount(X, Y, length);
return FinalHuffmanCost(&stats);
}
// Aggregated costs
static double PopulationCost(const int* const population, int length) {
return BitsEntropy(population, length) + HuffmanCost(population, length);
}

View File

@ -90,13 +90,6 @@ static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) {
((palette_code_bits > 0) ? (1 << palette_code_bits) : 0);
}
// Used to finalized the Huffman cost:
// cnt_z / cnt_nz: counts the number of 0's and non-0's
// streak_{z,nz}_le3 / streak_{z,nz}_gt3: number of streaks larger than 3
// or less-or-equal than 3.
double VP8FinalHuffmanCost(int cnt_z, int streak_z_le3, int streak_z_gt3,
int cnt_nz, int streak_nz_le3, int streak_nz_gt3);
// Builds the histogram image.
int VP8LGetHistoImageSymbols(int xsize, int ysize,
const VP8LBackwardRefs* const refs,