Cache all costs in the histograms

This provides a small speed-up but it mostly makes a
unique entry point to compute costs.

Change-Id: I05d9eb3f01ae90d95bcd7b1e1e987ae729844a60
This commit is contained in:
Vincent Rabaud 2025-04-20 17:49:29 +02:00
parent ad52d5fc7e
commit e53e213091
2 changed files with 86 additions and 88 deletions

View File

@ -35,6 +35,15 @@
// Maximum number of histograms allowed in greedy combining algorithm. // Maximum number of histograms allowed in greedy combining algorithm.
#define MAX_HISTO_GREEDY 100 #define MAX_HISTO_GREEDY 100
// Enum to meaningfully access the elements of the Histogram arrays.
typedef enum {
LITERAL = 0,
RED,
BLUE,
ALPHA,
DISTANCE,
} HistogramIndex;
// Return the size of the histogram for a given cache_bits. // Return the size of the histogram for a given cache_bits.
static int GetHistogramSize(int cache_bits) { static int GetHistogramSize(int cache_bits) {
const int literal_size = VP8LHistogramNumCodes(cache_bits); const int literal_size = VP8LHistogramNumCodes(cache_bits);
@ -98,9 +107,7 @@ void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits,
} else { } else {
p->trivial_symbol = 0; p->trivial_symbol = 0;
p->bit_cost = 0; p->bit_cost = 0;
p->literal_cost = 0; memset(p->costs, 0, sizeof(p->costs));
p->red_cost = 0;
p->blue_cost = 0;
memset(p->is_used, 0, sizeof(p->is_used)); memset(p->is_used, 0, sizeof(p->is_used));
} }
} }
@ -331,78 +338,56 @@ static uint64_t PopulationCost(const uint32_t* const population, int length,
// alpha, distance). // alpha, distance).
static WEBP_INLINE uint64_t GetCombinedEntropy( static WEBP_INLINE uint64_t GetCombinedEntropy(
const VP8LHistogram* const histo_X, const VP8LHistogram* const histo_Y, const VP8LHistogram* const histo_X, const VP8LHistogram* const histo_Y,
int index, int trivial_at_end) { HistogramIndex index, int trivial_at_end) {
const uint32_t* X; const uint32_t* X;
const uint32_t* Y; const uint32_t* Y;
int length; int length;
VP8LStreaks stats; VP8LStreaks stats;
if (index == 0) { VP8LBitEntropy bit_entropy;
const int is_X_used = histo_X->is_used[index];
const int is_Y_used = histo_Y->is_used[index];
if (trivial_at_end || !is_X_used || !is_Y_used) {
if (is_X_used) return histo_X->costs[index];
return histo_Y->costs[index];
}
assert(is_X_used && is_Y_used);
if (index == LITERAL) {
X = histo_X->literal; X = histo_X->literal;
Y = histo_Y->literal; Y = histo_Y->literal;
length = VP8LHistogramNumCodes(histo_X->palette_code_bits); length = VP8LHistogramNumCodes(histo_X->palette_code_bits);
} else if (index == 1) { } else if (index == RED) {
X = histo_X->red; X = histo_X->red;
Y = histo_Y->red; Y = histo_Y->red;
length = NUM_LITERAL_CODES; length = NUM_LITERAL_CODES;
} else if (index == 2) { } else if (index == BLUE) {
X = histo_X->blue; X = histo_X->blue;
Y = histo_Y->blue; Y = histo_Y->blue;
length = NUM_LITERAL_CODES; length = NUM_LITERAL_CODES;
} else if (index == 3) { } else if (index == ALPHA) {
X = histo_X->alpha; X = histo_X->alpha;
Y = histo_Y->alpha; Y = histo_Y->alpha;
length = NUM_LITERAL_CODES; length = NUM_LITERAL_CODES;
} else { } else {
assert(index == 4); assert(index == DISTANCE);
X = histo_X->distance; X = histo_X->distance;
Y = histo_Y->distance; Y = histo_Y->distance;
length = NUM_DISTANCE_CODES; length = NUM_DISTANCE_CODES;
} }
if (trivial_at_end) {
// This configuration is due to palettization that transforms an indexed
// pixel into 0xff000000 | (pixel << 8) in VP8LBundleColorMap.
// BitsEntropyRefine is 0 for histograms with only one non-zero value.
// Only FinalHuffmanCost needs to be evaluated.
memset(&stats, 0, sizeof(stats));
// Deal with the non-zero value at index 0 or length-1.
stats.streaks[1][0] = 1;
// Deal with the following/previous zero streak.
stats.counts[0] = 1;
stats.streaks[0][1] = length - 1;
return FinalHuffmanCost(&stats);
} else {
const int is_X_used = histo_X->is_used[index];
const int is_Y_used = histo_Y->is_used[index];
VP8LBitEntropy bit_entropy;
if (is_X_used) {
if (is_Y_used) {
VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats); VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats);
} else {
VP8LGetEntropyUnrefined(X, length, &bit_entropy, &stats);
}
} else {
if (is_Y_used) {
VP8LGetEntropyUnrefined(Y, length, &bit_entropy, &stats);
} else {
memset(&stats, 0, sizeof(stats));
stats.counts[0] = 1;
stats.streaks[0][length > 3] = length;
VP8LBitEntropyInit(&bit_entropy);
}
}
return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats); return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);
} }
}
// Estimates the Entropy + Huffman + other block overhead size cost. // Estimates the Entropy + Huffman + other block overhead size cost.
uint64_t VP8LHistogramEstimateBits(VP8LHistogram* const p) { uint64_t VP8LHistogramEstimateBits(VP8LHistogram* const p) {
return PopulationCost(p->literal, VP8LHistogramNumCodes(p->palette_code_bits), return PopulationCost(p->literal, VP8LHistogramNumCodes(p->palette_code_bits),
NULL, &p->is_used[0]) + NULL, &p->is_used[LITERAL]) +
PopulationCost(p->red, NUM_LITERAL_CODES, NULL, &p->is_used[1]) + PopulationCost(p->red, NUM_LITERAL_CODES, NULL, &p->is_used[RED]) +
PopulationCost(p->blue, NUM_LITERAL_CODES, NULL, &p->is_used[2]) + PopulationCost(p->blue, NUM_LITERAL_CODES, NULL, &p->is_used[BLUE]) +
PopulationCost(p->alpha, NUM_LITERAL_CODES, NULL, &p->is_used[3]) + PopulationCost(p->alpha, NUM_LITERAL_CODES, NULL, &p->is_used[ALPHA]) +
PopulationCost(p->distance, NUM_DISTANCE_CODES, NULL, &p->is_used[4]) + PopulationCost(p->distance, NUM_DISTANCE_CODES, NULL,
&p->is_used[DISTANCE]) +
((uint64_t)(VP8LExtraCost(p->literal + NUM_LITERAL_CODES, ((uint64_t)(VP8LExtraCost(p->literal + NUM_LITERAL_CODES,
NUM_LENGTH_CODES) + NUM_LENGTH_CODES) +
VP8LExtraCost(p->distance, NUM_DISTANCE_CODES)) VP8LExtraCost(p->distance, NUM_DISTANCE_CODES))
@ -425,12 +410,13 @@ static WEBP_INLINE void SaturateAdd(uint64_t a, int64_t* b) {
// Otherwise returns 0 and the cost is invalid due to early bail-out. // Otherwise returns 0 and the cost is invalid due to early bail-out.
WEBP_NODISCARD static int GetCombinedHistogramEntropy( WEBP_NODISCARD static int GetCombinedHistogramEntropy(
const VP8LHistogram* const a, const VP8LHistogram* const b, const VP8LHistogram* const a, const VP8LHistogram* const b,
int64_t cost_threshold_in, uint64_t* cost) { int64_t cost_threshold_in, uint64_t* cost, uint64_t costs[5]) {
int trivial_at_end = 0, i; int trivial_at_end = 0, i;
const uint64_t cost_threshold = (uint64_t)cost_threshold_in; const uint64_t cost_threshold = (uint64_t)cost_threshold_in;
assert(a->palette_code_bits == b->palette_code_bits); assert(a->palette_code_bits == b->palette_code_bits);
if (cost_threshold_in <= 0) return 0; if (cost_threshold_in <= 0) return 0;
*cost = GetCombinedEntropy(a, b, /*index=*/0, /*trivial_at_end=*/0); *cost = costs[LITERAL] =
GetCombinedEntropy(a, b, LITERAL, /*trivial_at_end=*/0);
// No need to add the extra cost for lengths as it is a constant that does not // No need to add the extra cost for lengths as it is a constant that does not
// influence the histograms. // influence the histograms.
if (*cost >= cost_threshold) return 0; if (*cost >= cost_threshold) return 0;
@ -449,8 +435,10 @@ WEBP_NODISCARD static int GetCombinedHistogramEntropy(
} }
for (i = 1; i <= 4; ++i) { for (i = 1; i <= 4; ++i) {
*cost += GetCombinedEntropy(a, b, i, costs[i] =
GetCombinedEntropy(a, b, (HistogramIndex)i,
/*trivial_at_end=*/i <= 3 ? trivial_at_end : 0); /*trivial_at_end=*/i <= 3 ? trivial_at_end : 0);
*cost += costs[i];
if (*cost >= cost_threshold) return 0; if (*cost >= cost_threshold) return 0;
} }
// No need to add the extra cost for distances as it is a constant that does // No need to add the extra cost for distances as it is a constant that does
@ -480,13 +468,14 @@ WEBP_NODISCARD static int HistogramAddEval(const VP8LHistogram* const a,
const VP8LHistogram* const b, const VP8LHistogram* const b,
VP8LHistogram* const out, VP8LHistogram* const out,
int64_t cost_threshold) { int64_t cost_threshold) {
uint64_t cost;
const uint64_t sum_cost = a->bit_cost + b->bit_cost; const uint64_t sum_cost = a->bit_cost + b->bit_cost;
SaturateAdd(sum_cost, &cost_threshold); SaturateAdd(sum_cost, &cost_threshold);
if (!GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) return 0; if (!GetCombinedHistogramEntropy(a, b, cost_threshold, &out->bit_cost,
out->costs)) {
return 0;
}
HistogramAdd(a, b, out); HistogramAdd(a, b, out);
out->bit_cost = cost;
out->palette_code_bits = a->palette_code_bits; out->palette_code_bits = a->palette_code_bits;
return 1; return 1;
} }
@ -500,10 +489,12 @@ WEBP_NODISCARD static int HistogramAddThresh(const VP8LHistogram* const a,
const VP8LHistogram* const b, const VP8LHistogram* const b,
int64_t cost_threshold, int64_t cost_threshold,
int64_t* cost_out) { int64_t* cost_out) {
uint64_t cost; uint64_t cost, costs[5];
assert(a != NULL && b != NULL); assert(a != NULL && b != NULL);
SaturateAdd(a->bit_cost, &cost_threshold); SaturateAdd(a->bit_cost, &cost_threshold);
if (!GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) return 0; if (!GetCombinedHistogramEntropy(a, b, cost_threshold, &cost, costs)) {
return 0;
}
*cost_out = (int64_t)cost - (int64_t)a->bit_cost; *cost_out = (int64_t)cost - (int64_t)a->bit_cost;
return 1; return 1;
@ -533,30 +524,31 @@ static void DominantCostRangeInit(DominantCostRange* const c) {
static void UpdateDominantCostRange( static void UpdateDominantCostRange(
const VP8LHistogram* const h, DominantCostRange* const c) { const VP8LHistogram* const h, DominantCostRange* const c) {
if (c->literal_max < h->literal_cost) c->literal_max = h->literal_cost; if (c->literal_max < h->costs[LITERAL]) c->literal_max = h->costs[LITERAL];
if (c->literal_min > h->literal_cost) c->literal_min = h->literal_cost; if (c->literal_min > h->costs[LITERAL]) c->literal_min = h->costs[LITERAL];
if (c->red_max < h->red_cost) c->red_max = h->red_cost; if (c->red_max < h->costs[RED]) c->red_max = h->costs[RED];
if (c->red_min > h->red_cost) c->red_min = h->red_cost; if (c->red_min > h->costs[RED]) c->red_min = h->costs[RED];
if (c->blue_max < h->blue_cost) c->blue_max = h->blue_cost; if (c->blue_max < h->costs[BLUE]) c->blue_max = h->costs[BLUE];
if (c->blue_min > h->blue_cost) c->blue_min = h->blue_cost; if (c->blue_min > h->costs[BLUE]) c->blue_min = h->costs[BLUE];
} }
static void UpdateHistogramCost(VP8LHistogram* const h) { static void UpdateHistogramCost(VP8LHistogram* const h) {
uint32_t alpha_sym, red_sym, blue_sym; uint32_t alpha_sym, red_sym, blue_sym;
const uint64_t alpha_cost = const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits);
PopulationCost(h->alpha, NUM_LITERAL_CODES, &alpha_sym, &h->is_used[3]); h->costs[ALPHA] = PopulationCost(h->alpha, NUM_LITERAL_CODES, &alpha_sym,
&h->is_used[ALPHA]);
// No need to add the extra cost as it is a constant that does not influence // No need to add the extra cost as it is a constant that does not influence
// the histograms. // the histograms.
const uint64_t distance_cost = h->costs[DISTANCE] = PopulationCost(h->distance, NUM_DISTANCE_CODES, NULL,
PopulationCost(h->distance, NUM_DISTANCE_CODES, NULL, &h->is_used[4]); &h->is_used[DISTANCE]);
const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits); h->costs[LITERAL] =
h->literal_cost = PopulationCost(h->literal, num_codes, NULL, &h->is_used[0]); PopulationCost(h->literal, num_codes, NULL, &h->is_used[LITERAL]);
h->red_cost = h->costs[RED] =
PopulationCost(h->red, NUM_LITERAL_CODES, &red_sym, &h->is_used[1]); PopulationCost(h->red, NUM_LITERAL_CODES, &red_sym, &h->is_used[RED]);
h->blue_cost = h->costs[BLUE] =
PopulationCost(h->blue, NUM_LITERAL_CODES, &blue_sym, &h->is_used[2]); PopulationCost(h->blue, NUM_LITERAL_CODES, &blue_sym, &h->is_used[BLUE]);
h->bit_cost = h->bit_cost = h->costs[LITERAL] + h->costs[RED] + h->costs[BLUE] +
h->literal_cost + h->red_cost + h->blue_cost + alpha_cost + distance_cost; h->costs[ALPHA] + h->costs[DISTANCE];
if ((alpha_sym | red_sym | blue_sym) == VP8L_NON_TRIVIAL_SYM) { if ((alpha_sym | red_sym | blue_sym) == VP8L_NON_TRIVIAL_SYM) {
h->trivial_symbol = VP8L_NON_TRIVIAL_SYM; h->trivial_symbol = VP8L_NON_TRIVIAL_SYM;
} else { } else {
@ -578,13 +570,13 @@ static int GetBinIdForEntropy(uint64_t min, uint64_t max, uint64_t val) {
static int GetHistoBinIndex(const VP8LHistogram* const h, static int GetHistoBinIndex(const VP8LHistogram* const h,
const DominantCostRange* const c, int low_effort) { const DominantCostRange* const c, int low_effort) {
int bin_id = int bin_id =
GetBinIdForEntropy(c->literal_min, c->literal_max, h->literal_cost); GetBinIdForEntropy(c->literal_min, c->literal_max, h->costs[LITERAL]);
assert(bin_id < NUM_PARTITIONS); assert(bin_id < NUM_PARTITIONS);
if (!low_effort) { if (!low_effort) {
bin_id = bin_id * NUM_PARTITIONS bin_id = bin_id * NUM_PARTITIONS +
+ GetBinIdForEntropy(c->red_min, c->red_max, h->red_cost); GetBinIdForEntropy(c->red_min, c->red_max, h->costs[RED]);
bin_id = bin_id * NUM_PARTITIONS bin_id = bin_id * NUM_PARTITIONS +
+ GetBinIdForEntropy(c->blue_min, c->blue_max, h->blue_cost); GetBinIdForEntropy(c->blue_min, c->blue_max, h->costs[BLUE]);
assert(bin_id < BIN_SIZE); assert(bin_id < BIN_SIZE);
} }
return bin_id; return bin_id;
@ -628,8 +620,9 @@ static void HistogramCopyAndAnalyze(VP8LHistogramSet* const orig_histo,
// Skip the histogram if it is completely empty, which can happen for tiles // Skip the histogram if it is completely empty, which can happen for tiles
// with no information (when they are skipped because of LZ77). // with no information (when they are skipped because of LZ77).
if (!histo->is_used[0] && !histo->is_used[1] && !histo->is_used[2] if (!histo->is_used[LITERAL] && !histo->is_used[RED] &&
&& !histo->is_used[3] && !histo->is_used[4]) { !histo->is_used[BLUE] && !histo->is_used[ALPHA] &&
!histo->is_used[DISTANCE]) {
// The first histogram is always used. // The first histogram is always used.
assert(i > 0); assert(i > 0);
orig_histograms[i] = NULL; orig_histograms[i] = NULL;
@ -749,6 +742,7 @@ typedef struct {
int idx2; int idx2;
int64_t cost_diff; int64_t cost_diff;
uint64_t cost_combo; uint64_t cost_combo;
uint64_t costs[5];
} HistogramPair; } HistogramPair;
typedef struct { typedef struct {
@ -810,7 +804,8 @@ WEBP_NODISCARD static int HistoQueueUpdatePair(const VP8LHistogram* const h1,
HistogramPair* const pair) { HistogramPair* const pair) {
const int64_t sum_cost = h1->bit_cost + h2->bit_cost; const int64_t sum_cost = h1->bit_cost + h2->bit_cost;
SaturateAdd(sum_cost, &cost_threshold); SaturateAdd(sum_cost, &cost_threshold);
if (!GetCombinedHistogramEntropy(h1, h2, cost_threshold, &pair->cost_combo)) { if (!GetCombinedHistogramEntropy(h1, h2, cost_threshold, &pair->cost_combo,
pair->costs)) {
return 0; return 0;
} }
pair->cost_diff = (int64_t)pair->cost_combo - sum_cost; pair->cost_diff = (int64_t)pair->cost_combo - sum_cost;
@ -887,6 +882,8 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo,
const int idx2 = histo_queue.queue[0].idx2; const int idx2 = histo_queue.queue[0].idx2;
HistogramAdd(histograms[idx2], histograms[idx1], histograms[idx1]); HistogramAdd(histograms[idx2], histograms[idx1], histograms[idx1]);
histograms[idx1]->bit_cost = histo_queue.queue[0].cost_combo; histograms[idx1]->bit_cost = histo_queue.queue[0].cost_combo;
memcpy(histograms[idx1]->costs, histo_queue.queue[0].costs,
sizeof(histograms[idx1]->costs));
// Remove merged histogram. // Remove merged histogram.
HistogramSetRemoveHistogram(image_histo, idx2, num_used); HistogramSetRemoveHistogram(image_histo, idx2, num_used);
@ -1008,6 +1005,8 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
HistogramAdd(histograms[best_idx2], histograms[best_idx1], HistogramAdd(histograms[best_idx2], histograms[best_idx1],
histograms[best_idx1]); histograms[best_idx1]);
histograms[best_idx1]->bit_cost = histo_queue.queue[0].cost_combo; histograms[best_idx1]->bit_cost = histo_queue.queue[0].cost_combo;
memcpy(histograms[best_idx1]->costs, histo_queue.queue[0].costs,
sizeof(histograms[best_idx1]->costs));
HistogramSetRemoveHistogram(image_histo, best_idx2, num_used); HistogramSetRemoveHistogram(image_histo, best_idx2, num_used);
// Parse the queue and update each pair that deals with best_idx1, // Parse the queue and update each pair that deals with best_idx1,
// best_idx2 or image_histo_size. // best_idx2 or image_histo_size.

View File

@ -39,10 +39,9 @@ typedef struct {
int palette_code_bits; int palette_code_bits;
uint32_t trivial_symbol; // True, if histograms for Red, Blue & Alpha uint32_t trivial_symbol; // True, if histograms for Red, Blue & Alpha
// literal symbols are single valued. // literal symbols are single valued.
uint64_t bit_cost; // cached value of bit cost. uint64_t bit_cost; // Cached value of total bit cost.
uint64_t literal_cost; // Cached values of dominant entropy costs: // Cached values of entropy costs: literal, red, blue, alpha, distance
uint64_t red_cost; // literal, red & blue. uint64_t costs[5];
uint64_t blue_cost;
uint8_t is_used[5]; // 5 for literal, red, blue, alpha, distance uint8_t is_used[5]; // 5 for literal, red, blue, alpha, distance
uint16_t bin_id; // entropy bin index. uint16_t bin_id; // entropy bin index.
} VP8LHistogram; } VP8LHistogram;