Speedups for empty histograms.

When histograms are empty, it is easy to add them.
They should also not be considered when merging histograms
(it is a waste of CPU).
This does not change the compression performance,
just the speed.

Change-Id: I42c721ca0f9c5ea067e73b792aa3db6d5e71d01f
(cherry picked from commit decf6f6b87)
This commit is contained in:
Vincent Rabaud 2018-09-28 15:32:25 +02:00 committed by James Zern
parent f2dfd92557
commit f0abab9217
3 changed files with 116 additions and 46 deletions

View File

@ -643,25 +643,56 @@ static void AddVectorEq_C(const uint32_t* a, uint32_t* out, int size) {
for (i = 0; i < size; ++i) out[i] += a[i]; for (i = 0; i < size; ++i) out[i] += a[i];
} }
#define ADD(X, ARG, LEN) do { \
if (a->is_used_[X]) { \
if (b->is_used_[X]) { \
VP8LAddVector(a->ARG, b->ARG, out->ARG, (LEN)); \
} else { \
memcpy(&out->ARG[0], &a->ARG[0], (LEN) * sizeof(out->ARG[0])); \
} \
} else if (b->is_used_[X]) { \
memcpy(&out->ARG[0], &b->ARG[0], (LEN) * sizeof(out->ARG[0])); \
} else { \
memset(&out->ARG[0], 0, (LEN) * sizeof(out->ARG[0])); \
} \
} while (0)
#define ADD_EQ(X, ARG, LEN) do { \
if (a->is_used_[X]) { \
if (out->is_used_[X]) { \
VP8LAddVectorEq(a->ARG, out->ARG, (LEN)); \
} else { \
memcpy(&out->ARG[0], &a->ARG[0], (LEN) * sizeof(out->ARG[0])); \
} \
} \
} while (0)
void VP8LHistogramAdd(const VP8LHistogram* const a, void VP8LHistogramAdd(const VP8LHistogram* const a,
const VP8LHistogram* const b, VP8LHistogram* const out) { const VP8LHistogram* const b, VP8LHistogram* const out) {
int i;
const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_); const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
assert(a->palette_code_bits_ == b->palette_code_bits_); assert(a->palette_code_bits_ == b->palette_code_bits_);
if (b != out) { if (b != out) {
VP8LAddVector(a->literal_, b->literal_, out->literal_, literal_size); ADD(0, literal_, literal_size);
VP8LAddVector(a->distance_, b->distance_, out->distance_, ADD(1, red_, NUM_LITERAL_CODES);
NUM_DISTANCE_CODES); ADD(2, blue_, NUM_LITERAL_CODES);
VP8LAddVector(a->red_, b->red_, out->red_, NUM_LITERAL_CODES); ADD(3, alpha_, NUM_LITERAL_CODES);
VP8LAddVector(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES); ADD(4, distance_, NUM_DISTANCE_CODES);
VP8LAddVector(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES); for (i = 0; i < 5; ++i) {
out->is_used_[i] = (a->is_used_[i] | b->is_used_[i]);
}
} else { } else {
VP8LAddVectorEq(a->literal_, out->literal_, literal_size); ADD_EQ(0, literal_, literal_size);
VP8LAddVectorEq(a->distance_, out->distance_, NUM_DISTANCE_CODES); ADD_EQ(1, red_, NUM_LITERAL_CODES);
VP8LAddVectorEq(a->red_, out->red_, NUM_LITERAL_CODES); ADD_EQ(2, blue_, NUM_LITERAL_CODES);
VP8LAddVectorEq(a->blue_, out->blue_, NUM_LITERAL_CODES); ADD_EQ(3, alpha_, NUM_LITERAL_CODES);
VP8LAddVectorEq(a->alpha_, out->alpha_, NUM_LITERAL_CODES); ADD_EQ(4, distance_, NUM_DISTANCE_CODES);
for (i = 0; i < 5; ++i) out->is_used_[i] |= a->is_used_[i];
} }
} }
#undef ADD
#undef ADD_EQ
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Image transforms. // Image transforms.

View File

@ -51,10 +51,12 @@ static void HistogramCopy(const VP8LHistogram* const src,
VP8LHistogram* const dst) { VP8LHistogram* const dst) {
uint32_t* const dst_literal = dst->literal_; uint32_t* const dst_literal = dst->literal_;
const int dst_cache_bits = dst->palette_code_bits_; const int dst_cache_bits = dst->palette_code_bits_;
const int literal_size = VP8LHistogramNumCodes(dst_cache_bits);
const int histo_size = VP8LGetHistogramSize(dst_cache_bits); const int histo_size = VP8LGetHistogramSize(dst_cache_bits);
assert(src->palette_code_bits_ == dst_cache_bits); assert(src->palette_code_bits_ == dst_cache_bits);
memcpy(dst, src, histo_size); memcpy(dst, src, histo_size);
dst->literal_ = dst_literal; dst->literal_ = dst_literal;
memcpy(dst->literal_, src->literal_, literal_size * sizeof(*dst->literal_));
} }
int VP8LGetHistogramSize(int cache_bits) { int VP8LGetHistogramSize(int cache_bits) {
@ -237,7 +239,8 @@ static double FinalHuffmanCost(const VP8LStreaks* const stats) {
// Get the symbol entropy for the distribution 'population'. // Get the symbol entropy for the distribution 'population'.
// Set 'trivial_sym', if there's only one symbol present in the distribution. // Set 'trivial_sym', if there's only one symbol present in the distribution.
static double PopulationCost(const uint32_t* const population, int length, static double PopulationCost(const uint32_t* const population, int length,
uint32_t* const trivial_sym) { uint32_t* const trivial_sym,
uint8_t* const is_used) {
VP8LBitEntropy bit_entropy; VP8LBitEntropy bit_entropy;
VP8LStreaks stats; VP8LStreaks stats;
VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats); VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats);
@ -245,6 +248,8 @@ static double PopulationCost(const uint32_t* const population, int length,
*trivial_sym = (bit_entropy.nonzeros == 1) ? bit_entropy.nonzero_code *trivial_sym = (bit_entropy.nonzeros == 1) ? bit_entropy.nonzero_code
: VP8L_NON_TRIVIAL_SYM; : VP8L_NON_TRIVIAL_SYM;
} }
// The histogram is used if there is at least one non-zero streak.
*is_used = (stats.streaks[1][0] != 0 || stats.streaks[1][1] != 0);
return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats); return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);
} }
@ -253,7 +258,9 @@ static double PopulationCost(const uint32_t* const population, int length,
// non-zero: both the zero-th one, or both the last one. // non-zero: both the zero-th one, or both the last one.
static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X, static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,
const uint32_t* const Y, const uint32_t* const Y,
int length, int trivial_at_end) { int length, int is_X_used,
int is_Y_used,
int trivial_at_end) {
VP8LStreaks stats; VP8LStreaks stats;
if (trivial_at_end) { if (trivial_at_end) {
// This configuration is due to palettization that transforms an indexed // This configuration is due to palettization that transforms an indexed
@ -262,28 +269,43 @@ static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,
// Only FinalHuffmanCost needs to be evaluated. // Only FinalHuffmanCost needs to be evaluated.
memset(&stats, 0, sizeof(stats)); memset(&stats, 0, sizeof(stats));
// Deal with the non-zero value at index 0 or length-1. // Deal with the non-zero value at index 0 or length-1.
stats.streaks[1][0] += 1; stats.streaks[1][0] = 1;
// Deal with the following/previous zero streak. // Deal with the following/previous zero streak.
stats.counts[0] += 1; stats.counts[0] = 1;
stats.streaks[0][1] += length - 1; stats.streaks[0][1] = length - 1;
return FinalHuffmanCost(&stats); return FinalHuffmanCost(&stats);
} else { } else {
VP8LBitEntropy bit_entropy; VP8LBitEntropy bit_entropy;
VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats); if (is_X_used) {
if (is_Y_used) {
VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats);
} else {
VP8LGetEntropyUnrefined(X, length, &bit_entropy, &stats);
}
} else {
if (is_Y_used) {
VP8LGetEntropyUnrefined(Y, length, &bit_entropy, &stats);
} else {
memset(&stats, 0, sizeof(stats));
stats.counts[0] = 1;
stats.streaks[0][length > 3] = length;
VP8LBitEntropyInit(&bit_entropy);
}
}
return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats); return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);
} }
} }
// Estimates the Entropy + Huffman + other block overhead size cost. // Estimates the Entropy + Huffman + other block overhead size cost.
double VP8LHistogramEstimateBits(const VP8LHistogram* const p) { double VP8LHistogramEstimateBits(VP8LHistogram* const p) {
return return
PopulationCost( PopulationCost(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_),
p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_), NULL) NULL, &p->is_used_[0])
+ PopulationCost(p->red_, NUM_LITERAL_CODES, NULL) + PopulationCost(p->red_, NUM_LITERAL_CODES, NULL, &p->is_used_[1])
+ PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL) + PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL, &p->is_used_[2])
+ PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL) + PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL, &p->is_used_[3])
+ PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL) + PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL, &p->is_used_[4])
+ VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES) + VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
+ VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES); + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
} }
@ -299,7 +321,8 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
int trivial_at_end = 0; int trivial_at_end = 0;
assert(a->palette_code_bits_ == b->palette_code_bits_); assert(a->palette_code_bits_ == b->palette_code_bits_);
*cost += GetCombinedEntropy(a->literal_, b->literal_, *cost += GetCombinedEntropy(a->literal_, b->literal_,
VP8LHistogramNumCodes(palette_code_bits), 0); VP8LHistogramNumCodes(palette_code_bits),
a->is_used_[0], b->is_used_[0], 0);
*cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES, *cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES,
b->literal_ + NUM_LITERAL_CODES, b->literal_ + NUM_LITERAL_CODES,
NUM_LENGTH_CODES); NUM_LENGTH_CODES);
@ -319,19 +342,23 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
} }
*cost += *cost +=
GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES, trivial_at_end); GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES, a->is_used_[1],
b->is_used_[1], trivial_at_end);
if (*cost > cost_threshold) return 0; if (*cost > cost_threshold) return 0;
*cost += *cost +=
GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES, trivial_at_end); GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES, a->is_used_[2],
if (*cost > cost_threshold) return 0; b->is_used_[2], trivial_at_end);
*cost += GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES,
trivial_at_end);
if (*cost > cost_threshold) return 0; if (*cost > cost_threshold) return 0;
*cost += *cost +=
GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES, 0); GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES,
a->is_used_[3], b->is_used_[3], trivial_at_end);
if (*cost > cost_threshold) return 0;
*cost +=
GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES,
a->is_used_[4], b->is_used_[4], 0);
*cost += *cost +=
VP8LExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES); VP8LExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES);
if (*cost > cost_threshold) return 0; if (*cost > cost_threshold) return 0;
@ -419,16 +446,19 @@ static void UpdateDominantCostRange(
static void UpdateHistogramCost(VP8LHistogram* const h) { static void UpdateHistogramCost(VP8LHistogram* const h) {
uint32_t alpha_sym, red_sym, blue_sym; uint32_t alpha_sym, red_sym, blue_sym;
const double alpha_cost = const double alpha_cost =
PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym); PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym,
&h->is_used_[3]);
const double distance_cost = const double distance_cost =
PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL) + PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL, &h->is_used_[4]) +
VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES); VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_); const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
h->literal_cost_ = PopulationCost(h->literal_, num_codes, NULL) + h->literal_cost_ =
VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, PopulationCost(h->literal_, num_codes, NULL, &h->is_used_[0]) +
NUM_LENGTH_CODES); VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES);
h->red_cost_ = PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym); h->red_cost_ =
h->blue_cost_ = PopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym); PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym, &h->is_used_[1]);
h->blue_cost_ =
PopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym, &h->is_used_[2]);
h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ + h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ +
alpha_cost + distance_cost; alpha_cost + distance_cost;
if ((alpha_sym | red_sym | blue_sym) == VP8L_NON_TRIVIAL_SYM) { if ((alpha_sym | red_sym | blue_sym) == VP8L_NON_TRIVIAL_SYM) {
@ -493,11 +523,19 @@ static void HistogramCopyAndAnalyze(
const int histo_size = orig_histo->size; const int histo_size = orig_histo->size;
VP8LHistogram** const orig_histograms = orig_histo->histograms; VP8LHistogram** const orig_histograms = orig_histo->histograms;
VP8LHistogram** const histograms = image_histo->histograms; VP8LHistogram** const histograms = image_histo->histograms;
image_histo->size = 0;
for (i = 0; i < histo_size; ++i) { for (i = 0; i < histo_size; ++i) {
VP8LHistogram* const histo = orig_histograms[i]; VP8LHistogram* const histo = orig_histograms[i];
UpdateHistogramCost(histo); UpdateHistogramCost(histo);
// Skip the histogram if it is completely empty, which can happen for tiles
// with no information (when they are skipped because of LZ77).
if (!histo->is_used_[0] && !histo->is_used_[1] && !histo->is_used_[2]
&& !histo->is_used_[3] && !histo->is_used_[4]) {
continue;
}
// Copy histograms from orig_histo[] to image_histo[]. // Copy histograms from orig_histo[] to image_histo[].
HistogramCopy(histo, histograms[i]); HistogramCopy(histo, histograms[image_histo->size++]);
} }
} }
@ -987,8 +1025,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
// histograms of small sizes (as bin_map will be very sparse) and // histograms of small sizes (as bin_map will be very sparse) and
// maximum quality q==100 (to preserve the compression gains at that level). // maximum quality q==100 (to preserve the compression gains at that level).
const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE; const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE;
const int entropy_combine = int entropy_combine;
(orig_histo->size > entropy_combine_num_bins * 2) && (quality < 100);
if (orig_histo == NULL) goto Error; if (orig_histo == NULL) goto Error;
@ -996,15 +1033,16 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
HistogramBuild(xsize, histo_bits, refs, orig_histo); HistogramBuild(xsize, histo_bits, refs, orig_histo);
// Copies the histograms and computes its bit_cost. // Copies the histograms and computes its bit_cost.
HistogramCopyAndAnalyze(orig_histo, image_histo); HistogramCopyAndAnalyze(orig_histo, image_histo);
entropy_combine =
(image_histo->size > entropy_combine_num_bins * 2) && (quality < 100);
if (entropy_combine) { if (entropy_combine) {
const int bin_map_size = orig_histo->size; const int bin_map_size = image_histo->size;
// Reuse histogram_symbols storage. By definition, it's guaranteed to be ok. // Reuse histogram_symbols storage. By definition, it's guaranteed to be ok.
uint16_t* const bin_map = histogram_symbols; uint16_t* const bin_map = histogram_symbols;
const double combine_cost_factor = const double combine_cost_factor =
GetCombineCostFactor(image_histo_raw_size, quality); GetCombineCostFactor(image_histo_raw_size, quality);
HistogramAnalyzeEntropyBin(orig_histo, bin_map, low_effort); HistogramAnalyzeEntropyBin(image_histo, bin_map, low_effort);
// Collapse histograms with similar entropy. // Collapse histograms with similar entropy.
HistogramCombineEntropyBin(image_histo, tmp_histo, bin_map, bin_map_size, HistogramCombineEntropyBin(image_histo, tmp_histo, bin_map, bin_map_size,
entropy_combine_num_bins, combine_cost_factor, entropy_combine_num_bins, combine_cost_factor,

View File

@ -44,6 +44,7 @@ typedef struct {
double literal_cost_; // Cached values of dominant entropy costs: double literal_cost_; // Cached values of dominant entropy costs:
double red_cost_; // literal, red & blue. double red_cost_; // literal, red & blue.
double blue_cost_; double blue_cost_;
uint8_t is_used_[5]; // 5 for literal, red, blue, alpha, distance
} VP8LHistogram; } VP8LHistogram;
// Collection of histograms with fixed capacity, allocated as one // Collection of histograms with fixed capacity, allocated as one
@ -113,7 +114,7 @@ double VP8LBitsEntropy(const uint32_t* const array, int n);
// Estimate how many bits the combined entropy of literals and distance // Estimate how many bits the combined entropy of literals and distance
// approximately maps to. // approximately maps to.
double VP8LHistogramEstimateBits(const VP8LHistogram* const p); double VP8LHistogramEstimateBits(VP8LHistogram* const p);
#ifdef __cplusplus #ifdef __cplusplus
} }