Speedups for empty histograms.

When histograms are empty, it is easy to add them.
They should also not be considered when merging histograms
(it is a waste of CPU).
This does not change the compression performance,
just the speed.

Change-Id: I42c721ca0f9c5ea067e73b792aa3db6d5e71d01f
(cherry picked from commit decf6f6b87)
This commit is contained in:
Vincent Rabaud 2018-09-28 15:32:25 +02:00 committed by James Zern
parent f2dfd92557
commit f0abab9217
3 changed files with 116 additions and 46 deletions

View File

@ -643,25 +643,56 @@ static void AddVectorEq_C(const uint32_t* a, uint32_t* out, int size) {
for (i = 0; i < size; ++i) out[i] += a[i];
}
#define ADD(X, ARG, LEN) do { \
if (a->is_used_[X]) { \
if (b->is_used_[X]) { \
VP8LAddVector(a->ARG, b->ARG, out->ARG, (LEN)); \
} else { \
memcpy(&out->ARG[0], &a->ARG[0], (LEN) * sizeof(out->ARG[0])); \
} \
} else if (b->is_used_[X]) { \
memcpy(&out->ARG[0], &b->ARG[0], (LEN) * sizeof(out->ARG[0])); \
} else { \
memset(&out->ARG[0], 0, (LEN) * sizeof(out->ARG[0])); \
} \
} while (0)
#define ADD_EQ(X, ARG, LEN) do { \
if (a->is_used_[X]) { \
if (out->is_used_[X]) { \
VP8LAddVectorEq(a->ARG, out->ARG, (LEN)); \
} else { \
memcpy(&out->ARG[0], &a->ARG[0], (LEN) * sizeof(out->ARG[0])); \
} \
} \
} while (0)
void VP8LHistogramAdd(const VP8LHistogram* const a,
const VP8LHistogram* const b, VP8LHistogram* const out) {
int i;
const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
assert(a->palette_code_bits_ == b->palette_code_bits_);
if (b != out) {
VP8LAddVector(a->literal_, b->literal_, out->literal_, literal_size);
VP8LAddVector(a->distance_, b->distance_, out->distance_,
NUM_DISTANCE_CODES);
VP8LAddVector(a->red_, b->red_, out->red_, NUM_LITERAL_CODES);
VP8LAddVector(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES);
VP8LAddVector(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES);
ADD(0, literal_, literal_size);
ADD(1, red_, NUM_LITERAL_CODES);
ADD(2, blue_, NUM_LITERAL_CODES);
ADD(3, alpha_, NUM_LITERAL_CODES);
ADD(4, distance_, NUM_DISTANCE_CODES);
for (i = 0; i < 5; ++i) {
out->is_used_[i] = (a->is_used_[i] | b->is_used_[i]);
}
} else {
VP8LAddVectorEq(a->literal_, out->literal_, literal_size);
VP8LAddVectorEq(a->distance_, out->distance_, NUM_DISTANCE_CODES);
VP8LAddVectorEq(a->red_, out->red_, NUM_LITERAL_CODES);
VP8LAddVectorEq(a->blue_, out->blue_, NUM_LITERAL_CODES);
VP8LAddVectorEq(a->alpha_, out->alpha_, NUM_LITERAL_CODES);
ADD_EQ(0, literal_, literal_size);
ADD_EQ(1, red_, NUM_LITERAL_CODES);
ADD_EQ(2, blue_, NUM_LITERAL_CODES);
ADD_EQ(3, alpha_, NUM_LITERAL_CODES);
ADD_EQ(4, distance_, NUM_DISTANCE_CODES);
for (i = 0; i < 5; ++i) out->is_used_[i] |= a->is_used_[i];
}
}
#undef ADD
#undef ADD_EQ
//------------------------------------------------------------------------------
// Image transforms.

View File

@ -51,10 +51,12 @@ static void HistogramCopy(const VP8LHistogram* const src,
VP8LHistogram* const dst) {
uint32_t* const dst_literal = dst->literal_;
const int dst_cache_bits = dst->palette_code_bits_;
const int literal_size = VP8LHistogramNumCodes(dst_cache_bits);
const int histo_size = VP8LGetHistogramSize(dst_cache_bits);
assert(src->palette_code_bits_ == dst_cache_bits);
memcpy(dst, src, histo_size);
dst->literal_ = dst_literal;
memcpy(dst->literal_, src->literal_, literal_size * sizeof(*dst->literal_));
}
int VP8LGetHistogramSize(int cache_bits) {
@ -237,7 +239,8 @@ static double FinalHuffmanCost(const VP8LStreaks* const stats) {
// Get the symbol entropy for the distribution 'population'.
// Set 'trivial_sym', if there's only one symbol present in the distribution.
static double PopulationCost(const uint32_t* const population, int length,
uint32_t* const trivial_sym) {
uint32_t* const trivial_sym,
uint8_t* const is_used) {
VP8LBitEntropy bit_entropy;
VP8LStreaks stats;
VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats);
@ -245,6 +248,8 @@ static double PopulationCost(const uint32_t* const population, int length,
*trivial_sym = (bit_entropy.nonzeros == 1) ? bit_entropy.nonzero_code
: VP8L_NON_TRIVIAL_SYM;
}
// The histogram is used if there is at least one non-zero streak.
*is_used = (stats.streaks[1][0] != 0 || stats.streaks[1][1] != 0);
return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);
}
@ -253,7 +258,9 @@ static double PopulationCost(const uint32_t* const population, int length,
// non-zero: both the zero-th one, or both the last one.
static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,
const uint32_t* const Y,
int length, int trivial_at_end) {
int length, int is_X_used,
int is_Y_used,
int trivial_at_end) {
VP8LStreaks stats;
if (trivial_at_end) {
// This configuration is due to palettization that transforms an indexed
@ -262,28 +269,43 @@ static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,
// Only FinalHuffmanCost needs to be evaluated.
memset(&stats, 0, sizeof(stats));
// Deal with the non-zero value at index 0 or length-1.
stats.streaks[1][0] += 1;
stats.streaks[1][0] = 1;
// Deal with the following/previous zero streak.
stats.counts[0] += 1;
stats.streaks[0][1] += length - 1;
stats.counts[0] = 1;
stats.streaks[0][1] = length - 1;
return FinalHuffmanCost(&stats);
} else {
VP8LBitEntropy bit_entropy;
VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats);
if (is_X_used) {
if (is_Y_used) {
VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats);
} else {
VP8LGetEntropyUnrefined(X, length, &bit_entropy, &stats);
}
} else {
if (is_Y_used) {
VP8LGetEntropyUnrefined(Y, length, &bit_entropy, &stats);
} else {
memset(&stats, 0, sizeof(stats));
stats.counts[0] = 1;
stats.streaks[0][length > 3] = length;
VP8LBitEntropyInit(&bit_entropy);
}
}
return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);
}
}
// Estimates the Entropy + Huffman + other block overhead size cost.
double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
double VP8LHistogramEstimateBits(VP8LHistogram* const p) {
return
PopulationCost(
p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_), NULL)
+ PopulationCost(p->red_, NUM_LITERAL_CODES, NULL)
+ PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL)
+ PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL)
+ PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL)
PopulationCost(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_),
NULL, &p->is_used_[0])
+ PopulationCost(p->red_, NUM_LITERAL_CODES, NULL, &p->is_used_[1])
+ PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL, &p->is_used_[2])
+ PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL, &p->is_used_[3])
+ PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL, &p->is_used_[4])
+ VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
+ VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
}
@ -299,7 +321,8 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
int trivial_at_end = 0;
assert(a->palette_code_bits_ == b->palette_code_bits_);
*cost += GetCombinedEntropy(a->literal_, b->literal_,
VP8LHistogramNumCodes(palette_code_bits), 0);
VP8LHistogramNumCodes(palette_code_bits),
a->is_used_[0], b->is_used_[0], 0);
*cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES,
b->literal_ + NUM_LITERAL_CODES,
NUM_LENGTH_CODES);
@ -319,19 +342,23 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
}
*cost +=
GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES, trivial_at_end);
GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES, a->is_used_[1],
b->is_used_[1], trivial_at_end);
if (*cost > cost_threshold) return 0;
*cost +=
GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES, trivial_at_end);
if (*cost > cost_threshold) return 0;
*cost += GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES,
trivial_at_end);
GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES, a->is_used_[2],
b->is_used_[2], trivial_at_end);
if (*cost > cost_threshold) return 0;
*cost +=
GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES, 0);
GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES,
a->is_used_[3], b->is_used_[3], trivial_at_end);
if (*cost > cost_threshold) return 0;
*cost +=
GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES,
a->is_used_[4], b->is_used_[4], 0);
*cost +=
VP8LExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES);
if (*cost > cost_threshold) return 0;
@ -419,16 +446,19 @@ static void UpdateDominantCostRange(
static void UpdateHistogramCost(VP8LHistogram* const h) {
uint32_t alpha_sym, red_sym, blue_sym;
const double alpha_cost =
PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym);
PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym,
&h->is_used_[3]);
const double distance_cost =
PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL) +
PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL, &h->is_used_[4]) +
VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
h->literal_cost_ = PopulationCost(h->literal_, num_codes, NULL) +
VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES,
NUM_LENGTH_CODES);
h->red_cost_ = PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym);
h->blue_cost_ = PopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym);
h->literal_cost_ =
PopulationCost(h->literal_, num_codes, NULL, &h->is_used_[0]) +
VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES);
h->red_cost_ =
PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym, &h->is_used_[1]);
h->blue_cost_ =
PopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym, &h->is_used_[2]);
h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ +
alpha_cost + distance_cost;
if ((alpha_sym | red_sym | blue_sym) == VP8L_NON_TRIVIAL_SYM) {
@ -493,11 +523,19 @@ static void HistogramCopyAndAnalyze(
const int histo_size = orig_histo->size;
VP8LHistogram** const orig_histograms = orig_histo->histograms;
VP8LHistogram** const histograms = image_histo->histograms;
image_histo->size = 0;
for (i = 0; i < histo_size; ++i) {
VP8LHistogram* const histo = orig_histograms[i];
UpdateHistogramCost(histo);
// Skip the histogram if it is completely empty, which can happen for tiles
// with no information (when they are skipped because of LZ77).
if (!histo->is_used_[0] && !histo->is_used_[1] && !histo->is_used_[2]
&& !histo->is_used_[3] && !histo->is_used_[4]) {
continue;
}
// Copy histograms from orig_histo[] to image_histo[].
HistogramCopy(histo, histograms[i]);
HistogramCopy(histo, histograms[image_histo->size++]);
}
}
@ -987,8 +1025,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
// histograms of small sizes (as bin_map will be very sparse) and
// maximum quality q==100 (to preserve the compression gains at that level).
const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE;
const int entropy_combine =
(orig_histo->size > entropy_combine_num_bins * 2) && (quality < 100);
int entropy_combine;
if (orig_histo == NULL) goto Error;
@ -996,15 +1033,16 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
HistogramBuild(xsize, histo_bits, refs, orig_histo);
// Copies the histograms and computes its bit_cost.
HistogramCopyAndAnalyze(orig_histo, image_histo);
entropy_combine =
(image_histo->size > entropy_combine_num_bins * 2) && (quality < 100);
if (entropy_combine) {
const int bin_map_size = orig_histo->size;
const int bin_map_size = image_histo->size;
// Reuse histogram_symbols storage. By definition, it's guaranteed to be ok.
uint16_t* const bin_map = histogram_symbols;
const double combine_cost_factor =
GetCombineCostFactor(image_histo_raw_size, quality);
HistogramAnalyzeEntropyBin(orig_histo, bin_map, low_effort);
HistogramAnalyzeEntropyBin(image_histo, bin_map, low_effort);
// Collapse histograms with similar entropy.
HistogramCombineEntropyBin(image_histo, tmp_histo, bin_map, bin_map_size,
entropy_combine_num_bins, combine_cost_factor,

View File

@ -44,6 +44,7 @@ typedef struct {
double literal_cost_; // Cached values of dominant entropy costs:
double red_cost_; // literal, red & blue.
double blue_cost_;
uint8_t is_used_[5]; // 5 for literal, red, blue, alpha, distance
} VP8LHistogram;
// Collection of histograms with fixed capacity, allocated as one
@ -113,7 +114,7 @@ double VP8LBitsEntropy(const uint32_t* const array, int n);
// Estimate how many bits the combined entropy of literals and distance
// approximately maps to.
double VP8LHistogramEstimateBits(const VP8LHistogram* const p);
double VP8LHistogramEstimateBits(VP8LHistogram* const p);
#ifdef __cplusplus
}