Move Entropy methods to lossless.c

Move all the Entropy evaluation methods to lossless.c (from histogram.c).
There's slight difference in the way entropy is computed for evaluating
entropy in prediction methods and histogram (literal) for huffman trees.
Plan (later) to merge few (static) methods and reduce the code size.

This change has no impact on the compression speed/density.

Change-Id: Ife3d96a3c4a8d78a91723d9e0a8d1b78c0256a15
This commit is contained in:
Vikas Arora 2014-11-20 13:46:36 -08:00
parent a96ccf8fde
commit e0c809ad23
4 changed files with 195 additions and 178 deletions

View File

@ -545,6 +545,9 @@ static const VP8LPredictorFunc kPredictorsC[16] = {
Predictor0, Predictor0 // <- padding security sentinels Predictor0, Predictor0 // <- padding security sentinels
}; };
//------------------------------------------------------------------------------
// Methods to calculate Entropy (Shannon).
static float PredictionCostSpatial(const int counts[256], int weight_0, static float PredictionCostSpatial(const int counts[256], int weight_0,
double exp_val) { double exp_val) {
const int significant_symbols = 256 >> 4; const int significant_symbols = 256 >> 4;
@ -592,6 +595,158 @@ static float PredictionCostSpatialHistogram(const int accumulated[4][256],
return (float)retval; return (float)retval;
} }
static WEBP_INLINE double BitsEntropyRefine(int nonzeros, int sum, int max_val,
double retval) {
double mix;
if (nonzeros < 5) {
if (nonzeros <= 1) {
return 0;
}
// Two symbols, they will be 0 and 1 in a Huffman code.
// Let's mix in a bit of entropy to favor good clustering when
// distributions of these are combined.
if (nonzeros == 2) {
return 0.99 * sum + 0.01 * retval;
}
// No matter what the entropy says, we cannot be better than min_limit
// with Huffman coding. I am mixing a bit of entropy into the
// min_limit since it produces much better (~0.5 %) compression results
// perhaps because of better entropy clustering.
if (nonzeros == 3) {
mix = 0.95;
} else {
mix = 0.7; // nonzeros == 4.
}
} else {
mix = 0.627;
}
{
double min_limit = 2 * sum - max_val;
min_limit = mix * min_limit + (1.0 - mix) * retval;
return (retval < min_limit) ? min_limit : retval;
}
}
// Returns the entropy for the symbols in the input array.
// Also sets trivial_symbol to the code value, if the array has only one code
// value. Otherwise, set it to VP8L_NON_TRIVIAL_SYM.
static double BitsEntropy(const uint32_t* const array, int n,
uint32_t* const trivial_symbol) {
double retval = 0.;
uint32_t sum = 0;
uint32_t nonzero_code = VP8L_NON_TRIVIAL_SYM;
int nonzeros = 0;
uint32_t max_val = 0;
int i;
for (i = 0; i < n; ++i) {
if (array[i] != 0) {
sum += array[i];
nonzero_code = i;
++nonzeros;
retval -= VP8LFastSLog2(array[i]);
if (max_val < array[i]) {
max_val = array[i];
}
}
}
retval += VP8LFastSLog2(sum);
if (trivial_symbol != NULL) {
*trivial_symbol = (nonzeros == 1) ? nonzero_code : VP8L_NON_TRIVIAL_SYM;
}
return BitsEntropyRefine(nonzeros, sum, max_val, retval);
}
static double BitsEntropyCombined(const uint32_t* const X,
const uint32_t* const Y, int n) {
double retval = 0.;
int sum = 0;
int nonzeros = 0;
int max_val = 0;
int i;
for (i = 0; i < n; ++i) {
const int xy = X[i] + Y[i];
if (xy != 0) {
sum += xy;
++nonzeros;
retval -= VP8LFastSLog2(xy);
if (max_val < xy) {
max_val = xy;
}
}
}
retval += VP8LFastSLog2(sum);
return BitsEntropyRefine(nonzeros, sum, max_val, retval);
}
static double InitialHuffmanCost(void) {
// Small bias because Huffman code length is typically not stored in
// full length.
static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;
static const double kSmallBias = 9.1;
return kHuffmanCodeOfHuffmanCodeSize - kSmallBias;
}
// Finalize the Huffman cost based on streak numbers and length type (<3 or >=3)
static double FinalHuffmanCost(const VP8LStreaks* const stats) {
double retval = InitialHuffmanCost();
retval += stats->counts[0] * 1.5625 + 0.234375 * stats->streaks[0][1];
retval += stats->counts[1] * 2.578125 + 0.703125 * stats->streaks[1][1];
retval += 1.796875 * stats->streaks[0][0];
retval += 3.28125 * stats->streaks[1][0];
return retval;
}
// Trampolines
static double HuffmanCost(const uint32_t* const population, int length) {
const VP8LStreaks stats = VP8LHuffmanCostCount(population, length);
return FinalHuffmanCost(&stats);
}
static double HuffmanCostCombined(const uint32_t* const X,
const uint32_t* const Y, int length) {
const VP8LStreaks stats = VP8LHuffmanCostCombinedCount(X, Y, length);
return FinalHuffmanCost(&stats);
}
// Aggregated costs
double VP8LPopulationCost(const uint32_t* const population, int length,
uint32_t* const trivial_sym) {
return
BitsEntropy(population, length, trivial_sym) +
HuffmanCost(population, length);
}
double VP8LGetCombinedEntropy(const uint32_t* const X,
const uint32_t* const Y, int length) {
return BitsEntropyCombined(X, Y, length) + HuffmanCostCombined(X, Y, length);
}
// Estimates the Entropy + Huffman + other block overhead size cost.
double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
return
VP8LPopulationCost(
p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_), NULL)
+ VP8LPopulationCost(p->red_, NUM_LITERAL_CODES, NULL)
+ VP8LPopulationCost(p->blue_, NUM_LITERAL_CODES, NULL)
+ VP8LPopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL)
+ VP8LPopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL)
+ VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
+ VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
}
double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
return
BitsEntropy(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_),
NULL)
+ BitsEntropy(p->red_, NUM_LITERAL_CODES, NULL)
+ BitsEntropy(p->blue_, NUM_LITERAL_CODES, NULL)
+ BitsEntropy(p->alpha_, NUM_LITERAL_CODES, NULL)
+ BitsEntropy(p->distance_, NUM_DISTANCE_CODES, NULL)
+ VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
+ VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
}
static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) { static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {
++histo_argb[0][argb >> 24]; ++histo_argb[0][argb >> 24];
++histo_argb[1][(argb >> 16) & 0xff]; ++histo_argb[1][(argb >> 16) & 0xff];
@ -599,6 +754,8 @@ static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {
++histo_argb[3][argb & 0xff]; ++histo_argb[3][argb & 0xff];
} }
//------------------------------------------------------------------------------
static int GetBestPredictorForTile(int width, int height, static int GetBestPredictorForTile(int width, int height,
int tile_x, int tile_y, int bits, int tile_x, int tile_y, int bits,
const int accumulated[4][256], const int accumulated[4][256],

View File

@ -25,6 +25,10 @@
extern "C" { extern "C" {
#endif #endif
// Not a trivial literal symbol.
#define VP8L_NON_TRIVIAL_SYM (0xffffffff)
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Signatures and generic function-pointers // Signatures and generic function-pointers
@ -158,6 +162,23 @@ typedef VP8LStreaks (*VP8LCostCombinedCountFunc)(const uint32_t* X,
extern VP8LCostCountFunc VP8LHuffmanCostCount; extern VP8LCostCountFunc VP8LHuffmanCostCount;
extern VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount; extern VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;
// Get the symbol entropy for the distribution 'population'.
// Set 'trivial_sym', if there's only one symbol present in the distribution.
double VP8LPopulationCost(const uint32_t* const population, int length,
uint32_t* const trivial_sym);
// Get the combined symbol entropy for the distributions 'X' and 'Y'.
double VP8LGetCombinedEntropy(const uint32_t* const X,
const uint32_t* const Y, int length);
// Estimate how many bits the combined entropy of literals and distance
// approximately maps to.
double VP8LHistogramEstimateBits(const VP8LHistogram* const p);
// This function estimates the cost in bits excluding the bits needed to
// represent the entropy code itself.
double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p);
typedef void (*VP8LHistogramAddFunc)(const VP8LHistogram* const a, typedef void (*VP8LHistogramAddFunc)(const VP8LHistogram* const a,
const VP8LHistogram* const b, const VP8LHistogram* const b,
VP8LHistogram* const out); VP8LHistogram* const out);

View File

@ -29,8 +29,6 @@
#define BIN_SIZE (NUM_PARTITIONS * NUM_PARTITIONS * NUM_PARTITIONS) #define BIN_SIZE (NUM_PARTITIONS * NUM_PARTITIONS * NUM_PARTITIONS)
// Maximum number of histograms allowed in greedy combining algorithm. // Maximum number of histograms allowed in greedy combining algorithm.
#define MAX_HISTO_GREEDY 100 #define MAX_HISTO_GREEDY 100
// Not a trivial literal symbol.
#define NON_TRIVIAL_SYM (0xffffffff)
static void HistogramClear(VP8LHistogram* const p) { static void HistogramClear(VP8LHistogram* const p) {
uint32_t* const literal = p->literal_; uint32_t* const literal = p->literal_;
@ -150,158 +148,6 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
} }
} }
static WEBP_INLINE double BitsEntropyRefine(int nonzeros, int sum, int max_val,
double retval) {
double mix;
if (nonzeros < 5) {
if (nonzeros <= 1) {
return 0;
}
// Two symbols, they will be 0 and 1 in a Huffman code.
// Let's mix in a bit of entropy to favor good clustering when
// distributions of these are combined.
if (nonzeros == 2) {
return 0.99 * sum + 0.01 * retval;
}
// No matter what the entropy says, we cannot be better than min_limit
// with Huffman coding. I am mixing a bit of entropy into the
// min_limit since it produces much better (~0.5 %) compression results
// perhaps because of better entropy clustering.
if (nonzeros == 3) {
mix = 0.95;
} else {
mix = 0.7; // nonzeros == 4.
}
} else {
mix = 0.627;
}
{
double min_limit = 2 * sum - max_val;
min_limit = mix * min_limit + (1.0 - mix) * retval;
return (retval < min_limit) ? min_limit : retval;
}
}
// Returns the entropy for the symbols in the input array.
// Also sets trivial_symbol to the code value, if the array has only one code
// value. Otherwise, set it to NON_TRIVIAL_SYM.
static double BitsEntropy(const uint32_t* const array, int n,
uint32_t* const trivial_symbol) {
double retval = 0.;
uint32_t sum = 0;
uint32_t nonzero_code = NON_TRIVIAL_SYM;
int nonzeros = 0;
uint32_t max_val = 0;
int i;
for (i = 0; i < n; ++i) {
if (array[i] != 0) {
sum += array[i];
nonzero_code = i;
++nonzeros;
retval -= VP8LFastSLog2(array[i]);
if (max_val < array[i]) {
max_val = array[i];
}
}
}
retval += VP8LFastSLog2(sum);
if (trivial_symbol != NULL) {
*trivial_symbol = (nonzeros == 1) ? nonzero_code : NON_TRIVIAL_SYM;
}
return BitsEntropyRefine(nonzeros, sum, max_val, retval);
}
static double BitsEntropyCombined(const uint32_t* const X,
const uint32_t* const Y, int n) {
double retval = 0.;
int sum = 0;
int nonzeros = 0;
int max_val = 0;
int i;
for (i = 0; i < n; ++i) {
const int xy = X[i] + Y[i];
if (xy != 0) {
sum += xy;
++nonzeros;
retval -= VP8LFastSLog2(xy);
if (max_val < xy) {
max_val = xy;
}
}
}
retval += VP8LFastSLog2(sum);
return BitsEntropyRefine(nonzeros, sum, max_val, retval);
}
static double InitialHuffmanCost(void) {
// Small bias because Huffman code length is typically not stored in
// full length.
static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;
static const double kSmallBias = 9.1;
return kHuffmanCodeOfHuffmanCodeSize - kSmallBias;
}
// Finalize the Huffman cost based on streak numbers and length type (<3 or >=3)
static double FinalHuffmanCost(const VP8LStreaks* const stats) {
double retval = InitialHuffmanCost();
retval += stats->counts[0] * 1.5625 + 0.234375 * stats->streaks[0][1];
retval += stats->counts[1] * 2.578125 + 0.703125 * stats->streaks[1][1];
retval += 1.796875 * stats->streaks[0][0];
retval += 3.28125 * stats->streaks[1][0];
return retval;
}
// Trampolines
static double HuffmanCost(const uint32_t* const population, int length) {
const VP8LStreaks stats = VP8LHuffmanCostCount(population, length);
return FinalHuffmanCost(&stats);
}
static double HuffmanCostCombined(const uint32_t* const X,
const uint32_t* const Y, int length) {
const VP8LStreaks stats = VP8LHuffmanCostCombinedCount(X, Y, length);
return FinalHuffmanCost(&stats);
}
// Aggregated costs
static double PopulationCost(const uint32_t* const population, int length,
uint32_t* const trivial_sym) {
return
BitsEntropy(population, length, trivial_sym) +
HuffmanCost(population, length);
}
static double GetCombinedEntropy(const uint32_t* const X,
const uint32_t* const Y, int length) {
return BitsEntropyCombined(X, Y, length) + HuffmanCostCombined(X, Y, length);
}
// Estimates the Entropy + Huffman + other block overhead size cost.
double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
return
PopulationCost(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_),
NULL)
+ PopulationCost(p->red_, NUM_LITERAL_CODES, NULL)
+ PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL)
+ PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL)
+ PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL)
+ VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
+ VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
}
double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
return
BitsEntropy(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_),
NULL)
+ BitsEntropy(p->red_, NUM_LITERAL_CODES, NULL)
+ BitsEntropy(p->blue_, NUM_LITERAL_CODES, NULL)
+ BitsEntropy(p->alpha_, NUM_LITERAL_CODES, NULL)
+ BitsEntropy(p->distance_, NUM_DISTANCE_CODES, NULL)
+ VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
+ VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
}
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// Various histogram combine/cost-eval functions // Various histogram combine/cost-eval functions
@ -311,23 +157,24 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
double* cost) { double* cost) {
const int palette_code_bits = a->palette_code_bits_; const int palette_code_bits = a->palette_code_bits_;
assert(a->palette_code_bits_ == b->palette_code_bits_); assert(a->palette_code_bits_ == b->palette_code_bits_);
*cost += GetCombinedEntropy(a->literal_, b->literal_, *cost += VP8LGetCombinedEntropy(a->literal_, b->literal_,
VP8LHistogramNumCodes(palette_code_bits)); VP8LHistogramNumCodes(palette_code_bits));
*cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES, *cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES,
b->literal_ + NUM_LITERAL_CODES, b->literal_ + NUM_LITERAL_CODES,
NUM_LENGTH_CODES); NUM_LENGTH_CODES);
if (*cost > cost_threshold) return 0; if (*cost > cost_threshold) return 0;
*cost += GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES); *cost += VP8LGetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES);
if (*cost > cost_threshold) return 0; if (*cost > cost_threshold) return 0;
*cost += GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES); *cost += VP8LGetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES);
if (*cost > cost_threshold) return 0; if (*cost > cost_threshold) return 0;
*cost += GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES); *cost += VP8LGetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES);
if (*cost > cost_threshold) return 0; if (*cost > cost_threshold) return 0;
*cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES); *cost += VP8LGetCombinedEntropy(a->distance_, b->distance_,
NUM_DISTANCE_CODES);
*cost += VP8LExtraCostCombined(a->distance_, b->distance_, *cost += VP8LExtraCostCombined(a->distance_, b->distance_,
NUM_DISTANCE_CODES); NUM_DISTANCE_CODES);
if (*cost > cost_threshold) return 0; if (*cost > cost_threshold) return 0;
@ -354,7 +201,7 @@ static double HistogramAddEval(const VP8LHistogram* const a,
out->bit_cost_ = cost; out->bit_cost_ = cost;
out->palette_code_bits_ = a->palette_code_bits_; out->palette_code_bits_ = a->palette_code_bits_;
out->trivial_symbol_ = (a->trivial_symbol_ == b->trivial_symbol_) ? out->trivial_symbol_ = (a->trivial_symbol_ == b->trivial_symbol_) ?
a->trivial_symbol_ : NON_TRIVIAL_SYM; a->trivial_symbol_ : VP8L_NON_TRIVIAL_SYM;
} }
return cost - sum_cost; return cost - sum_cost;
@ -407,22 +254,22 @@ static void UpdateDominantCostRange(
static void UpdateHistogramCost(VP8LHistogram* const h) { static void UpdateHistogramCost(VP8LHistogram* const h) {
uint32_t alpha_sym, red_sym, blue_sym; uint32_t alpha_sym, red_sym, blue_sym;
const double alpha_cost = PopulationCost(h->alpha_, NUM_LITERAL_CODES, const double alpha_cost = VP8LPopulationCost(h->alpha_, NUM_LITERAL_CODES,
&alpha_sym); &alpha_sym);
const double distance_cost = const double distance_cost =
PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL) + VP8LPopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL) +
VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES); VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_); const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
h->literal_cost_ = PopulationCost(h->literal_, num_codes, NULL) + h->literal_cost_ = VP8LPopulationCost(h->literal_, num_codes, NULL) +
VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES,
NUM_LENGTH_CODES); NUM_LENGTH_CODES);
h->red_cost_ = PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym); h->red_cost_ = VP8LPopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym);
h->blue_cost_ = h->blue_cost_ =
PopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym); VP8LPopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym);
h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ + h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ +
alpha_cost + distance_cost; alpha_cost + distance_cost;
if ((alpha_sym | red_sym | blue_sym) == NON_TRIVIAL_SYM) { if ((alpha_sym | red_sym | blue_sym) == VP8L_NON_TRIVIAL_SYM) {
h->trivial_symbol_ = NON_TRIVIAL_SYM; h->trivial_symbol_ = VP8L_NON_TRIVIAL_SYM;
} else { } else {
h->trivial_symbol_ = h->trivial_symbol_ =
((uint32_t)alpha_sym << 24) | (red_sym << 16) | (blue_sym << 0); ((uint32_t)alpha_sym << 24) | (red_sym << 16) | (blue_sym << 0);
@ -576,9 +423,9 @@ static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo,
// histogram pairs. In that case, we fallback to combining histograms // histogram pairs. In that case, we fallback to combining histograms
// as usual to avoid increasing the header size. // as usual to avoid increasing the header size.
const int try_combine = const int try_combine =
(cur_combo->trivial_symbol_ != NON_TRIVIAL_SYM) || (cur_combo->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM) ||
((histograms[idx1]->trivial_symbol_ == NON_TRIVIAL_SYM) && ((histograms[idx1]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM) &&
(histograms[idx2]->trivial_symbol_== NON_TRIVIAL_SYM)); (histograms[idx2]->trivial_symbol_== VP8L_NON_TRIVIAL_SYM));
const int max_combine_failures = 32; const int max_combine_failures = 32;
if (try_combine || (num_combine_failures >= max_combine_failures)) { if (try_combine || (num_combine_failures >= max_combine_failures)) {
HistogramCopy(cur_combo, histograms[idx1]); HistogramCopy(cur_combo, histograms[idx1]);

View File

@ -93,14 +93,6 @@ VP8LHistogram* VP8LAllocateHistogram(int cache_bits);
void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
const PixOrCopy* const v); const PixOrCopy* const v);
// Estimate how many bits the combined entropy of literals and distance
// approximately maps to.
double VP8LHistogramEstimateBits(const VP8LHistogram* const p);
// This function estimates the cost in bits excluding the bits needed to
// represent the entropy code itself.
double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p);
static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) { static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) {
return NUM_LITERAL_CODES + NUM_LENGTH_CODES + return NUM_LITERAL_CODES + NUM_LENGTH_CODES +
((palette_code_bits > 0) ? (1 << palette_code_bits) : 0); ((palette_code_bits > 0) ? (1 << palette_code_bits) : 0);