From 66408c2c7c6830b29340ac2dc680c85e734ff53d Mon Sep 17 00:00:00 2001 From: Vincent Rabaud Date: Tue, 2 Jul 2024 14:33:37 +0200 Subject: [PATCH] Switch the histogram_enc.h API to fixed point Speedups: 4% with '-lossless', 8% with '-lossless -q 100 -m6' Change-Id: I8f1c244b290d48132c1edc6a1c9fc3f79fef68ec --- src/dsp/lossless_common.h | 3 + src/enc/backward_references_cost_enc.c | 4 +- src/enc/backward_references_enc.c | 14 +- src/enc/histogram_enc.c | 236 +++++++++++++------------ src/enc/histogram_enc.h | 12 +- src/enc/vp8l_enc.c | 18 +- 6 files changed, 148 insertions(+), 139 deletions(-) diff --git a/src/dsp/lossless_common.h b/src/dsp/lossless_common.h index 376f6956..54abbdbd 100644 --- a/src/dsp/lossless_common.h +++ b/src/dsp/lossless_common.h @@ -111,6 +111,9 @@ static WEBP_INLINE int64_t DivRound(int64_t a, int64_t b) { return ((a < 0) == (b < 0)) ? ((a + b / 2) / b) : ((a - b / 2) / b); } +#define WEBP_INT64_MAX ((int64_t)((1ull << 63) - 1)) +#define WEBP_UINT64_MAX (~0ull) + // ----------------------------------------------------------------------------- // PrefixEncode() diff --git a/src/enc/backward_references_cost_enc.c b/src/enc/backward_references_cost_enc.c index b22df693..e097b509 100644 --- a/src/enc/backward_references_cost_enc.c +++ b/src/enc/backward_references_cost_enc.c @@ -320,9 +320,7 @@ static int CostManagerInit(CostManager* const manager, } // Set the initial costs_ to INT64_MAX for every pixel as we will keep the // minimum. - for (i = 0; i < pix_count; ++i) { - manager->costs_[i] = (int64_t)((1ull << 63) - 1); - } + for (i = 0; i < pix_count; ++i) manager->costs_[i] = WEBP_INT64_MAX; return 1; } diff --git a/src/enc/backward_references_enc.c b/src/enc/backward_references_enc.c index dc98bf17..0f1d83da 100644 --- a/src/enc/backward_references_enc.c +++ b/src/enc/backward_references_enc.c @@ -13,8 +13,6 @@ #include "src/enc/backward_references_enc.h" #include -#include -#include #include "src/dsp/dsp.h" #include "src/dsp/lossless.h" @@ -27,8 +25,6 @@ #define MIN_BLOCK_SIZE 256 // minimum block size for backward references -#define MAX_ENTROPY (1e30f) - // 1M window (4M bytes) minus 120 special codes for short distances. #define WINDOW_SIZE ((1 << WINDOW_SIZE_BITS) - 120) @@ -758,7 +754,7 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality, int* const best_cache_bits) { int i; const int cache_bits_max = (quality <= 25) ? 0 : *best_cache_bits; - float entropy_min = MAX_ENTROPY; + uint64_t entropy_min = WEBP_UINT64_MAX; int cc_init[MAX_COLOR_CACHE_BITS + 1] = { 0 }; VP8LColorCache hashers[MAX_COLOR_CACHE_BITS + 1]; VP8LRefsCursor c = VP8LRefsCursorInit(refs); @@ -843,7 +839,7 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality, } for (i = 0; i <= cache_bits_max; ++i) { - const float entropy = VP8LHistogramEstimateBits(histos[i]); + const uint64_t entropy = VP8LHistogramEstimateBits(histos[i]); if (i == 0 || entropy < entropy_min) { entropy_min = entropy; *best_cache_bits = i; @@ -920,7 +916,7 @@ static int GetBackwardReferences(int width, int height, int i, lz77_type; // Index 0 is for a color cache, index 1 for no cache (if needed). int lz77_types_best[2] = {0, 0}; - float bit_costs_best[2] = {FLT_MAX, FLT_MAX}; + uint64_t bit_costs_best[2] = {WEBP_UINT64_MAX, WEBP_UINT64_MAX}; VP8LHashChain hash_chain_box; VP8LBackwardRefs* const refs_tmp = &refs[do_no_cache ? 2 : 1]; int status = 0; @@ -932,7 +928,7 @@ static int GetBackwardReferences(int width, int height, for (lz77_type = 1; lz77_types_to_try; lz77_types_to_try &= ~lz77_type, lz77_type <<= 1) { int res = 0; - float bit_cost = 0.f; + uint64_t bit_cost = 0u; if ((lz77_types_to_try & lz77_type) == 0) continue; switch (lz77_type) { case kLZ77RLE: @@ -1006,7 +1002,7 @@ static int GetBackwardReferences(int width, int height, const VP8LHashChain* const hash_chain_tmp = (lz77_types_best[i] == kLZ77Standard) ? hash_chain : &hash_chain_box; const int cache_bits = (i == 1) ? 0 : *cache_bits_best; - float bit_cost_trace; + uint64_t bit_cost_trace; if (!VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits, hash_chain_tmp, &refs[i], refs_tmp)) { diff --git a/src/enc/histogram_enc.c b/src/enc/histogram_enc.c index 3ca67b3a..f9459357 100644 --- a/src/enc/histogram_enc.c +++ b/src/enc/histogram_enc.c @@ -13,8 +13,7 @@ #include "src/webp/config.h" #endif -#include -#include +#include #include "src/dsp/lossless.h" #include "src/dsp/lossless_common.h" @@ -23,8 +22,6 @@ #include "src/enc/vp8i_enc.h" #include "src/utils/utils.h" -#define MAX_BIT_COST FLT_MAX - // Number of partitions for the three dominant (literal, red and blue) symbol // costs. #define NUM_PARTITIONS 4 @@ -102,10 +99,10 @@ void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits, HistogramClear(p); } else { p->trivial_symbol_ = 0; - p->bit_cost_ = 0.; - p->literal_cost_ = 0.; - p->red_cost_ = 0.; - p->blue_cost_ = 0.; + p->bit_cost_ = 0; + p->literal_cost_ = 0; + p->red_cost_ = 0; + p->blue_cost_ = 0; memset(p->is_used_, 0, sizeof(p->is_used_)); } } @@ -230,8 +227,10 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, // ----------------------------------------------------------------------------- // Entropy-related functions. -static WEBP_INLINE float BitsEntropyRefine(const VP8LBitEntropy* entropy) { - float mix; +static WEBP_INLINE uint64_t BitsEntropyRefine(const VP8LBitEntropy* entropy) { + uint64_t mix; + const uint64_t fixed_point_entropy = + (uint64_t)(entropy->entropy * (1ll << LOG_2_PRECISION_BITS) + .5); if (entropy->nonzeros < 5) { if (entropy->nonzeros <= 1) { return 0; @@ -240,67 +239,72 @@ static WEBP_INLINE float BitsEntropyRefine(const VP8LBitEntropy* entropy) { // Let's mix in a bit of entropy to favor good clustering when // distributions of these are combined. if (entropy->nonzeros == 2) { - return 0.99f * entropy->sum + 0.01f * entropy->entropy; + return DivRound(99 * ((uint64_t)entropy->sum << LOG_2_PRECISION_BITS) + + fixed_point_entropy, + 100); } // No matter what the entropy says, we cannot be better than min_limit // with Huffman coding. I am mixing a bit of entropy into the // min_limit since it produces much better (~0.5 %) compression results // perhaps because of better entropy clustering. if (entropy->nonzeros == 3) { - mix = 0.95f; + mix = 950; } else { - mix = 0.7f; // nonzeros == 4. + mix = 700; // nonzeros == 4. } } else { - mix = 0.627f; + mix = 627; } { - float min_limit = 2.f * entropy->sum - entropy->max_val; - min_limit = mix * min_limit + (1.f - mix) * entropy->entropy; - return (entropy->entropy < min_limit) ? min_limit : entropy->entropy; + uint64_t min_limit = (uint64_t)(2 * entropy->sum - entropy->max_val) + << LOG_2_PRECISION_BITS; + min_limit = + DivRound(mix * min_limit + (1000 - mix) * fixed_point_entropy, 1000); + return (fixed_point_entropy < min_limit) ? min_limit : fixed_point_entropy; } } -float VP8LBitsEntropy(const uint32_t* const array, int n) { +uint64_t VP8LBitsEntropy(const uint32_t* const array, int n) { VP8LBitEntropy entropy; VP8LBitsEntropyUnrefined(array, n, &entropy); return BitsEntropyRefine(&entropy); } -static float InitialHuffmanCost(void) { +static uint64_t InitialHuffmanCost(void) { // Small bias because Huffman code length is typically not stored in // full length. - static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3; - static const float kSmallBias = 9.1f; - return kHuffmanCodeOfHuffmanCodeSize - kSmallBias; + static const uint64_t kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3; + // Subtract a bias of 9.1. + return (kHuffmanCodeOfHuffmanCodeSize << LOG_2_PRECISION_BITS) - + DivRound(91ll << LOG_2_PRECISION_BITS, 10); } // Finalize the Huffman cost based on streak numbers and length type (<3 or >=3) -static float FinalHuffmanCost(const VP8LStreaks* const stats) { - // The constants in this function are experimental and got rounded from +static uint64_t FinalHuffmanCost(const VP8LStreaks* const stats) { + // The constants in this function are empirical and got rounded from // their original values in 1/8 when switched to 1/1024. - float retval = InitialHuffmanCost(); + uint64_t retval = InitialHuffmanCost(); // Second coefficient: Many zeros in the histogram are covered efficiently // by a run-length encode. Originally 2/8. - retval += stats->counts[0] * 1.5625f + 0.234375f * stats->streaks[0][1]; + uint64_t retval_extra = stats->counts[0] * 1600 + 240 * stats->streaks[0][1]; // Second coefficient: Constant values are encoded less efficiently, but still // RLE'ed. Originally 6/8. - retval += stats->counts[1] * 2.578125f + 0.703125f * stats->streaks[1][1]; + retval_extra += stats->counts[1] * 2640 + 720 * stats->streaks[1][1]; // 0s are usually encoded more efficiently than non-0s. // Originally 15/8. - retval += 1.796875f * stats->streaks[0][0]; + retval_extra += 1840 * stats->streaks[0][0]; // Originally 26/8. - retval += 3.28125f * stats->streaks[1][0]; - return retval; + retval_extra += 3360 * stats->streaks[1][0]; + return retval + (retval_extra << (LOG_2_PRECISION_BITS - 10)); } // Get the symbol entropy for the distribution 'population'. // Set 'trivial_sym', if there's only one symbol present in the distribution. -static float PopulationCost(const uint32_t* const population, int length, - uint32_t* const trivial_sym, - uint8_t* const is_used) { +static uint64_t PopulationCost(const uint32_t* const population, int length, + uint32_t* const trivial_sym, + uint8_t* const is_used) { VP8LBitEntropy bit_entropy; VP8LStreaks stats; VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats); @@ -316,10 +320,11 @@ static float PopulationCost(const uint32_t* const population, int length, // trivial_at_end is 1 if the two histograms only have one element that is // non-zero: both the zero-th one, or both the last one. -static WEBP_INLINE float GetCombinedEntropy(const uint32_t* const X, - const uint32_t* const Y, int length, - int is_X_used, int is_Y_used, - int trivial_at_end) { +static WEBP_INLINE uint64_t GetCombinedEntropy(const uint32_t* const X, + const uint32_t* const Y, + int length, int is_X_used, + int is_Y_used, + int trivial_at_end) { VP8LStreaks stats; if (trivial_at_end) { // This configuration is due to palettization that transforms an indexed @@ -357,7 +362,7 @@ static WEBP_INLINE float GetCombinedEntropy(const uint32_t* const X, } // Estimates the Entropy + Huffman + other block overhead size cost. -float VP8LHistogramEstimateBits(VP8LHistogram* const p) { +uint64_t VP8LHistogramEstimateBits(VP8LHistogram* const p) { return PopulationCost(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_), NULL, &p->is_used_[0]) + @@ -366,9 +371,10 @@ float VP8LHistogramEstimateBits(VP8LHistogram* const p) { PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL, &p->is_used_[3]) + PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL, &p->is_used_[4]) + - (float)VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, - NUM_LENGTH_CODES) + - (float)VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES); + ((uint64_t)(VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, + NUM_LENGTH_CODES) + + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES)) + << LOG_2_PRECISION_BITS); } // ----------------------------------------------------------------------------- @@ -376,16 +382,17 @@ float VP8LHistogramEstimateBits(VP8LHistogram* const p) { static int GetCombinedHistogramEntropy(const VP8LHistogram* const a, const VP8LHistogram* const b, - float cost_threshold, float* cost) { + int64_t cost_threshold, int64_t* cost) { const int palette_code_bits = a->palette_code_bits_; int trivial_at_end = 0; assert(a->palette_code_bits_ == b->palette_code_bits_); *cost += GetCombinedEntropy(a->literal_, b->literal_, VP8LHistogramNumCodes(palette_code_bits), a->is_used_[0], b->is_used_[0], 0); - *cost += (float)VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES, - b->literal_ + NUM_LITERAL_CODES, - NUM_LENGTH_CODES); + *cost += (int64_t)VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES, + b->literal_ + NUM_LITERAL_CODES, + NUM_LENGTH_CODES) + << LOG_2_PRECISION_BITS; if (*cost > cost_threshold) return 0; if (a->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM && @@ -419,8 +426,9 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a, *cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES, a->is_used_[4], b->is_used_[4], 0); - *cost += (float)VP8LExtraCostCombined(a->distance_, b->distance_, - NUM_DISTANCE_CODES); + *cost += (int64_t)VP8LExtraCostCombined(a->distance_, b->distance_, + NUM_DISTANCE_CODES) + << LOG_2_PRECISION_BITS; if (*cost > cost_threshold) return 0; return 1; @@ -441,11 +449,12 @@ static WEBP_INLINE void HistogramAdd(const VP8LHistogram* const a, // Since the previous score passed is 'cost_threshold', we only need to compare // the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out // early. -static float HistogramAddEval(const VP8LHistogram* const a, - const VP8LHistogram* const b, - VP8LHistogram* const out, float cost_threshold) { - float cost = 0; - const float sum_cost = a->bit_cost_ + b->bit_cost_; +static int64_t HistogramAddEval(const VP8LHistogram* const a, + const VP8LHistogram* const b, + VP8LHistogram* const out, + int64_t cost_threshold) { + int64_t cost = 0; + const int64_t sum_cost = a->bit_cost_ + b->bit_cost_; cost_threshold += sum_cost; if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) { @@ -460,10 +469,10 @@ static float HistogramAddEval(const VP8LHistogram* const a, // Same as HistogramAddEval(), except that the resulting histogram // is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit // the term C(b) which is constant over all the evaluations. -static float HistogramAddThresh(const VP8LHistogram* const a, - const VP8LHistogram* const b, - float cost_threshold) { - float cost; +static int64_t HistogramAddThresh(const VP8LHistogram* const a, + const VP8LHistogram* const b, + int64_t cost_threshold) { + int64_t cost; assert(a != NULL && b != NULL); cost = -a->bit_cost_; GetCombinedHistogramEntropy(a, b, cost_threshold, &cost); @@ -475,21 +484,21 @@ static float HistogramAddThresh(const VP8LHistogram* const a, // The structure to keep track of cost range for the three dominant entropy // symbols. typedef struct { - float literal_max_; - float literal_min_; - float red_max_; - float red_min_; - float blue_max_; - float blue_min_; + uint64_t literal_max_; + uint64_t literal_min_; + uint64_t red_max_; + uint64_t red_min_; + uint64_t blue_max_; + uint64_t blue_min_; } DominantCostRange; static void DominantCostRangeInit(DominantCostRange* const c) { - c->literal_max_ = 0.; - c->literal_min_ = MAX_BIT_COST; - c->red_max_ = 0.; - c->red_min_ = MAX_BIT_COST; - c->blue_max_ = 0.; - c->blue_min_ = MAX_BIT_COST; + c->literal_max_ = 0; + c->literal_min_ = WEBP_UINT64_MAX; + c->red_max_ = 0; + c->red_min_ = WEBP_UINT64_MAX; + c->blue_max_ = 0; + c->blue_min_ = WEBP_UINT64_MAX; } static void UpdateDominantCostRange( @@ -504,15 +513,18 @@ static void UpdateDominantCostRange( static void UpdateHistogramCost(VP8LHistogram* const h) { uint32_t alpha_sym, red_sym, blue_sym; - const float alpha_cost = + const uint64_t alpha_cost = PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym, &h->is_used_[3]); - const float distance_cost = + const uint64_t distance_cost = PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL, &h->is_used_[4]) + - (float)VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES); + ((uint64_t)VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES) + << LOG_2_PRECISION_BITS); const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_); h->literal_cost_ = PopulationCost(h->literal_, num_codes, NULL, &h->is_used_[0]) + - (float)VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES); + ((uint64_t)VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, + NUM_LENGTH_CODES) + << LOG_2_PRECISION_BITS); h->red_cost_ = PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym, &h->is_used_[1]); h->blue_cost_ = @@ -527,10 +539,10 @@ static void UpdateHistogramCost(VP8LHistogram* const h) { } } -static int GetBinIdForEntropy(float min, float max, float val) { - const float range = max - min; - if (range > 0.) { - const float delta = val - min; +static int GetBinIdForEntropy(uint64_t min, uint64_t max, uint64_t val) { + const uint64_t range = max - min; + if (range > 0) { + const uint64_t delta = val - min; return (int)((NUM_PARTITIONS - 1e-6) * delta / range); } else { return 0; @@ -639,11 +651,12 @@ static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo, // Merges some histograms with same bin_id together if it's advantageous. // Sets the remaining histograms to NULL. +// 'combine_cost_factor' has to be divided by 100. static void HistogramCombineEntropyBin( VP8LHistogramSet* const image_histo, int* num_used, const uint16_t* const clusters, uint16_t* const cluster_mappings, VP8LHistogram* cur_combo, const uint16_t* const bin_map, int num_bins, - float combine_cost_factor, int low_effort) { + int32_t combine_cost_factor, int low_effort) { VP8LHistogram** const histograms = image_histo->histograms; int idx; struct { @@ -673,9 +686,10 @@ static void HistogramCombineEntropyBin( cluster_mappings[clusters[idx]] = clusters[first]; } else { // try to merge #idx into #first (both share the same bin_id) - const float bit_cost = histograms[idx]->bit_cost_; - const float bit_cost_thresh = -bit_cost * combine_cost_factor; - const float curr_cost_diff = HistogramAddEval( + const int64_t bit_cost = histograms[idx]->bit_cost_; + const int64_t bit_cost_thresh = + -DivRound(bit_cost * combine_cost_factor, 100); + const int64_t curr_cost_diff = HistogramAddEval( histograms[first], histograms[idx], cur_combo, bit_cost_thresh); if (curr_cost_diff < bit_cost_thresh) { // Try to merge two histograms only if the combo is a trivial one or @@ -724,8 +738,8 @@ static uint32_t MyRand(uint32_t* const seed) { typedef struct { int idx1; int idx2; - float cost_diff; - float cost_combo; + int64_t cost_diff; + int64_t cost_combo; } HistogramPair; typedef struct { @@ -765,7 +779,7 @@ static void HistoQueuePopPair(HistoQueue* const histo_queue, // Check whether a pair in the queue should be updated as head or not. static void HistoQueueUpdateHead(HistoQueue* const histo_queue, HistogramPair* const pair) { - assert(pair->cost_diff < 0.); + assert(pair->cost_diff < 0); assert(pair >= histo_queue->queue && pair < (histo_queue->queue + histo_queue->size)); assert(histo_queue->size > 0); @@ -778,29 +792,29 @@ static void HistoQueueUpdateHead(HistoQueue* const histo_queue, } // Update the cost diff and combo of a pair of histograms. This needs to be -// called when the the histograms have been merged with a third one. +// called when the histograms have been merged with a third one. static void HistoQueueUpdatePair(const VP8LHistogram* const h1, - const VP8LHistogram* const h2, float threshold, - HistogramPair* const pair) { - const float sum_cost = h1->bit_cost_ + h2->bit_cost_; - pair->cost_combo = 0.; + const VP8LHistogram* const h2, + int64_t threshold, HistogramPair* const pair) { + const int64_t sum_cost = h1->bit_cost_ + h2->bit_cost_; + pair->cost_combo = 0; GetCombinedHistogramEntropy(h1, h2, sum_cost + threshold, &pair->cost_combo); pair->cost_diff = pair->cost_combo - sum_cost; } // Create a pair from indices "idx1" and "idx2" provided its cost // is inferior to "threshold", a negative entropy. -// It returns the cost of the pair, or 0. if it superior to threshold. -static float HistoQueuePush(HistoQueue* const histo_queue, - VP8LHistogram** const histograms, int idx1, - int idx2, float threshold) { +// It returns the cost of the pair, or 0 if it superior to threshold. +static int64_t HistoQueuePush(HistoQueue* const histo_queue, + VP8LHistogram** const histograms, int idx1, + int idx2, int64_t threshold) { const VP8LHistogram* h1; const VP8LHistogram* h2; HistogramPair pair; // Stop here if the queue is full. - if (histo_queue->size == histo_queue->max_size) return 0.; - assert(threshold <= 0.); + if (histo_queue->size == histo_queue->max_size) return 0; + assert(threshold <= 0); if (idx1 > idx2) { const int tmp = idx2; idx2 = idx1; @@ -814,7 +828,7 @@ static float HistoQueuePush(HistoQueue* const histo_queue, HistoQueueUpdatePair(h1, h2, threshold, &pair); // Do not even consider the pair if it does not improve the entropy. - if (pair.cost_diff >= threshold) return 0.; + if (pair.cost_diff >= threshold) return 0; histo_queue->queue[histo_queue->size++] = pair; HistoQueueUpdateHead(histo_queue, &histo_queue->queue[histo_queue->size - 1]); @@ -851,7 +865,7 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo, for (j = i + 1; j < image_histo_size; ++j) { // Initialize queue. if (image_histo->histograms[j] == NULL) continue; - HistoQueuePush(&histo_queue, histograms, i, j, 0.); + HistoQueuePush(&histo_queue, histograms, i, j, 0); } } @@ -879,7 +893,7 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo, // Push new pairs formed with combined histogram to the queue. for (i = 0; i < image_histo->size; ++i) { if (i == idx1 || image_histo->histograms[i] == NULL) continue; - HistoQueuePush(&histo_queue, image_histo->histograms, idx1, i, 0.); + HistoQueuePush(&histo_queue, image_histo->histograms, idx1, i, 0); } } @@ -937,8 +951,8 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo, ++tries_with_no_success < num_tries_no_success; ++iter) { int* mapping_index; - float best_cost = - (histo_queue.size == 0) ? 0.f : histo_queue.queue[0].cost_diff; + int64_t best_cost = + (histo_queue.size == 0) ? 0 : histo_queue.queue[0].cost_diff; int best_idx1 = -1, best_idx2 = 1; const uint32_t rand_range = (*num_used - 1) * (*num_used); // (*num_used) / 2 was chosen empirically. Less means faster but worse @@ -947,7 +961,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo, // Pick random samples. for (j = 0; *num_used >= 2 && j < num_tries; ++j) { - float curr_cost; + int64_t curr_cost; // Choose two different histograms at random and try to combine them. const uint32_t tmp = MyRand(&seed) % rand_range; uint32_t idx1 = tmp / (*num_used - 1); @@ -1012,8 +1026,8 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo, } if (do_eval) { // Re-evaluate the cost of an updated pair. - HistoQueueUpdatePair(histograms[p->idx1], histograms[p->idx2], 0., p); - if (p->cost_diff >= 0.) { + HistoQueueUpdatePair(histograms[p->idx1], histograms[p->idx2], 0, p); + if (p->cost_diff >= 0) { HistoQueuePopPair(&histo_queue, p); continue; } @@ -1049,7 +1063,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in, if (out_size > 1) { for (i = 0; i < in_size; ++i) { int best_out = 0; - float best_bits = MAX_BIT_COST; + int64_t best_bits = WEBP_INT64_MAX; int k; if (in_histo[i] == NULL) { // Arbitrarily set to the previous value if unused to help future LZ77. @@ -1057,7 +1071,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in, continue; } for (k = 0; k < out_size; ++k) { - float cur_bits; + int64_t cur_bits; cur_bits = HistogramAddThresh(out_histo[k], in_histo[i], best_bits); if (k == 0 || cur_bits < best_bits) { best_bits = cur_bits; @@ -1085,13 +1099,13 @@ static void HistogramRemap(const VP8LHistogramSet* const in, } } -static float GetCombineCostFactor(int histo_size, int quality) { - float combine_cost_factor = 0.16f; +static int32_t GetCombineCostFactor(int histo_size, int quality) { + int32_t combine_cost_factor = 16; if (quality < 90) { - if (histo_size > 256) combine_cost_factor /= 2.f; - if (histo_size > 512) combine_cost_factor /= 2.f; - if (histo_size > 1024) combine_cost_factor /= 2.f; - if (quality <= 50) combine_cost_factor /= 2.f; + if (histo_size > 256) combine_cost_factor /= 2; + if (histo_size > 512) combine_cost_factor /= 2; + if (histo_size > 1024) combine_cost_factor /= 2; + if (quality <= 50) combine_cost_factor /= 2; } return combine_cost_factor; } @@ -1201,7 +1215,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, if (entropy_combine) { uint16_t* const bin_map = map_tmp; - const float combine_cost_factor = + const int32_t combine_cost_factor = GetCombineCostFactor(image_histo_raw_size, quality); const uint32_t num_clusters = num_used; diff --git a/src/enc/histogram_enc.h b/src/enc/histogram_enc.h index 4c0bb974..ebeca929 100644 --- a/src/enc/histogram_enc.h +++ b/src/enc/histogram_enc.h @@ -40,10 +40,10 @@ typedef struct { int palette_code_bits_; uint32_t trivial_symbol_; // True, if histograms for Red, Blue & Alpha // literal symbols are single valued. - float bit_cost_; // cached value of bit cost. - float literal_cost_; // Cached values of dominant entropy costs: - float red_cost_; // literal, red & blue. - float blue_cost_; + int64_t bit_cost_; // cached value of bit cost. + uint64_t literal_cost_; // Cached values of dominant entropy costs: + uint64_t red_cost_; // literal, red & blue. + uint64_t blue_cost_; uint8_t is_used_[5]; // 5 for literal, red, blue, alpha, distance } VP8LHistogram; @@ -117,11 +117,11 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, int* const percent); // Returns the entropy for the symbols in the input array. -float VP8LBitsEntropy(const uint32_t* const array, int n); +uint64_t VP8LBitsEntropy(const uint32_t* const array, int n); // Estimate how many bits the combined entropy of literals and distance // approximately maps to. -float VP8LHistogramEstimateBits(VP8LHistogram* const p); +uint64_t VP8LHistogramEstimateBits(VP8LHistogram* const p); #ifdef __cplusplus } diff --git a/src/enc/vp8l_enc.c b/src/enc/vp8l_enc.c index 6ad4bd99..456b71ba 100644 --- a/src/enc/vp8l_enc.c +++ b/src/enc/vp8l_enc.c @@ -141,8 +141,8 @@ static int AnalyzeEntropy(const uint32_t* argb, curr_row += argb_stride; } { - float entropy_comp[kHistoTotal]; - float entropy[kNumEntropyIx]; + uint64_t entropy_comp[kHistoTotal]; + uint64_t entropy[kNumEntropyIx]; int k; int last_mode_to_analyze = use_palette ? kPalette : kSpatialSubGreen; int j; @@ -180,21 +180,19 @@ static int AnalyzeEntropy(const uint32_t* argb, // When including transforms, there is an overhead in bits from // storing them. This overhead is small but matters for small images. // For spatial, there are 14 transformations. - entropy[kSpatial] += RightShiftRound( - (uint64_t)VP8LSubSampleSize(width, transform_bits) * - VP8LSubSampleSize(height, transform_bits) * VP8LFastLog2(14), - LOG_2_PRECISION_BITS); + entropy[kSpatial] += (uint64_t)VP8LSubSampleSize(width, transform_bits) * + VP8LSubSampleSize(height, transform_bits) * + VP8LFastLog2(14); // For color transforms: 24 as only 3 channels are considered in a // ColorTransformElement. - entropy[kSpatialSubGreen] += RightShiftRound( + entropy[kSpatialSubGreen] += (uint64_t)VP8LSubSampleSize(width, transform_bits) * - VP8LSubSampleSize(height, transform_bits) * VP8LFastLog2(24), - LOG_2_PRECISION_BITS); + VP8LSubSampleSize(height, transform_bits) * VP8LFastLog2(24); // For palettes, add the cost of storing the palette. // We empirically estimate the cost of a compressed entry as 8 bits. // The palette is differential-coded when compressed hence a much // lower cost than sizeof(uint32_t)*8. - entropy[kPalette] += palette_size * 8; + entropy[kPalette] += (palette_size * 8ull) << LOG_2_PRECISION_BITS; *min_entropy_ix = kDirect; for (k = kDirect + 1; k <= last_mode_to_analyze; ++k) {