From b0b973c39bbbc1b6eabd05d26c2713f30b2c935f Mon Sep 17 00:00:00 2001 From: Vikas Arora Date: Fri, 19 Dec 2014 17:33:17 +0000 Subject: [PATCH] Speedup VP8LGetHistoImageSymbols for low effort (m=0) mode. - Do light weight entropy based histogram combine and leave out CPU intensive stochastic and greedy heuristics for combining the histograms. For 1000 image PNG corpus (m=0), this change yields speedup of 10% at lower quality range (1% drop in compression density) and about 5% for higher quality range (1% drop in compression density). Following is the compression stats (before/after) for method = 0: Before After bpp/MPs bpp/MPs q=0 2.8336/16.577 2.8615/18.000 q=5 2.8336/16.504 2.8615/18.216 q=10 2.8293/16.419 2.8572/18.070 q=15 2.8242/17.582 2.8519/18.371 q=20 2.8182/16.131 2.8454/18.975 q=25 2.7924/7.670 2.8230/8.531 q=30 2.7078/6.635 2.7310/7.706 q=35 2.7028/6.203 2.7253/6.855 q=40 2.7005/6.198 2.7231/6.364 q=45 2.6989/5.570 2.7216/5.844 q=50 2.6970/5.087 2.7196/5.210 q=55 2.6963/4.589 2.7208/4.766 q=60 2.6949/4.292 2.7195/4.495 q=65 2.6940/3.970 2.7185/4.024 q=70 2.6929/3.698 2.7174/3.699 q=75 2.6919/3.427 2.7164/3.449 q=80 2.6918/3.106 2.7161/3.222 q=85 2.6909/2.856 2.7153/2.919 q=90 2.6902/2.695 2.7145/2.766 q=95 2.6881/2.499 2.7124/2.548 q=100 2.6873/2.253 2.6873/2.285 Change-Id: I0567945068f8dc7888041e93d872f9def91f50ba --- src/enc/histogram.c | 104 ++++++++++++++++++++++++++++---------------- src/enc/histogram.h | 3 +- src/enc/vp8l.c | 10 ++--- 3 files changed, 73 insertions(+), 44 deletions(-) diff --git a/src/enc/histogram.c b/src/enc/histogram.c index ac5dda24..98a6e80d 100644 --- a/src/enc/histogram.c +++ b/src/enc/histogram.c @@ -288,6 +288,14 @@ static int GetBinIdForEntropy(double min, double max, double val) { return (int)(NUM_PARTITIONS * delta / range); } +static int GetHistoBinIndexLowEffort( + const VP8LHistogram* const h, const DominantCostRange* const c) { + const int bin_id = GetBinIdForEntropy(c->literal_min_, c->literal_max_, + h->literal_cost_); + assert(bin_id < NUM_PARTITIONS); + return bin_id; +} + static int GetHistoBinIndex( const VP8LHistogram* const h, const DominantCostRange* const c) { const int bin_id = @@ -340,8 +348,8 @@ static void HistogramCopyAndAnalyze( // Partition histograms to different entropy bins for three dominant (literal, // red and blue) symbol costs and compute the histogram aggregate bit_cost. -static void HistogramAnalyzeEntropyBin( - VP8LHistogramSet* const image_histo, int16_t* const bin_map) { +static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo, + int16_t* const bin_map, int low_effort) { int i; VP8LHistogram** const histograms = image_histo->histograms; const int histo_size = image_histo->size; @@ -360,7 +368,9 @@ static void HistogramAnalyzeEntropyBin( for (i = 0; i < histo_size; ++i) { int num_histos; VP8LHistogram* const histo = histograms[i]; - const int16_t bin_id = (int16_t)GetHistoBinIndex(histo, &cost_range); + const int16_t bin_id = low_effort ? + (int16_t)GetHistoBinIndexLowEffort(histo, &cost_range) : + (int16_t)GetHistoBinIndex(histo, &cost_range); const int bin_offset = bin_id * bin_depth; // bin_map[n][0] for every bin 'n' maintains the counter for the number of // histograms in that bin. @@ -392,12 +402,12 @@ static void HistogramCompactBins(VP8LHistogramSet* const image_histo) { static VP8LHistogram* HistogramCombineEntropyBin( VP8LHistogramSet* const image_histo, VP8LHistogram* cur_combo, - int16_t* const bin_map, int bin_depth, - double combine_cost_factor) { + int16_t* const bin_map, int bin_depth, int num_bins, + double combine_cost_factor, int low_effort) { int bin_id; VP8LHistogram** const histograms = image_histo->histograms; - for (bin_id = 0; bin_id < BIN_SIZE; ++bin_id) { + for (bin_id = 0; bin_id < num_bins; ++bin_id) { const int bin_offset = bin_id * bin_depth; const int num_histos = bin_map[bin_offset]; const int idx1 = bin_map[bin_offset + 1]; @@ -405,32 +415,43 @@ static VP8LHistogram* HistogramCombineEntropyBin( int n; for (n = 2; n <= num_histos; ++n) { const int idx2 = bin_map[bin_offset + n]; - const double bit_cost_idx2 = histograms[idx2]->bit_cost_; - if (bit_cost_idx2 > 0.) { - const double bit_cost_thresh = -bit_cost_idx2 * combine_cost_factor; - const double curr_cost_diff = - HistogramAddEval(histograms[idx1], histograms[idx2], - cur_combo, bit_cost_thresh); - if (curr_cost_diff < bit_cost_thresh) { - // Try to merge two histograms only if the combo is a trivial one or - // the two candidate histograms are already non-trivial. - // For some images, 'try_combine' turns out to be false for a lot of - // histogram pairs. In that case, we fallback to combining histograms - // as usual to avoid increasing the header size. - const int try_combine = - (cur_combo->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM) || - ((histograms[idx1]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM) && - (histograms[idx2]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM)); - const int max_combine_failures = 32; - if (try_combine || (num_combine_failures >= max_combine_failures)) { - HistogramSwap(&cur_combo, &histograms[idx1]); - histograms[idx2]->bit_cost_ = 0.; - } else { - ++num_combine_failures; + if (low_effort) { + // Merge all histograms with the same bin index, irrespective of cost of + // the merged histograms. + VP8LHistogramAdd(histograms[idx1], histograms[idx2], histograms[idx1]); + histograms[idx2]->bit_cost_ = 0.; + } else { + const double bit_cost_idx2 = histograms[idx2]->bit_cost_; + if (bit_cost_idx2 > 0.) { + const double bit_cost_thresh = -bit_cost_idx2 * combine_cost_factor; + const double curr_cost_diff = + HistogramAddEval(histograms[idx1], histograms[idx2], + cur_combo, bit_cost_thresh); + if (curr_cost_diff < bit_cost_thresh) { + // Try to merge two histograms only if the combo is a trivial one or + // the two candidate histograms are already non-trivial. + // For some images, 'try_combine' turns out to be false for a lot of + // histogram pairs. In that case, we fallback to combining + // histograms as usual to avoid increasing the header size. + const int try_combine = + (cur_combo->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM) || + ((histograms[idx1]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM) && + (histograms[idx2]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM)); + const int max_combine_failures = 32; + if (try_combine || (num_combine_failures >= max_combine_failures)) { + HistogramSwap(&cur_combo, &histograms[idx1]); + histograms[idx2]->bit_cost_ = 0.; + } else { + ++num_combine_failures; + } } } } } + if (low_effort) { + // Update the bit_cost for the merged histograms (per bin index). + UpdateHistogramCost(histograms[idx1]); + } } HistogramCompactBins(image_histo); return cur_combo; @@ -670,7 +691,7 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo, return ok; } -static VP8LHistogram*HistogramCombineStochastic( +static VP8LHistogram* HistogramCombineStochastic( VP8LHistogramSet* const image_histo, VP8LHistogram* tmp_histo, VP8LHistogram* best_combo, @@ -797,7 +818,8 @@ static double GetCombineCostFactor(int histo_size, int quality) { int VP8LGetHistoImageSymbols(int xsize, int ysize, const VP8LBackwardRefs* const refs, - int quality, int histo_bits, int cache_bits, + int quality, int low_effort, + int histo_bits, int cache_bits, VP8LHistogramSet* const image_histo, VP8LHistogramSet* const tmp_histos, uint16_t* const histogram_symbols) { @@ -805,6 +827,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1; const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1; const int image_histo_raw_size = histo_xsize * histo_ysize; + const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE; // The bin_map for every bin follows following semantics: // bin_map[n][0] = num_histo; // The number of histograms in that bin. @@ -816,14 +839,16 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, VP8LHistogramSet* const orig_histo = VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits); VP8LHistogram* cur_combo; + const int entropy_combine = + (orig_histo->size > entropy_combine_num_bins * 2) && (quality < 100); if (orig_histo == NULL) goto Error; // Don't attempt linear bin-partition heuristic for: // histograms of small sizes, as bin_map will be very sparse and; // Maximum quality (q==100), to preserve the compression gains at that level. - if (orig_histo->size > 2 * BIN_SIZE && quality < 100) { - const int bin_map_size = bin_depth * BIN_SIZE; + if (entropy_combine) { + const int bin_map_size = bin_depth * entropy_combine_num_bins; bin_map = (int16_t*)WebPSafeCalloc(bin_map_size, sizeof(*bin_map)); if (bin_map == NULL) goto Error; } @@ -834,17 +859,19 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, HistogramCopyAndAnalyze(orig_histo, image_histo); cur_combo = tmp_histos->histograms[1]; // pick up working slot - if (bin_map != NULL) { + if (entropy_combine) { const double combine_cost_factor = GetCombineCostFactor(image_histo_raw_size, quality); - HistogramAnalyzeEntropyBin(orig_histo, bin_map); + HistogramAnalyzeEntropyBin(orig_histo, bin_map, low_effort); // Collapse histograms with similar entropy. - cur_combo = - HistogramCombineEntropyBin(image_histo, cur_combo, - bin_map, bin_depth, combine_cost_factor); + cur_combo = HistogramCombineEntropyBin(image_histo, cur_combo, bin_map, + bin_depth, entropy_combine_num_bins, + combine_cost_factor, low_effort); } - { + // Don't combine the histograms using stochastic and greedy heuristics for + // low-effort compression mode. + if (!low_effort || !entropy_combine) { const float x = quality / 100.f; // cubic ramp between 1 and MAX_HISTO_GREEDY: const int threshold_size = (int)(1 + (x * x * x) * (MAX_HISTO_GREEDY - 1)); @@ -857,6 +884,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, } } + // TODO(vikasa): Optimize HistogramRemap for low-effort compression mode also. // Find the optimal map from original histograms to the final ones. HistogramRemap(orig_histo, image_histo, histogram_symbols); diff --git a/src/enc/histogram.h b/src/enc/histogram.h index 4649c92c..e997a9cc 100644 --- a/src/enc/histogram.h +++ b/src/enc/histogram.h @@ -101,7 +101,8 @@ static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) { // Builds the histogram image. int VP8LGetHistoImageSymbols(int xsize, int ysize, const VP8LBackwardRefs* const refs, - int quality, int histogram_bits, int cache_bits, + int quality, int low_effort, + int histogram_bits, int cache_bits, VP8LHistogramSet* const image_in, VP8LHistogramSet* const tmp_histos, uint16_t* const histogram_symbols); diff --git a/src/enc/vp8l.c b/src/enc/vp8l.c index 2675fe6c..a7ce9d44 100644 --- a/src/enc/vp8l.c +++ b/src/enc/vp8l.c @@ -748,7 +748,7 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw, VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[2], int width, int height, int quality, - int* cache_bits, + int low_effort, int* cache_bits, int histogram_bits, size_t init_byte_position, int* const hdr_size, @@ -799,9 +799,9 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw, } // Build histogram image and symbols from backward references. - if (!VP8LGetHistoImageSymbols(width, height, &refs, quality, histogram_bits, - *cache_bits, histogram_image, tmp_histos, - histogram_symbols)) { + if (!VP8LGetHistoImageSymbols(width, height, &refs, quality, low_effort, + histogram_bits, *cache_bits, histogram_image, + tmp_histos, histogram_symbols)) { err = VP8_ENC_ERROR_OUT_OF_MEMORY; goto Error; } @@ -1318,7 +1318,7 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, // --------------------------------------------------------------------------- // Encode and write the transformed image. err = EncodeImageInternal(bw, enc->argb_, &enc->hash_chain_, enc->refs_, - enc->current_width_, height, quality, + enc->current_width_, height, quality, low_effort, &enc->cache_bits_, enc->histo_bits_, byte_position, &hdr_size, &data_size); if (err != VP8_ENC_OK) goto Error;