Speedup VP8LGetHistoImageSymbols for low effort (m=0) mode.

- Do light weight entropy based histogram combine and leave out CPU
  intensive stochastic and greedy heuristics for combining the
  histograms.

For 1000 image PNG corpus (m=0), this change yields speedup of 10% at
lower quality range (1% drop in compression density) and about 5% for
higher quality range (1% drop in compression density). Following is the
compression stats (before/after) for method = 0:
         Before           After
         bpp/MPs          bpp/MPs
q=0      2.8336/16.577    2.8615/18.000
q=5      2.8336/16.504    2.8615/18.216
q=10     2.8293/16.419    2.8572/18.070
q=15     2.8242/17.582    2.8519/18.371
q=20     2.8182/16.131    2.8454/18.975
q=25     2.7924/7.670     2.8230/8.531
q=30     2.7078/6.635     2.7310/7.706
q=35     2.7028/6.203     2.7253/6.855
q=40     2.7005/6.198     2.7231/6.364
q=45     2.6989/5.570     2.7216/5.844
q=50     2.6970/5.087     2.7196/5.210
q=55     2.6963/4.589     2.7208/4.766
q=60     2.6949/4.292     2.7195/4.495
q=65     2.6940/3.970     2.7185/4.024
q=70     2.6929/3.698     2.7174/3.699
q=75     2.6919/3.427     2.7164/3.449
q=80     2.6918/3.106     2.7161/3.222
q=85     2.6909/2.856     2.7153/2.919
q=90     2.6902/2.695     2.7145/2.766
q=95     2.6881/2.499     2.7124/2.548
q=100    2.6873/2.253     2.6873/2.285

Change-Id: I0567945068f8dc7888041e93d872f9def91f50ba
This commit is contained in:
Vikas Arora 2014-12-19 17:33:17 +00:00 committed by James Zern
parent 67f601cd46
commit b0b973c39b
3 changed files with 73 additions and 44 deletions

View File

@ -288,6 +288,14 @@ static int GetBinIdForEntropy(double min, double max, double val) {
return (int)(NUM_PARTITIONS * delta / range);
}
static int GetHistoBinIndexLowEffort(
const VP8LHistogram* const h, const DominantCostRange* const c) {
const int bin_id = GetBinIdForEntropy(c->literal_min_, c->literal_max_,
h->literal_cost_);
assert(bin_id < NUM_PARTITIONS);
return bin_id;
}
static int GetHistoBinIndex(
const VP8LHistogram* const h, const DominantCostRange* const c) {
const int bin_id =
@ -340,8 +348,8 @@ static void HistogramCopyAndAnalyze(
// Partition histograms to different entropy bins for three dominant (literal,
// red and blue) symbol costs and compute the histogram aggregate bit_cost.
static void HistogramAnalyzeEntropyBin(
VP8LHistogramSet* const image_histo, int16_t* const bin_map) {
static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo,
int16_t* const bin_map, int low_effort) {
int i;
VP8LHistogram** const histograms = image_histo->histograms;
const int histo_size = image_histo->size;
@ -360,7 +368,9 @@ static void HistogramAnalyzeEntropyBin(
for (i = 0; i < histo_size; ++i) {
int num_histos;
VP8LHistogram* const histo = histograms[i];
const int16_t bin_id = (int16_t)GetHistoBinIndex(histo, &cost_range);
const int16_t bin_id = low_effort ?
(int16_t)GetHistoBinIndexLowEffort(histo, &cost_range) :
(int16_t)GetHistoBinIndex(histo, &cost_range);
const int bin_offset = bin_id * bin_depth;
// bin_map[n][0] for every bin 'n' maintains the counter for the number of
// histograms in that bin.
@ -392,12 +402,12 @@ static void HistogramCompactBins(VP8LHistogramSet* const image_histo) {
static VP8LHistogram* HistogramCombineEntropyBin(
VP8LHistogramSet* const image_histo,
VP8LHistogram* cur_combo,
int16_t* const bin_map, int bin_depth,
double combine_cost_factor) {
int16_t* const bin_map, int bin_depth, int num_bins,
double combine_cost_factor, int low_effort) {
int bin_id;
VP8LHistogram** const histograms = image_histo->histograms;
for (bin_id = 0; bin_id < BIN_SIZE; ++bin_id) {
for (bin_id = 0; bin_id < num_bins; ++bin_id) {
const int bin_offset = bin_id * bin_depth;
const int num_histos = bin_map[bin_offset];
const int idx1 = bin_map[bin_offset + 1];
@ -405,32 +415,43 @@ static VP8LHistogram* HistogramCombineEntropyBin(
int n;
for (n = 2; n <= num_histos; ++n) {
const int idx2 = bin_map[bin_offset + n];
const double bit_cost_idx2 = histograms[idx2]->bit_cost_;
if (bit_cost_idx2 > 0.) {
const double bit_cost_thresh = -bit_cost_idx2 * combine_cost_factor;
const double curr_cost_diff =
HistogramAddEval(histograms[idx1], histograms[idx2],
cur_combo, bit_cost_thresh);
if (curr_cost_diff < bit_cost_thresh) {
// Try to merge two histograms only if the combo is a trivial one or
// the two candidate histograms are already non-trivial.
// For some images, 'try_combine' turns out to be false for a lot of
// histogram pairs. In that case, we fallback to combining histograms
// as usual to avoid increasing the header size.
const int try_combine =
(cur_combo->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM) ||
((histograms[idx1]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM) &&
(histograms[idx2]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM));
const int max_combine_failures = 32;
if (try_combine || (num_combine_failures >= max_combine_failures)) {
HistogramSwap(&cur_combo, &histograms[idx1]);
histograms[idx2]->bit_cost_ = 0.;
} else {
++num_combine_failures;
if (low_effort) {
// Merge all histograms with the same bin index, irrespective of cost of
// the merged histograms.
VP8LHistogramAdd(histograms[idx1], histograms[idx2], histograms[idx1]);
histograms[idx2]->bit_cost_ = 0.;
} else {
const double bit_cost_idx2 = histograms[idx2]->bit_cost_;
if (bit_cost_idx2 > 0.) {
const double bit_cost_thresh = -bit_cost_idx2 * combine_cost_factor;
const double curr_cost_diff =
HistogramAddEval(histograms[idx1], histograms[idx2],
cur_combo, bit_cost_thresh);
if (curr_cost_diff < bit_cost_thresh) {
// Try to merge two histograms only if the combo is a trivial one or
// the two candidate histograms are already non-trivial.
// For some images, 'try_combine' turns out to be false for a lot of
// histogram pairs. In that case, we fallback to combining
// histograms as usual to avoid increasing the header size.
const int try_combine =
(cur_combo->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM) ||
((histograms[idx1]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM) &&
(histograms[idx2]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM));
const int max_combine_failures = 32;
if (try_combine || (num_combine_failures >= max_combine_failures)) {
HistogramSwap(&cur_combo, &histograms[idx1]);
histograms[idx2]->bit_cost_ = 0.;
} else {
++num_combine_failures;
}
}
}
}
}
if (low_effort) {
// Update the bit_cost for the merged histograms (per bin index).
UpdateHistogramCost(histograms[idx1]);
}
}
HistogramCompactBins(image_histo);
return cur_combo;
@ -670,7 +691,7 @@ static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo,
return ok;
}
static VP8LHistogram*HistogramCombineStochastic(
static VP8LHistogram* HistogramCombineStochastic(
VP8LHistogramSet* const image_histo,
VP8LHistogram* tmp_histo,
VP8LHistogram* best_combo,
@ -797,7 +818,8 @@ static double GetCombineCostFactor(int histo_size, int quality) {
int VP8LGetHistoImageSymbols(int xsize, int ysize,
const VP8LBackwardRefs* const refs,
int quality, int histo_bits, int cache_bits,
int quality, int low_effort,
int histo_bits, int cache_bits,
VP8LHistogramSet* const image_histo,
VP8LHistogramSet* const tmp_histos,
uint16_t* const histogram_symbols) {
@ -805,6 +827,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1;
const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1;
const int image_histo_raw_size = histo_xsize * histo_ysize;
const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE;
// The bin_map for every bin follows following semantics:
// bin_map[n][0] = num_histo; // The number of histograms in that bin.
@ -816,14 +839,16 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
VP8LHistogramSet* const orig_histo =
VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits);
VP8LHistogram* cur_combo;
const int entropy_combine =
(orig_histo->size > entropy_combine_num_bins * 2) && (quality < 100);
if (orig_histo == NULL) goto Error;
// Don't attempt linear bin-partition heuristic for:
// histograms of small sizes, as bin_map will be very sparse and;
// Maximum quality (q==100), to preserve the compression gains at that level.
if (orig_histo->size > 2 * BIN_SIZE && quality < 100) {
const int bin_map_size = bin_depth * BIN_SIZE;
if (entropy_combine) {
const int bin_map_size = bin_depth * entropy_combine_num_bins;
bin_map = (int16_t*)WebPSafeCalloc(bin_map_size, sizeof(*bin_map));
if (bin_map == NULL) goto Error;
}
@ -834,17 +859,19 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
HistogramCopyAndAnalyze(orig_histo, image_histo);
cur_combo = tmp_histos->histograms[1]; // pick up working slot
if (bin_map != NULL) {
if (entropy_combine) {
const double combine_cost_factor =
GetCombineCostFactor(image_histo_raw_size, quality);
HistogramAnalyzeEntropyBin(orig_histo, bin_map);
HistogramAnalyzeEntropyBin(orig_histo, bin_map, low_effort);
// Collapse histograms with similar entropy.
cur_combo =
HistogramCombineEntropyBin(image_histo, cur_combo,
bin_map, bin_depth, combine_cost_factor);
cur_combo = HistogramCombineEntropyBin(image_histo, cur_combo, bin_map,
bin_depth, entropy_combine_num_bins,
combine_cost_factor, low_effort);
}
{
// Don't combine the histograms using stochastic and greedy heuristics for
// low-effort compression mode.
if (!low_effort || !entropy_combine) {
const float x = quality / 100.f;
// cubic ramp between 1 and MAX_HISTO_GREEDY:
const int threshold_size = (int)(1 + (x * x * x) * (MAX_HISTO_GREEDY - 1));
@ -857,6 +884,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
}
}
// TODO(vikasa): Optimize HistogramRemap for low-effort compression mode also.
// Find the optimal map from original histograms to the final ones.
HistogramRemap(orig_histo, image_histo, histogram_symbols);

View File

@ -101,7 +101,8 @@ static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) {
// Builds the histogram image.
int VP8LGetHistoImageSymbols(int xsize, int ysize,
const VP8LBackwardRefs* const refs,
int quality, int histogram_bits, int cache_bits,
int quality, int low_effort,
int histogram_bits, int cache_bits,
VP8LHistogramSet* const image_in,
VP8LHistogramSet* const tmp_histos,
uint16_t* const histogram_symbols);

View File

@ -748,7 +748,7 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw,
VP8LHashChain* const hash_chain,
VP8LBackwardRefs refs_array[2],
int width, int height, int quality,
int* cache_bits,
int low_effort, int* cache_bits,
int histogram_bits,
size_t init_byte_position,
int* const hdr_size,
@ -799,9 +799,9 @@ static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw,
}
// Build histogram image and symbols from backward references.
if (!VP8LGetHistoImageSymbols(width, height, &refs, quality, histogram_bits,
*cache_bits, histogram_image, tmp_histos,
histogram_symbols)) {
if (!VP8LGetHistoImageSymbols(width, height, &refs, quality, low_effort,
histogram_bits, *cache_bits, histogram_image,
tmp_histos, histogram_symbols)) {
err = VP8_ENC_ERROR_OUT_OF_MEMORY;
goto Error;
}
@ -1318,7 +1318,7 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
// ---------------------------------------------------------------------------
// Encode and write the transformed image.
err = EncodeImageInternal(bw, enc->argb_, &enc->hash_chain_, enc->refs_,
enc->current_width_, height, quality,
enc->current_width_, height, quality, low_effort,
&enc->cache_bits_, enc->histo_bits_, byte_position,
&hdr_size, &data_size);
if (err != VP8_ENC_OK) goto Error;