Optimize and re-structure VP8LGetHistoImageSymbols

Optimize and re-structured VP8LGetHistoImageSymbols method, by using the bin-hash
for merging the Histograms more efficiently, instead of the randomized
heuristic of existing method HistogramCombine.

This change speeds up the Lossless encoding by 40-50% (for method=4 and Q > 50)
with 0.8% penalty in compression density. For lower method, the speed up is 25-30%,
with 0.4% penalty in the compression density.

Change-Id: If61adadb1a041b95def6405aa1fe3b83c3cb25ce
This commit is contained in:
Vikas Arora 2014-03-13 11:34:12 -07:00
parent 068b14ac57
commit fef22704ec
2 changed files with 287 additions and 78 deletions

View File

@ -14,13 +14,20 @@
#endif #endif
#include <math.h> #include <math.h>
#include <stdio.h>
#include "./backward_references.h" #include "./backward_references.h"
#include "./histogram.h" #include "./histogram.h"
#include "../dsp/lossless.h" #include "../dsp/lossless.h"
#include "../utils/utils.h" #include "../utils/utils.h"
#define MAX_COST 1.e38
// Number of partitions for the three dominant (literal, red and blue) symbol
// costs.
#define NUM_PARTITIONS 4
// The size of the bin-hash corresponding to the three dominant costs.
#define BIN_SIZE (NUM_PARTITIONS * NUM_PARTITIONS * NUM_PARTITIONS)
static void HistogramClear(VP8LHistogram* const p) { static void HistogramClear(VP8LHistogram* const p) {
memset(p->literal_, 0, sizeof(p->literal_)); memset(p->literal_, 0, sizeof(p->literal_));
memset(p->red_, 0, sizeof(p->red_)); memset(p->red_, 0, sizeof(p->red_));
@ -243,7 +250,6 @@ static double PopulationCost(const int* const population, int length) {
static double GetCombinedEntropy(const int* const X, const int* const Y, static double GetCombinedEntropy(const int* const X, const int* const Y,
int length) { int length) {
return BitsEntropyCombined(X, Y, length) + HuffmanCostCombined(X, Y, length); return BitsEntropyCombined(X, Y, length) + HuffmanCostCombined(X, Y, length);
} }
static double ExtraCost(const int* const population, int length) { static double ExtraCost(const int* const population, int length) {
@ -326,11 +332,11 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
*cost += GetCombinedEntropy(a->blue_, b->blue_, 256); *cost += GetCombinedEntropy(a->blue_, b->blue_, 256);
if (*cost > cost_threshold) return 0; if (*cost > cost_threshold) return 0;
*cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES); *cost += GetCombinedEntropy(a->alpha_, b->alpha_, 256);
*cost += ExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES);
if (*cost > cost_threshold) return 0; if (*cost > cost_threshold) return 0;
*cost += GetCombinedEntropy(a->alpha_, b->alpha_, 256); *cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES);
*cost += ExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES);
if (*cost > cost_threshold) return 0; if (*cost > cost_threshold) return 0;
return 1; return 1;
@ -384,14 +390,81 @@ static double HistogramAddThresh(const VP8LHistogram* const a,
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
static void HistogramBuildImage(int xsize, int histo_bits, // The structure to keep track of cost range for the three dominant entropy
const VP8LBackwardRefs* const backward_refs, // symbols.
VP8LHistogramSet* const image) { // TODO(skal): Evaluate if float can be used here instead of double for
// representing the entropy costs.
typedef struct {
double literal_max_;
double literal_min_;
double red_max_;
double red_min_;
double blue_max_;
double blue_min_;
} DominantCostRange;
static void DominantCostRangeInit(DominantCostRange* const c) {
c->literal_max_ = 0.;
c->literal_min_ = MAX_COST;
c->red_max_ = 0.;
c->red_min_ = MAX_COST;
c->blue_max_ = 0.;
c->blue_min_ = MAX_COST;
}
static void UpdateDominantCostRange(
const VP8LHistogram* const h, DominantCostRange* const c) {
if (c->literal_max_ < h->literal_cost_) c->literal_max_ = h->literal_cost_;
if (c->literal_min_ > h->literal_cost_) c->literal_min_ = h->literal_cost_;
if (c->red_max_ < h->red_cost_) c->red_max_ = h->red_cost_;
if (c->red_min_ > h->red_cost_) c->red_min_ = h->red_cost_;
if (c->blue_max_ < h->blue_cost_) c->blue_max_ = h->blue_cost_;
if (c->blue_min_ > h->blue_cost_) c->blue_min_ = h->blue_cost_;
}
static void UpdateHistogramCost(VP8LHistogram* const h) {
const float alpha_cost = PopulationCost(h->alpha_, 256);
const float distance_cost = PopulationCost(h->distance_, NUM_DISTANCE_CODES) +
ExtraCost(h->distance_, NUM_DISTANCE_CODES);
const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
h->literal_cost_ = PopulationCost(h->literal_, num_codes) +
ExtraCost(h->literal_ + 256, NUM_LENGTH_CODES);
h->red_cost_ = PopulationCost(h->red_, 256);
h->blue_cost_ = PopulationCost(h->blue_, 256);
h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ +
alpha_cost + distance_cost;
}
static int GetBinIdForEntropy(double min, double max, double val) {
const double range = max - min + 1e-6;
const double delta = val - min;
return (int)(NUM_PARTITIONS * delta / range);
}
// TODO(vikasa): Evaluate, if there's any correlation between red & blue.
static int GetHistoBinIndex(
const VP8LHistogram* const h, const DominantCostRange* const c) {
const int bin_id =
GetBinIdForEntropy(c->blue_min_, c->blue_max_, h->blue_cost_) +
NUM_PARTITIONS * GetBinIdForEntropy(c->red_min_, c->red_max_,
h->red_cost_) +
NUM_PARTITIONS * NUM_PARTITIONS * GetBinIdForEntropy(c->literal_min_,
c->literal_max_,
h->literal_cost_);
assert(bin_id < BIN_SIZE);
return bin_id;
}
// Construct the Histogram from backward references.
static void HistogramBuild(
int xsize, int histo_bits, const VP8LBackwardRefs* const backward_refs,
VP8LHistogramSet* const init_histo) {
int i; int i;
int x = 0, y = 0; int x = 0, y = 0;
const int histo_xsize = VP8LSubSampleSize(xsize, histo_bits); const int histo_xsize = VP8LSubSampleSize(xsize, histo_bits);
VP8LHistogram** const histograms = image->histograms; VP8LHistogram** const histograms = init_histo->histograms;
assert(histo_bits > 0); assert(histo_bits > 0);
// Construct the Histo from a given backward references.
for (i = 0; i < backward_refs->size; ++i) { for (i = 0; i < backward_refs->size; ++i) {
const PixOrCopy* const v = &backward_refs->refs[i]; const PixOrCopy* const v = &backward_refs->refs[i];
const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits); const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits);
@ -404,6 +477,121 @@ static void HistogramBuildImage(int xsize, int histo_bits,
} }
} }
// Compute the histogram aggregate bit_cost.
static void HistogramAnalyze(
VP8LHistogramSet* const init_histo, VP8LHistogramSet* const histo_image) {
int i;
const int histo_size = init_histo->size;
VP8LHistogram** const histograms = init_histo->histograms;
for (i = 0; i < histo_size; ++i) {
VP8LHistogram* const histo = histograms[i];
histo->bit_cost_ = VP8LHistogramEstimateBits(histo);
// Copy histograms from init_histo[] to histo_image[].
*histo_image->histograms[i] = *histo;
}
}
// Partition Histograms to different entropy bins for three dominant (literal,
// red and blue) symbol costs and compute the histogram aggregate bit_cost.
static void HistogramAnalyzeBin(
VP8LHistogramSet* const init_histo, VP8LHistogramSet* const histo_image,
int16_t* const bin_map) {
int i;
const int histo_size = init_histo->size;
VP8LHistogram** const histograms = init_histo->histograms;
if (bin_map != NULL) {
const int bin_depth = init_histo->size + 1;
DominantCostRange cost_range;
DominantCostRangeInit(&cost_range);
// Analyze the dominant (literal, red and blue) entropy costs.
for (i = 0; i < histo_size; ++i) {
VP8LHistogram* const histo = histograms[i];
UpdateHistogramCost(histo);
// Copy histograms from init_histo[] to histo_image[].
*histo_image->histograms[i] = *histo;
UpdateDominantCostRange(histo, &cost_range);
}
// bin-hash histograms on three of the dominant (literal, red and blue)
// symbol costs.
for (i = 0; i < histo_size; ++i) {
int num_histos;
VP8LHistogram* const histo = histograms[i];
const int16_t bin_id = (int16_t)GetHistoBinIndex(histo, &cost_range);
const int bin_offset = bin_id * bin_depth;
// bin_map[n][0] for every bin 'n' maintains the counter for the number of
// histograms in that bin.
// Get and increment the num_histos in that bin.
num_histos = ++bin_map[bin_offset];
assert(bin_offset + num_histos < bin_depth * BIN_SIZE);
// Add Histogram i'th index at num_histos (last) position in the bin_map.
bin_map[bin_offset + num_histos] = i;
}
}
}
// Compact the histogram set by moving the valid one left in the set to the
// head and moving the ones that have been merged to other histograms towards
// the end.
// TODO(vikasa): Evaluate if this method can be avoided by altering the code
// logic of HistogramCombineBin main loop.
static void HistogramCompactBins(VP8LHistogramSet* const histo_image) {
int start = 0;
int end = histo_image->size - 1;
while (start < end) {
while (start <= end &&
histo_image->histograms[start] != NULL &&
histo_image->histograms[start]->bit_cost_ != 0.) {
++start;
}
while (start <= end &&
histo_image->histograms[end]->bit_cost_ == 0.) {
histo_image->histograms[end] = NULL;
--end;
}
if (start < end) {
assert(histo_image->histograms[start] != NULL);
assert(histo_image->histograms[end] != NULL);
*histo_image->histograms[start] = *histo_image->histograms[end];
histo_image->histograms[end] = NULL;
--end;
}
}
histo_image->size = end + 1;
}
static void HistogramCombineBin(VP8LHistogramSet* const histo_image,
VP8LHistogram* const histos,
int bin_depth,
int16_t* const bin_map) {
int i;
int bin_id;
VP8LHistogram* cur_combo = histos;
for (bin_id = 0; bin_id < BIN_SIZE; ++bin_id) {
const int bin_offset = bin_id * bin_depth;
const int num_histos = bin_map[bin_offset];
const int idx1 = bin_map[bin_offset + 1];
for (i = 2; i <= num_histos; ++i) {
const int idx2 = bin_map[bin_offset + i];
const double bit_cost_idx2 = histo_image->histograms[idx2]->bit_cost_;
if (bit_cost_idx2 > 0.) {
const double bit_cost_thresh = -bit_cost_idx2 * 0.1;
const double curr_cost_diff =
HistogramAddEval(histo_image->histograms[idx1],
histo_image->histograms[idx2],
cur_combo, bit_cost_thresh);
if (curr_cost_diff < bit_cost_thresh) {
*histo_image->histograms[idx1] = *cur_combo;
histo_image->histograms[idx2]->bit_cost_ = 0.;
}
}
}
}
HistogramCompactBins(histo_image);
}
static uint32_t MyRand(uint32_t *seed) { static uint32_t MyRand(uint32_t *seed) {
*seed *= 16807U; *seed *= 16807U;
if (*seed == 0) { if (*seed == 0) {
@ -412,48 +600,45 @@ static uint32_t MyRand(uint32_t *seed) {
return *seed; return *seed;
} }
static int HistogramCombine(const VP8LHistogramSet* const in, static void HistogramCombine(VP8LHistogramSet* const histo_image,
VP8LHistogramSet* const out, int iter_mult, VP8LHistogram* const histos, int quality) {
int num_pairs, int num_tries_no_success) { int iter;
int ok = 0;
int i, iter;
uint32_t seed = 0; uint32_t seed = 0;
int tries_with_no_success = 0; int tries_with_no_success = 0;
int out_size = in->size; int histo_image_size = histo_image->size;
const int outer_iters = in->size * iter_mult; const int iter_mult = (quality < 25) ? 2 : 2 + (quality - 25) / 8;
const int outer_iters = histo_image_size * iter_mult;
const int num_pairs = histo_image_size / 2;
const int num_tries_no_success = outer_iters / 2;
const int min_cluster_size = 2; const int min_cluster_size = 2;
VP8LHistogram* const histos = (VP8LHistogram*)malloc(2 * sizeof(*histos));
VP8LHistogram* cur_combo = histos + 0; // trial merged histogram VP8LHistogram* cur_combo = histos + 0; // trial merged histogram
VP8LHistogram* best_combo = histos + 1; // best merged histogram so far VP8LHistogram* best_combo = histos + 1; // best merged histogram so far
if (histos == NULL) goto End;
// Copy histograms from in[] to out[]. // Collapse similar histograms in 'histo_image'.
assert(in->size <= out->size); for (iter = 0;
for (i = 0; i < in->size; ++i) { iter < outer_iters && histo_image_size >= min_cluster_size;
in->histograms[i]->bit_cost_ = VP8LHistogramEstimateBits(in->histograms[i]); ++iter) {
*out->histograms[i] = *in->histograms[i];
}
// Collapse similar histograms in 'out'.
for (iter = 0; iter < outer_iters && out_size >= min_cluster_size; ++iter) {
double best_cost_diff = 0.; double best_cost_diff = 0.;
int best_idx1 = -1, best_idx2 = 1; int best_idx1 = -1, best_idx2 = 1;
int j; int j;
const int num_tries = (num_pairs < out_size) ? num_pairs : out_size; const int num_tries =
(num_pairs < histo_image_size) ? num_pairs : histo_image_size;
seed += iter; seed += iter;
for (j = 0; j < num_tries; ++j) { for (j = 0; j < num_tries; ++j) {
double curr_cost_diff; double curr_cost_diff;
// Choose two histograms at random and try to combine them. // Choose two histograms at random and try to combine them.
const uint32_t idx1 = MyRand(&seed) % out_size; const uint32_t idx1 = MyRand(&seed) % histo_image_size;
const uint32_t tmp = (j & 7) + 1; const uint32_t tmp = (j & 7) + 1;
const uint32_t diff = (tmp < 3) ? tmp : MyRand(&seed) % (out_size - 1); const uint32_t diff =
const uint32_t idx2 = (idx1 + diff + 1) % out_size; (tmp < 3) ? tmp : MyRand(&seed) % (histo_image_size - 1);
const uint32_t idx2 = (idx1 + diff + 1) % histo_image_size;
if (idx1 == idx2) { if (idx1 == idx2) {
continue; continue;
} }
// Calculate cost reduction on combining. // Calculate cost reduction on combining.
curr_cost_diff = HistogramAddEval(out->histograms[idx1], curr_cost_diff = HistogramAddEval(histo_image->histograms[idx1],
out->histograms[idx2], histo_image->histograms[idx2],
cur_combo, best_cost_diff); cur_combo, best_cost_diff);
if (curr_cost_diff < best_cost_diff) { // found a better pair? if (curr_cost_diff < best_cost_diff) { // found a better pair?
{ // swap cur/best combo histograms { // swap cur/best combo histograms
@ -468,12 +653,13 @@ static int HistogramCombine(const VP8LHistogramSet* const in,
} }
if (best_idx1 >= 0) { if (best_idx1 >= 0) {
*out->histograms[best_idx1] = *best_combo; *histo_image->histograms[best_idx1] = *best_combo;
// swap best_idx2 slot with last one (which is now unused) // swap best_idx2 slot with last one (which is now unused)
--out_size; --histo_image_size;
if (best_idx2 != out_size) { if (best_idx2 != histo_image_size) {
out->histograms[best_idx2] = out->histograms[out_size]; histo_image->histograms[best_idx2] =
out->histograms[out_size] = NULL; // just for sanity check. histo_image->histograms[histo_image_size];
histo_image->histograms[histo_image_size] = NULL;
} }
tries_with_no_success = 0; tries_with_no_success = 0;
} }
@ -481,38 +667,27 @@ static int HistogramCombine(const VP8LHistogramSet* const in,
break; break;
} }
} }
out->size = out_size; histo_image->size = histo_image_size;
ok = 1;
End:
free(histos);
return ok;
} }
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// Histogram refinement // Histogram refinement
// What is the bit cost of moving square_histogram from cur_symbol to candidate.
static double HistogramDistance(const VP8LHistogram* const square_histogram,
const VP8LHistogram* const candidate,
double cost_threshold) {
return HistogramAddThresh(candidate, square_histogram, cost_threshold);
}
// Find the best 'out' histogram for each of the 'in' histograms. // Find the best 'out' histogram for each of the 'in' histograms.
// Note: we assume that out[]->bit_cost_ is already up-to-date. // Note: we assume that out[]->bit_cost_ is already up-to-date.
static void HistogramRemap(const VP8LHistogramSet* const in, static void HistogramRemap(const VP8LHistogramSet* const init_histo,
const VP8LHistogramSet* const out, const VP8LHistogramSet* const histo_image,
uint16_t* const symbols) { uint16_t* const symbols) {
int i; int i;
for (i = 0; i < in->size; ++i) { for (i = 0; i < init_histo->size; ++i) {
int best_out = 0; int best_out = 0;
double best_bits = double best_bits = HistogramAddThresh(histo_image->histograms[0],
HistogramDistance(in->histograms[i], out->histograms[0], 1.e38); init_histo->histograms[i], MAX_COST);
int k; int k;
for (k = 1; k < out->size; ++k) { for (k = 1; k < histo_image->size; ++k) {
const double cur_bits = const double cur_bits = HistogramAddThresh(histo_image->histograms[k],
HistogramDistance(in->histograms[i], out->histograms[k], best_bits); init_histo->histograms[i],
best_bits);
if (cur_bits < best_bits) { if (cur_bits < best_bits) {
best_bits = cur_bits; best_bits = cur_bits;
best_out = k; best_out = k;
@ -522,45 +697,76 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
} }
// Recompute each out based on raw and symbols. // Recompute each out based on raw and symbols.
for (i = 0; i < out->size; ++i) { for (i = 0; i < histo_image->size; ++i) {
HistogramClear(out->histograms[i]); HistogramClear(histo_image->histograms[i]);
} }
for (i = 0; i < in->size; ++i) {
HistogramAdd(in->histograms[i], out->histograms[symbols[i]]); for (i = 0; i < init_histo->size; ++i) {
HistogramAdd(init_histo->histograms[i],
histo_image->histograms[symbols[i]]);
} }
} }
int VP8LGetHistoImageSymbols(int xsize, int ysize, int VP8LGetHistoImageSymbols(int xsize, int ysize,
const VP8LBackwardRefs* const refs, const VP8LBackwardRefs* const refs,
int quality, int histo_bits, int cache_bits, int quality, int histo_bits, int cache_bits,
VP8LHistogramSet* const image_in, VP8LHistogramSet* const histo_image,
uint16_t* const histogram_symbols) { uint16_t* const histogram_symbols) {
int ok = 0; int ok = 0;
const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1; const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1;
const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1; const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1;
const int histo_image_raw_size = histo_xsize * histo_ysize; const int histo_image_raw_size = histo_xsize * histo_ysize;
// Heuristic params for HistogramCombine(). // The bin_map for every bin follows following semantics:
const int num_tries_no_success = 8 + (quality >> 1); // bin_map[n][0] = num_histo; // The number of histograms in that bin.
const int iter_mult = (quality < 27) ? 1 : 1 + ((quality - 27) >> 4); // bin_map[n][1] = index of first histogram in that bin;
const int num_pairs = (quality < 25) ? 10 : (5 * quality) >> 3; // bin_map[n][num_histo] = index of last histogram in that bin;
// bin_map[n][num_histo + 1] ... bin_map[n][bin_depth - 1] = un-used indices.
VP8LHistogramSet* const image_out = const int bin_depth = histo_image_raw_size + 1;
int16_t* bin_map = NULL;
VP8LHistogram* const histos = (VP8LHistogram*)malloc(2 * sizeof(*histos));
VP8LHistogramSet* const init_histo =
VP8LAllocateHistogramSet(histo_image_raw_size, cache_bits); VP8LAllocateHistogramSet(histo_image_raw_size, cache_bits);
if (image_out == NULL) return 0;
// Build histogram image. if (init_histo == NULL || histos == NULL) {
HistogramBuildImage(xsize, histo_bits, refs, image_out);
// Collapse similar histograms.
if (!HistogramCombine(image_out, image_in, iter_mult, num_pairs,
num_tries_no_success)) {
goto Error; goto Error;
} }
// Don't attempt linear bin-partition heuristic for:
// Histograms of small sizes, as bin_map will be very sparse and;
// Higher qualities (> 90), to preserve the compression gains at those
// quality settings.
if (init_histo->size > 2 * BIN_SIZE && quality < 90) {
const int bin_map_size = (uint64_t)bin_depth * BIN_SIZE;
bin_map = (int16_t*)WebPSafeCalloc(bin_map_size, sizeof(*bin_map));
if (bin_map == NULL) goto Error;
}
// Construct the Histogram from backward references.
HistogramBuild(xsize, histo_bits, refs, init_histo);
if (bin_map != NULL) {
// Partition Histograms to different entropy bins for three dominant
// (literal red and blue) symbol costs and compute the histogram aggregate
// bit_cost.
HistogramAnalyzeBin(init_histo, histo_image, bin_map);
HistogramCombineBin(histo_image, histos, bin_depth, bin_map);
} else {
// Compute the histogram aggregate bit_cost.
HistogramAnalyze(init_histo, histo_image);
}
// Collapse similar histograms.
HistogramCombine(histo_image, histos, quality);
// Find the optimal map from original histograms to the final ones. // Find the optimal map from original histograms to the final ones.
HistogramRemap(image_out, image_in, histogram_symbols); HistogramRemap(init_histo, histo_image, histogram_symbols);
ok = 1; ok = 1;
Error: Error:
free(image_out); free(bin_map);
free(init_histo);
free(histos);
return ok; return ok;
} }

View File

@ -40,6 +40,9 @@ typedef struct {
int distance_[NUM_DISTANCE_CODES]; int distance_[NUM_DISTANCE_CODES];
int palette_code_bits_; int palette_code_bits_;
double bit_cost_; // cached value of VP8LHistogramEstimateBits(this) double bit_cost_; // cached value of VP8LHistogramEstimateBits(this)
double literal_cost_; // Cached values of dominant entropy costs:
double red_cost_; // literal, red & blue.
double blue_cost_;
} VP8LHistogram; } VP8LHistogram;
// Collection of histograms with fixed capacity, allocated as one // Collection of histograms with fixed capacity, allocated as one