mirror of
https://github.com/webmproject/libwebp.git
synced 2025-07-17 06:19:54 +02:00
Reduce memory footprint for encoding WebP lossless.
Reduce calls to Malloc (WebPSafeMalloc/WebPSafeCalloc) for: - Building HashChain data-structure used in creating the backward references. - Creating Backward references for LZ77 or RLE coding. - Creating Huffman tree for encoding the image. For the above mentioned code-paths, allocate memory once and re-use it subsequently. Reduce the foorprint of VP8LHistogram struct by changing the Struct field 'literal_' from an array of constant size to dynamically allocated buffer based on the input parameter cache_bits. Initialize BitWriter buffer corresponding to 16bpp (2*W*H). There are some hard-files that are compressed at 12 bpp or more. The realloc is costly and can be avoided for most of the WebP lossless images by allocating some extra memory at the encoder initializaiton. Change-Id: I1ea8cf60df727b8eb41547901f376c9a585e6095
This commit is contained in:
committed by
Pascal Massimino
parent
1d62acf6af
commit
0b896101b4
@ -29,12 +29,35 @@
|
||||
#define BIN_SIZE (NUM_PARTITIONS * NUM_PARTITIONS * NUM_PARTITIONS)
|
||||
|
||||
static void HistogramClear(VP8LHistogram* const p) {
|
||||
memset(p->literal_, 0, sizeof(p->literal_));
|
||||
memset(p->red_, 0, sizeof(p->red_));
|
||||
memset(p->blue_, 0, sizeof(p->blue_));
|
||||
memset(p->alpha_, 0, sizeof(p->alpha_));
|
||||
memset(p->distance_, 0, sizeof(p->distance_));
|
||||
p->bit_cost_ = 0;
|
||||
int* const literal = p->literal_;
|
||||
const int cache_bits = p->palette_code_bits_;
|
||||
const uint64_t histo_size = VP8LGetHistogramSize(cache_bits);
|
||||
memset(p, 0, histo_size);
|
||||
p->palette_code_bits_ = cache_bits;
|
||||
p->literal_ = literal;
|
||||
}
|
||||
|
||||
static void HistogramCopy(const VP8LHistogram* const src,
|
||||
VP8LHistogram* const dst) {
|
||||
int* const dst_literal = dst->literal_;
|
||||
const int dst_cache_bits = dst->palette_code_bits_;
|
||||
const uint64_t histo_size = VP8LGetHistogramSize(dst_cache_bits);
|
||||
assert(src->palette_code_bits_ == dst_cache_bits);
|
||||
memcpy(dst, src, histo_size);
|
||||
dst->literal_ = dst_literal;
|
||||
}
|
||||
|
||||
int VP8LGetHistogramSize(int cache_bits) {
|
||||
const uint64_t literal_size = VP8LHistogramNumCodes(cache_bits);
|
||||
return sizeof(VP8LHistogram) + sizeof(int) * literal_size;
|
||||
}
|
||||
|
||||
void VP8LFreeHistogram(VP8LHistogram* const histo) {
|
||||
WebPSafeFree(histo);
|
||||
}
|
||||
|
||||
void VP8LFreeHistogramSet(VP8LHistogramSet* const histo) {
|
||||
WebPSafeFree(histo);
|
||||
}
|
||||
|
||||
void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs,
|
||||
@ -60,13 +83,24 @@ void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits) {
|
||||
HistogramClear(p);
|
||||
}
|
||||
|
||||
VP8LHistogram* VP8LAllocateHistogram(int cache_bits) {
|
||||
VP8LHistogram* histo = NULL;
|
||||
const uint64_t total_size = VP8LGetHistogramSize(cache_bits);
|
||||
uint8_t* const memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
|
||||
if (memory == NULL) return NULL;
|
||||
histo = (VP8LHistogram*)memory;
|
||||
// literal_ won't necessary be aligned.
|
||||
histo->literal_ = (int*)(memory + sizeof(VP8LHistogram));
|
||||
VP8LHistogramInit(histo, cache_bits);
|
||||
return histo;
|
||||
}
|
||||
|
||||
VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
|
||||
int i;
|
||||
VP8LHistogramSet* set;
|
||||
VP8LHistogram* bulk;
|
||||
const uint64_t total_size = sizeof(*set)
|
||||
+ (uint64_t)size * sizeof(*set->histograms)
|
||||
+ (uint64_t)size * sizeof(**set->histograms);
|
||||
+ (uint64_t)size * VP8LGetHistogramSize(cache_bits);
|
||||
uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
|
||||
if (memory == NULL) return NULL;
|
||||
|
||||
@ -74,12 +108,15 @@ VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
|
||||
memory += sizeof(*set);
|
||||
set->histograms = (VP8LHistogram**)memory;
|
||||
memory += size * sizeof(*set->histograms);
|
||||
bulk = (VP8LHistogram*)memory;
|
||||
set->max_size = size;
|
||||
set->size = size;
|
||||
for (i = 0; i < size; ++i) {
|
||||
set->histograms[i] = bulk + i;
|
||||
set->histograms[i] = (VP8LHistogram*)memory;
|
||||
// literal_ won't necessary be aligned.
|
||||
set->histograms[i]->literal_ = (int*)(memory + sizeof(VP8LHistogram));
|
||||
VP8LHistogramInit(set->histograms[i], cache_bits);
|
||||
// There's no padding/alignment between successive histograms.
|
||||
memory += VP8LGetHistogramSize(cache_bits);
|
||||
}
|
||||
return set;
|
||||
}
|
||||
@ -94,12 +131,13 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
|
||||
++histo->literal_[PixOrCopyLiteral(v, 1)];
|
||||
++histo->blue_[PixOrCopyLiteral(v, 0)];
|
||||
} else if (PixOrCopyIsCacheIdx(v)) {
|
||||
int literal_ix = 256 + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v);
|
||||
int literal_ix =
|
||||
NUM_LITERAL_CODES + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v);
|
||||
++histo->literal_[literal_ix];
|
||||
} else {
|
||||
int code, extra_bits;
|
||||
VP8LPrefixEncodeBits(PixOrCopyLength(v), &code, &extra_bits);
|
||||
++histo->literal_[256 + code];
|
||||
++histo->literal_[NUM_LITERAL_CODES + code];
|
||||
VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits);
|
||||
++histo->distance_[code];
|
||||
}
|
||||
@ -224,22 +262,22 @@ static double GetCombinedEntropy(const int* const X, const int* const Y,
|
||||
double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
|
||||
return
|
||||
PopulationCost(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_))
|
||||
+ PopulationCost(p->red_, 256)
|
||||
+ PopulationCost(p->blue_, 256)
|
||||
+ PopulationCost(p->alpha_, 256)
|
||||
+ PopulationCost(p->red_, NUM_LITERAL_CODES)
|
||||
+ PopulationCost(p->blue_, NUM_LITERAL_CODES)
|
||||
+ PopulationCost(p->alpha_, NUM_LITERAL_CODES)
|
||||
+ PopulationCost(p->distance_, NUM_DISTANCE_CODES)
|
||||
+ VP8LExtraCost(p->literal_ + 256, NUM_LENGTH_CODES)
|
||||
+ VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
|
||||
+ VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
|
||||
}
|
||||
|
||||
double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
|
||||
return
|
||||
BitsEntropy(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_))
|
||||
+ BitsEntropy(p->red_, 256)
|
||||
+ BitsEntropy(p->blue_, 256)
|
||||
+ BitsEntropy(p->alpha_, 256)
|
||||
+ BitsEntropy(p->red_, NUM_LITERAL_CODES)
|
||||
+ BitsEntropy(p->blue_, NUM_LITERAL_CODES)
|
||||
+ BitsEntropy(p->alpha_, NUM_LITERAL_CODES)
|
||||
+ BitsEntropy(p->distance_, NUM_DISTANCE_CODES)
|
||||
+ VP8LExtraCost(p->literal_ + 256, NUM_LENGTH_CODES)
|
||||
+ VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
|
||||
+ VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
|
||||
}
|
||||
|
||||
@ -250,13 +288,15 @@ double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
|
||||
static void HistogramAdd(const VP8LHistogram* const in,
|
||||
VP8LHistogram* const out) {
|
||||
int i;
|
||||
for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) {
|
||||
int literal_size = VP8LHistogramNumCodes(out->palette_code_bits_);
|
||||
assert(in->palette_code_bits_ == out->palette_code_bits_);
|
||||
for (i = 0; i < literal_size; ++i) {
|
||||
out->literal_[i] += in->literal_[i];
|
||||
}
|
||||
for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
|
||||
out->distance_[i] += in->distance_[i];
|
||||
}
|
||||
for (i = 0; i < 256; ++i) {
|
||||
for (i = 0; i < NUM_LITERAL_CODES; ++i) {
|
||||
out->red_[i] += in->red_[i];
|
||||
out->blue_[i] += in->blue_[i];
|
||||
out->alpha_[i] += in->alpha_[i];
|
||||
@ -267,22 +307,22 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
double cost_threshold,
|
||||
double* cost) {
|
||||
const int palette_code_bits =
|
||||
(a->palette_code_bits_ > b->palette_code_bits_) ? a->palette_code_bits_ :
|
||||
b->palette_code_bits_;
|
||||
const int palette_code_bits = a->palette_code_bits_;
|
||||
assert(a->palette_code_bits_ == b->palette_code_bits_);
|
||||
*cost += GetCombinedEntropy(a->literal_, b->literal_,
|
||||
VP8LHistogramNumCodes(palette_code_bits));
|
||||
*cost += VP8LExtraCostCombined(a->literal_ + 256, b->literal_ + 256,
|
||||
*cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES,
|
||||
b->literal_ + NUM_LITERAL_CODES,
|
||||
NUM_LENGTH_CODES);
|
||||
if (*cost > cost_threshold) return 0;
|
||||
|
||||
*cost += GetCombinedEntropy(a->red_, b->red_, 256);
|
||||
*cost += GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES);
|
||||
if (*cost > cost_threshold) return 0;
|
||||
|
||||
*cost += GetCombinedEntropy(a->blue_, b->blue_, 256);
|
||||
*cost += GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES);
|
||||
if (*cost > cost_threshold) return 0;
|
||||
|
||||
*cost += GetCombinedEntropy(a->alpha_, b->alpha_, 256);
|
||||
*cost += GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES);
|
||||
if (*cost > cost_threshold) return 0;
|
||||
|
||||
*cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES);
|
||||
@ -306,22 +346,24 @@ static double HistogramAddEval(const VP8LHistogram* const a,
|
||||
double cost = 0;
|
||||
const double sum_cost = a->bit_cost_ + b->bit_cost_;
|
||||
int i;
|
||||
assert(a->palette_code_bits_ == b->palette_code_bits_);
|
||||
cost_threshold += sum_cost;
|
||||
|
||||
if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) {
|
||||
for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) {
|
||||
int literal_size;
|
||||
out->palette_code_bits_ = a->palette_code_bits_;
|
||||
literal_size = VP8LHistogramNumCodes(out->palette_code_bits_);
|
||||
for (i = 0; i < literal_size; ++i) {
|
||||
out->literal_[i] = a->literal_[i] + b->literal_[i];
|
||||
}
|
||||
for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
|
||||
out->distance_[i] = a->distance_[i] + b->distance_[i];
|
||||
}
|
||||
for (i = 0; i < 256; ++i) {
|
||||
for (i = 0; i < NUM_LITERAL_CODES; ++i) {
|
||||
out->red_[i] = a->red_[i] + b->red_[i];
|
||||
out->blue_[i] = a->blue_[i] + b->blue_[i];
|
||||
out->alpha_[i] = a->alpha_[i] + b->alpha_[i];
|
||||
}
|
||||
out->palette_code_bits_ = (a->palette_code_bits_ > b->palette_code_bits_) ?
|
||||
a->palette_code_bits_ : b->palette_code_bits_;
|
||||
out->bit_cost_ = cost;
|
||||
}
|
||||
|
||||
@ -374,15 +416,16 @@ static void UpdateDominantCostRange(
|
||||
}
|
||||
|
||||
static void UpdateHistogramCost(VP8LHistogram* const h) {
|
||||
const double alpha_cost = PopulationCost(h->alpha_, 256);
|
||||
const double alpha_cost = PopulationCost(h->alpha_, NUM_LITERAL_CODES);
|
||||
const double distance_cost =
|
||||
PopulationCost(h->distance_, NUM_DISTANCE_CODES) +
|
||||
VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
|
||||
const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
|
||||
h->literal_cost_ = PopulationCost(h->literal_, num_codes) +
|
||||
VP8LExtraCost(h->literal_ + 256, NUM_LENGTH_CODES);
|
||||
h->red_cost_ = PopulationCost(h->red_, 256);
|
||||
h->blue_cost_ = PopulationCost(h->blue_, 256);
|
||||
VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES,
|
||||
NUM_LENGTH_CODES);
|
||||
h->red_cost_ = PopulationCost(h->red_, NUM_LITERAL_CODES);
|
||||
h->blue_cost_ = PopulationCost(h->blue_, NUM_LITERAL_CODES);
|
||||
h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ +
|
||||
alpha_cost + distance_cost;
|
||||
}
|
||||
@ -439,7 +482,7 @@ static void HistogramAnalyze(
|
||||
VP8LHistogram* const histo = histograms[i];
|
||||
histo->bit_cost_ = VP8LHistogramEstimateBits(histo);
|
||||
// Copy histograms from init_histo[] to histo_image[].
|
||||
*histo_image->histograms[i] = *histo;
|
||||
HistogramCopy(histo, histo_image->histograms[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -460,7 +503,7 @@ static void HistogramAnalyzeBin(
|
||||
VP8LHistogram* const histo = histograms[i];
|
||||
UpdateHistogramCost(histo);
|
||||
// Copy histograms from init_histo[] to histo_image[].
|
||||
*histo_image->histograms[i] = *histo;
|
||||
HistogramCopy(histo, histo_image->histograms[i]);
|
||||
UpdateDominantCostRange(histo, &cost_range);
|
||||
}
|
||||
|
||||
@ -503,7 +546,8 @@ static void HistogramCompactBins(VP8LHistogramSet* const histo_image) {
|
||||
if (start < end) {
|
||||
assert(histo_image->histograms[start] != NULL);
|
||||
assert(histo_image->histograms[end] != NULL);
|
||||
*histo_image->histograms[start] = *histo_image->histograms[end];
|
||||
HistogramCopy(histo_image->histograms[end],
|
||||
histo_image->histograms[start]);
|
||||
histo_image->histograms[end] = NULL;
|
||||
--end;
|
||||
}
|
||||
@ -533,7 +577,7 @@ static void HistogramCombineBin(VP8LHistogramSet* const histo_image,
|
||||
histo_image->histograms[idx2],
|
||||
cur_combo, bit_cost_thresh);
|
||||
if (curr_cost_diff < bit_cost_thresh) {
|
||||
*histo_image->histograms[idx1] = *cur_combo;
|
||||
HistogramCopy(cur_combo, histo_image->histograms[idx1]);
|
||||
histo_image->histograms[idx2]->bit_cost_ = 0.;
|
||||
}
|
||||
}
|
||||
@ -551,7 +595,7 @@ static uint32_t MyRand(uint32_t *seed) {
|
||||
}
|
||||
|
||||
static void HistogramCombine(VP8LHistogramSet* const histo_image,
|
||||
VP8LHistogram* const histos, int quality) {
|
||||
VP8LHistogramSet* const histos, int quality) {
|
||||
int iter;
|
||||
uint32_t seed = 0;
|
||||
int tries_with_no_success = 0;
|
||||
@ -561,8 +605,8 @@ static void HistogramCombine(VP8LHistogramSet* const histo_image,
|
||||
const int num_pairs = histo_image_size / 2;
|
||||
const int num_tries_no_success = outer_iters / 2;
|
||||
const int min_cluster_size = 2;
|
||||
VP8LHistogram* cur_combo = histos + 0; // trial merged histogram
|
||||
VP8LHistogram* best_combo = histos + 1; // best merged histogram so far
|
||||
VP8LHistogram* cur_combo = histos->histograms[0]; // trial histogram
|
||||
VP8LHistogram* best_combo = histos->histograms[1]; // best histogram so far
|
||||
|
||||
// Collapse similar histograms in 'histo_image'.
|
||||
for (iter = 0;
|
||||
@ -603,12 +647,12 @@ static void HistogramCombine(VP8LHistogramSet* const histo_image,
|
||||
}
|
||||
|
||||
if (best_idx1 >= 0) {
|
||||
*histo_image->histograms[best_idx1] = *best_combo;
|
||||
HistogramCopy(best_combo, histo_image->histograms[best_idx1]);
|
||||
// swap best_idx2 slot with last one (which is now unused)
|
||||
--histo_image_size;
|
||||
if (best_idx2 != histo_image_size) {
|
||||
histo_image->histograms[best_idx2] =
|
||||
histo_image->histograms[histo_image_size];
|
||||
HistogramCopy(histo_image->histograms[histo_image_size],
|
||||
histo_image->histograms[best_idx2]);
|
||||
histo_image->histograms[histo_image_size] = NULL;
|
||||
}
|
||||
tries_with_no_success = 0;
|
||||
@ -683,8 +727,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
||||
// bin_map[n][num_histo + 1] ... bin_map[n][bin_depth - 1] = un-used indices.
|
||||
const int bin_depth = histo_image_raw_size + 1;
|
||||
int16_t* bin_map = NULL;
|
||||
VP8LHistogram* const histos =
|
||||
(VP8LHistogram*)WebPSafeMalloc(2ULL, sizeof(*histos));
|
||||
VP8LHistogramSet* const histos = VP8LAllocateHistogramSet(2, cache_bits);
|
||||
VP8LHistogramSet* const init_histo =
|
||||
VP8LAllocateHistogramSet(histo_image_raw_size, cache_bits);
|
||||
|
||||
@ -709,8 +752,8 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
||||
const double combine_cost_factor =
|
||||
GetCombineCostFactor(histo_image_raw_size, quality);
|
||||
HistogramAnalyzeBin(init_histo, histo_image, bin_map);
|
||||
HistogramCombineBin(histo_image, histos, bin_depth, combine_cost_factor,
|
||||
bin_map);
|
||||
HistogramCombineBin(histo_image, histos->histograms[0],
|
||||
bin_depth, combine_cost_factor, bin_map);
|
||||
} else {
|
||||
HistogramAnalyze(init_histo, histo_image);
|
||||
}
|
||||
@ -725,7 +768,7 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
|
||||
|
||||
Error:
|
||||
WebPSafeFree(bin_map);
|
||||
WebPSafeFree(init_histo);
|
||||
WebPSafeFree(histos);
|
||||
VP8LFreeHistogramSet(init_histo);
|
||||
VP8LFreeHistogramSet(histos);
|
||||
return ok;
|
||||
}
|
||||
|
Reference in New Issue
Block a user