Add no-color cache configuration to the cruncher

Fix another pessimization found by the pingo image compressor.

Refactoring is necessary to make LZ77 computation
common to cache or no-cache analysis.
Slower by 1.7x instead of 2x

Change-Id: I396701ea6e88543dbfe9471eb552877f6c8ce1e3
This commit is contained in:
Vincent Rabaud
2020-06-09 18:53:56 +02:00
parent 749a8b99f7
commit f0110baec0
5 changed files with 300 additions and 215 deletions

View File

@ -11,13 +11,14 @@
//
#include <assert.h>
#include <float.h>
#include <math.h>
#include "src/enc/backward_references_enc.h"
#include "src/enc/histogram_enc.h"
#include "src/dsp/dsp.h"
#include "src/dsp/lossless.h"
#include "src/dsp/lossless_common.h"
#include "src/dsp/dsp.h"
#include "src/enc/backward_references_enc.h"
#include "src/enc/histogram_enc.h"
#include "src/utils/color_cache_utils.h"
#include "src/utils/utils.h"
@ -103,6 +104,20 @@ void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) {
}
}
// Swaps the content of two VP8LBackwardRefs.
static void BackwardRefsSwap(VP8LBackwardRefs* const refs1,
VP8LBackwardRefs* const refs2) {
const int point_to_refs1 =
(refs1->tail_ != NULL && refs1->tail_ == &refs1->refs_);
const int point_to_refs2 =
(refs2->tail_ != NULL && refs2->tail_ == &refs2->refs_);
const VP8LBackwardRefs tmp = *refs1;
*refs1 = *refs2;
*refs2 = tmp;
if (point_to_refs2) refs1->tail_ = &refs1->refs_;
if (point_to_refs1) refs2->tail_ = &refs2->refs_;
}
void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size) {
assert(refs != NULL);
memset(refs, 0, sizeof(*refs));
@ -154,6 +169,22 @@ static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) {
return b;
}
// Return 1 on success, 0 on error.
static int BackwardRefsClone(const VP8LBackwardRefs* const from,
VP8LBackwardRefs* const to) {
const PixOrCopyBlock* block_from = from->refs_;
VP8LClearBackwardRefs(to);
while (block_from != NULL) {
PixOrCopyBlock* const block_to = BackwardRefsNewBlock(to);
if (block_to == NULL) return 0;
memcpy(block_to->start_, block_from->start_,
block_from->size_ * sizeof(PixOrCopy));
block_to->size_ = block_from->size_;
block_from = block_from->next_;
}
return 1;
}
extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
const PixOrCopy v);
void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
@ -848,16 +879,21 @@ extern int VP8LBackwardReferencesTraceBackwards(
int xsize, int ysize, const uint32_t* const argb, int cache_bits,
const VP8LHashChain* const hash_chain,
const VP8LBackwardRefs* const refs_src, VP8LBackwardRefs* const refs_dst);
static VP8LBackwardRefs* GetBackwardReferences(
int width, int height, const uint32_t* const argb, int quality,
int lz77_types_to_try, int* const cache_bits,
const VP8LHashChain* const hash_chain, VP8LBackwardRefs* best,
VP8LBackwardRefs* worst) {
const int cache_bits_initial = *cache_bits;
double bit_cost_best = -1;
static int GetBackwardReferences(int width, int height,
const uint32_t* const argb, int quality,
int lz77_types_to_try, int cache_bits_max,
int do_no_cache,
const VP8LHashChain* const hash_chain,
VP8LBackwardRefs* const refs,
int* const cache_bits_best) {
VP8LHistogram* histo = NULL;
int lz77_type, lz77_type_best = 0;
int i, lz77_type;
// Index 0 is for a color cache, index 1 for no cache (if needed).
int lz77_types_best[2] = {0, 0};
double bit_costs_best[2] = {DBL_MAX, DBL_MAX};
VP8LHashChain hash_chain_box;
VP8LBackwardRefs* const refs_tmp = &refs[do_no_cache ? 2 : 1];
int status = 0;
memset(&hash_chain_box, 0, sizeof(hash_chain_box));
histo = VP8LAllocateHistogram(MAX_COLOR_CACHE_BITS);
@ -866,86 +902,129 @@ static VP8LBackwardRefs* GetBackwardReferences(
for (lz77_type = 1; lz77_types_to_try;
lz77_types_to_try &= ~lz77_type, lz77_type <<= 1) {
int res = 0;
double bit_cost;
int cache_bits_tmp = cache_bits_initial;
double bit_cost = 0.;
if ((lz77_types_to_try & lz77_type) == 0) continue;
switch (lz77_type) {
case kLZ77RLE:
res = BackwardReferencesRle(width, height, argb, 0, worst);
res = BackwardReferencesRle(width, height, argb, 0, refs_tmp);
break;
case kLZ77Standard:
// Compute LZ77 with no cache (0 bits), as the ideal LZ77 with a color
// cache is not that different in practice.
res = BackwardReferencesLz77(width, height, argb, 0, hash_chain, worst);
res = BackwardReferencesLz77(width, height, argb, 0, hash_chain,
refs_tmp);
break;
case kLZ77Box:
if (!VP8LHashChainInit(&hash_chain_box, width * height)) goto Error;
res = BackwardReferencesLz77Box(width, height, argb, 0, hash_chain,
&hash_chain_box, worst);
&hash_chain_box, refs_tmp);
break;
default:
assert(0);
}
if (!res) goto Error;
// Next, try with a color cache and update the references.
if (!CalculateBestCacheSize(argb, quality, worst, &cache_bits_tmp)) {
goto Error;
}
if (cache_bits_tmp > 0) {
if (!BackwardRefsWithLocalCache(argb, cache_bits_tmp, worst)) {
goto Error;
// Start with the no color cache case.
for (i = 1; i >= 0; --i) {
int cache_bits = (i == 1) ? 0 : cache_bits_max;
if (i == 1 && !do_no_cache) continue;
if (i == 0) {
// Try with a color cache.
if (!CalculateBestCacheSize(argb, quality, refs_tmp, &cache_bits)) {
goto Error;
}
if (cache_bits > 0) {
if (!BackwardRefsWithLocalCache(argb, cache_bits, refs_tmp)) {
goto Error;
}
}
}
if (i == 0 && do_no_cache && cache_bits == 0) {
// No need to re-compute bit_cost as it was computed at i == 1.
} else {
VP8LHistogramCreate(histo, refs_tmp, cache_bits);
bit_cost = VP8LHistogramEstimateBits(histo);
}
if (bit_cost < bit_costs_best[i]) {
if (i == 1) {
// Do not swap as the full cache analysis would have the wrong
// VP8LBackwardRefs to start with.
if (!BackwardRefsClone(refs_tmp, &refs[1])) goto Error;
} else {
BackwardRefsSwap(refs_tmp, &refs[0]);
}
bit_costs_best[i] = bit_cost;
lz77_types_best[i] = lz77_type;
if (i == 0) *cache_bits_best = cache_bits;
}
}
// Keep the best backward references.
VP8LHistogramCreate(histo, worst, cache_bits_tmp);
bit_cost = VP8LHistogramEstimateBits(histo);
if (lz77_type_best == 0 || bit_cost < bit_cost_best) {
VP8LBackwardRefs* const tmp = worst;
worst = best;
best = tmp;
bit_cost_best = bit_cost;
*cache_bits = cache_bits_tmp;
lz77_type_best = lz77_type;
}
}
assert(lz77_type_best > 0);
assert(lz77_types_best[0] > 0);
assert(!do_no_cache || lz77_types_best[1] > 0);
// Improve on simple LZ77 but only for high quality (TraceBackwards is
// costly).
if ((lz77_type_best == kLZ77Standard || lz77_type_best == kLZ77Box) &&
quality >= 25) {
const VP8LHashChain* const hash_chain_tmp =
(lz77_type_best == kLZ77Standard) ? hash_chain : &hash_chain_box;
if (VP8LBackwardReferencesTraceBackwards(width, height, argb, *cache_bits,
hash_chain_tmp, best, worst)) {
double bit_cost_trace;
VP8LHistogramCreate(histo, worst, *cache_bits);
bit_cost_trace = VP8LHistogramEstimateBits(histo);
if (bit_cost_trace < bit_cost_best) best = worst;
for (i = 1; i >= 0; --i) {
if (i == 1 && !do_no_cache) continue;
if ((lz77_types_best[i] == kLZ77Standard ||
lz77_types_best[i] == kLZ77Box) &&
quality >= 25) {
const VP8LHashChain* const hash_chain_tmp =
(lz77_types_best[i] == kLZ77Standard) ? hash_chain : &hash_chain_box;
const int cache_bits = (i == 1) ? 0 : *cache_bits_best;
if (VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits,
hash_chain_tmp, &refs[i],
refs_tmp)) {
double bit_cost_trace;
VP8LHistogramCreate(histo, refs_tmp, cache_bits);
bit_cost_trace = VP8LHistogramEstimateBits(histo);
if (bit_cost_trace < bit_costs_best[i]) {
BackwardRefsSwap(refs_tmp, &refs[i]);
}
}
}
BackwardReferences2DLocality(width, &refs[i]);
if (i == 1 && lz77_types_best[0] == lz77_types_best[1] &&
*cache_bits_best == 0) {
// If the best cache size is 0 and we have the same best LZ77, just copy
// the data over and stop here.
if (!BackwardRefsClone(&refs[1], &refs[0])) goto Error;
break;
}
}
BackwardReferences2DLocality(width, best);
status = 1;
Error:
VP8LHashChainClear(&hash_chain_box);
VP8LFreeHistogram(histo);
return best;
return status;
}
VP8LBackwardRefs* VP8LGetBackwardReferences(
WebPEncodingError VP8LGetBackwardReferences(
int width, int height, const uint32_t* const argb, int quality,
int low_effort, int lz77_types_to_try, int* const cache_bits,
const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs_tmp1,
VP8LBackwardRefs* const refs_tmp2) {
int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache,
const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs,
int* const cache_bits_best) {
if (low_effort) {
return GetBackwardReferencesLowEffort(width, height, argb, cache_bits,
hash_chain, refs_tmp1);
VP8LBackwardRefs* refs_best;
*cache_bits_best = cache_bits_max;
refs_best = GetBackwardReferencesLowEffort(
width, height, argb, cache_bits_best, hash_chain, refs);
if (refs_best == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
// Set it in first position.
BackwardRefsSwap(refs_best, &refs[0]);
} else {
return GetBackwardReferences(width, height, argb, quality,
lz77_types_to_try, cache_bits, hash_chain,
refs_tmp1, refs_tmp2);
if (!GetBackwardReferences(width, height, argb, quality, lz77_types_to_try,
cache_bits_max, do_no_cache, hash_chain, refs,
cache_bits_best)) {
return VP8_ENC_ERROR_OUT_OF_MEMORY;
}
}
return VP8_ENC_OK;
}