Add no-color cache configuration to the cruncher

Fix another pessimization found by the pingo image compressor.

Refactoring is necessary to make LZ77 computation
common to cache or no-cache analysis.
Slower by 1.7x instead of 2x

Change-Id: I396701ea6e88543dbfe9471eb552877f6c8ce1e3
This commit is contained in:
Vincent Rabaud
2020-06-09 18:53:56 +02:00
parent 749a8b99f7
commit f0110baec0
5 changed files with 300 additions and 215 deletions

View File

@ -355,11 +355,15 @@ static int GetTransformBits(int method, int histo_bits) {
}
// Set of parameters to be used in each iteration of the cruncher.
#define CRUNCH_CONFIGS_LZ77_MAX 2
#define CRUNCH_SUBCONFIGS_MAX 2
typedef struct {
int lz77_;
int do_no_cache_;
} CrunchSubConfig;
typedef struct {
int entropy_idx_;
int lz77s_types_to_try_[CRUNCH_CONFIGS_LZ77_MAX];
int lz77s_types_to_try_size_;
CrunchSubConfig sub_configs_[CRUNCH_SUBCONFIGS_MAX];
int sub_configs_size_;
} CrunchConfig;
#define CRUNCH_CONFIGS_MAX kNumEntropyIx
@ -377,6 +381,9 @@ static int EncoderAnalyze(VP8LEncoder* const enc,
int i;
int use_palette;
int n_lz77s;
// If set to 0, analyze the cache with the computed cache value. If 1, also
// analyze with no-cache.
int do_no_cache = 0;
assert(pic != NULL && pic->argb != NULL);
use_palette =
@ -403,6 +410,7 @@ static int EncoderAnalyze(VP8LEncoder* const enc,
return 0;
}
if (method == 6 && config->quality == 100) {
do_no_cache = 1;
// Go brute force on all transforms.
*crunch_configs_size = 0;
for (i = 0; i < kNumEntropyIx; ++i) {
@ -420,14 +428,16 @@ static int EncoderAnalyze(VP8LEncoder* const enc,
}
}
// Fill in the different LZ77s.
assert(n_lz77s <= CRUNCH_CONFIGS_LZ77_MAX);
assert(n_lz77s <= CRUNCH_SUBCONFIGS_MAX);
for (i = 0; i < *crunch_configs_size; ++i) {
int j;
for (j = 0; j < n_lz77s; ++j) {
crunch_configs[i].lz77s_types_to_try_[j] =
assert(j < CRUNCH_SUBCONFIGS_MAX);
crunch_configs[i].sub_configs_[j].lz77_ =
(j == 0) ? kLZ77Standard | kLZ77RLE : kLZ77Box;
crunch_configs[i].sub_configs_[j].do_no_cache_ = do_no_cache;
}
crunch_configs[i].lz77s_types_to_try_size_ = n_lz77s;
crunch_configs[i].sub_configs_size_ = n_lz77s;
}
return 1;
}
@ -443,7 +453,7 @@ static int EncoderInit(VP8LEncoder* const enc) {
int i;
if (!VP8LHashChainInit(&enc->hash_chain_, pix_cnt)) return 0;
for (i = 0; i < 3; ++i) VP8LBackwardRefsInit(&enc->refs_[i], refs_block_size);
for (i = 0; i < 4; ++i) VP8LBackwardRefsInit(&enc->refs_[i], refs_block_size);
return 1;
}
@ -772,13 +782,10 @@ static WebPEncodingError StoreImageToBitMask(
}
// Special case of EncodeImageInternal() for cache-bits=0, histo_bits=31
static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw,
const uint32_t* const argb,
VP8LHashChain* const hash_chain,
VP8LBackwardRefs* const refs_tmp1,
VP8LBackwardRefs* const refs_tmp2,
int width, int height,
int quality, int low_effort) {
static WebPEncodingError EncodeImageNoHuffman(
VP8LBitWriter* const bw, const uint32_t* const argb,
VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs_array,
int width, int height, int quality, int low_effort) {
int i;
int max_tokens = 0;
WebPEncodingError err = VP8_ENC_OK;
@ -801,13 +808,11 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw,
err = VP8_ENC_ERROR_OUT_OF_MEMORY;
goto Error;
}
refs = VP8LGetBackwardReferences(width, height, argb, quality, 0,
kLZ77Standard | kLZ77RLE, &cache_bits,
hash_chain, refs_tmp1, refs_tmp2);
if (refs == NULL) {
err = VP8_ENC_ERROR_OUT_OF_MEMORY;
goto Error;
}
err = VP8LGetBackwardReferences(
width, height, argb, quality, /*low_effort=*/0, kLZ77Standard | kLZ77RLE,
cache_bits, /*do_no_cache=*/0, hash_chain, refs_array, &cache_bits);
if (err != VP8_ENC_OK) goto Error;
refs = &refs_array[0];
histogram_image = VP8LAllocateHistogramSet(1, cache_bits);
if (histogram_image == NULL) {
err = VP8_ENC_ERROR_OUT_OF_MEMORY;
@ -863,11 +868,11 @@ static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw,
static WebPEncodingError EncodeImageInternal(
VP8LBitWriter* const bw, const uint32_t* const argb,
VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[3], int width,
VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[4], int width,
int height, int quality, int low_effort, int use_cache,
const CrunchConfig* const config, int* cache_bits, int histogram_bits,
size_t init_byte_position, int* const hdr_size, int* const data_size) {
WebPEncodingError err = VP8_ENC_OK;
WebPEncodingError err = VP8_ENC_ERROR_OUT_OF_MEMORY;
const uint32_t histogram_image_xysize =
VP8LSubSampleSize(width, histogram_bits) *
VP8LSubSampleSize(height, histogram_bits);
@ -879,103 +884,103 @@ static WebPEncodingError EncodeImageInternal(
3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree));
HuffmanTreeToken* tokens = NULL;
HuffmanTreeCode* huffman_codes = NULL;
VP8LBackwardRefs* refs_best;
VP8LBackwardRefs* refs_tmp;
uint16_t* const histogram_symbols =
(uint16_t*)WebPSafeMalloc(histogram_image_xysize,
sizeof(*histogram_symbols));
int lz77s_idx;
int sub_configs_idx;
int cache_bits_init, write_histogram_image;
VP8LBitWriter bw_init = *bw, bw_best;
int hdr_size_tmp;
VP8LHashChain hash_chain_histogram; // histogram image hash chain
size_t bw_size_best = ~(size_t)0;
assert(histogram_bits >= MIN_HUFFMAN_BITS);
assert(histogram_bits <= MAX_HUFFMAN_BITS);
assert(hdr_size != NULL);
assert(data_size != NULL);
if (histogram_symbols == NULL) {
err = VP8_ENC_ERROR_OUT_OF_MEMORY;
// Make sure we can allocate the different objects.
memset(&hash_chain_histogram, 0, sizeof(hash_chain_histogram));
if (huff_tree == NULL || histogram_symbols == NULL ||
!VP8LHashChainInit(&hash_chain_histogram, histogram_image_xysize) ||
!VP8LHashChainFill(hash_chain, quality, argb, width, height,
low_effort)) {
goto Error;
}
if (use_cache) {
// If the value is different from zero, it has been set during the
// palette analysis.
if (*cache_bits == 0) *cache_bits = MAX_COLOR_CACHE_BITS;
cache_bits_init = (*cache_bits == 0) ? MAX_COLOR_CACHE_BITS : *cache_bits;
} else {
*cache_bits = 0;
cache_bits_init = 0;
}
// 'best_refs' is the reference to the best backward refs and points to one
// of refs_array[0] or refs_array[1].
// Calculate backward references from ARGB image.
if (huff_tree == NULL ||
!VP8LHashChainFill(hash_chain, quality, argb, width, height,
low_effort) ||
!VP8LBitWriterInit(&bw_best, 0) ||
(config->lz77s_types_to_try_size_ > 1 &&
// If several iterations will happen, clone into bw_best.
if (!VP8LBitWriterInit(&bw_best, 0) ||
((config->sub_configs_size_ > 1 ||
config->sub_configs_[0].do_no_cache_) &&
!VP8LBitWriterClone(bw, &bw_best))) {
err = VP8_ENC_ERROR_OUT_OF_MEMORY;
goto Error;
}
for (lz77s_idx = 0; lz77s_idx < config->lz77s_types_to_try_size_;
++lz77s_idx) {
refs_best = VP8LGetBackwardReferences(
width, height, argb, quality, low_effort,
config->lz77s_types_to_try_[lz77s_idx], cache_bits, hash_chain,
&refs_array[0], &refs_array[1]);
if (refs_best == NULL) {
err = VP8_ENC_ERROR_OUT_OF_MEMORY;
goto Error;
}
// Keep the best references aside and use the other element from the first
// two as a temporary for later usage.
refs_tmp = &refs_array[refs_best == &refs_array[0] ? 1 : 0];
for (sub_configs_idx = 0; sub_configs_idx < config->sub_configs_size_;
++sub_configs_idx) {
const CrunchSubConfig* const sub_config =
&config->sub_configs_[sub_configs_idx];
int cache_bits_best, i_cache;
err = VP8LGetBackwardReferences(width, height, argb, quality, low_effort,
sub_config->lz77_, cache_bits_init,
sub_config->do_no_cache_, hash_chain,
&refs_array[0], &cache_bits_best);
if (err != VP8_ENC_OK) goto Error;
histogram_image =
VP8LAllocateHistogramSet(histogram_image_xysize, *cache_bits);
tmp_histo = VP8LAllocateHistogram(*cache_bits);
if (histogram_image == NULL || tmp_histo == NULL) {
err = VP8_ENC_ERROR_OUT_OF_MEMORY;
goto Error;
}
for (i_cache = 0; i_cache < (sub_config->do_no_cache_ ? 2 : 1); ++i_cache) {
const int cache_bits_tmp = (i_cache == 0) ? cache_bits_best : 0;
// Speed-up: no need to study the no-cache case if it was already studied
// in i_cache == 0.
if (i_cache == 1 && cache_bits_best == 0) break;
// Build histogram image and symbols from backward references.
if (!VP8LGetHistoImageSymbols(width, height, refs_best, quality, low_effort,
histogram_bits, *cache_bits, histogram_image,
tmp_histo, histogram_symbols)) {
err = VP8_ENC_ERROR_OUT_OF_MEMORY;
goto Error;
}
// Create Huffman bit lengths and codes for each histogram image.
histogram_image_size = histogram_image->size;
bit_array_size = 5 * histogram_image_size;
huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size,
sizeof(*huffman_codes));
// Note: some histogram_image entries may point to tmp_histos[], so the
// latter need to outlive the following call to GetHuffBitLengthsAndCodes().
if (huffman_codes == NULL ||
!GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) {
err = VP8_ENC_ERROR_OUT_OF_MEMORY;
goto Error;
}
// Free combined histograms.
VP8LFreeHistogramSet(histogram_image);
histogram_image = NULL;
// Reset the bit writer for this iteration.
VP8LBitWriterReset(&bw_init, bw);
// Free scratch histograms.
VP8LFreeHistogram(tmp_histo);
tmp_histo = NULL;
// Build histogram image and symbols from backward references.
histogram_image =
VP8LAllocateHistogramSet(histogram_image_xysize, cache_bits_tmp);
tmp_histo = VP8LAllocateHistogram(cache_bits_tmp);
if (histogram_image == NULL || tmp_histo == NULL ||
!VP8LGetHistoImageSymbols(width, height, &refs_array[i_cache],
quality, low_effort, histogram_bits,
cache_bits_tmp, histogram_image, tmp_histo,
histogram_symbols)) {
goto Error;
}
// Create Huffman bit lengths and codes for each histogram image.
histogram_image_size = histogram_image->size;
bit_array_size = 5 * histogram_image_size;
huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size,
sizeof(*huffman_codes));
// Note: some histogram_image entries may point to tmp_histos[], so the
// latter need to outlive the following call to
// GetHuffBitLengthsAndCodes().
if (huffman_codes == NULL ||
!GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) {
goto Error;
}
// Free combined histograms.
VP8LFreeHistogramSet(histogram_image);
histogram_image = NULL;
// Color Cache parameters.
if (*cache_bits > 0) {
VP8LPutBits(bw, 1, 1);
VP8LPutBits(bw, *cache_bits, 4);
} else {
VP8LPutBits(bw, 0, 1);
}
// Free scratch histograms.
VP8LFreeHistogram(tmp_histo);
tmp_histo = NULL;
// Huffman image + meta huffman.
{
const int write_histogram_image = (histogram_image_size > 1);
// Color Cache parameters.
if (cache_bits_tmp > 0) {
VP8LPutBits(bw, 1, 1);
VP8LPutBits(bw, cache_bits_tmp, 4);
} else {
VP8LPutBits(bw, 0, 1);
}
// Huffman image + meta huffman.
write_histogram_image = (histogram_image_size > 1);
VP8LPutBits(bw, write_histogram_image, 1);
if (write_histogram_image) {
uint32_t* const histogram_argb =
@ -983,10 +988,7 @@ static WebPEncodingError EncodeImageInternal(
sizeof(*histogram_argb));
int max_index = 0;
uint32_t i;
if (histogram_argb == NULL) {
err = VP8_ENC_ERROR_OUT_OF_MEMORY;
goto Error;
}
if (histogram_argb == NULL) goto Error;
for (i = 0; i < histogram_image_xysize; ++i) {
const int symbol_index = histogram_symbols[i] & 0xffff;
histogram_argb[i] = (symbol_index << 8);
@ -998,65 +1000,64 @@ static WebPEncodingError EncodeImageInternal(
VP8LPutBits(bw, histogram_bits - 2, 3);
err = EncodeImageNoHuffman(
bw, histogram_argb, hash_chain, refs_tmp, &refs_array[2],
bw, histogram_argb, &hash_chain_histogram, &refs_array[2],
VP8LSubSampleSize(width, histogram_bits),
VP8LSubSampleSize(height, histogram_bits), quality, low_effort);
WebPSafeFree(histogram_argb);
if (err != VP8_ENC_OK) goto Error;
}
}
// Store Huffman codes.
{
int i;
int max_tokens = 0;
// Find maximum number of symbols for the huffman tree-set.
for (i = 0; i < 5 * histogram_image_size; ++i) {
HuffmanTreeCode* const codes = &huffman_codes[i];
if (max_tokens < codes->num_symbols) {
max_tokens = codes->num_symbols;
// Store Huffman codes.
{
int i;
int max_tokens = 0;
// Find maximum number of symbols for the huffman tree-set.
for (i = 0; i < 5 * histogram_image_size; ++i) {
HuffmanTreeCode* const codes = &huffman_codes[i];
if (max_tokens < codes->num_symbols) {
max_tokens = codes->num_symbols;
}
}
tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens));
if (tokens == NULL) goto Error;
for (i = 0; i < 5 * histogram_image_size; ++i) {
HuffmanTreeCode* const codes = &huffman_codes[i];
StoreHuffmanCode(bw, huff_tree, tokens, codes);
ClearHuffmanTreeIfOnlyOneSymbol(codes);
}
}
tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens));
if (tokens == NULL) {
err = VP8_ENC_ERROR_OUT_OF_MEMORY;
goto Error;
// Store actual literals.
hdr_size_tmp = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position);
err = StoreImageToBitMask(bw, width, histogram_bits, &refs_array[i_cache],
histogram_symbols, huffman_codes);
if (err != VP8_ENC_OK) goto Error;
// Keep track of the smallest image so far.
if (VP8LBitWriterNumBytes(bw) < bw_size_best) {
bw_size_best = VP8LBitWriterNumBytes(bw);
*cache_bits = cache_bits_tmp;
*hdr_size = hdr_size_tmp;
*data_size =
(int)(VP8LBitWriterNumBytes(bw) - init_byte_position - *hdr_size);
VP8LBitWriterSwap(bw, &bw_best);
}
for (i = 0; i < 5 * histogram_image_size; ++i) {
HuffmanTreeCode* const codes = &huffman_codes[i];
StoreHuffmanCode(bw, huff_tree, tokens, codes);
ClearHuffmanTreeIfOnlyOneSymbol(codes);
WebPSafeFree(tokens);
tokens = NULL;
if (huffman_codes != NULL) {
WebPSafeFree(huffman_codes->codes);
WebPSafeFree(huffman_codes);
huffman_codes = NULL;
}
}
// Store actual literals.
hdr_size_tmp = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position);
err = StoreImageToBitMask(bw, width, histogram_bits, refs_best,
histogram_symbols, huffman_codes);
// Keep track of the smallest image so far.
if (lz77s_idx == 0 ||
VP8LBitWriterNumBytes(bw) < VP8LBitWriterNumBytes(&bw_best)) {
*hdr_size = hdr_size_tmp;
*data_size =
(int)(VP8LBitWriterNumBytes(bw) - init_byte_position - *hdr_size);
VP8LBitWriterSwap(bw, &bw_best);
}
// Reset the bit writer for the following iteration if any.
if (config->lz77s_types_to_try_size_ > 1) VP8LBitWriterReset(&bw_init, bw);
WebPSafeFree(tokens);
tokens = NULL;
if (huffman_codes != NULL) {
WebPSafeFree(huffman_codes->codes);
WebPSafeFree(huffman_codes);
huffman_codes = NULL;
}
}
VP8LBitWriterSwap(bw, &bw_best);
err = VP8_ENC_OK;
Error:
WebPSafeFree(tokens);
WebPSafeFree(huff_tree);
VP8LFreeHistogramSet(histogram_image);
VP8LFreeHistogram(tmp_histo);
VP8LHashChainClear(&hash_chain_histogram);
if (huffman_codes != NULL) {
WebPSafeFree(huffman_codes->codes);
WebPSafeFree(huffman_codes);
@ -1098,8 +1099,7 @@ static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc,
VP8LPutBits(bw, pred_bits - 2, 3);
return EncodeImageNoHuffman(
bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_,
(VP8LBackwardRefs*)&enc->refs_[0], // cast const away
(VP8LBackwardRefs*)&enc->refs_[1], transform_width, transform_height,
(VP8LBackwardRefs*)&enc->refs_[0], transform_width, transform_height,
quality, low_effort);
}
@ -1119,8 +1119,7 @@ static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc,
VP8LPutBits(bw, ccolor_transform_bits - 2, 3);
return EncodeImageNoHuffman(
bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_,
(VP8LBackwardRefs*)&enc->refs_[0], // cast const away
(VP8LBackwardRefs*)&enc->refs_[1], transform_width, transform_height,
(VP8LBackwardRefs*)&enc->refs_[0], transform_width, transform_height,
quality, low_effort);
}
@ -1467,8 +1466,8 @@ static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, int low_effort,
}
tmp_palette[0] = palette[0];
return EncodeImageNoHuffman(bw, tmp_palette, &enc->hash_chain_,
&enc->refs_[0], &enc->refs_[1], palette_size, 1,
20 /* quality */, low_effort);
&enc->refs_[0], palette_size, 1, /*quality=*/20,
low_effort);
}
// -----------------------------------------------------------------------------
@ -1494,7 +1493,7 @@ static void VP8LEncoderDelete(VP8LEncoder* enc) {
if (enc != NULL) {
int i;
VP8LHashChainClear(&enc->hash_chain_);
for (i = 0; i < 3; ++i) VP8LBackwardRefsClear(&enc->refs_[i]);
for (i = 0; i < 4; ++i) VP8LBackwardRefsClear(&enc->refs_[i]);
ClearTransformBuffer(enc);
WebPSafeFree(enc);
}
@ -1544,7 +1543,7 @@ static int EncodeStreamHook(void* input, void* data2) {
int data_size = 0;
int use_delta_palette = 0;
int idx;
size_t best_size = 0;
size_t best_size = ~(size_t)0;
VP8LBitWriter bw_init = *bw, bw_best;
(void)data2;
@ -1645,7 +1644,7 @@ static int EncodeStreamHook(void* input, void* data2) {
if (err != VP8_ENC_OK) goto Error;
// If we are better than what we already have.
if (idx == 0 || VP8LBitWriterNumBytes(bw) < best_size) {
if (VP8LBitWriterNumBytes(bw) < best_size) {
best_size = VP8LBitWriterNumBytes(bw);
// Store the BitWriter.
VP8LBitWriterSwap(bw, &bw_best);
@ -1821,7 +1820,7 @@ Error:
}
#undef CRUNCH_CONFIGS_MAX
#undef CRUNCH_CONFIGS_LZ77_MAX
#undef CRUNCH_SUBCONFIGS_MAX
int VP8LEncodeImage(const WebPConfig* const config,
const WebPPicture* const picture) {