From c6882c49e3922177a9d66e7d0d9b9ce03f970834 Mon Sep 17 00:00:00 2001 From: Pascal Massimino Date: Mon, 14 May 2012 05:49:02 -0700 Subject: [PATCH] merge all tree processing into a single VP8LProcessTree() -> 0.1% size improvement because we're calling OptimizeForRLE() systematically now. Change-Id: I03bd712175728e0d46323f375134cae5a241db4b --- src/enc/vp8l.c | 153 +++---------------------------------- src/utils/huffman_encode.c | 142 +++++++++++++++++++++++++++++++++- src/utils/huffman_encode.h | 22 ++---- 3 files changed, 155 insertions(+), 162 deletions(-) diff --git a/src/enc/vp8l.c b/src/enc/vp8l.c index 402546ac..a203de1d 100644 --- a/src/enc/vp8l.c +++ b/src/enc/vp8l.c @@ -155,114 +155,6 @@ static int VP8LEncAnalyze(VP8LEncoder* const enc) { return 1; } -// ----------------------------------------------------------------------------- - -// Heuristics for selecting the stride ranges to collapse. -static int ValuesShouldBeCollapsedToStrideAverage(int a, int b) { - return abs(a - b) < 4; -} - -// Change the population counts in a way that the consequent -// Hufmann tree compression, especially its rle-part will be more -// likely to compress this data more efficiently. -// -// length contains the size of the histogram. -// data contains the population counts. -static int OptimizeHuffmanForRle(int length, int* counts) { - int stride; - int limit; - int sum; - uint8_t* good_for_rle; - // 1) Let's make the Huffman code more compatible with rle encoding. - int i; - for (; length >= 0; --length) { - if (length == 0) { - return 1; // All zeros. - } - if (counts[length - 1] != 0) { - // Now counts[0..length - 1] does not have trailing zeros. - break; - } - } - // 2) Let's mark all population counts that already can be encoded - // with an rle code. - good_for_rle = (uint8_t*)calloc(length, 1); - if (good_for_rle == NULL) { - return 0; - } - { - // Let's not spoil any of the existing good rle codes. - // Mark any seq of 0's that is longer as 5 as a good_for_rle. - // Mark any seq of non-0's that is longer as 7 as a good_for_rle. - int symbol = counts[0]; - int stride = 0; - for (i = 0; i < length + 1; ++i) { - if (i == length || counts[i] != symbol) { - if ((symbol == 0 && stride >= 5) || - (symbol != 0 && stride >= 7)) { - int k; - for (k = 0; k < stride; ++k) { - good_for_rle[i - k - 1] = 1; - } - } - stride = 1; - if (i != length) { - symbol = counts[i]; - } - } else { - ++stride; - } - } - } - // 3) Let's replace those population counts that lead to more rle codes. - stride = 0; - limit = counts[0]; - sum = 0; - for (i = 0; i < length + 1; ++i) { - if (i == length || good_for_rle[i] || - (i != 0 && good_for_rle[i - 1]) || - !ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) { - if (stride >= 4 || (stride >= 3 && sum == 0)) { - int k; - // The stride must end, collapse what we have, if we have enough (4). - int count = (sum + stride / 2) / stride; - if (count < 1) { - count = 1; - } - if (sum == 0) { - // Don't make an all zeros stride to be upgraded to ones. - count = 0; - } - for (k = 0; k < stride; ++k) { - // We don't want to change value at counts[i], - // that is already belonging to the next stride. Thus - 1. - counts[i - k - 1] = count; - } - } - stride = 0; - sum = 0; - if (i < length - 3) { - // All interesting strides have a count of at least 4, - // at least when non-zeros. - limit = (counts[i] + counts[i + 1] + - counts[i + 2] + counts[i + 3] + 2) / 4; - } else if (i < length) { - limit = counts[i]; - } else { - limit = 0; - } - } - ++stride; - if (i != length) { - sum += counts[i]; - if (stride >= 4) { - limit = (sum + stride / 2) / stride; - } - } - } - free(good_for_rle); - return 1; -} static int GetHuffBitLengthsAndCodes( const VP8LHistogramSet* const histogram_image, @@ -312,34 +204,11 @@ static int GetHuffBitLengthsAndCodes( for (i = 0; i < histogram_image_size; ++i) { HuffmanTreeCode* const codes = &huffman_codes[5 * i]; VP8LHistogram* const histo = histogram_image->histograms[i]; - const int num_literals = codes[0].num_symbols; - // For each component, optimize histogram for Huffman with RLE compression, - // and create a Huffman tree (in the form of bit lengths) for each. - ok = ok && OptimizeHuffmanForRle(num_literals, histo->literal_); - ok = ok && VP8LCreateHuffmanTree(histo->literal_, num_literals, 15, - codes[0].code_lengths); - - ok = ok && OptimizeHuffmanForRle(256, histo->red_); - ok = ok && VP8LCreateHuffmanTree(histo->red_, 256, 15, - codes[1].code_lengths); - - ok = ok && OptimizeHuffmanForRle(256, histo->blue_); - ok = ok && VP8LCreateHuffmanTree(histo->blue_, 256, 15, - codes[2].code_lengths); - - ok = ok && OptimizeHuffmanForRle(256, histo->alpha_); - ok = ok && VP8LCreateHuffmanTree(histo->alpha_, 256, 15, - codes[3].code_lengths); - - ok = ok && OptimizeHuffmanForRle(DISTANCE_CODES_MAX, histo->distance_); - ok = ok && VP8LCreateHuffmanTree(histo->distance_, DISTANCE_CODES_MAX, 15, - codes[4].code_lengths); - - // Create the actual bit codes for the bit lengths. - // TODO(vikasa): merge with each VP8LCreateHuffmanTree() ? - for (k = 0; k < 5; ++k) { - VP8LConvertBitDepthsToSymbols(codes + k); - } + ok = ok && VP8LCreateHuffmanTree(histo->literal_, 15, codes + 0); + ok = ok && VP8LCreateHuffmanTree(histo->red_, 15, codes + 1); + ok = ok && VP8LCreateHuffmanTree(histo->blue_, 15, codes + 2); + ok = ok && VP8LCreateHuffmanTree(histo->alpha_, 15, codes + 3); + ok = ok && VP8LCreateHuffmanTree(histo->distance_, 15, codes + 4); } End: @@ -423,6 +292,10 @@ static int StoreFullHuffmanCode(VP8LBitWriter* const bw, (HuffmanTreeToken*)malloc(bit_lengths_size * sizeof(*tokens)); if (tokens == NULL) return 0; + huffman_code.num_symbols = CODE_LENGTH_CODES; + huffman_code.code_lengths = code_length_bitdepth; + huffman_code.codes = code_length_bitdepth_symbols; + VP8LWriteBits(bw, 1, 0); num_tokens = VP8LCreateCompressedHuffmanTree(bit_lengths, bit_lengths_size, tokens, bit_lengths_size); @@ -433,17 +306,11 @@ static int StoreFullHuffmanCode(VP8LBitWriter* const bw, ++histogram[tokens[i].code]; } - if (!VP8LCreateHuffmanTree(histogram, CODE_LENGTH_CODES, - 7, code_length_bitdepth)) { + if (!VP8LCreateHuffmanTree(histogram, 7, &huffman_code)) { goto End; } } - huffman_code.num_symbols = CODE_LENGTH_CODES; - huffman_code.code_lengths = code_length_bitdepth; - huffman_code.codes = code_length_bitdepth_symbols; - - VP8LConvertBitDepthsToSymbols(&huffman_code); StoreHuffmanTreeOfHuffmanTreeToBitMask(bw, code_length_bitdepth); ClearHuffmanTreeIfOnlyOneSymbol(&huffman_code); { diff --git a/src/utils/huffman_encode.c b/src/utils/huffman_encode.c index 13d0390e..d731edf4 100644 --- a/src/utils/huffman_encode.c +++ b/src/utils/huffman_encode.c @@ -20,6 +20,112 @@ #include #include +// ----------------------------------------------------------------------------- +// Util function to optimize the symbol map for RLE coding + +// Heuristics for selecting the stride ranges to collapse. +static int ValuesShouldBeCollapsedToStrideAverage(int a, int b) { + return abs(a - b) < 4; +} + +// Change the population counts in a way that the consequent +// Hufmann tree compression, especially its RLE-part, give smaller output. +static int OptimizeHuffmanForRle(int length, int* const counts) { + int stride; + int limit; + int sum; + uint8_t* good_for_rle; + // 1) Let's make the Huffman code more compatible with rle encoding. + int i; + for (; length >= 0; --length) { + if (length == 0) { + return 1; // All zeros. + } + if (counts[length - 1] != 0) { + // Now counts[0..length - 1] does not have trailing zeros. + break; + } + } + // 2) Let's mark all population counts that already can be encoded + // with an rle code. + good_for_rle = (uint8_t*)calloc(length, 1); + if (good_for_rle == NULL) { + return 0; + } + { + // Let's not spoil any of the existing good rle codes. + // Mark any seq of 0's that is longer as 5 as a good_for_rle. + // Mark any seq of non-0's that is longer as 7 as a good_for_rle. + int symbol = counts[0]; + int stride = 0; + for (i = 0; i < length + 1; ++i) { + if (i == length || counts[i] != symbol) { + if ((symbol == 0 && stride >= 5) || + (symbol != 0 && stride >= 7)) { + int k; + for (k = 0; k < stride; ++k) { + good_for_rle[i - k - 1] = 1; + } + } + stride = 1; + if (i != length) { + symbol = counts[i]; + } + } else { + ++stride; + } + } + } + // 3) Let's replace those population counts that lead to more rle codes. + stride = 0; + limit = counts[0]; + sum = 0; + for (i = 0; i < length + 1; ++i) { + if (i == length || good_for_rle[i] || + (i != 0 && good_for_rle[i - 1]) || + !ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) { + if (stride >= 4 || (stride >= 3 && sum == 0)) { + int k; + // The stride must end, collapse what we have, if we have enough (4). + int count = (sum + stride / 2) / stride; + if (count < 1) { + count = 1; + } + if (sum == 0) { + // Don't make an all zeros stride to be upgraded to ones. + count = 0; + } + for (k = 0; k < stride; ++k) { + // We don't want to change value at counts[i], + // that is already belonging to the next stride. Thus - 1. + counts[i - k - 1] = count; + } + } + stride = 0; + sum = 0; + if (i < length - 3) { + // All interesting strides have a count of at least 4, + // at least when non-zeros. + limit = (counts[i] + counts[i + 1] + + counts[i + 2] + counts[i + 3] + 2) / 4; + } else if (i < length) { + limit = counts[i]; + } else { + limit = 0; + } + } + ++stride; + if (i != length) { + sum += counts[i]; + if (stride >= 4) { + limit = (sum + stride / 2) / stride; + } + } + } + free(good_for_rle); + return 1; +} + typedef struct { int total_count_; int value_; @@ -58,7 +164,13 @@ static void SetBitDepths(const HuffmanTree* const tree, } } -// This function will create a Huffman tree. +// Create an optimal Huffman tree. +// +// (data,length): population counts. +// tree_limit: maximum bit depth (inclusive) of the codes. +// bit_depths[]: how many bits are used for the symbol. +// +// Returns 0 when an error has occurred. // // The catch here is that the tree cannot be arbitrarily deep // @@ -71,18 +183,21 @@ static void SetBitDepths(const HuffmanTree* const tree, // we are not planning to use this with extremely long blocks. // // See http://en.wikipedia.org/wiki/Huffman_coding -int VP8LCreateHuffmanTree(const int* const histogram, int histogram_size, - int tree_depth_limit, uint8_t* const bit_depths) { +static int GenerateOptimalTree(const int* const histogram, int histogram_size, + int tree_depth_limit, + uint8_t* const bit_depths) { int count_min; HuffmanTree* tree_pool; HuffmanTree* tree; int tree_size_orig = 0; int i; + for (i = 0; i < histogram_size; ++i) { if (histogram[i] != 0) { ++tree_size_orig; } } + // 3 * tree_size is enough to cover all the nodes representing a // population and all the inserted nodes combining two existing nodes. // The tree pool needs 2 * (tree_size_orig - 1) entities, and the @@ -282,7 +397,8 @@ static uint32_t ReverseBits(int num_bits, uint32_t bits) { return retval; } -void VP8LConvertBitDepthsToSymbols(HuffmanTreeCode* const tree) { +// Get the actual bit values for a tree of bit depths. +static void ConvertBitDepthsToSymbols(HuffmanTreeCode* const tree) { // 0 bit-depth means that the symbol does not exist. int i; int len; @@ -311,4 +427,22 @@ void VP8LConvertBitDepthsToSymbols(HuffmanTreeCode* const tree) { } } +// ----------------------------------------------------------------------------- +// Main entry point + +int VP8LCreateHuffmanTree(int* const histogram, int tree_depth_limit, + HuffmanTreeCode* const tree) { + const int num_symbols = tree->num_symbols; + if (!OptimizeHuffmanForRle(num_symbols, histogram)) { + return 0; + } + if (!GenerateOptimalTree(histogram, num_symbols, + tree_depth_limit, tree->code_lengths)) { + return 0; + } + // Create the actual bit codes for the bit lengths. + ConvertBitDepthsToSymbols(tree); + return 1; +} + #endif diff --git a/src/utils/huffman_encode.h b/src/utils/huffman_encode.h index 4ae739a2..78e89c05 100644 --- a/src/utils/huffman_encode.h +++ b/src/utils/huffman_encode.h @@ -20,24 +20,15 @@ extern "C" { #endif -// Create a Huffman tree. -// -// (data,length): population counts. -// tree_limit: maximum bit depth (inclusive) of the codes. -// bit_depths[]: how many bits are used for the symbol. -// -// Returns 0 when an error has occurred. -int VP8LCreateHuffmanTree(const int* data, const int length, - const int tree_limit, uint8_t* bit_depths); - -// Turn the Huffman tree into a token sequence. -// Returns the number of tokens used. +// Struct for holding the tree header in coded form. typedef struct { uint8_t code; // value (0..15) or escape code (16,17,18) uint8_t extra_bits; // extra bits for escape codes } HuffmanTreeToken; -int VP8LCreateCompressedHuffmanTree(const uint8_t* const depth, int len, +// Turn the Huffman tree into a token sequence. +// Returns the number of tokens used. +int VP8LCreateCompressedHuffmanTree(const uint8_t* const depth, int depth_size, HuffmanTreeToken* tokens, int max_tokens); // Struct to represent the tree codes (depth and bits array). @@ -47,8 +38,9 @@ typedef struct { uint16_t* codes; // Symbol Codes. } HuffmanTreeCode; -// Get the actual bit values for a tree of bit depths. -void VP8LConvertBitDepthsToSymbols(HuffmanTreeCode* const tree); +// Create an optimized tree, and tokenize it. +int VP8LCreateHuffmanTree(int* const histogram, int tree_depth_limit, + HuffmanTreeCode* const tree); #if defined(__cplusplus) || defined(c_plusplus) }