From c24f8954be8b72750317034457da2aba9643b711 Mon Sep 17 00:00:00 2001 From: Vikas Arora Date: Fri, 17 Oct 2014 09:20:02 -0700 Subject: [PATCH] Simplify and speedup Backward refs computation. Updated VP8LGetBackwardReferences and HashChainFindCopy method with following: - Remove the recursive CostModelBuild. - Reuse the lz77 backward refs in CostModelBuild, instead of evaluating it again (as it was done for recursion_level=0). - Consolidated the Match-length logic inside FindMatchLength method. - Removed the logic for altering best_length/val based on the 2D distance. The additional 162 value (+= 9 * 9 + 9 * 9 - y * y - x * x) can't change the best_val eval computation to choose a different curr_length, as best_val was set to 'curr_length << 16'. Following is the impact on the compression speed/density at default & max quality, overall this speeds up compression by 5-15% (q=100 -> 75) with a tad drop (0.02-0.03%) in compression density for the non-palette images. Before After bpp/Rate(MP/s) bpp/Rate(MP/s) q=75 (def) All 1000 2.4492/1.049 MP/s 2.4498/1.230 MP/s Palette 0.2719/5.060 MP/s 0.2719/6.110 MP/s non-Palette 3.7597/0.732 MP/s 3.7607/0.840 MP/s q=100 All 1000 2.4134/0.125 MP/s 2.4142/0.131 MP/s Palette 0.2692/2.585 MP/s 0.2692/2.885 MP/s non-Palette 3.7040/0.079 MP/s 3.7053/0.083 MP/s Change-Id: I27a5eff3356d876c3e949fd32262244b25678b7a --- src/enc/backward_references.c | 121 ++++++++++++---------------------- 1 file changed, 43 insertions(+), 78 deletions(-) diff --git a/src/enc/backward_references.c b/src/enc/backward_references.c index a3c30aa0..54f08b3d 100644 --- a/src/enc/backward_references.c +++ b/src/enc/backward_references.c @@ -60,8 +60,12 @@ static int DistanceToPlaneCode(int xsize, int dist) { static WEBP_INLINE int FindMatchLength(const uint32_t* const array1, const uint32_t* const array2, - const int max_limit) { + int best_len_match, + int max_limit) { int match_len = 0; + // Before 'expensive' linear match, check if the two arrays match at the + // current best length index. + if (array1[best_len_match] != array2[best_len_match]) return 0; while (match_len < max_limit && array1[match_len] == array2[match_len]) { ++match_len; } @@ -242,16 +246,14 @@ static void GetParamsForHashChainFindCopy(int quality, int xsize, } static int HashChainFindCopy(const VP8LHashChain* const p, - int base_position, int xsize_signed, + int base_position, int xsize, const uint32_t* const argb, int max_len, int window_size, int iter_pos, int iter_limit, int* const distance_ptr, int* const length_ptr) { const uint32_t* const argb_start = argb + base_position; - uint64_t best_val = 0; - uint32_t best_length = 1; - uint32_t best_distance = 0; - const uint32_t xsize = (uint32_t)xsize_signed; + int best_length = 1; + int best_distance = 0; const int min_pos = (base_position > window_size) ? base_position - window_size : 0; int pos; @@ -262,45 +264,23 @@ static int HashChainFindCopy(const VP8LHashChain* const p, for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)]; pos >= min_pos; pos = p->chain_[pos]) { - uint64_t val; - uint32_t curr_length; - uint32_t distance; - const uint32_t* const ptr1 = (argb + pos + best_length - 1); - const uint32_t* const ptr2 = (argb_start + best_length - 1); - + int curr_length; + int distance; if (iter_pos < 0) { - if (iter_pos < iter_limit || best_val >= 0xff0000) { + if (iter_pos < iter_limit || best_length >= 255) { break; } } --iter_pos; - // Before 'expensive' linear match, check if the two arrays match at the - // current best length index and also for the succeeding elements. - if (ptr1[0] != ptr2[0] || ptr1[1] != ptr2[1]) continue; - - curr_length = FindMatchLength(argb + pos, argb_start, max_len); + curr_length = FindMatchLength(argb + pos, argb_start, best_length, max_len); if (curr_length < best_length) continue; distance = (uint32_t)(base_position - pos); - val = curr_length << 16; - // Favoring 2d locality here gives savings for certain images. - if (distance < 9 * xsize) { - const uint32_t y = distance / xsize; - uint32_t x = distance % xsize; - if (x > (xsize >> 1)) { - x = xsize - x; - } - if (x <= 7) { - val += 9 * 9 + 9 * 9; - val -= y * y + x * x; - } - } - if (best_val < val) { - best_val = val; + if (best_length < curr_length) { best_length = curr_length; best_distance = distance; - if (curr_length >= (uint32_t)max_len) { + if (curr_length >= max_len) { break; } if ((best_distance == 1 || distance == xsize) && @@ -309,7 +289,7 @@ static int HashChainFindCopy(const VP8LHashChain* const p, } } } - *distance_ptr = (int)best_distance; + *distance_ptr = best_distance; *length_ptr = best_length; return (best_length >= MIN_LENGTH); } @@ -461,9 +441,8 @@ typedef struct { } CostModel; static int BackwardReferencesTraceBackwards( - int xsize, int ysize, int recursive_cost_model, - const uint32_t* const argb, int quality, int cache_bits, - VP8LHashChain* const hash_chain, + int xsize, int ysize, const uint32_t* const argb, int quality, + int cache_bits, VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs); static void ConvertPopulationCountTableToBitEstimates( @@ -487,28 +466,10 @@ static void ConvertPopulationCountTableToBitEstimates( } } -static int CostModelBuild(CostModel* const m, int xsize, int ysize, - int recursion_level, const uint32_t* const argb, - int quality, int cache_bits, - VP8LHashChain* const hash_chain, +static int CostModelBuild(CostModel* const m, int cache_bits, VP8LBackwardRefs* const refs) { int ok = 0; - VP8LHistogram* histo = NULL; - - ClearBackwardRefs(refs); - if (recursion_level > 0) { - if (!BackwardReferencesTraceBackwards(xsize, ysize, recursion_level - 1, - argb, quality, cache_bits, hash_chain, - refs)) { - goto Error; - } - } else { - if (!BackwardReferencesHashChain(xsize, ysize, argb, cache_bits, quality, - hash_chain, refs)) { - goto Error; - } - } - histo = VP8LAllocateHistogram(cache_bits); + VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits); if (histo == NULL) goto Error; VP8LHistogramCreate(histo, refs, cache_bits); @@ -558,7 +519,7 @@ static WEBP_INLINE double GetDistanceCost(const CostModel* const m, } static int BackwardReferencesHashChainDistanceOnly( - int xsize, int ysize, int recursive_cost_model, const uint32_t* const argb, + int xsize, int ysize, const uint32_t* const argb, int quality, int cache_bits, VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs, uint32_t* const dist_array) { int i; @@ -570,8 +531,6 @@ static int BackwardReferencesHashChainDistanceOnly( (float*)WebPSafeMalloc(pix_count, sizeof(*cost)); CostModel* cost_model = (CostModel*)WebPSafeMalloc(1ULL, sizeof(*cost_model)); VP8LColorCache hashers; - const double mul0 = (recursive_cost_model != 0) ? 1.0 : 0.68; - const double mul1 = (recursive_cost_model != 0) ? 1.0 : 0.82; const int min_distance_code = 2; // TODO(vikasa): tune as function of quality int window_size = WINDOW_SIZE; int iter_pos = 1; @@ -584,8 +543,7 @@ static int BackwardReferencesHashChainDistanceOnly( if (!cc_init) goto Error; } - if (!CostModelBuild(cost_model, xsize, ysize, recursive_cost_model, argb, - quality, cache_bits, hash_chain, refs)) { + if (!CostModelBuild(cost_model, cache_bits, refs)) { goto Error; } @@ -653,6 +611,8 @@ static int BackwardReferencesHashChainDistanceOnly( HashChainInsert(hash_chain, &argb[i], i); } { + const double mul0 = 0.68; + const double mul1 = 0.82; // inserting a literal pixel double cost_val = prev_cost; if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) { @@ -773,7 +733,6 @@ Error: // Returns 1 on success. static int BackwardReferencesTraceBackwards(int xsize, int ysize, - int recursive_cost_model, const uint32_t* const argb, int quality, int cache_bits, VP8LHashChain* const hash_chain, @@ -788,7 +747,7 @@ static int BackwardReferencesTraceBackwards(int xsize, int ysize, if (dist_array == NULL) goto Error; if (!BackwardReferencesHashChainDistanceOnly( - xsize, ysize, recursive_cost_model, argb, quality, cache_bits, hash_chain, + xsize, ysize, argb, quality, cache_bits, hash_chain, refs, dist_array)) { goto Error; } @@ -822,23 +781,22 @@ VP8LBackwardRefs* VP8LGetBackwardReferences( int cache_bits, int use_2d_locality, VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[2]) { int lz77_is_useful; - const int num_pix = width * height; + double bit_cost_lz77, bit_cost_rle; VP8LBackwardRefs* best = NULL; VP8LBackwardRefs* const refs_lz77 = &refs_array[0]; VP8LBackwardRefs* const refs_rle = &refs_array[1]; + VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits); + if (histo == NULL) return NULL; if (!BackwardReferencesHashChain(width, height, argb, cache_bits, quality, hash_chain, refs_lz77)) { - return NULL; + goto Error; } if (!BackwardReferencesRle(width, height, argb, refs_rle)) { - return NULL; + goto Error; } { - double bit_cost_lz77, bit_cost_rle; - VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits); - if (histo == NULL) return NULL; // Evaluate LZ77 coding. VP8LHistogramCreate(histo, refs_lz77, cache_bits); bit_cost_lz77 = VP8LHistogramEstimateBits(histo); @@ -847,7 +805,6 @@ VP8LBackwardRefs* VP8LGetBackwardReferences( bit_cost_rle = VP8LHistogramEstimateBits(histo); // Decide if LZ77 is useful. lz77_is_useful = (bit_cost_lz77 < bit_cost_rle); - VP8LFreeHistogram(histo); } // Choose appropriate backward reference. @@ -856,23 +813,31 @@ VP8LBackwardRefs* VP8LGetBackwardReferences( const int try_lz77_trace_backwards = (quality >= 25); best = refs_lz77; // default guess: lz77 is better if (try_lz77_trace_backwards) { - // Set recursion level for large images using a color cache. - const int recursion_level = - (num_pix < 320 * 200) && (cache_bits > 0) ? 1 : 0; VP8LBackwardRefs* const refs_trace = &refs_array[1]; - ClearBackwardRefs(refs_trace); - if (BackwardReferencesTraceBackwards(width, height, recursion_level, argb, - quality, cache_bits, hash_chain, + if (!VP8LBackwardRefsCopy(refs_lz77, refs_trace)) { + best = NULL; + goto Error; + } + if (BackwardReferencesTraceBackwards(width, height, argb, quality, + cache_bits, hash_chain, refs_trace)) { + double bit_cost_trace; + // Evaluate LZ77 coding. + VP8LHistogramCreate(histo, refs_trace, cache_bits); + bit_cost_trace = VP8LHistogramEstimateBits(histo); + if (bit_cost_trace < bit_cost_lz77) { best = refs_trace; } } + } } else { best = refs_rle; } if (use_2d_locality) BackwardReferences2DLocality(width, best); + Error: + VP8LFreeHistogram(histo); return best; }