From 77bdddf01617890abe400b01f36564b19e66d7ff Mon Sep 17 00:00:00 2001 From: Vikas Arora Date: Thu, 30 Oct 2014 10:53:15 -0700 Subject: [PATCH] Speed up BackwardReferences Speed up BackwardReferencesHashChainDistanceOnly method by: 1.) Remove for loop for shortmax code path. 2.) Execute the shortmax code path after regular call to HashChainFindCopy, only if HashChainFindCopy() returns length > 2 (MIN_LENGTH). 3.) Also for shortmax, call method HashChainFindOffset (for length = 2), instead of expensive method HashChainFindCopy(). 4.) Handling first pixel (i==0) outside main loop and removing one if condition (i > 0) per pixel. 5.) Handle the last pixel outside the main 'for' loop. Overall compression speedup observed is around 5% (+/- noise). Change-Id: Ifa30c4035f8d26e6e43e3c4881244d777961c22b --- src/enc/backward_references.c | 156 +++++++++++++++++++--------------- 1 file changed, 88 insertions(+), 68 deletions(-) diff --git a/src/enc/backward_references.c b/src/enc/backward_references.c index 337b58d1..7d7e9a9f 100644 --- a/src/enc/backward_references.c +++ b/src/enc/backward_references.c @@ -536,6 +536,33 @@ static WEBP_INLINE double GetDistanceCost(const CostModel* const m, return m->distance_[code] + extra_bits; } +static void AddSingleLiteral(const uint32_t* const argb, + VP8LHashChain* const hash_chain, + VP8LColorCache* const hashers, + const CostModel* const cost_model, + int idx, int is_last, int use_color_cache, + double prev_cost, float* const cost, + uint32_t* const dist_array) { + double cost_val = prev_cost; + const uint32_t color = argb[0]; + if (!is_last) { + HashChainInsert(hash_chain, argb, idx); + } + if (use_color_cache && VP8LColorCacheContains(hashers, color)) { + const double mul0 = 0.68; + const int ix = VP8LColorCacheGetIndex(hashers, color); + cost_val += GetCacheCost(cost_model, ix) * mul0; + } else { + const double mul1 = 0.82; + if (use_color_cache) VP8LColorCacheInsert(hashers, color); + cost_val += GetLiteralCost(cost_model, color) * mul1; + } + if (cost[idx] > cost_val) { + cost[idx] = (float)cost_val; + dist_array[idx] = 1; // only one is inserted. + } +} + static int BackwardReferencesHashChainDistanceOnly( int xsize, int ysize, const uint32_t* const argb, int quality, int cache_bits, VP8LHashChain* const hash_chain, @@ -554,7 +581,7 @@ static int BackwardReferencesHashChainDistanceOnly( CostModel* const cost_model = (CostModel*)WebPSafeMalloc(1ULL, cost_model_size); VP8LColorCache hashers; - const int min_distance_code = 2; // TODO(vikasa): tune as function of quality + const int min_distance_code = 2; int window_size = WINDOW_SIZE; int iter_pos = 1; int iter_limit = -1; @@ -579,82 +606,75 @@ static int BackwardReferencesHashChainDistanceOnly( GetParamsForHashChainFindCopy(quality, xsize, cache_bits, &window_size, &iter_pos, &iter_limit); HashChainInit(hash_chain); - for (i = 0; i < pix_count; ++i) { - double prev_cost = 0.0; - int shortmax; - if (i > 0) { - prev_cost = cost[i - 1]; - } - for (shortmax = 0; shortmax < 2; ++shortmax) { - int offset = 0; - int len = 0; - if (i < pix_count - 1) { // FindCopy reads pixels at [i] and [i + 1]. - int max_len = shortmax ? 2 : pix_count - i; - HashChainFindCopy(hash_chain, i, xsize, argb, max_len, - window_size, iter_pos, iter_limit, - &offset, &len); + // Add first pixel as literal. + AddSingleLiteral(argb + 0, hash_chain, &hashers, cost_model, 0, 0, + use_color_cache, 0.0, cost, dist_array); + for (i = 1; i < pix_count - 1; ++i) { + int offset = 0; + int len = 0; + double prev_cost = cost[i - 1]; + const int max_len = pix_count - i; + HashChainFindCopy(hash_chain, i, xsize, argb, max_len, window_size, + iter_pos, iter_limit, &offset, &len); + if (len >= MIN_LENGTH) { + const int code = DistanceToPlaneCode(xsize, offset); + const double distance_cost = + prev_cost + GetDistanceCost(cost_model, code); + int k; + for (k = 1; k < len; ++k) { + const double cost_val = distance_cost + GetLengthCost(cost_model, k); + if (cost[i + k] > cost_val) { + cost[i + k] = (float)cost_val; + dist_array[i + k] = k + 1; + } } - if (len >= MIN_LENGTH) { - const int code = DistanceToPlaneCode(xsize, offset); - const double distance_cost = - prev_cost + GetDistanceCost(cost_model, code); - int k; - for (k = 1; k < len; ++k) { - const double cost_val = distance_cost + GetLengthCost(cost_model, k); - if (cost[i + k] > cost_val) { - cost[i + k] = (float)cost_val; - dist_array[i + k] = k + 1; + // This if is for speedup only. It roughly doubles the speed, and + // makes compression worse by .1 %. + if (len >= 128 && code <= min_distance_code) { + // Long copy for short distances, let's skip the middle + // lookups for better copies. + // 1) insert the hashes. + if (use_color_cache) { + for (k = 0; k < len; ++k) { + VP8LColorCacheInsert(&hashers, argb[i + k]); } } - // This if is for speedup only. It roughly doubles the speed, and - // makes compression worse by .1 %. - if (len >= 128 && code <= min_distance_code) { - // Long copy for short distances, let's skip the middle - // lookups for better copies. - // 1) insert the hashes. - if (use_color_cache) { - for (k = 0; k < len; ++k) { - VP8LColorCacheInsert(&hashers, argb[i + k]); - } + // 2) Add to the hash_chain (but cannot add the last pixel) + { + const int last = (len + i < pix_count - 1) ? len + i + : pix_count - 1; + for (k = i; k < last; ++k) { + HashChainInsert(hash_chain, &argb[k], k); } - // 2) Add to the hash_chain (but cannot add the last pixel) - { - const int last = (len + i < pix_count - 1) ? len + i - : pix_count - 1; - for (k = i; k < last; ++k) { - HashChainInsert(hash_chain, &argb[k], k); - } - } - // 3) jump. - i += len - 1; // for loop does ++i, thus -1 here. - goto next_symbol; + } + // 3) jump. + i += len - 1; // for loop does ++i, thus -1 here. + goto next_symbol; + } + if (len != MIN_LENGTH) { + int code_min_length; + double cost_total; + HashChainFindOffset(hash_chain, i, argb, MIN_LENGTH, window_size, + &offset); + code_min_length = DistanceToPlaneCode(xsize, offset); + cost_total = prev_cost + + GetDistanceCost(cost_model, code_min_length) + + GetLengthCost(cost_model, 1); + if (cost[i + 1] > cost_total) { + cost[i + 1] = (float)cost_total; + dist_array[i + 1] = 2; } } } - if (i < pix_count - 1) { - HashChainInsert(hash_chain, &argb[i], i); - } - { - const double mul0 = 0.68; - const double mul1 = 0.82; - // inserting a literal pixel - double cost_val = prev_cost; - if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) { - const int ix = VP8LColorCacheGetIndex(&hashers, argb[i]); - cost_val += GetCacheCost(cost_model, ix) * mul0; - } else { - if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]); - cost_val += GetLiteralCost(cost_model, argb[i]) * mul1; - } - if (cost[i] > cost_val) { - cost[i] = (float)cost_val; - dist_array[i] = 1; // only one is inserted. - } - } + AddSingleLiteral(argb + i, hash_chain, &hashers, cost_model, i, 0, + use_color_cache, prev_cost, cost, dist_array); next_symbol: ; } - // Last pixel still to do, it can only be a single step if not reached - // through cheaper means already. + // Handle the last pixel. + if (i == (pix_count - 1)) { + AddSingleLiteral(argb + i, hash_chain, &hashers, cost_model, i, 1, + use_color_cache, cost[pix_count - 2], cost, dist_array); + } ok = !refs->error_; Error: if (cc_init) VP8LColorCacheClear(&hashers);