From 498d4dd634f052f9e7cde7f328ee96278a577c66 Mon Sep 17 00:00:00 2001 From: Vikas Arora Date: Wed, 8 May 2013 17:19:04 -0700 Subject: [PATCH] WebP-Lossless encoding improvements. Lossy (with Alpha) image compression gets 2.3X speedup. Compressing lossless images is 20%-40% faster now. Change-Id: I41f0225838b48ae5c60b1effd1b0de72fecb3ae6 (cherry picked from commit 8eae188a62bdd2dfef2266cfd34617a421c3266f) --- src/dsp/lossless.c | 21 +++++++++ src/dsp/lossless.h | 3 ++ src/enc/alpha.c | 65 +++++++++++++++++++++------ src/enc/backward_references.c | 76 +++++++++++++++---------------- src/enc/vp8l.c | 84 ++++++++++++++++++----------------- src/utils/filters.c | 4 +- 6 files changed, 159 insertions(+), 94 deletions(-) diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c index 080b3e63..c015b7ad 100644 --- a/src/dsp/lossless.c +++ b/src/dsp/lossless.c @@ -1325,6 +1325,27 @@ void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels, } } +// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel. +void VP8LBundleColorMap(const uint8_t* const row, int width, + int xbits, uint32_t* const dst) { + int x; + if (xbits > 0) { + const int bit_depth = 1 << (3 - xbits); + const int mask = (1 << xbits) - 1; + uint32_t code = 0xff000000; + for (x = 0; x < width; ++x) { + const int xsub = x & mask; + if (xsub == 0) { + code = 0xff000000; + } + code |= row[x] << (8 + bit_depth * xsub); + dst[x >> xbits] = code; + } + } else { + for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8); + } +} + //------------------------------------------------------------------------------ #if defined(__cplusplus) || defined(c_plusplus) diff --git a/src/dsp/lossless.h b/src/dsp/lossless.h index 0ac4ecb8..6742bcc8 100644 --- a/src/dsp/lossless.h +++ b/src/dsp/lossless.h @@ -83,6 +83,9 @@ static WEBP_INLINE uint32_t VP8LSubPixels(uint32_t a, uint32_t b) { return (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu); } +void VP8LBundleColorMap(const uint8_t* const row, int width, + int xbits, uint32_t* const dst); + //------------------------------------------------------------------------------ #if defined(__cplusplus) || defined(c_plusplus) diff --git a/src/enc/alpha.c b/src/enc/alpha.c index aadf88fe..60391c11 100644 --- a/src/enc/alpha.c +++ b/src/enc/alpha.c @@ -80,7 +80,7 @@ static int EncodeLossless(const uint8_t* const data, int width, int height, config.lossless = 1; config.method = effort_level; // impact is very small // Set a moderate default quality setting for alpha. - config.quality = 5.f * effort_level; + config.quality = 10.f * effort_level; assert(config.quality >= 0 && config.quality <= 100.f); ok = VP8LBitWriterInit(&tmp_bw, (width * height) >> 3); @@ -156,6 +156,25 @@ static void CopyPlane(const uint8_t* src, int src_stride, } } +static int GetNumColors(const uint8_t* data, int width, int height, + int stride) { + int j; + int colors = 0; + uint8_t color[256] = { 0 }; + + for (j = 0; j < height; ++j) { + int i; + const uint8_t* const p = data + j * stride; + for (i = 0; i < width; ++i) { + color[p[i]] = 1; + } + } + for (j = 0; j < 256; ++j) { + if (color[j] > 0) ++colors; + } + return colors; +} + static int EncodeAlpha(VP8Encoder* const enc, int quality, int method, int filter, int effort_level, @@ -207,18 +226,32 @@ static int EncodeAlpha(VP8Encoder* const enc, VP8BitWriter bw; int test_filter; uint8_t* filtered_alpha = NULL; + int try_filter_none = (effort_level > 3); - // We always test WEBP_FILTER_NONE first. - ok = EncodeAlphaInternal(quant_alpha, width, height, - method, WEBP_FILTER_NONE, reduce_levels, - effort_level, NULL, &bw, pic->stats); - if (!ok) { - VP8BitWriterWipeOut(&bw); - goto End; + if (filter == WEBP_FILTER_FAST) { // Quick estimate of the best candidate. + const int kMinColorsForFilterNone = 16; + const int kMaxColorsForFilterNone = 192; + const int num_colors = GetNumColors(quant_alpha, width, height, width); + // For low number of colors, NONE yeilds better compression. + filter = (num_colors <= kMinColorsForFilterNone) ? WEBP_FILTER_NONE : + EstimateBestFilter(quant_alpha, width, height, width); + // For large number of colors, try FILTER_NONE in addition to the best + // filter as well. + if (num_colors > kMaxColorsForFilterNone) { + try_filter_none = 1; + } } - if (filter == WEBP_FILTER_FAST) { // Quick estimate of a second candidate? - filter = EstimateBestFilter(quant_alpha, width, height, width); + // Test for WEBP_FILTER_NONE for higher effort levels. + if (try_filter_none || filter == WEBP_FILTER_NONE) { + ok = EncodeAlphaInternal(quant_alpha, width, height, + method, WEBP_FILTER_NONE, reduce_levels, + effort_level, NULL, &bw, pic->stats); + + if (!ok) { + VP8BitWriterWipeOut(&bw); + goto End; + } } // Stop? if (filter == WEBP_FILTER_NONE) { @@ -234,11 +267,14 @@ static int EncodeAlpha(VP8Encoder* const enc, // Try the other mode(s). { WebPAuxStats best_stats; - size_t best_score = VP8BitWriterSize(&bw); + size_t best_score = try_filter_none ? + VP8BitWriterSize(&bw) : (size_t)~0U; + int wipe_tmp_bw = try_filter_none; memset(&best_stats, 0, sizeof(best_stats)); // prevent spurious warning if (pic->stats != NULL) best_stats = *pic->stats; - for (test_filter = WEBP_FILTER_HORIZONTAL; + for (test_filter = + try_filter_none ? WEBP_FILTER_HORIZONTAL : WEBP_FILTER_NONE; ok && (test_filter <= WEBP_FILTER_GRADIENT); ++test_filter) { VP8BitWriter tmp_bw; @@ -262,7 +298,10 @@ static int EncodeAlpha(VP8Encoder* const enc, } else { VP8BitWriterWipeOut(&bw); } - VP8BitWriterWipeOut(&tmp_bw); + if (wipe_tmp_bw) { + VP8BitWriterWipeOut(&tmp_bw); + } + wipe_tmp_bw = 1; // For next filter trial for WEBP_FILTER_BEST. } if (pic->stats != NULL) *pic->stats = best_stats; } diff --git a/src/enc/backward_references.c b/src/enc/backward_references.c index cf027875..67dd7e94 100644 --- a/src/enc/backward_references.c +++ b/src/enc/backward_references.c @@ -142,9 +142,10 @@ static void HashChainInsert(HashChain* const p, } static void GetParamsForHashChainFindCopy(int quality, int xsize, - int* window_size, int* iter_pos, - int* iter_limit) { + int cache_bits, int* window_size, + int* iter_pos, int* iter_limit) { const int iter_mult = (quality < 27) ? 1 : 1 + ((quality - 27) >> 4); + const int iter_neg = -iter_mult * (quality >> 1); // Limit the backward-ref window size for lower qualities. const int max_window_size = (quality > 50) ? WINDOW_SIZE : (quality > 25) ? (xsize << 8) @@ -152,77 +153,74 @@ static void GetParamsForHashChainFindCopy(int quality, int xsize, assert(xsize > 0); *window_size = (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE : max_window_size; - *iter_pos = 5 + (quality >> 3); - *iter_limit = -quality * iter_mult; + *iter_pos = 8 + (quality >> 3); + // For lower entropy images, the rigourous search loop in HashChainFindCopy + // can be relaxed. + *iter_limit = (cache_bits > 0) ? iter_neg : iter_neg / 2; } static int HashChainFindCopy(const HashChain* const p, - int base_position, int xsize, + int base_position, int xsize_signed, const uint32_t* const argb, int maxlen, int window_size, int iter_pos, int iter_limit, int* const distance_ptr, int* const length_ptr) { - const uint64_t hash_code = GetPixPairHash64(&argb[base_position]); - int prev_length = 0; - int64_t best_val = 0; - int best_length = 0; - int best_distance = 0; const uint32_t* const argb_start = argb + base_position; + uint64_t best_val = 0; + uint32_t best_length = 1; + uint32_t best_distance = 0; + const uint32_t xsize = (uint32_t)xsize_signed; const int min_pos = (base_position > window_size) ? base_position - window_size : 0; int pos; - assert(xsize > 0); - for (pos = p->hash_to_first_index_[hash_code]; + for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)]; pos >= min_pos; pos = p->chain_[pos]) { - int64_t val; - int curr_length; + uint64_t val; + uint32_t curr_length; + uint32_t distance; if (iter_pos < 0) { if (iter_pos < iter_limit || best_val >= 0xff0000) { break; } } --iter_pos; - if (best_length != 0 && - argb[pos + best_length - 1] != argb_start[best_length - 1]) { + if (argb[pos + best_length - 1] != argb_start[best_length - 1]) { continue; } curr_length = FindMatchLength(argb + pos, argb_start, maxlen); - if (curr_length < prev_length) { + if (curr_length < best_length) { continue; } - val = 65536 * curr_length; + distance = (uint32_t)(base_position - pos); + val = curr_length << 16; // Favoring 2d locality here gives savings for certain images. - if (base_position - pos < 9 * xsize) { - const int y = (base_position - pos) / xsize; - int x = (base_position - pos) % xsize; - if (x > xsize / 2) { + if (distance < 9 * xsize) { + const uint32_t y = distance / xsize; + uint32_t x = distance % xsize; + if (x > (xsize >> 1)) { x = xsize - x; } - if (x <= 7 && x >= -8) { + if (x <= 7) { + val += 9 * 9 + 9 * 9; val -= y * y + x * x; - } else { - val -= 9 * 9 + 9 * 9; } - } else { - val -= 9 * 9 + 9 * 9; } if (best_val < val) { - prev_length = curr_length; best_val = val; best_length = curr_length; - best_distance = base_position - pos; + best_distance = distance; if (curr_length >= MAX_LENGTH) { break; } - if ((best_distance == 1 || best_distance == xsize) && + if ((best_distance == 1 || distance == xsize) && best_length >= 128) { break; } } } - *distance_ptr = best_distance; + *distance_ptr = (int)best_distance; *length_ptr = best_length; return (best_length >= MIN_LENGTH); } @@ -284,8 +282,8 @@ static int BackwardReferencesHashChain(int xsize, int ysize, if (!HashChainInit(hash_chain, pix_count)) goto Error; refs->size = 0; - GetParamsForHashChainFindCopy(quality, xsize, &window_size, &iter_pos, - &iter_limit); + GetParamsForHashChainFindCopy(quality, xsize, cache_bits, + &window_size, &iter_pos, &iter_limit); for (i = 0; i < pix_count; ) { // Alternative#1: Code the pixels starting at 'i' using backward reference. int offset = 0; @@ -510,8 +508,8 @@ static int BackwardReferencesHashChainDistanceOnly( // We loop one pixel at a time, but store all currently best points to // non-processed locations from this point. dist_array[0] = 0; - GetParamsForHashChainFindCopy(quality, xsize, &window_size, &iter_pos, - &iter_limit); + GetParamsForHashChainFindCopy(quality, xsize, cache_bits, + &window_size, &iter_pos, &iter_limit); for (i = 0; i < pix_count; ++i) { double prev_cost = 0.0; int shortmax; @@ -645,8 +643,8 @@ static int BackwardReferencesHashChainFollowChosenPath( } refs->size = 0; - GetParamsForHashChainFindCopy(quality, xsize, &window_size, &iter_pos, - &iter_limit); + GetParamsForHashChainFindCopy(quality, xsize, cache_bits, + &window_size, &iter_pos, &iter_limit); for (ix = 0; ix < chosen_path_size; ++ix, ++size) { int offset = 0; int len = 0; @@ -785,7 +783,9 @@ int VP8LGetBackwardReferences(int width, int height, *best = refs_lz77; // default guess: lz77 is better VP8LClearBackwardRefs(&refs_rle); if (try_lz77_trace_backwards) { - const int recursion_level = (num_pix < 320 * 200) ? 1 : 0; + // Set recursion level for large images using a color cache. + const int recursion_level = + (num_pix < 320 * 200) && (cache_bits > 0) ? 1 : 0; VP8LBackwardRefs refs_trace; if (!VP8LBackwardRefsAlloc(&refs_trace, num_pix)) { goto End; diff --git a/src/enc/vp8l.c b/src/enc/vp8l.c index 5077167b..8af544ff 100644 --- a/src/enc/vp8l.c +++ b/src/enc/vp8l.c @@ -811,27 +811,6 @@ static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc, return err; } -// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel. -static void BundleColorMap(const uint8_t* const row, int width, - int xbits, uint32_t* const dst) { - int x; - if (xbits > 0) { - const int bit_depth = 1 << (3 - xbits); - const int mask = (1 << xbits) - 1; - uint32_t code = 0xff000000; - for (x = 0; x < width; ++x) { - const int xsub = x & mask; - if (xsub == 0) { - code = 0xff000000; - } - code |= row[x] << (8 + bit_depth * xsub); - dst[x >> xbits] = code; - } - } else { - for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8); - } -} - // Note: Expects "enc->palette_" to be set properly. // Also, "enc->palette_" will be modified after this call and should not be used // later. @@ -848,6 +827,7 @@ static WebPEncodingError ApplyPalette(VP8LBitWriter* const bw, const int palette_size = enc->palette_size_; uint8_t* row = NULL; int xbits; + int is_alpha = 1; // Replace each input pixel by corresponding palette index. // This is done line by line. @@ -864,19 +844,43 @@ static WebPEncodingError ApplyPalette(VP8LBitWriter* const bw, row = WebPSafeMalloc((uint64_t)width, sizeof(*row)); if (row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; - for (y = 0; y < height; ++y) { - for (x = 0; x < width; ++x) { - const uint32_t pix = src[x]; - for (i = 0; i < palette_size; ++i) { - if (pix == palette[i]) { - row[x] = i; - break; + for (i = 0; i < palette_size; ++i) { + if ((palette[i] & 0x00ff00ffu) != 0) { + is_alpha = 0; + break; + } + } + + if (is_alpha) { + int inv_palette[MAX_PALETTE_SIZE] = { 0 }; + for (i = 0; i < palette_size; ++i) { + const int color = (palette[i] >> 8) & 0xff; + inv_palette[color] = i; + } + for (y = 0; y < height; ++y) { + for (x = 0; x < width; ++x) { + const int color = (src[x] >> 8) & 0xff; + row[x] = inv_palette[color]; + } + VP8LBundleColorMap(row, width, xbits, dst); + src += pic->argb_stride; + dst += enc->current_width_; + } + } else { + for (y = 0; y < height; ++y) { + for (x = 0; x < width; ++x) { + const uint32_t pix = src[x]; + for (i = 0; i < palette_size; ++i) { + if (pix == palette[i]) { + row[x] = i; + break; + } } } + VP8LBundleColorMap(row, width, xbits, dst); + src += pic->argb_stride; + dst += enc->current_width_; } - BundleColorMap(row, width, xbits, dst); - src += pic->argb_stride; - dst += enc->current_width_; } // Save palette to bitstream. @@ -899,13 +903,10 @@ static WebPEncodingError ApplyPalette(VP8LBitWriter* const bw, // ----------------------------------------------------------------------------- -static int GetHistoBits(const WebPConfig* const config, - const WebPPicture* const pic) { - const int width = pic->width; - const int height = pic->height; +static int GetHistoBits(int method, int use_palette, int width, int height) { const uint64_t hist_size = sizeof(VP8LHistogram); // Make tile size a function of encoding method (Range: 0 to 6). - int histo_bits = 7 - config->method; + int histo_bits = (use_palette ? 9 : 7) - method; while (1) { const uint64_t huff_image_size = VP8LSubSampleSize(width, histo_bits) * VP8LSubSampleSize(height, histo_bits) * @@ -917,13 +918,14 @@ static int GetHistoBits(const WebPConfig* const config, (histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits; } -static void InitEncParams(VP8LEncoder* const enc) { +static void FinishEncParams(VP8LEncoder* const enc) { const WebPConfig* const config = enc->config_; - const WebPPicture* const picture = enc->pic_; + const WebPPicture* const pic = enc->pic_; const int method = config->method; const float quality = config->quality; + const int use_palette = enc->use_palette_; enc->transform_bits_ = (method < 4) ? 5 : (method > 4) ? 3 : 4; - enc->histo_bits_ = GetHistoBits(config, picture); + enc->histo_bits_ = GetHistoBits(method, use_palette, pic->width, pic->height); enc->cache_bits_ = (quality <= 25.f) ? 0 : 7; } @@ -965,8 +967,6 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, goto Error; } - InitEncParams(enc); - // --------------------------------------------------------------------------- // Analyze image (entropy, num_palettes etc) @@ -975,6 +975,8 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, goto Error; } + FinishEncParams(enc); + if (enc->use_palette_) { err = ApplyPalette(bw, enc, quality); if (err != VP8_ENC_OK) goto Error; diff --git a/src/utils/filters.c b/src/utils/filters.c index 9486355a..ad847746 100644 --- a/src/utils/filters.c +++ b/src/utils/filters.c @@ -154,8 +154,7 @@ static void GradientUnfilter(int width, int height, int stride, uint8_t* data) { #undef SANITY_CHECK // ----------------------------------------------------------------------------- -// Quick estimate of a potentially interesting filter mode to try, in addition -// to the default NONE. +// Quick estimate of a potentially interesting filter mode to try. #define SMAX 16 #define SDIFF(a, b) (abs((a) - (b)) >> 4) // Scoring diff, in [0..SMAX) @@ -165,6 +164,7 @@ WEBP_FILTER_TYPE EstimateBestFilter(const uint8_t* data, int i, j; int bins[WEBP_FILTER_LAST][SMAX]; memset(bins, 0, sizeof(bins)); + // We only sample every other pixels. That's enough. for (j = 2; j < height - 1; j += 2) { const uint8_t* const p = data + j * stride;