WebP-Lossless encoding improvements.

Lossy (with Alpha) image compression gets 2.3X speedup.
Compressing lossless images is 20%-40% faster now.

Change-Id: I41f0225838b48ae5c60b1effd1b0de72fecb3ae6
(cherry picked from commit 8eae188a62)
This commit is contained in:
Vikas Arora 2013-05-08 17:19:04 -07:00 committed by James Zern
parent 26e7244221
commit 498d4dd634
6 changed files with 159 additions and 94 deletions

View File

@ -1325,6 +1325,27 @@ void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
} }
} }
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
void VP8LBundleColorMap(const uint8_t* const row, int width,
int xbits, uint32_t* const dst) {
int x;
if (xbits > 0) {
const int bit_depth = 1 << (3 - xbits);
const int mask = (1 << xbits) - 1;
uint32_t code = 0xff000000;
for (x = 0; x < width; ++x) {
const int xsub = x & mask;
if (xsub == 0) {
code = 0xff000000;
}
code |= row[x] << (8 + bit_depth * xsub);
dst[x >> xbits] = code;
}
} else {
for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8);
}
}
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus) #if defined(__cplusplus) || defined(c_plusplus)

View File

@ -83,6 +83,9 @@ static WEBP_INLINE uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
return (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu); return (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
} }
void VP8LBundleColorMap(const uint8_t* const row, int width,
int xbits, uint32_t* const dst);
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus) #if defined(__cplusplus) || defined(c_plusplus)

View File

@ -80,7 +80,7 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
config.lossless = 1; config.lossless = 1;
config.method = effort_level; // impact is very small config.method = effort_level; // impact is very small
// Set a moderate default quality setting for alpha. // Set a moderate default quality setting for alpha.
config.quality = 5.f * effort_level; config.quality = 10.f * effort_level;
assert(config.quality >= 0 && config.quality <= 100.f); assert(config.quality >= 0 && config.quality <= 100.f);
ok = VP8LBitWriterInit(&tmp_bw, (width * height) >> 3); ok = VP8LBitWriterInit(&tmp_bw, (width * height) >> 3);
@ -156,6 +156,25 @@ static void CopyPlane(const uint8_t* src, int src_stride,
} }
} }
static int GetNumColors(const uint8_t* data, int width, int height,
int stride) {
int j;
int colors = 0;
uint8_t color[256] = { 0 };
for (j = 0; j < height; ++j) {
int i;
const uint8_t* const p = data + j * stride;
for (i = 0; i < width; ++i) {
color[p[i]] = 1;
}
}
for (j = 0; j < 256; ++j) {
if (color[j] > 0) ++colors;
}
return colors;
}
static int EncodeAlpha(VP8Encoder* const enc, static int EncodeAlpha(VP8Encoder* const enc,
int quality, int method, int filter, int quality, int method, int filter,
int effort_level, int effort_level,
@ -207,18 +226,32 @@ static int EncodeAlpha(VP8Encoder* const enc,
VP8BitWriter bw; VP8BitWriter bw;
int test_filter; int test_filter;
uint8_t* filtered_alpha = NULL; uint8_t* filtered_alpha = NULL;
int try_filter_none = (effort_level > 3);
// We always test WEBP_FILTER_NONE first. if (filter == WEBP_FILTER_FAST) { // Quick estimate of the best candidate.
ok = EncodeAlphaInternal(quant_alpha, width, height, const int kMinColorsForFilterNone = 16;
method, WEBP_FILTER_NONE, reduce_levels, const int kMaxColorsForFilterNone = 192;
effort_level, NULL, &bw, pic->stats); const int num_colors = GetNumColors(quant_alpha, width, height, width);
if (!ok) { // For low number of colors, NONE yeilds better compression.
VP8BitWriterWipeOut(&bw); filter = (num_colors <= kMinColorsForFilterNone) ? WEBP_FILTER_NONE :
goto End; EstimateBestFilter(quant_alpha, width, height, width);
// For large number of colors, try FILTER_NONE in addition to the best
// filter as well.
if (num_colors > kMaxColorsForFilterNone) {
try_filter_none = 1;
}
} }
if (filter == WEBP_FILTER_FAST) { // Quick estimate of a second candidate? // Test for WEBP_FILTER_NONE for higher effort levels.
filter = EstimateBestFilter(quant_alpha, width, height, width); if (try_filter_none || filter == WEBP_FILTER_NONE) {
ok = EncodeAlphaInternal(quant_alpha, width, height,
method, WEBP_FILTER_NONE, reduce_levels,
effort_level, NULL, &bw, pic->stats);
if (!ok) {
VP8BitWriterWipeOut(&bw);
goto End;
}
} }
// Stop? // Stop?
if (filter == WEBP_FILTER_NONE) { if (filter == WEBP_FILTER_NONE) {
@ -234,11 +267,14 @@ static int EncodeAlpha(VP8Encoder* const enc,
// Try the other mode(s). // Try the other mode(s).
{ {
WebPAuxStats best_stats; WebPAuxStats best_stats;
size_t best_score = VP8BitWriterSize(&bw); size_t best_score = try_filter_none ?
VP8BitWriterSize(&bw) : (size_t)~0U;
int wipe_tmp_bw = try_filter_none;
memset(&best_stats, 0, sizeof(best_stats)); // prevent spurious warning memset(&best_stats, 0, sizeof(best_stats)); // prevent spurious warning
if (pic->stats != NULL) best_stats = *pic->stats; if (pic->stats != NULL) best_stats = *pic->stats;
for (test_filter = WEBP_FILTER_HORIZONTAL; for (test_filter =
try_filter_none ? WEBP_FILTER_HORIZONTAL : WEBP_FILTER_NONE;
ok && (test_filter <= WEBP_FILTER_GRADIENT); ok && (test_filter <= WEBP_FILTER_GRADIENT);
++test_filter) { ++test_filter) {
VP8BitWriter tmp_bw; VP8BitWriter tmp_bw;
@ -262,7 +298,10 @@ static int EncodeAlpha(VP8Encoder* const enc,
} else { } else {
VP8BitWriterWipeOut(&bw); VP8BitWriterWipeOut(&bw);
} }
VP8BitWriterWipeOut(&tmp_bw); if (wipe_tmp_bw) {
VP8BitWriterWipeOut(&tmp_bw);
}
wipe_tmp_bw = 1; // For next filter trial for WEBP_FILTER_BEST.
} }
if (pic->stats != NULL) *pic->stats = best_stats; if (pic->stats != NULL) *pic->stats = best_stats;
} }

View File

@ -142,9 +142,10 @@ static void HashChainInsert(HashChain* const p,
} }
static void GetParamsForHashChainFindCopy(int quality, int xsize, static void GetParamsForHashChainFindCopy(int quality, int xsize,
int* window_size, int* iter_pos, int cache_bits, int* window_size,
int* iter_limit) { int* iter_pos, int* iter_limit) {
const int iter_mult = (quality < 27) ? 1 : 1 + ((quality - 27) >> 4); const int iter_mult = (quality < 27) ? 1 : 1 + ((quality - 27) >> 4);
const int iter_neg = -iter_mult * (quality >> 1);
// Limit the backward-ref window size for lower qualities. // Limit the backward-ref window size for lower qualities.
const int max_window_size = (quality > 50) ? WINDOW_SIZE const int max_window_size = (quality > 50) ? WINDOW_SIZE
: (quality > 25) ? (xsize << 8) : (quality > 25) ? (xsize << 8)
@ -152,77 +153,74 @@ static void GetParamsForHashChainFindCopy(int quality, int xsize,
assert(xsize > 0); assert(xsize > 0);
*window_size = (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE *window_size = (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE
: max_window_size; : max_window_size;
*iter_pos = 5 + (quality >> 3); *iter_pos = 8 + (quality >> 3);
*iter_limit = -quality * iter_mult; // For lower entropy images, the rigourous search loop in HashChainFindCopy
// can be relaxed.
*iter_limit = (cache_bits > 0) ? iter_neg : iter_neg / 2;
} }
static int HashChainFindCopy(const HashChain* const p, static int HashChainFindCopy(const HashChain* const p,
int base_position, int xsize, int base_position, int xsize_signed,
const uint32_t* const argb, int maxlen, const uint32_t* const argb, int maxlen,
int window_size, int iter_pos, int iter_limit, int window_size, int iter_pos, int iter_limit,
int* const distance_ptr, int* const distance_ptr,
int* const length_ptr) { int* const length_ptr) {
const uint64_t hash_code = GetPixPairHash64(&argb[base_position]);
int prev_length = 0;
int64_t best_val = 0;
int best_length = 0;
int best_distance = 0;
const uint32_t* const argb_start = argb + base_position; const uint32_t* const argb_start = argb + base_position;
uint64_t best_val = 0;
uint32_t best_length = 1;
uint32_t best_distance = 0;
const uint32_t xsize = (uint32_t)xsize_signed;
const int min_pos = const int min_pos =
(base_position > window_size) ? base_position - window_size : 0; (base_position > window_size) ? base_position - window_size : 0;
int pos; int pos;
assert(xsize > 0); assert(xsize > 0);
for (pos = p->hash_to_first_index_[hash_code]; for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)];
pos >= min_pos; pos >= min_pos;
pos = p->chain_[pos]) { pos = p->chain_[pos]) {
int64_t val; uint64_t val;
int curr_length; uint32_t curr_length;
uint32_t distance;
if (iter_pos < 0) { if (iter_pos < 0) {
if (iter_pos < iter_limit || best_val >= 0xff0000) { if (iter_pos < iter_limit || best_val >= 0xff0000) {
break; break;
} }
} }
--iter_pos; --iter_pos;
if (best_length != 0 && if (argb[pos + best_length - 1] != argb_start[best_length - 1]) {
argb[pos + best_length - 1] != argb_start[best_length - 1]) {
continue; continue;
} }
curr_length = FindMatchLength(argb + pos, argb_start, maxlen); curr_length = FindMatchLength(argb + pos, argb_start, maxlen);
if (curr_length < prev_length) { if (curr_length < best_length) {
continue; continue;
} }
val = 65536 * curr_length; distance = (uint32_t)(base_position - pos);
val = curr_length << 16;
// Favoring 2d locality here gives savings for certain images. // Favoring 2d locality here gives savings for certain images.
if (base_position - pos < 9 * xsize) { if (distance < 9 * xsize) {
const int y = (base_position - pos) / xsize; const uint32_t y = distance / xsize;
int x = (base_position - pos) % xsize; uint32_t x = distance % xsize;
if (x > xsize / 2) { if (x > (xsize >> 1)) {
x = xsize - x; x = xsize - x;
} }
if (x <= 7 && x >= -8) { if (x <= 7) {
val += 9 * 9 + 9 * 9;
val -= y * y + x * x; val -= y * y + x * x;
} else {
val -= 9 * 9 + 9 * 9;
} }
} else {
val -= 9 * 9 + 9 * 9;
} }
if (best_val < val) { if (best_val < val) {
prev_length = curr_length;
best_val = val; best_val = val;
best_length = curr_length; best_length = curr_length;
best_distance = base_position - pos; best_distance = distance;
if (curr_length >= MAX_LENGTH) { if (curr_length >= MAX_LENGTH) {
break; break;
} }
if ((best_distance == 1 || best_distance == xsize) && if ((best_distance == 1 || distance == xsize) &&
best_length >= 128) { best_length >= 128) {
break; break;
} }
} }
} }
*distance_ptr = best_distance; *distance_ptr = (int)best_distance;
*length_ptr = best_length; *length_ptr = best_length;
return (best_length >= MIN_LENGTH); return (best_length >= MIN_LENGTH);
} }
@ -284,8 +282,8 @@ static int BackwardReferencesHashChain(int xsize, int ysize,
if (!HashChainInit(hash_chain, pix_count)) goto Error; if (!HashChainInit(hash_chain, pix_count)) goto Error;
refs->size = 0; refs->size = 0;
GetParamsForHashChainFindCopy(quality, xsize, &window_size, &iter_pos, GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
&iter_limit); &window_size, &iter_pos, &iter_limit);
for (i = 0; i < pix_count; ) { for (i = 0; i < pix_count; ) {
// Alternative#1: Code the pixels starting at 'i' using backward reference. // Alternative#1: Code the pixels starting at 'i' using backward reference.
int offset = 0; int offset = 0;
@ -510,8 +508,8 @@ static int BackwardReferencesHashChainDistanceOnly(
// We loop one pixel at a time, but store all currently best points to // We loop one pixel at a time, but store all currently best points to
// non-processed locations from this point. // non-processed locations from this point.
dist_array[0] = 0; dist_array[0] = 0;
GetParamsForHashChainFindCopy(quality, xsize, &window_size, &iter_pos, GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
&iter_limit); &window_size, &iter_pos, &iter_limit);
for (i = 0; i < pix_count; ++i) { for (i = 0; i < pix_count; ++i) {
double prev_cost = 0.0; double prev_cost = 0.0;
int shortmax; int shortmax;
@ -645,8 +643,8 @@ static int BackwardReferencesHashChainFollowChosenPath(
} }
refs->size = 0; refs->size = 0;
GetParamsForHashChainFindCopy(quality, xsize, &window_size, &iter_pos, GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
&iter_limit); &window_size, &iter_pos, &iter_limit);
for (ix = 0; ix < chosen_path_size; ++ix, ++size) { for (ix = 0; ix < chosen_path_size; ++ix, ++size) {
int offset = 0; int offset = 0;
int len = 0; int len = 0;
@ -785,7 +783,9 @@ int VP8LGetBackwardReferences(int width, int height,
*best = refs_lz77; // default guess: lz77 is better *best = refs_lz77; // default guess: lz77 is better
VP8LClearBackwardRefs(&refs_rle); VP8LClearBackwardRefs(&refs_rle);
if (try_lz77_trace_backwards) { if (try_lz77_trace_backwards) {
const int recursion_level = (num_pix < 320 * 200) ? 1 : 0; // Set recursion level for large images using a color cache.
const int recursion_level =
(num_pix < 320 * 200) && (cache_bits > 0) ? 1 : 0;
VP8LBackwardRefs refs_trace; VP8LBackwardRefs refs_trace;
if (!VP8LBackwardRefsAlloc(&refs_trace, num_pix)) { if (!VP8LBackwardRefsAlloc(&refs_trace, num_pix)) {
goto End; goto End;

View File

@ -811,27 +811,6 @@ static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc,
return err; return err;
} }
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
static void BundleColorMap(const uint8_t* const row, int width,
int xbits, uint32_t* const dst) {
int x;
if (xbits > 0) {
const int bit_depth = 1 << (3 - xbits);
const int mask = (1 << xbits) - 1;
uint32_t code = 0xff000000;
for (x = 0; x < width; ++x) {
const int xsub = x & mask;
if (xsub == 0) {
code = 0xff000000;
}
code |= row[x] << (8 + bit_depth * xsub);
dst[x >> xbits] = code;
}
} else {
for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8);
}
}
// Note: Expects "enc->palette_" to be set properly. // Note: Expects "enc->palette_" to be set properly.
// Also, "enc->palette_" will be modified after this call and should not be used // Also, "enc->palette_" will be modified after this call and should not be used
// later. // later.
@ -848,6 +827,7 @@ static WebPEncodingError ApplyPalette(VP8LBitWriter* const bw,
const int palette_size = enc->palette_size_; const int palette_size = enc->palette_size_;
uint8_t* row = NULL; uint8_t* row = NULL;
int xbits; int xbits;
int is_alpha = 1;
// Replace each input pixel by corresponding palette index. // Replace each input pixel by corresponding palette index.
// This is done line by line. // This is done line by line.
@ -864,19 +844,43 @@ static WebPEncodingError ApplyPalette(VP8LBitWriter* const bw,
row = WebPSafeMalloc((uint64_t)width, sizeof(*row)); row = WebPSafeMalloc((uint64_t)width, sizeof(*row));
if (row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; if (row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
for (y = 0; y < height; ++y) { for (i = 0; i < palette_size; ++i) {
for (x = 0; x < width; ++x) { if ((palette[i] & 0x00ff00ffu) != 0) {
const uint32_t pix = src[x]; is_alpha = 0;
for (i = 0; i < palette_size; ++i) { break;
if (pix == palette[i]) { }
row[x] = i; }
break;
if (is_alpha) {
int inv_palette[MAX_PALETTE_SIZE] = { 0 };
for (i = 0; i < palette_size; ++i) {
const int color = (palette[i] >> 8) & 0xff;
inv_palette[color] = i;
}
for (y = 0; y < height; ++y) {
for (x = 0; x < width; ++x) {
const int color = (src[x] >> 8) & 0xff;
row[x] = inv_palette[color];
}
VP8LBundleColorMap(row, width, xbits, dst);
src += pic->argb_stride;
dst += enc->current_width_;
}
} else {
for (y = 0; y < height; ++y) {
for (x = 0; x < width; ++x) {
const uint32_t pix = src[x];
for (i = 0; i < palette_size; ++i) {
if (pix == palette[i]) {
row[x] = i;
break;
}
} }
} }
VP8LBundleColorMap(row, width, xbits, dst);
src += pic->argb_stride;
dst += enc->current_width_;
} }
BundleColorMap(row, width, xbits, dst);
src += pic->argb_stride;
dst += enc->current_width_;
} }
// Save palette to bitstream. // Save palette to bitstream.
@ -899,13 +903,10 @@ static WebPEncodingError ApplyPalette(VP8LBitWriter* const bw,
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
static int GetHistoBits(const WebPConfig* const config, static int GetHistoBits(int method, int use_palette, int width, int height) {
const WebPPicture* const pic) {
const int width = pic->width;
const int height = pic->height;
const uint64_t hist_size = sizeof(VP8LHistogram); const uint64_t hist_size = sizeof(VP8LHistogram);
// Make tile size a function of encoding method (Range: 0 to 6). // Make tile size a function of encoding method (Range: 0 to 6).
int histo_bits = 7 - config->method; int histo_bits = (use_palette ? 9 : 7) - method;
while (1) { while (1) {
const uint64_t huff_image_size = VP8LSubSampleSize(width, histo_bits) * const uint64_t huff_image_size = VP8LSubSampleSize(width, histo_bits) *
VP8LSubSampleSize(height, histo_bits) * VP8LSubSampleSize(height, histo_bits) *
@ -917,13 +918,14 @@ static int GetHistoBits(const WebPConfig* const config,
(histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits; (histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits;
} }
static void InitEncParams(VP8LEncoder* const enc) { static void FinishEncParams(VP8LEncoder* const enc) {
const WebPConfig* const config = enc->config_; const WebPConfig* const config = enc->config_;
const WebPPicture* const picture = enc->pic_; const WebPPicture* const pic = enc->pic_;
const int method = config->method; const int method = config->method;
const float quality = config->quality; const float quality = config->quality;
const int use_palette = enc->use_palette_;
enc->transform_bits_ = (method < 4) ? 5 : (method > 4) ? 3 : 4; enc->transform_bits_ = (method < 4) ? 5 : (method > 4) ? 3 : 4;
enc->histo_bits_ = GetHistoBits(config, picture); enc->histo_bits_ = GetHistoBits(method, use_palette, pic->width, pic->height);
enc->cache_bits_ = (quality <= 25.f) ? 0 : 7; enc->cache_bits_ = (quality <= 25.f) ? 0 : 7;
} }
@ -965,8 +967,6 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
goto Error; goto Error;
} }
InitEncParams(enc);
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Analyze image (entropy, num_palettes etc) // Analyze image (entropy, num_palettes etc)
@ -975,6 +975,8 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
goto Error; goto Error;
} }
FinishEncParams(enc);
if (enc->use_palette_) { if (enc->use_palette_) {
err = ApplyPalette(bw, enc, quality); err = ApplyPalette(bw, enc, quality);
if (err != VP8_ENC_OK) goto Error; if (err != VP8_ENC_OK) goto Error;

View File

@ -154,8 +154,7 @@ static void GradientUnfilter(int width, int height, int stride, uint8_t* data) {
#undef SANITY_CHECK #undef SANITY_CHECK
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// Quick estimate of a potentially interesting filter mode to try, in addition // Quick estimate of a potentially interesting filter mode to try.
// to the default NONE.
#define SMAX 16 #define SMAX 16
#define SDIFF(a, b) (abs((a) - (b)) >> 4) // Scoring diff, in [0..SMAX) #define SDIFF(a, b) (abs((a) - (b)) >> 4) // Scoring diff, in [0..SMAX)
@ -165,6 +164,7 @@ WEBP_FILTER_TYPE EstimateBestFilter(const uint8_t* data,
int i, j; int i, j;
int bins[WEBP_FILTER_LAST][SMAX]; int bins[WEBP_FILTER_LAST][SMAX];
memset(bins, 0, sizeof(bins)); memset(bins, 0, sizeof(bins));
// We only sample every other pixels. That's enough. // We only sample every other pixels. That's enough.
for (j = 2; j < height - 1; j += 2) { for (j = 2; j < height - 1; j += 2) {
const uint8_t* const p = data + j * stride; const uint8_t* const p = data + j * stride;