Merge "1.5x-2x faster encoding for method 3 and up"

This commit is contained in:
James Zern 2013-03-11 17:14:18 -07:00 committed by Gerrit Code Review
commit 5a21d96741
10 changed files with 346 additions and 184 deletions

1
NEWS
View File

@ -2,6 +2,7 @@
* WebPINewRGB/WebPINewYUVA accept being passed a NULL output buffer * WebPINewRGB/WebPINewYUVA accept being passed a NULL output buffer
and will perform auto-allocation. and will perform auto-allocation.
* default filter option is now '-strong -f 60' * default filter option is now '-strong -f 60'
* encoding speed-up for lossy methods 3 to 6
- 10/30/12: version 0.2.1 - 10/30/12: version 0.2.1
* Various security related fixes * Various security related fixes

1
README
View File

@ -159,6 +159,7 @@ options:
-crop <x> <y> <w> <h> .. crop picture with the given rectangle -crop <x> <y> <w> <h> .. crop picture with the given rectangle
-resize <w> <h> ........ resize picture (after any cropping) -resize <w> <h> ........ resize picture (after any cropping)
-mt .................... use multi-threading if available -mt .................... use multi-threading if available
-low_memory ............ reduce memory usage (slower encoding)
-map <int> ............. print map of extra info. -map <int> ............. print map of extra info.
-print_psnr ............ prints averaged PSNR distortion. -print_psnr ............ prints averaged PSNR distortion.
-print_ssim ............ prints averaged SSIM distortion. -print_ssim ............ prints averaged SSIM distortion.

View File

@ -229,6 +229,7 @@ static void PrintExtraInfoLossless(const WebPPicture* const pic,
} }
static void PrintExtraInfoLossy(const WebPPicture* const pic, int short_output, static void PrintExtraInfoLossy(const WebPPicture* const pic, int short_output,
int full_details,
const char* const file_name) { const char* const file_name) {
const WebPAuxStats* const stats = pic->stats; const WebPAuxStats* const stats = pic->stats;
if (short_output) { if (short_output) {
@ -270,23 +271,27 @@ static void PrintExtraInfoLossy(const WebPPicture* const pic, int short_output,
fprintf(stderr, " Residuals bytes " fprintf(stderr, " Residuals bytes "
"|segment 1|segment 2|segment 3" "|segment 1|segment 2|segment 3"
"|segment 4| total\n"); "|segment 4| total\n");
if (full_details) {
fprintf(stderr, " intra4-coeffs: "); fprintf(stderr, " intra4-coeffs: ");
PrintByteCount(stats->residual_bytes[0], stats->coded_size, totals); PrintByteCount(stats->residual_bytes[0], stats->coded_size, totals);
fprintf(stderr, " intra16-coeffs: "); fprintf(stderr, " intra16-coeffs: ");
PrintByteCount(stats->residual_bytes[1], stats->coded_size, totals); PrintByteCount(stats->residual_bytes[1], stats->coded_size, totals);
fprintf(stderr, " chroma coeffs: "); fprintf(stderr, " chroma coeffs: ");
PrintByteCount(stats->residual_bytes[2], stats->coded_size, totals); PrintByteCount(stats->residual_bytes[2], stats->coded_size, totals);
}
fprintf(stderr, " macroblocks: "); fprintf(stderr, " macroblocks: ");
PrintPercents(stats->segment_size, total); PrintPercents(stats->segment_size, total);
fprintf(stderr, " quantizer: "); fprintf(stderr, " quantizer: ");
PrintValues(stats->segment_quant); PrintValues(stats->segment_quant);
fprintf(stderr, " filter level: "); fprintf(stderr, " filter level: ");
PrintValues(stats->segment_level); PrintValues(stats->segment_level);
if (full_details) {
fprintf(stderr, "------------------+---------"); fprintf(stderr, "------------------+---------");
fprintf(stderr, "+---------+---------+---------+-----------------\n"); fprintf(stderr, "+---------+---------+---------+-----------------\n");
fprintf(stderr, " segments total: "); fprintf(stderr, " segments total: ");
PrintByteCount(totals, stats->coded_size, NULL); PrintByteCount(totals, stats->coded_size, NULL);
} }
}
if (stats->lossless_size > 0) { if (stats->lossless_size > 0) {
PrintFullLosslessInfo(stats, "alpha"); PrintFullLosslessInfo(stats, "alpha");
} }
@ -547,6 +552,7 @@ static void HelpLong(void) {
printf(" -crop <x> <y> <w> <h> .. crop picture with the given rectangle\n"); printf(" -crop <x> <y> <w> <h> .. crop picture with the given rectangle\n");
printf(" -resize <w> <h> ........ resize picture (after any cropping)\n"); printf(" -resize <w> <h> ........ resize picture (after any cropping)\n");
printf(" -mt .................... use multi-threading if available\n"); printf(" -mt .................... use multi-threading if available\n");
printf(" -low_memory ............ reduce memory usage (slower encoding)\n");
#ifdef WEBP_EXPERIMENTAL_FEATURES #ifdef WEBP_EXPERIMENTAL_FEATURES
printf(" -444 / -422 / -gray ..... Change colorspace\n"); printf(" -444 / -422 / -gray ..... Change colorspace\n");
#endif #endif
@ -727,6 +733,8 @@ int main(int argc, const char *argv[]) {
config.emulate_jpeg_size = 1; config.emulate_jpeg_size = 1;
} else if (!strcmp(argv[c], "-mt")) { } else if (!strcmp(argv[c], "-mt")) {
++config.thread_level; // increase thread level ++config.thread_level; // increase thread level
} else if (!strcmp(argv[c], "-low_memory")) {
config.low_memory = 1;
} else if (!strcmp(argv[c], "-strong")) { } else if (!strcmp(argv[c], "-strong")) {
config.filter_type = 1; config.filter_type = 1;
} else if (!strcmp(argv[c], "-nostrong")) { } else if (!strcmp(argv[c], "-nostrong")) {
@ -980,7 +988,7 @@ int main(int argc, const char *argv[]) {
if (config.lossless) { if (config.lossless) {
PrintExtraInfoLossless(&picture, short_output, in_file); PrintExtraInfoLossless(&picture, short_output, in_file);
} else { } else {
PrintExtraInfoLossy(&picture, short_output, in_file); PrintExtraInfoLossy(&picture, short_output, config.low_memory, in_file);
} }
} }
if (!quiet && !short_output && print_distortion >= 0) { // print distortion if (!quiet && !short_output && print_distortion >= 0) { // print distortion

View File

@ -1,5 +1,5 @@
.\" Hey, EMACS: -*- nroff -*- .\" Hey, EMACS: -*- nroff -*-
.TH CWEBP 1 "February 28, 2013" .TH CWEBP 1 "March 8, 2013"
.SH NAME .SH NAME
cwebp \- compress an image file to a WebP file cwebp \- compress an image file to a WebP file
.SH SYNOPSIS .SH SYNOPSIS
@ -86,6 +86,15 @@ with less visual distortion.
Use multi-threading for encoding, if possible. This option is only effective Use multi-threading for encoding, if possible. This option is only effective
when using lossy compression on a source with a transparency channel. when using lossy compression on a source with a transparency channel.
.TP .TP
.B \-low_memory
Reduce memory usage of lossy encoding by saving four times the compressed
size (typically). This will make the encoding slower and the output slightly
different in size and distortion. This flag is only effective for methods
3 and up, and is off by default. Note that leaving this flag off will have
some side effects on the bitstream: it forces certain bitstream features
like number of partitions (forced to 1). Note that a more detailed report
of bitstream size is printed by \fBcwebp\fP when using this option.
.TP
.B \-af .B \-af
Turns auto-filter on. This algorithm will spend additional time optimizing Turns auto-filter on. This algorithm will spend additional time optimizing
the filtering strength to reach a well-balanced quality. the filtering strength to reach a well-balanced quality.
@ -108,6 +117,8 @@ Disable strong filtering (if filtering is being used thanks to the
.BI \-segments " int .BI \-segments " int
Change the number of partitions to use during the segmentation of the Change the number of partitions to use during the segmentation of the
sns algorithm. Segments should be in range 1 to 4. Default value is 4. sns algorithm. Segments should be in range 1 to 4. Default value is 4.
This option has no effect for methods 3 and up, unless \fB\-low_memory\fP
is used.
.TP .TP
.BI \-partition_limit " int .BI \-partition_limit " int
Degrade quality by limiting the number of bits used by some macroblocks. Degrade quality by limiting the number of bits used by some macroblocks.

View File

@ -48,6 +48,7 @@ int WebPConfigInitInternal(WebPConfig* config,
config->image_hint = WEBP_HINT_DEFAULT; config->image_hint = WEBP_HINT_DEFAULT;
config->emulate_jpeg_size = 0; config->emulate_jpeg_size = 0;
config->thread_level = 0; config->thread_level = 0;
config->low_memory = 0;
// TODO(skal): tune. // TODO(skal): tune.
switch (preset) { switch (preset) {
@ -128,6 +129,8 @@ int WebPValidateConfig(const WebPConfig* config) {
return 0; return 0;
if (config->thread_level < 0 || config->thread_level > 1) if (config->thread_level < 0 || config->thread_level > 1)
return 0; return 0;
if (config->low_memory < 0 || config->low_memory > 1)
return 0;
return 1; return 1;
} }

View File

@ -121,7 +121,7 @@ static int RecordCoeffs(int ctx, const VP8Residual* const res) {
} }
while (n <= res->last) { while (n <= res->last) {
int v; int v;
Record(1, s + 0); Record(1, s + 0); // order of record doesn't matter
while ((v = res->coeffs[n++]) == 0) { while ((v = res->coeffs[n++]) == 0) {
Record(0, s + 1); Record(0, s + 1);
s = res->stats[VP8EncBands[n]][0]; s = res->stats[VP8EncBands[n]][0];
@ -175,8 +175,7 @@ static int BranchCost(int nb, int total, int proba) {
return nb * VP8BitCost(1, proba) + (total - nb) * VP8BitCost(0, proba); return nb * VP8BitCost(1, proba) + (total - nb) * VP8BitCost(0, proba);
} }
static int FinalizeTokenProbas(VP8Encoder* const enc) { static int FinalizeTokenProbas(VP8Proba* const proba) {
VP8Proba* const proba = &enc->proba_;
int has_changed = 0; int has_changed = 0;
int size = 0; int size = 0;
int t, b, c, p; int t, b, c, p;
@ -464,8 +463,7 @@ static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) {
return 1; return 1;
} }
static void CodeResiduals(VP8BitWriter* const bw, static void CodeResiduals(VP8BitWriter* const bw, VP8EncIterator* const it,
VP8EncIterator* const it,
const VP8ModeScore* const rd) { const VP8ModeScore* const rd) {
int x, y, ch; int x, y, ch;
VP8Residual res; VP8Residual res;
@ -565,7 +563,7 @@ static void RecordResiduals(VP8EncIterator* const it,
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Token buffer // Token buffer
#ifdef USE_TOKEN_BUFFER #if !defined(DISABLE_TOKEN_BUFFER)
static void RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd, static void RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
VP8TBuffer* const tokens) { VP8TBuffer* const tokens) {
@ -575,11 +573,13 @@ static void RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
VP8IteratorNzToBytes(it); VP8IteratorNzToBytes(it);
if (it->mb_->type_ == 1) { // i16x16 if (it->mb_->type_ == 1) { // i16x16
const int ctx = it->top_nz_[8] + it->left_nz_[8];
InitResidual(0, 1, enc, &res); InitResidual(0, 1, enc, &res);
SetResidualCoeffs(rd->y_dc_levels, &res); SetResidualCoeffs(rd->y_dc_levels, &res);
// TODO(skal): FIX -> it->top_nz_[8] = it->left_nz_[8] = it->top_nz_[8] = it->left_nz_[8] =
VP8RecordCoeffTokens(it->top_nz_[8] + it->left_nz_[8], VP8RecordCoeffTokens(ctx, 1,
res.first, res.last, res.coeffs, tokens); res.first, res.last, res.coeffs, tokens);
RecordCoeffs(ctx, &res);
InitResidual(1, 0, enc, &res); InitResidual(1, 0, enc, &res);
} else { } else {
InitResidual(0, 3, enc, &res); InitResidual(0, 3, enc, &res);
@ -591,7 +591,9 @@ static void RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
const int ctx = it->top_nz_[x] + it->left_nz_[y]; const int ctx = it->top_nz_[x] + it->left_nz_[y];
SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res); SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
it->top_nz_[x] = it->left_nz_[y] = it->top_nz_[x] = it->left_nz_[y] =
VP8RecordCoeffTokens(ctx, res.first, res.last, res.coeffs, tokens); VP8RecordCoeffTokens(ctx, res.coeff_type,
res.first, res.last, res.coeffs, tokens);
RecordCoeffs(ctx, &res);
} }
} }
@ -603,13 +605,16 @@ static void RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res); SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
VP8RecordCoeffTokens(ctx, res.first, res.last, res.coeffs, tokens); VP8RecordCoeffTokens(ctx, 2,
res.first, res.last, res.coeffs, tokens);
RecordCoeffs(ctx, &res);
} }
} }
} }
VP8IteratorBytesToNz(it);
} }
#endif // USE_TOKEN_BUFFER #endif // !DISABLE_TOKEN_BUFFER
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// ExtraInfo map / Debug function // ExtraInfo map / Debug function
@ -679,99 +684,13 @@ static void StoreSideInfo(const VP8EncIterator* const it) {
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Main loops // StatLoop(): only collect statistics (number of skips, token usage, ...).
// // This is used for deciding optimal probabilities. It also modifies the
// VP8EncLoop(): does the final bitstream coding. // quantizer value if some target (size, PNSR) was specified.
static void ResetAfterSkip(VP8EncIterator* const it) {
if (it->mb_->type_ == 1) {
*it->nz_ = 0; // reset all predictors
it->left_nz_[8] = 0;
} else {
*it->nz_ &= (1 << 24); // preserve the dc_nz bit
}
}
int VP8EncLoop(VP8Encoder* const enc) {
int i, s, p;
int ok = 1;
VP8EncIterator it;
VP8ModeScore info;
const int dont_use_skip = !enc->proba_.use_skip_proba_;
const VP8RDLevel rd_opt = enc->rd_opt_level_;
const int kAverageBytesPerMB = 5; // TODO: have a kTable[quality/10]
const int bytes_per_parts =
enc->mb_w_ * enc->mb_h_ * kAverageBytesPerMB / enc->num_parts_;
// Initialize the bit-writers
for (p = 0; p < enc->num_parts_; ++p) {
VP8BitWriterInit(enc->parts_ + p, bytes_per_parts);
}
ResetStats(enc);
ResetSSE(enc);
VP8IteratorInit(enc, &it);
VP8InitFilter(&it);
do {
VP8IteratorImport(&it);
// Warning! order is important: first call VP8Decimate() and
// *then* decide how to code the skip decision if there's one.
if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) {
CodeResiduals(it.bw_, &it, &info);
} else { // reset predictors after a skip
ResetAfterSkip(&it);
}
#ifdef WEBP_EXPERIMENTAL_FEATURES
if (enc->use_layer_) {
VP8EncCodeLayerBlock(&it);
}
#endif
StoreSideInfo(&it);
VP8StoreFilterStats(&it);
VP8IteratorExport(&it);
ok = VP8IteratorProgress(&it, 20);
} while (ok && VP8IteratorNext(&it, it.yuv_out_));
if (ok) { // Finalize the partitions, check for extra errors.
for (p = 0; p < enc->num_parts_; ++p) {
VP8BitWriterFinish(enc->parts_ + p);
ok &= !enc->parts_[p].error_;
}
}
if (ok) { // All good. Finish up.
if (enc->pic_->stats) { // finalize byte counters...
for (i = 0; i <= 2; ++i) {
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
enc->residual_bytes_[i][s] = (int)((it.bit_count_[s][i] + 7) >> 3);
}
}
}
VP8AdjustFilterStrength(&it); // ...and store filter stats.
} else {
// Something bad happened -> need to do some memory cleanup.
VP8EncFreeBitWriters(enc);
}
return ok;
}
//------------------------------------------------------------------------------
// VP8StatLoop(): only collect statistics (number of skips, token usage, ...)
// This is used for deciding optimal probabilities. It also
// modifies the quantizer value if some target (size, PNSR)
// was specified.
#define kHeaderSizeEstimate (15 + 20 + 10) // TODO: fix better #define kHeaderSizeEstimate (15 + 20 + 10) // TODO: fix better
static int OneStatPass(VP8Encoder* const enc, float q, VP8RDLevel rd_opt, static void SetLoopParams(VP8Encoder* const enc, float q) {
int nb_mbs, float* const PSNR, int percent_delta) {
VP8EncIterator it;
uint64_t size = 0;
uint64_t distortion = 0;
const uint64_t pixel_count = nb_mbs * 384;
// Make sure the quality parameter is inside valid bounds // Make sure the quality parameter is inside valid bounds
if (q < 0.) { if (q < 0.) {
q = 0; q = 0;
@ -785,6 +704,18 @@ static int OneStatPass(VP8Encoder* const enc, float q, VP8RDLevel rd_opt,
ResetStats(enc); ResetStats(enc);
ResetTokenStats(enc); ResetTokenStats(enc);
ResetSSE(enc);
}
static int OneStatPass(VP8Encoder* const enc, float q, VP8RDLevel rd_opt,
int nb_mbs, float* const PSNR, int percent_delta) {
VP8EncIterator it;
uint64_t size = 0;
uint64_t distortion = 0;
const uint64_t pixel_count = nb_mbs * 384;
SetLoopParams(enc, q);
VP8IteratorInit(enc, &it); VP8IteratorInit(enc, &it);
do { do {
VP8ModeScore info; VP8ModeScore info;
@ -800,7 +731,7 @@ static int OneStatPass(VP8Encoder* const enc, float q, VP8RDLevel rd_opt,
return 0; return 0;
} while (VP8IteratorNext(&it, it.yuv_out_) && --nb_mbs > 0); } while (VP8IteratorNext(&it, it.yuv_out_) && --nb_mbs > 0);
size += FinalizeSkipProba(enc); size += FinalizeSkipProba(enc);
size += FinalizeTokenProbas(enc); size += FinalizeTokenProbas(&enc->proba_);
size += enc->segment_hdr_.size_; size += enc->segment_hdr_.size_;
size = ((size + 1024) >> 11) + kHeaderSizeEstimate; size = ((size + 1024) >> 11) + kHeaderSizeEstimate;
@ -813,10 +744,9 @@ static int OneStatPass(VP8Encoder* const enc, float q, VP8RDLevel rd_opt,
// successive refinement increments. // successive refinement increments.
static const int dqs[] = { 20, 15, 10, 8, 6, 4, 2, 1, 0 }; static const int dqs[] = { 20, 15, 10, 8, 6, 4, 2, 1, 0 };
int VP8StatLoop(VP8Encoder* const enc) { static int StatLoop(VP8Encoder* const enc) {
const int method = enc->method_; const int method = enc->method_;
const int do_search = const int do_search = enc->do_search_;
(enc->config_->target_size > 0 || enc->config_->target_PSNR > 0);
const int fast_probe = ((method == 0 || method == 3) && !do_search); const int fast_probe = ((method == 0 || method == 3) && !do_search);
float q = enc->config_->quality; float q = enc->config_->quality;
const int max_passes = enc->config_->pass; const int max_passes = enc->config_->pass;
@ -868,9 +798,172 @@ int VP8StatLoop(VP8Encoder* const enc) {
} }
} }
} }
VP8CalculateLevelCosts(&enc->proba_); // finalize costs
return WebPReportProgress(enc->pic_, final_percent, &enc->percent_); return WebPReportProgress(enc->pic_, final_percent, &enc->percent_);
} }
//------------------------------------------------------------------------------
// Main loops
//
static const int kAverageBytesPerMB[8] = { 50, 24, 16, 9, 7, 5, 3, 2 };
static int PreLoopInitialize(VP8Encoder* const enc) {
int p;
int ok = 1;
const int average_bytes_per_MB = kAverageBytesPerMB[enc->base_quant_ >> 4];
const int bytes_per_parts =
enc->mb_w_ * enc->mb_h_ * average_bytes_per_MB / enc->num_parts_;
// Initialize the bit-writers
for (p = 0; ok && p < enc->num_parts_; ++p) {
ok = VP8BitWriterInit(enc->parts_ + p, bytes_per_parts);
}
if (!ok) VP8EncFreeBitWriters(enc); // malloc error occurred
return ok;
}
static int PostLoopFinalize(VP8EncIterator* const it, int ok) {
VP8Encoder* const enc = it->enc_;
if (ok) { // Finalize the partitions, check for extra errors.
int p;
for (p = 0; p < enc->num_parts_; ++p) {
VP8BitWriterFinish(enc->parts_ + p);
ok &= !enc->parts_[p].error_;
}
}
if (ok) { // All good. Finish up.
if (enc->pic_->stats) { // finalize byte counters...
int i, s;
for (i = 0; i <= 2; ++i) {
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
enc->residual_bytes_[i][s] = (int)((it->bit_count_[s][i] + 7) >> 3);
}
}
}
VP8AdjustFilterStrength(it); // ...and store filter stats.
} else {
// Something bad happened -> need to do some memory cleanup.
VP8EncFreeBitWriters(enc);
}
return ok;
}
//------------------------------------------------------------------------------
// VP8EncLoop(): does the final bitstream coding.
static void ResetAfterSkip(VP8EncIterator* const it) {
if (it->mb_->type_ == 1) {
*it->nz_ = 0; // reset all predictors
it->left_nz_[8] = 0;
} else {
*it->nz_ &= (1 << 24); // preserve the dc_nz bit
}
}
int VP8EncLoop(VP8Encoder* const enc) {
VP8EncIterator it;
int ok = PreLoopInitialize(enc);
if (!ok) return 0;
StatLoop(enc); // stats-collection loop
VP8IteratorInit(enc, &it);
VP8InitFilter(&it);
do {
VP8ModeScore info;
const int dont_use_skip = !enc->proba_.use_skip_proba_;
const VP8RDLevel rd_opt = enc->rd_opt_level_;
VP8IteratorImport(&it);
// Warning! order is important: first call VP8Decimate() and
// *then* decide how to code the skip decision if there's one.
if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) {
CodeResiduals(it.bw_, &it, &info);
} else { // reset predictors after a skip
ResetAfterSkip(&it);
}
#ifdef WEBP_EXPERIMENTAL_FEATURES
if (enc->use_layer_) {
VP8EncCodeLayerBlock(&it);
}
#endif
StoreSideInfo(&it);
VP8StoreFilterStats(&it);
VP8IteratorExport(&it);
ok = VP8IteratorProgress(&it, 20);
} while (ok && VP8IteratorNext(&it, it.yuv_out_));
return PostLoopFinalize(&it, ok);
}
//------------------------------------------------------------------------------
// Single pass using Token Buffer.
#if !defined(DISABLE_TOKEN_BUFFER)
int VP8EncTokenLoop(VP8Encoder* const enc) {
int ok;
// refresh the proba 8 times per pass
const int max_count = (enc->mb_w_ * enc->mb_h_) >> 3;
int cnt = max_count;
VP8EncIterator it;
VP8Proba* const proba = &enc->proba_;
const VP8RDLevel rd_opt = enc->rd_opt_level_;
assert(enc->num_parts_ == 1);
assert(enc->use_tokens_);
assert(proba->use_skip_proba_ == 0);
assert(rd_opt >= RD_OPT_BASIC); // otherwise, token-buffer won't be useful
assert(!enc->do_search_); // TODO(skal): handle pass and dichotomy
SetLoopParams(enc, enc->config_->quality);
ok = PreLoopInitialize(enc);
if (!ok) return 0;
VP8IteratorInit(enc, &it);
VP8InitFilter(&it);
do {
VP8ModeScore info;
VP8IteratorImport(&it);
if (--cnt < 0) {
FinalizeTokenProbas(proba);
VP8CalculateLevelCosts(proba); // refresh cost tables for rd-opt
cnt = max_count;
}
VP8Decimate(&it, &info, rd_opt);
RecordTokens(&it, &info, &enc->tokens_);
#ifdef WEBP_EXPERIMENTAL_FEATURES
if (enc->use_layer_) {
VP8EncCodeLayerBlock(&it);
}
#endif
StoreSideInfo(&it);
VP8StoreFilterStats(&it);
VP8IteratorExport(&it);
ok = VP8IteratorProgress(&it, 20);
} while (ok && VP8IteratorNext(&it, it.yuv_out_));
ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
if (ok) {
FinalizeTokenProbas(proba);
ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0,
(const uint8_t*)proba->coeffs_, 1);
}
return PostLoopFinalize(&it, ok);
}
#else
int VP8EncTokenLoop(VP8Encoder* const enc) {
(void)enc;
return 0; // we shouldn't be here.
}
#endif // DISABLE_TOKEN_BUFFER
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus) #if defined(__cplusplus) || defined(c_plusplus)

View File

@ -11,6 +11,7 @@
// or a later-to-be-determined after statistics have been collected. // or a later-to-be-determined after statistics have been collected.
// For dynamic probability, we just record the slot id (idx) for the probability // For dynamic probability, we just record the slot id (idx) for the probability
// value in the final probability array (uint8_t* probas in VP8EmitTokens). // value in the final probability array (uint8_t* probas in VP8EmitTokens).
//
// Author: Skal (pascal.massimino@gmail.com) // Author: Skal (pascal.massimino@gmail.com)
#include <assert.h> #include <assert.h>
@ -19,12 +20,15 @@
#include "./vp8enci.h" #include "./vp8enci.h"
#if defined(__cplusplus) || defined(c_plusplus) #if defined(__cplusplus) || defined(c_plusplus)
extern "C" { extern "C" {
#endif #endif
#define MAX_NUM_TOKEN 2048 // max number of token per page #if !defined(DISABLE_TOKEN_BUFFER)
// we use pages to reduce the number of memcpy()
#define MAX_NUM_TOKEN 8192 // max number of token per page
#define FIXED_PROBA_BIT (1u << 14)
struct VP8Tokens { struct VP8Tokens {
uint16_t tokens_[MAX_NUM_TOKEN]; // bit#15: bit uint16_t tokens_[MAX_NUM_TOKEN]; // bit#15: bit
@ -35,8 +39,6 @@ struct VP8Tokens {
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
#ifdef USE_TOKEN_BUFFER
void VP8TBufferInit(VP8TBuffer* const b) { void VP8TBufferInit(VP8TBuffer* const b) {
b->tokens_ = NULL; b->tokens_ = NULL;
b->pages_ = NULL; b->pages_ = NULL;
@ -73,32 +75,36 @@ static int TBufferNewPage(VP8TBuffer* const b) {
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
#define TOKEN_ID(b, ctx, p) ((p) + NUM_PROBAS * ((ctx) + (b) * NUM_CTX)) #define TOKEN_ID(t, b, ctx, p) \
((p) + NUM_PROBAS * ((ctx) + NUM_CTX * ((b) + NUM_BANDS * (t))))
static WEBP_INLINE int VP8AddToken(VP8TBuffer* const b, static WEBP_INLINE int AddToken(VP8TBuffer* const b,
int bit, int proba_idx) { int bit, uint32_t proba_idx) {
assert(proba_idx < (1 << 14)); assert(proba_idx < FIXED_PROBA_BIT);
assert(bit == 0 || bit == 1);
if (b->left_ > 0 || TBufferNewPage(b)) { if (b->left_ > 0 || TBufferNewPage(b)) {
const int slot = --b->left_; const int slot = --b->left_;
b->tokens_[slot] = ((!bit) << 15) | proba_idx; b->tokens_[slot] = (bit << 15) | proba_idx;
} }
return bit; return bit;
} }
static WEBP_INLINE void VP8AddConstantToken(VP8TBuffer* const b, static WEBP_INLINE void AddConstantToken(VP8TBuffer* const b,
int bit, int proba) { int bit, int proba) {
assert(proba < 256); assert(proba < 256);
assert(bit == 0 || bit == 1);
if (b->left_ > 0 || TBufferNewPage(b)) { if (b->left_ > 0 || TBufferNewPage(b)) {
const int slot = --b->left_; const int slot = --b->left_;
b->tokens_[slot] = (bit << 15) | (1 << 14) | proba; b->tokens_[slot] = (bit << 15) | FIXED_PROBA_BIT | proba;
} }
} }
int VP8RecordCoeffTokens(int ctx, int first, int last, int VP8RecordCoeffTokens(int ctx, int coeff_type, int first, int last,
const int16_t* const coeffs, VP8TBuffer* tokens) { const int16_t* const coeffs,
VP8TBuffer* const tokens) {
int n = first; int n = first;
int b = VP8EncBands[n]; uint32_t base_id = TOKEN_ID(coeff_type, n, ctx, 0);
if (!VP8AddToken(tokens, last >= 0, TOKEN_ID(b, ctx, 0))) { if (!AddToken(tokens, last >= 0, base_id + 0)) {
return 0; return 0;
} }
@ -106,64 +112,62 @@ int VP8RecordCoeffTokens(int ctx, int first, int last,
const int c = coeffs[n++]; const int c = coeffs[n++];
const int sign = c < 0; const int sign = c < 0;
int v = sign ? -c : c; int v = sign ? -c : c;
const int base_id = TOKEN_ID(b, ctx, 0); if (!AddToken(tokens, v != 0, base_id + 1)) {
if (!VP8AddToken(tokens, v != 0, base_id + 1)) {
b = VP8EncBands[n];
ctx = 0; ctx = 0;
base_id = TOKEN_ID(coeff_type, VP8EncBands[n], ctx, 0);
continue; continue;
} }
if (!VP8AddToken(tokens, v > 1, base_id + 2)) { if (!AddToken(tokens, v > 1, base_id + 2)) {
b = VP8EncBands[n];
ctx = 1; ctx = 1;
} else { } else {
if (!VP8AddToken(tokens, v > 4, base_id + 3)) { if (!AddToken(tokens, v > 4, base_id + 3)) {
if (VP8AddToken(tokens, v != 2, base_id + 4)) if (AddToken(tokens, v != 2, base_id + 4))
VP8AddToken(tokens, v == 4, base_id + 5); AddToken(tokens, v == 4, base_id + 5);
} else if (!VP8AddToken(tokens, v > 10, base_id + 6)) { } else if (!AddToken(tokens, v > 10, base_id + 6)) {
if (!VP8AddToken(tokens, v > 6, base_id + 7)) { if (!AddToken(tokens, v > 6, base_id + 7)) {
VP8AddConstantToken(tokens, v == 6, 159); AddConstantToken(tokens, v == 6, 159);
} else { } else {
VP8AddConstantToken(tokens, v >= 9, 165); AddConstantToken(tokens, v >= 9, 165);
VP8AddConstantToken(tokens, !(v & 1), 145); AddConstantToken(tokens, !(v & 1), 145);
} }
} else { } else {
int mask; int mask;
const uint8_t* tab; const uint8_t* tab;
if (v < 3 + (8 << 1)) { // VP8Cat3 (3b) if (v < 3 + (8 << 1)) { // VP8Cat3 (3b)
VP8AddToken(tokens, 0, base_id + 8); AddToken(tokens, 0, base_id + 8);
VP8AddToken(tokens, 0, base_id + 9); AddToken(tokens, 0, base_id + 9);
v -= 3 + (8 << 0); v -= 3 + (8 << 0);
mask = 1 << 2; mask = 1 << 2;
tab = VP8Cat3; tab = VP8Cat3;
} else if (v < 3 + (8 << 2)) { // VP8Cat4 (4b) } else if (v < 3 + (8 << 2)) { // VP8Cat4 (4b)
VP8AddToken(tokens, 0, base_id + 8); AddToken(tokens, 0, base_id + 8);
VP8AddToken(tokens, 1, base_id + 9); AddToken(tokens, 1, base_id + 9);
v -= 3 + (8 << 1); v -= 3 + (8 << 1);
mask = 1 << 3; mask = 1 << 3;
tab = VP8Cat4; tab = VP8Cat4;
} else if (v < 3 + (8 << 3)) { // VP8Cat5 (5b) } else if (v < 3 + (8 << 3)) { // VP8Cat5 (5b)
VP8AddToken(tokens, 1, base_id + 8); AddToken(tokens, 1, base_id + 8);
VP8AddToken(tokens, 0, base_id + 10); AddToken(tokens, 0, base_id + 10);
v -= 3 + (8 << 2); v -= 3 + (8 << 2);
mask = 1 << 4; mask = 1 << 4;
tab = VP8Cat5; tab = VP8Cat5;
} else { // VP8Cat6 (11b) } else { // VP8Cat6 (11b)
VP8AddToken(tokens, 1, base_id + 8); AddToken(tokens, 1, base_id + 8);
VP8AddToken(tokens, 1, base_id + 10); AddToken(tokens, 1, base_id + 10);
v -= 3 + (8 << 3); v -= 3 + (8 << 3);
mask = 1 << 10; mask = 1 << 10;
tab = VP8Cat6; tab = VP8Cat6;
} }
while (mask) { while (mask) {
VP8AddConstantToken(tokens, !!(v & mask), *tab++); AddConstantToken(tokens, !!(v & mask), *tab++);
mask >>= 1; mask >>= 1;
} }
} }
ctx = 2; ctx = 2;
} }
b = VP8EncBands[n]; AddConstantToken(tokens, sign, 128);
VP8AddConstantToken(tokens, sign, 128); base_id = TOKEN_ID(coeff_type, VP8EncBands[n], ctx, 0);
if (n == 16 || !VP8AddToken(tokens, n <= last, TOKEN_ID(b, ctx, 0))) { if (n == 16 || !AddToken(tokens, n <= last, base_id + 0)) {
return 1; // EOB return 1; // EOB
} }
} }
@ -173,6 +177,9 @@ int VP8RecordCoeffTokens(int ctx, int first, int last,
#undef TOKEN_ID #undef TOKEN_ID
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// This function works, but isn't currently used. Saved for later.
#if 0
static void Record(int bit, proba_t* const stats) { static void Record(int bit, proba_t* const stats) {
proba_t p = *stats; proba_t p = *stats;
@ -191,7 +198,7 @@ void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats) {
int n = MAX_NUM_TOKEN; int n = MAX_NUM_TOKEN;
while (n-- > N) { while (n-- > N) {
const uint16_t token = p->tokens_[n]; const uint16_t token = p->tokens_[n];
if (!(token & (1 << 14))) { if (!(token & FIXED_PROBA_BIT)) {
Record((token >> 15) & 1, stats + (token & 0x3fffu)); Record((token >> 15) & 1, stats + (token & 0x3fffu));
} }
} }
@ -199,7 +206,12 @@ void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats) {
} }
} }
int VP8EmitTokens(const VP8TBuffer* const b, VP8BitWriter* const bw, #endif // 0
//------------------------------------------------------------------------------
// Final coding pass, with known probabilities
int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw,
const uint8_t* const probas, int final_pass) { const uint8_t* const probas, int final_pass) {
const VP8Tokens* p = b->pages_; const VP8Tokens* p = b->pages_;
(void)final_pass; (void)final_pass;
@ -210,19 +222,23 @@ int VP8EmitTokens(const VP8TBuffer* const b, VP8BitWriter* const bw,
int n = MAX_NUM_TOKEN; int n = MAX_NUM_TOKEN;
while (n-- > N) { while (n-- > N) {
const uint16_t token = p->tokens_[n]; const uint16_t token = p->tokens_[n];
if (token & (1 << 14)) { const int bit = (token >> 15) & 1;
VP8PutBit(bw, (token >> 15) & 1, token & 0x3fffu); // constant proba if (token & FIXED_PROBA_BIT) {
VP8PutBit(bw, bit, token & 0xffu); // constant proba
} else { } else {
VP8PutBit(bw, (token >> 15) & 1, probas[token & 0x3fffu]); VP8PutBit(bw, bit, probas[token & 0x3fffu]);
} }
} }
if (final_pass) free((void*)p);
p = next; p = next;
} }
if (final_pass) b->pages_ = NULL;
return 1; return 1;
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
#else
#else // DISABLE_TOKEN_BUFFER
void VP8TBufferInit(VP8TBuffer* const b) { void VP8TBufferInit(VP8TBuffer* const b) {
(void)b; (void)b;
@ -231,7 +247,7 @@ void VP8TBufferClear(VP8TBuffer* const b) {
(void)b; (void)b;
} }
#endif // USE_TOKEN_BUFFER #endif // !DISABLE_TOKEN_BUFFER
#if defined(__cplusplus) || defined(c_plusplus) #if defined(__cplusplus) || defined(c_plusplus)
} // extern "C" } // extern "C"

View File

@ -175,6 +175,9 @@ struct VP8Histogram {
int distribution[MAX_COEFF_THRESH + 1]; int distribution[MAX_COEFF_THRESH + 1];
}; };
// Uncomment the following to remove token-buffer code:
// #define DISABLE_TOKEN_BUFFER
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Headers // Headers
@ -327,12 +330,10 @@ void VP8SetSegment(const VP8EncIterator* const it, int segment);
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Paginated token buffer // Paginated token buffer
// WIP:#define USE_TOKEN_BUFFER
typedef struct VP8Tokens VP8Tokens; // struct details in token.c typedef struct VP8Tokens VP8Tokens; // struct details in token.c
typedef struct { typedef struct {
#ifdef USE_TOKEN_BUFFER #if !defined(DISABLE_TOKEN_BUFFER)
VP8Tokens* pages_; // first page VP8Tokens* pages_; // first page
VP8Tokens** last_page_; // last page VP8Tokens** last_page_; // last page
uint16_t* tokens_; // set to (*last_page_)->tokens_ uint16_t* tokens_; // set to (*last_page_)->tokens_
@ -344,15 +345,22 @@ typedef struct {
void VP8TBufferInit(VP8TBuffer* const b); // initialize an empty buffer void VP8TBufferInit(VP8TBuffer* const b); // initialize an empty buffer
void VP8TBufferClear(VP8TBuffer* const b); // de-allocate pages memory void VP8TBufferClear(VP8TBuffer* const b); // de-allocate pages memory
#ifdef USE_TOKEN_BUFFER #if !defined(DISABLE_TOKEN_BUFFER)
int VP8EmitTokens(const VP8TBuffer* const b, VP8BitWriter* const bw, // Finalizes bitstream when probabilities are known.
// Deletes the allocated token memory if final_pass is true.
int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw,
const uint8_t* const probas, int final_pass); const uint8_t* const probas, int final_pass);
int VP8RecordCoeffTokens(int ctx, int first, int last,
const int16_t* const coeffs, VP8TBuffer* tokens); // record the coding of coefficients without knowing the probabilities yet
int VP8RecordCoeffTokens(int ctx, int coeff_type, int first, int last,
const int16_t* const coeffs,
VP8TBuffer* const tokens);
// unused for now
void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats); void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats);
#endif // USE_TOKEN_BUFFER #endif // !DISABLE_TOKEN_BUFFER
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// VP8Encoder // VP8Encoder
@ -377,12 +385,10 @@ struct VP8Encoder {
// per-partition boolean decoders. // per-partition boolean decoders.
VP8BitWriter bw_; // part0 VP8BitWriter bw_; // part0
VP8BitWriter parts_[MAX_NUM_PARTITIONS]; // token partitions VP8BitWriter parts_[MAX_NUM_PARTITIONS]; // token partitions
VP8TBuffer tokens_; // token buffer
int percent_; // for progress int percent_; // for progress
int use_tokens_; // if true, use Token buffer
VP8TBuffer tokens_; // token buffer
// transparency blob // transparency blob
int has_alpha_; int has_alpha_;
uint8_t* alpha_data_; // non-NULL if transparency is present uint8_t* alpha_data_; // non-NULL if transparency is present
@ -419,6 +425,8 @@ struct VP8Encoder {
VP8RDLevel rd_opt_level_; // Deduced from method_. VP8RDLevel rd_opt_level_; // Deduced from method_.
int max_i4_header_bits_; // partition #0 safeness factor int max_i4_header_bits_; // partition #0 safeness factor
int thread_level_; // derived from config->thread_level int thread_level_; // derived from config->thread_level
int do_search_; // derived from config->target_XXX
int use_tokens_; // if true, use token buffer
// Memory // Memory
VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1) VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1)
@ -480,7 +488,7 @@ int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]);
int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd); int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd);
// Main coding calls // Main coding calls
int VP8EncLoop(VP8Encoder* const enc); int VP8EncLoop(VP8Encoder* const enc);
int VP8StatLoop(VP8Encoder* const enc); int VP8EncTokenLoop(VP8Encoder* const enc);
// in webpenc.c // in webpenc.c
// Assign an error code to a picture. Return false for convenience. // Assign an error code to a picture. Return false for convenience.

View File

@ -109,14 +109,17 @@ static void ResetBoundaryPredictions(VP8Encoder* const enc) {
//-------------------+---+---+---+---+---+---+---+ //-------------------+---+---+---+---+---+---+---+
// rd-opt i4/16 | | | ~ | x | x | x | x | // rd-opt i4/16 | | | ~ | x | x | x | x |
//-------------------+---+---+---+---+---+---+---+ //-------------------+---+---+---+---+---+---+---+
// token buffer (opt)| | | | x | x | x | x |
//-------------------+---+---+---+---+---+---+---+
// Trellis | | | | | | x |Ful| // Trellis | | | | | | x |Ful|
//-------------------+---+---+---+---+---+---+---+ //-------------------+---+---+---+---+---+---+---+
// full-SNS | | | | | x | x | x | // full-SNS | | | | | x | x | x |
//-------------------+---+---+---+---+---+---+---+ //-------------------+---+---+---+---+---+---+---+
static void MapConfigToTools(VP8Encoder* const enc) { static void MapConfigToTools(VP8Encoder* const enc) {
const int method = enc->config_->method; const WebPConfig* const config = enc->config_;
const int limit = 100 - enc->config_->partition_limit; const int method = config->method;
const int limit = 100 - config->partition_limit;
enc->method_ = method; enc->method_ = method;
enc->rd_opt_level_ = (method >= 6) ? RD_OPT_TRELLIS_ALL enc->rd_opt_level_ = (method >= 6) ? RD_OPT_TRELLIS_ALL
: (method >= 5) ? RD_OPT_TRELLIS : (method >= 5) ? RD_OPT_TRELLIS
@ -126,7 +129,17 @@ static void MapConfigToTools(VP8Encoder* const enc) {
256 * 16 * 16 * // upper bound: up to 16bit per 4x4 block 256 * 16 * 16 * // upper bound: up to 16bit per 4x4 block
(limit * limit) / (100 * 100); // ... modulated with a quadratic curve. (limit * limit) / (100 * 100); // ... modulated with a quadratic curve.
enc->thread_level_ = enc->config_->thread_level; enc->thread_level_ = config->thread_level;
enc->do_search_ = (config->target_size > 0 || config->target_PSNR > 0);
if (!config->low_memory) {
#if !defined(DISABLE_TOKEN_BUFFER)
enc->use_tokens_ = (method >= 3) && !enc->do_search_;
#endif
if (enc->use_tokens_) {
enc->num_parts_ = 1; // doesn't work with multi-partition
}
}
} }
// Memory scaling with dimensions: // Memory scaling with dimensions:
@ -265,6 +278,7 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
VP8EncInitLayer(enc); VP8EncInitLayer(enc);
#endif #endif
VP8TBufferInit(&enc->tokens_);
return enc; return enc;
} }
@ -275,6 +289,7 @@ static int DeleteVP8Encoder(VP8Encoder* enc) {
#ifdef WEBP_EXPERIMENTAL_FEATURES #ifdef WEBP_EXPERIMENTAL_FEATURES
VP8EncDeleteLayer(enc); VP8EncDeleteLayer(enc);
#endif #endif
VP8TBufferClear(&enc->tokens_);
free(enc); free(enc);
} }
return ok; return ok;
@ -373,11 +388,16 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
// Analysis is done, proceed to actual coding. // Analysis is done, proceed to actual coding.
ok = ok && VP8EncStartAlpha(enc); // possibly done in parallel ok = ok && VP8EncStartAlpha(enc); // possibly done in parallel
ok = ok && VP8StatLoop(enc) && VP8EncLoop(enc); if (!enc->use_tokens_) {
ok = VP8EncLoop(enc);
} else {
ok = VP8EncTokenLoop(enc);
}
ok = ok && VP8EncFinishAlpha(enc); ok = ok && VP8EncFinishAlpha(enc);
#ifdef WEBP_EXPERIMENTAL_FEATURES #ifdef WEBP_EXPERIMENTAL_FEATURES
ok = ok && VP8EncFinishLayer(enc); ok = ok && VP8EncFinishLayer(enc);
#endif #endif
ok = ok && VP8EncWrite(enc); ok = ok && VP8EncWrite(enc);
StoreStats(enc); StoreStats(enc);
if (!ok) { if (!ok) {

View File

@ -18,7 +18,7 @@
extern "C" { extern "C" {
#endif #endif
#define WEBP_ENCODER_ABI_VERSION 0x0200 // MAJOR(8b) + MINOR(8b) #define WEBP_ENCODER_ABI_VERSION 0x0201 // MAJOR(8b) + MINOR(8b)
#if !(defined(__cplusplus) || defined(c_plusplus)) #if !(defined(__cplusplus) || defined(c_plusplus))
typedef enum WebPImageHint WebPImageHint; typedef enum WebPImageHint WebPImageHint;
@ -126,8 +126,9 @@ struct WebPConfig {
// JPEG compression. Generally, the output size will // JPEG compression. Generally, the output size will
// be similar but the degradation will be lower. // be similar but the degradation will be lower.
int thread_level; // If non-zero, try and use multi-threaded encoding. int thread_level; // If non-zero, try and use multi-threaded encoding.
int low_memory; // If set, reduce memory usage (but increase CPU use).
uint32_t pad[6]; // padding for later use uint32_t pad[5]; // padding for later use
}; };
// Enumerate some predefined settings for WebPConfig, depending on the type // Enumerate some predefined settings for WebPConfig, depending on the type