mirror of
https://github.com/webmproject/libwebp.git
synced 2024-11-20 04:18:26 +01:00
rebalance method tools (-m) for methods [0..4]
(methods 5 and 6 are still untouched). Methods #0 and #1 got much faster Method #2 gets vastly improved in quality Method #3 is noticeably faster for little lower quality Method #4 (default) is 10-20% faster for comparable quality + update the internal doc about the methods' tools. Example of speed difference: Time to encode picture: Method | Before | After -m 0 | 1.272s | 0.517s -m 1 | 1.295s | 0.623s -m 2 | 2.217s | 0.834s -m 3 | 2.816s | 2.243s -m 4 | 3.235s | 3.014s -m 5 | 3.668s | 3.654s -m 6 | 8.296s | 8.235s Change-Id: Ic41fda5de65066b3a6586cb8ae1ebb0206d47fe0
This commit is contained in:
parent
5189957e07
commit
58ca6f65b7
@ -223,14 +223,18 @@ static void AssignSegments(VP8Encoder* const enc,
|
|||||||
// susceptibility and set best modes for this macroblock.
|
// susceptibility and set best modes for this macroblock.
|
||||||
// Segment assignment is done later.
|
// Segment assignment is done later.
|
||||||
|
|
||||||
// Number of modes to inspect for alpha_ evaluation. For high-quality settings,
|
// Number of modes to inspect for alpha_ evaluation. For high-quality settings
|
||||||
// we don't need to test all the possible modes during the analysis phase.
|
// (method >= FAST_ANALYSIS_METHOD) we don't need to test all the possible modes
|
||||||
|
// during the analysis phase.
|
||||||
|
#define FAST_ANALYSIS_METHOD 4 // method above which we do partial analysis
|
||||||
#define MAX_INTRA16_MODE 2
|
#define MAX_INTRA16_MODE 2
|
||||||
#define MAX_INTRA4_MODE 2
|
#define MAX_INTRA4_MODE 2
|
||||||
#define MAX_UV_MODE 2
|
#define MAX_UV_MODE 2
|
||||||
|
|
||||||
static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
|
static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
|
||||||
const int max_mode = (it->enc_->method_ >= 3) ? MAX_INTRA16_MODE : 4;
|
const int max_mode =
|
||||||
|
(it->enc_->method_ >= FAST_ANALYSIS_METHOD) ? MAX_INTRA16_MODE
|
||||||
|
: NUM_PRED_MODES;
|
||||||
int mode;
|
int mode;
|
||||||
int best_alpha = DEFAULT_ALPHA;
|
int best_alpha = DEFAULT_ALPHA;
|
||||||
int best_mode = 0;
|
int best_mode = 0;
|
||||||
@ -256,7 +260,9 @@ static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
|
|||||||
static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it,
|
static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it,
|
||||||
int best_alpha) {
|
int best_alpha) {
|
||||||
uint8_t modes[16];
|
uint8_t modes[16];
|
||||||
const int max_mode = (it->enc_->method_ >= 3) ? MAX_INTRA4_MODE : NUM_BMODES;
|
const int max_mode =
|
||||||
|
(it->enc_->method_ >= FAST_ANALYSIS_METHOD) ? MAX_INTRA4_MODE
|
||||||
|
: NUM_BMODES;
|
||||||
int i4_alpha;
|
int i4_alpha;
|
||||||
VP8Histogram total_histo = { { 0 } };
|
VP8Histogram total_histo = { { 0 } };
|
||||||
int cur_histo = 0;
|
int cur_histo = 0;
|
||||||
@ -298,7 +304,9 @@ static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it,
|
|||||||
static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
|
static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
|
||||||
int best_alpha = DEFAULT_ALPHA;
|
int best_alpha = DEFAULT_ALPHA;
|
||||||
int best_mode = 0;
|
int best_mode = 0;
|
||||||
const int max_mode = (it->enc_->method_ >= 3) ? MAX_UV_MODE : 4;
|
const int max_mode =
|
||||||
|
(it->enc_->method_ >= FAST_ANALYSIS_METHOD) ? MAX_UV_MODE
|
||||||
|
: NUM_PRED_MODES;
|
||||||
int mode;
|
int mode;
|
||||||
VP8MakeChroma8Preds(it);
|
VP8MakeChroma8Preds(it);
|
||||||
for (mode = 0; mode < max_mode; ++mode) {
|
for (mode = 0; mode < max_mode; ++mode) {
|
||||||
@ -328,7 +336,7 @@ static void MBAnalyze(VP8EncIterator* const it,
|
|||||||
VP8SetSegment(it, 0); // default segment, spec-wise.
|
VP8SetSegment(it, 0); // default segment, spec-wise.
|
||||||
|
|
||||||
best_alpha = MBAnalyzeBestIntra16Mode(it);
|
best_alpha = MBAnalyzeBestIntra16Mode(it);
|
||||||
if (enc->method_ != 3) {
|
if (enc->method_ >= 5) {
|
||||||
// We go and make a fast decision for intra4/intra16.
|
// We go and make a fast decision for intra4/intra16.
|
||||||
// It's usually not a good and definitive pick, but helps seeding the stats
|
// It's usually not a good and definitive pick, but helps seeding the stats
|
||||||
// about level bit-cost.
|
// about level bit-cost.
|
||||||
@ -383,7 +391,7 @@ int VP8EncAnalyze(VP8Encoder* const enc) {
|
|||||||
const int do_segments =
|
const int do_segments =
|
||||||
enc->config_->emulate_jpeg_size || // We need the complexity evaluation.
|
enc->config_->emulate_jpeg_size || // We need the complexity evaluation.
|
||||||
(enc->segment_hdr_.num_segments_ > 1) ||
|
(enc->segment_hdr_.num_segments_ > 1) ||
|
||||||
(enc->method_ <= 2); // for methods 0,1,2, we need preds_[] to be filled.
|
(enc->method_ == 0); // for method 0, we need preds_[] to be filled.
|
||||||
enc->alpha_ = 0;
|
enc->alpha_ = 0;
|
||||||
enc->uv_alpha_ = 0;
|
enc->uv_alpha_ = 0;
|
||||||
if (do_segments) {
|
if (do_segments) {
|
||||||
|
@ -811,9 +811,10 @@ static int OneStatPass(VP8Encoder* const enc, float q, VP8RDLevel rd_opt,
|
|||||||
static const int dqs[] = { 20, 15, 10, 8, 6, 4, 2, 1, 0 };
|
static const int dqs[] = { 20, 15, 10, 8, 6, 4, 2, 1, 0 };
|
||||||
|
|
||||||
int VP8StatLoop(VP8Encoder* const enc) {
|
int VP8StatLoop(VP8Encoder* const enc) {
|
||||||
|
const int method = enc->method_;
|
||||||
const int do_search =
|
const int do_search =
|
||||||
(enc->config_->target_size > 0 || enc->config_->target_PSNR > 0);
|
(enc->config_->target_size > 0 || enc->config_->target_PSNR > 0);
|
||||||
const int fast_probe = (enc->method_ < 2 && !do_search);
|
const int fast_probe = ((method == 0 || method == 3) && !do_search);
|
||||||
float q = enc->config_->quality;
|
float q = enc->config_->quality;
|
||||||
const int max_passes = enc->config_->pass;
|
const int max_passes = enc->config_->pass;
|
||||||
const int task_percent = 20;
|
const int task_percent = 20;
|
||||||
@ -824,12 +825,18 @@ int VP8StatLoop(VP8Encoder* const enc) {
|
|||||||
|
|
||||||
// Fast mode: quick analysis pass over few mbs. Better than nothing.
|
// Fast mode: quick analysis pass over few mbs. Better than nothing.
|
||||||
nb_mbs = enc->mb_w_ * enc->mb_h_;
|
nb_mbs = enc->mb_w_ * enc->mb_h_;
|
||||||
if (fast_probe && nb_mbs > 100) nb_mbs = 100;
|
if (fast_probe) {
|
||||||
|
if (method == 3) { // we need more stats for method 3 to be reliable.
|
||||||
|
nb_mbs = (nb_mbs > 200) ? nb_mbs >> 1 : 100;
|
||||||
|
} else {
|
||||||
|
nb_mbs = (nb_mbs > 200) ? nb_mbs >> 2 : 50;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// No target size: just do several pass without changing 'q'
|
// No target size: just do several pass without changing 'q'
|
||||||
if (!do_search) {
|
if (!do_search) {
|
||||||
for (pass = 0; pass < max_passes; ++pass) {
|
for (pass = 0; pass < max_passes; ++pass) {
|
||||||
const VP8RDLevel rd_opt = (enc->method_ > 2) ? RD_OPT_BASIC : RD_OPT_NONE;
|
const VP8RDLevel rd_opt = (method >= 3) ? RD_OPT_BASIC : RD_OPT_NONE;
|
||||||
if (!OneStatPass(enc, q, rd_opt, nb_mbs, NULL, percent_per_pass)) {
|
if (!OneStatPass(enc, q, rd_opt, nb_mbs, NULL, percent_per_pass)) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -27,6 +27,8 @@
|
|||||||
#define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP
|
#define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP
|
||||||
// power-law modulation. Must be strictly less than 1.
|
// power-law modulation. Must be strictly less than 1.
|
||||||
|
|
||||||
|
#define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision
|
||||||
|
|
||||||
#define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
|
#define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
|
||||||
|
|
||||||
#if defined(__cplusplus) || defined(c_plusplus)
|
#if defined(__cplusplus) || defined(c_plusplus)
|
||||||
@ -773,7 +775,7 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
|||||||
int mode;
|
int mode;
|
||||||
|
|
||||||
rd->mode_i16 = -1;
|
rd->mode_i16 = -1;
|
||||||
for (mode = 0; mode < 4; ++mode) {
|
for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
|
||||||
uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer
|
uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer
|
||||||
int nz;
|
int nz;
|
||||||
|
|
||||||
@ -902,7 +904,7 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
|||||||
|
|
||||||
rd->mode_uv = -1;
|
rd->mode_uv = -1;
|
||||||
InitScore(&rd_best);
|
InitScore(&rd_best);
|
||||||
for (mode = 0; mode < 4; ++mode) {
|
for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
|
||||||
VP8ModeScore rd_uv;
|
VP8ModeScore rd_uv;
|
||||||
|
|
||||||
// Reconstruct
|
// Reconstruct
|
||||||
@ -931,10 +933,10 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
|||||||
|
|
||||||
static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
||||||
const VP8Encoder* const enc = it->enc_;
|
const VP8Encoder* const enc = it->enc_;
|
||||||
const int i16 = (it->mb_->type_ == 1);
|
const int is_i16 = (it->mb_->type_ == 1);
|
||||||
int nz = 0;
|
int nz = 0;
|
||||||
|
|
||||||
if (i16) {
|
if (is_i16) {
|
||||||
nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]);
|
nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]);
|
||||||
} else {
|
} else {
|
||||||
VP8IteratorStartI4(it);
|
VP8IteratorStartI4(it);
|
||||||
@ -953,12 +955,66 @@ static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
|
|||||||
rd->nz = nz;
|
rd->nz = nz;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Refine intra16/intra4 sub-modes based on distortion only (not rate).
|
||||||
|
static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) {
|
||||||
|
const int is_i16 = (it->mb_->type_ == 1);
|
||||||
|
score_t best_score = MAX_COST;
|
||||||
|
|
||||||
|
if (try_both_i4_i16 || is_i16) {
|
||||||
|
int mode;
|
||||||
|
int best_mode = -1;
|
||||||
|
for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
|
||||||
|
const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
|
||||||
|
const uint8_t* const src = it->yuv_in_ + Y_OFF;
|
||||||
|
const score_t score = VP8SSE16x16(src, ref);
|
||||||
|
if (score < best_score) {
|
||||||
|
best_mode = mode;
|
||||||
|
best_score = score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
VP8SetIntra16Mode(it, best_mode);
|
||||||
|
}
|
||||||
|
if (try_both_i4_i16 || !is_i16) {
|
||||||
|
uint8_t modes_i4[16];
|
||||||
|
// We don't evaluate the rate here, but just account for it through a
|
||||||
|
// constant penalty (i4 mode usually needs more bits compared to i16).
|
||||||
|
score_t score_i4 = (score_t)I4_PENALTY;
|
||||||
|
|
||||||
|
VP8IteratorStartI4(it);
|
||||||
|
do {
|
||||||
|
int mode;
|
||||||
|
int best_sub_mode = -1;
|
||||||
|
score_t best_sub_score = MAX_COST;
|
||||||
|
const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
|
||||||
|
|
||||||
|
// TODO(skal): we don't really need the prediction pixels here,
|
||||||
|
// but just the distortion against 'src'.
|
||||||
|
VP8MakeIntra4Preds(it);
|
||||||
|
for (mode = 0; mode < NUM_BMODES; ++mode) {
|
||||||
|
const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
|
||||||
|
const score_t score = VP8SSE4x4(src, ref);
|
||||||
|
if (score < best_sub_score) {
|
||||||
|
best_sub_mode = mode;
|
||||||
|
best_sub_score = score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
modes_i4[it->i4_] = best_sub_mode;
|
||||||
|
score_i4 += best_sub_score;
|
||||||
|
if (score_i4 >= best_score) break;
|
||||||
|
} while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));
|
||||||
|
if (score_i4 < best_score) {
|
||||||
|
VP8SetIntra4Mode(it, modes_i4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Entry point
|
// Entry point
|
||||||
|
|
||||||
int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
|
int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
|
||||||
VP8RDLevel rd_opt) {
|
VP8RDLevel rd_opt) {
|
||||||
int is_skipped;
|
int is_skipped;
|
||||||
|
const int method = it->enc_->method_;
|
||||||
|
|
||||||
InitScore(rd);
|
InitScore(rd);
|
||||||
|
|
||||||
@ -970,7 +1026,7 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
|
|||||||
if (rd_opt > RD_OPT_NONE) {
|
if (rd_opt > RD_OPT_NONE) {
|
||||||
it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);
|
it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);
|
||||||
PickBestIntra16(it, rd);
|
PickBestIntra16(it, rd);
|
||||||
if (it->enc_->method_ >= 2) {
|
if (method >= 2) {
|
||||||
PickBestIntra4(it, rd);
|
PickBestIntra4(it, rd);
|
||||||
}
|
}
|
||||||
PickBestUV(it, rd);
|
PickBestUV(it, rd);
|
||||||
@ -979,8 +1035,9 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
|
|||||||
SimpleQuantize(it, rd);
|
SimpleQuantize(it, rd);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// TODO: for method_ == 2, pick the best intra4/intra16 based on SSE
|
// For method == 2, pick the best intra4/intra16 based on SSE (~tad slower).
|
||||||
it->do_trellis_ = (it->enc_->method_ == 2);
|
// For method <= 1, we refine intra4 or intra16 (but don't re-examine mode).
|
||||||
|
DistoRefine(it, (method >= 2));
|
||||||
SimpleQuantize(it, rd);
|
SimpleQuantize(it, rd);
|
||||||
}
|
}
|
||||||
is_skipped = (rd->nz == 0);
|
is_skipped = (rd->nz == 0);
|
||||||
|
@ -44,7 +44,8 @@ enum { B_DC_PRED = 0, // 4x4 modes
|
|||||||
|
|
||||||
// Luma16 or UV modes
|
// Luma16 or UV modes
|
||||||
DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
|
DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
|
||||||
H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED
|
H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED,
|
||||||
|
NUM_PRED_MODES = 4
|
||||||
};
|
};
|
||||||
|
|
||||||
enum { NUM_MB_SEGMENTS = 4,
|
enum { NUM_MB_SEGMENTS = 4,
|
||||||
|
@ -93,22 +93,26 @@ static void ResetBoundaryPredictions(VP8Encoder* const enc) {
|
|||||||
enc->nz_[-1] = 0; // constant
|
enc->nz_[-1] = 0; // constant
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map configured quality level to coding tools used.
|
// Mapping from config->method_ to coding tools used.
|
||||||
//-------------+---+---+---+---+---+---+
|
//-------------------+---+---+---+---+---+---+---+
|
||||||
// Quality | 0 | 1 | 2 | 3 | 4 | 5 +
|
// Method | 0 | 1 | 2 | 3 |(4)| 5 | 6 |
|
||||||
//-------------+---+---+---+---+---+---+
|
//-------------------+---+---+---+---+---+---+---+
|
||||||
// dynamic prob| ~ | x | x | x | x | x |
|
// fast probe | x | | | x | | | |
|
||||||
//-------------+---+---+---+---+---+---+
|
//-------------------+---+---+---+---+---+---+---+
|
||||||
// rd-opt modes| | | x | x | x | x |
|
// dynamic proba | ~ | x | x | x | x | x | x |
|
||||||
//-------------+---+---+---+---+---+---+
|
//-------------------+---+---+---+---+---+---+---+
|
||||||
// fast i4/i16 | x | x | | | | |
|
// fast mode analysis| | | | | x | x | x |
|
||||||
//-------------+---+---+---+---+---+---+
|
//-------------------+---+---+---+---+---+---+---+
|
||||||
// rd-opt i4/16| | | x | x | x | x |
|
// basic rd-opt | | | | x | x | x | x |
|
||||||
//-------------+---+---+---+---+---+---+
|
//-------------------+---+---+---+---+---+---+---+
|
||||||
// Trellis | | x | | | x | x |
|
// disto-score i4/16 | | | x | | | | |
|
||||||
//-------------+---+---+---+---+---+---+
|
//-------------------+---+---+---+---+---+---+---+
|
||||||
// full-SNS | | | | | | x |
|
// rd-opt i4/16 | | | ~ | x | x | x | x |
|
||||||
//-------------+---+---+---+---+---+---+
|
//-------------------+---+---+---+---+---+---+---+
|
||||||
|
// Trellis | | | | | | x |Ful|
|
||||||
|
//-------------------+---+---+---+---+---+---+---+
|
||||||
|
// full-SNS | | | | | x | x | x |
|
||||||
|
//-------------------+---+---+---+---+---+---+---+
|
||||||
|
|
||||||
static void MapConfigToTools(VP8Encoder* const enc) {
|
static void MapConfigToTools(VP8Encoder* const enc) {
|
||||||
const int method = enc->config_->method;
|
const int method = enc->config_->method;
|
||||||
|
Loading…
Reference in New Issue
Block a user