Merge "speed-up trellis quant (~5-10% overall speed-up)"

This commit is contained in:
James Zern 2014-02-27 14:34:01 -08:00 committed by Gerrit Code Review
commit 3cb8406262

View File

@ -395,7 +395,7 @@ void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV); dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV);
// We also boost the dc-uv-quant a little, based on sns-strength, since // We also boost the dc-uv-quant a little, based on sns-strength, since
// U/V channels are quite more reactive to high quants (flat DC-blocks // U/V channels are quite more reactive to high quants (flat DC-blocks
// tend to appear, and are displeasant). // tend to appear, and are unpleasant).
dq_uv_dc = -4 * enc->config_->sns_strength / 100; dq_uv_dc = -4 * enc->config_->sns_strength / 100;
dq_uv_dc = clip(dq_uv_dc, -15, 15); // 4bit-signed max allowed dq_uv_dc = clip(dq_uv_dc, -15, 15); // 4bit-signed max allowed
@ -522,7 +522,7 @@ typedef struct {
int sign; // sign of coeff_i int sign; // sign of coeff_i
score_t cost; // bit cost score_t cost; // bit cost
score_t error; // distortion = sum of (|coeff_i| - level_i * Q_i)^2 score_t error; // distortion = sum of (|coeff_i| - level_i * Q_i)^2
int ctx; // context (only depends on 'level'. Could be spared.) const uint16_t* costs; // shortcut to cost tables
} Node; } Node;
// If a coefficient was quantized to a value Q (using a neutral bias), // If a coefficient was quantized to a value Q (using a neutral bias),
@ -554,9 +554,8 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
Node nodes[17][NUM_NODES]; Node nodes[17][NUM_NODES];
int best_path[3] = {-1, -1, -1}; // store best-last/best-level/best-previous int best_path[3] = {-1, -1, -1}; // store best-last/best-level/best-previous
score_t best_score; score_t best_score;
int best_node;
int last = first - 1; int last = first - 1;
int n, m, p, nz; int n, m, p;
{ {
score_t cost; score_t cost;
@ -567,7 +566,7 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
// compute maximal distortion. // compute maximal distortion.
max_error = 0; max_error = 0;
for (n = first; n < 16; ++n) { for (n = first; n < 16; ++n) {
const int j = kZigzag[n]; const int j = kZigzag[n];
const int err = in[j] * in[j]; const int err = in[j] * in[j];
max_error += kWeightTrellis[j] * err; max_error += kWeightTrellis[j] * err;
if (err > thresh) last = n; if (err > thresh) last = n;
@ -585,13 +584,13 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) { for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
NODE(n, m).cost = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0; NODE(n, m).cost = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;
NODE(n, m).error = max_error; NODE(n, m).error = max_error;
NODE(n, m).ctx = ctx0; NODE(n, m).costs = costs[VP8EncBands[first]][ctx0];
} }
} }
// traverse trellis. // traverse trellis.
for (n = first; n <= last; ++n) { for (n = first; n <= last; ++n) {
const int j = kZigzag[n]; const int j = kZigzag[n];
const uint32_t Q = mtx->q_[j]; const uint32_t Q = mtx->q_[j];
const uint32_t iQ = mtx->iq_[j]; const uint32_t iQ = mtx->iq_[j];
const uint32_t B = BIAS(0x00); // neutral bias const uint32_t B = BIAS(0x00); // neutral bias
@ -608,30 +607,30 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
int delta_error, new_error; int delta_error, new_error;
score_t cur_score = MAX_COST; score_t cur_score = MAX_COST;
int level = level0 + m; int level = level0 + m;
const int ctx = (level > 2) ? 2 : level;
const int band = VP8EncBands[n + 1];
int last_pos_cost; // extra cost if last coeff's position is < 15 int last_pos_cost; // extra cost if last coeff's position is < 15
cur->sign = sign; cur->sign = sign;
cur->level = level; cur->level = level;
cur->ctx = (level == 0) ? 0 : (level == 1) ? 1 : 2; cur->costs = costs[band][ctx];
if (level > MAX_LEVEL || level < 0) { // node is dead? if (level > MAX_LEVEL || level < 0) { // node is dead?
cur->cost = MAX_COST; cur->cost = MAX_COST;
continue; continue;
} }
last_pos_cost = last_pos_cost = (n < 15) ? VP8BitCost(0, probas[band][ctx][0])
(n < 15) ? VP8BitCost(0, probas[VP8EncBands[n + 1]][cur->ctx][0]) : 0;
: 0;
// Compute delta_error = how much coding this level will // Compute delta_error = how much coding this level will
// subtract as distortion to max_error // subtract as distortion to max_error
new_error = coeff0 - level * Q; new_error = coeff0 - level * Q;
delta_error = delta_error =
kWeightTrellis[j] * (coeff0 * coeff0 - new_error * new_error); kWeightTrellis[j] * (coeff0 * coeff0 - new_error * new_error);
// Inspect all possible non-dead predecessors. Retain only the best one. // Inspect all possible non-dead predecessors. Retain only the best one.
for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) { for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {
const Node* const prev = &NODE(n - 1, p); const Node* const prev = &NODE(n - 1, p);
const int prev_ctx = prev->ctx; const uint16_t* const tcost = prev->costs;
const uint16_t* const tcost = costs[VP8EncBands[n]][prev_ctx];
const score_t total_error = prev->error - delta_error; const score_t total_error = prev->error - delta_error;
score_t cost, score; score_t cost, score;
@ -672,23 +671,25 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
return 0; // skip! return 0; // skip!
} }
// Unwind the best path. {
// Note: best-prev on terminal node is not necessarily equal to the // Unwind the best path.
// best_prev for non-terminal. So we patch best_path[2] in. // Note: best-prev on terminal node is not necessarily equal to the
n = best_path[0]; // best_prev for non-terminal. So we patch best_path[2] in.
best_node = best_path[1]; int nz = 0;
NODE(n, best_node).prev = best_path[2]; // force best-prev for terminal int best_node = best_path[1];
nz = 0; n = best_path[0];
NODE(n, best_node).prev = best_path[2]; // force best-prev for terminal
for (; n >= first; --n) { for (; n >= first; --n) {
const Node* const node = &NODE(n, best_node); const Node* const node = &NODE(n, best_node);
const int j = kZigzag[n]; const int j = kZigzag[n];
out[n] = node->sign ? -node->level : node->level; out[n] = node->sign ? -node->level : node->level;
nz |= (node->level != 0); nz |= node->level;
in[j] = out[n] * mtx->q_[j]; in[j] = out[n] * mtx->q_[j];
best_node = node->prev; best_node = node->prev;
}
return (nz != 0);
} }
return nz;
} }
#undef NODE #undef NODE