Merge "speed-up trellis quant (~5-10% overall speed-up)"

This commit is contained in:
James Zern 2014-02-27 14:34:01 -08:00 committed by Gerrit Code Review
commit 3cb8406262

View File

@ -395,7 +395,7 @@ void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV);
// We also boost the dc-uv-quant a little, based on sns-strength, since
// U/V channels are quite more reactive to high quants (flat DC-blocks
// tend to appear, and are displeasant).
// tend to appear, and are unpleasant).
dq_uv_dc = -4 * enc->config_->sns_strength / 100;
dq_uv_dc = clip(dq_uv_dc, -15, 15); // 4bit-signed max allowed
@ -522,7 +522,7 @@ typedef struct {
int sign; // sign of coeff_i
score_t cost; // bit cost
score_t error; // distortion = sum of (|coeff_i| - level_i * Q_i)^2
int ctx; // context (only depends on 'level'. Could be spared.)
const uint16_t* costs; // shortcut to cost tables
} Node;
// If a coefficient was quantized to a value Q (using a neutral bias),
@ -554,9 +554,8 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
Node nodes[17][NUM_NODES];
int best_path[3] = {-1, -1, -1}; // store best-last/best-level/best-previous
score_t best_score;
int best_node;
int last = first - 1;
int n, m, p, nz;
int n, m, p;
{
score_t cost;
@ -585,7 +584,7 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
NODE(n, m).cost = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;
NODE(n, m).error = max_error;
NODE(n, m).ctx = ctx0;
NODE(n, m).costs = costs[VP8EncBands[first]][ctx0];
}
}
@ -608,17 +607,18 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
int delta_error, new_error;
score_t cur_score = MAX_COST;
int level = level0 + m;
const int ctx = (level > 2) ? 2 : level;
const int band = VP8EncBands[n + 1];
int last_pos_cost; // extra cost if last coeff's position is < 15
cur->sign = sign;
cur->level = level;
cur->ctx = (level == 0) ? 0 : (level == 1) ? 1 : 2;
cur->costs = costs[band][ctx];
if (level > MAX_LEVEL || level < 0) { // node is dead?
cur->cost = MAX_COST;
continue;
}
last_pos_cost =
(n < 15) ? VP8BitCost(0, probas[VP8EncBands[n + 1]][cur->ctx][0])
last_pos_cost = (n < 15) ? VP8BitCost(0, probas[band][ctx][0])
: 0;
// Compute delta_error = how much coding this level will
@ -630,8 +630,7 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
// Inspect all possible non-dead predecessors. Retain only the best one.
for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {
const Node* const prev = &NODE(n - 1, p);
const int prev_ctx = prev->ctx;
const uint16_t* const tcost = costs[VP8EncBands[n]][prev_ctx];
const uint16_t* const tcost = prev->costs;
const score_t total_error = prev->error - delta_error;
score_t cost, score;
@ -672,23 +671,25 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
return 0; // skip!
}
{
// Unwind the best path.
// Note: best-prev on terminal node is not necessarily equal to the
// best_prev for non-terminal. So we patch best_path[2] in.
int nz = 0;
int best_node = best_path[1];
n = best_path[0];
best_node = best_path[1];
NODE(n, best_node).prev = best_path[2]; // force best-prev for terminal
nz = 0;
for (; n >= first; --n) {
const Node* const node = &NODE(n, best_node);
const int j = kZigzag[n];
out[n] = node->sign ? -node->level : node->level;
nz |= (node->level != 0);
nz |= node->level;
in[j] = out[n] * mtx->q_[j];
best_node = node->prev;
}
return nz;
return (nz != 0);
}
}
#undef NODE