mirror of
https://github.com/webmproject/libwebp.git
synced 2024-11-20 04:18:26 +01:00
lossy encoding: ~3% speed-up
incorporate non-last cost in per-level cost table also: correct trellis-quant cost evaluation at nodes (output a little bit different now). Method 6 is ~4% faster. Change-Id: Ic48bd6d33f9193838216e7dc3a9f9c5508a1fbe8
This commit is contained in:
parent
e8605e9625
commit
390c8b316d
@ -360,9 +360,10 @@ void VP8CalculateLevelCosts(VP8Proba* const proba) {
|
|||||||
for (ctx = 0; ctx < NUM_CTX; ++ctx) {
|
for (ctx = 0; ctx < NUM_CTX; ++ctx) {
|
||||||
const uint8_t* const p = proba->coeffs_[ctype][band][ctx];
|
const uint8_t* const p = proba->coeffs_[ctype][band][ctx];
|
||||||
uint16_t* const table = proba->level_cost_[ctype][band][ctx];
|
uint16_t* const table = proba->level_cost_[ctype][band][ctx];
|
||||||
const int cost_base = VP8BitCost(1, p[1]);
|
const int cost0 = (ctx > 0) ? VP8BitCost(1, p[0]) : 0;
|
||||||
|
const int cost_base = VP8BitCost(1, p[1]) + cost0;
|
||||||
int v;
|
int v;
|
||||||
table[0] = VP8BitCost(0, p[1]);
|
table[0] = VP8BitCost(0, p[1]) + cost0;
|
||||||
for (v = 1; v <= MAX_VARIABLE_LEVEL; ++v) {
|
for (v = 1; v <= MAX_VARIABLE_LEVEL; ++v) {
|
||||||
table[v] = cost_base + VariableLevelCost(v, p);
|
table[v] = cost_base + VariableLevelCost(v, p);
|
||||||
}
|
}
|
||||||
|
@ -199,8 +199,9 @@ static int RecordCoeffs(int ctx, const VP8Residual* const res) {
|
|||||||
Record((v >= 3 + (8 << 3)), s + 10);
|
Record((v >= 3 + (8 << 3)), s + 10);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (v > MAX_VARIABLE_LEVEL)
|
if (v > MAX_VARIABLE_LEVEL) {
|
||||||
v = MAX_VARIABLE_LEVEL;
|
v = MAX_VARIABLE_LEVEL;
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
const int bits = VP8LevelCodes[v - 1][1];
|
const int bits = VP8LevelCodes[v - 1][1];
|
||||||
@ -339,22 +340,22 @@ static void SetResidualCoeffs(const int16_t* const coeffs,
|
|||||||
static int GetResidualCost(int ctx0, const VP8Residual* const res) {
|
static int GetResidualCost(int ctx0, const VP8Residual* const res) {
|
||||||
int n = res->first;
|
int n = res->first;
|
||||||
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
|
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
|
||||||
int p0 = res->prob[n][ctx0][0];
|
const int p0 = res->prob[n][ctx0][0];
|
||||||
const uint16_t* t = res->cost[n][ctx0];
|
const uint16_t* t = res->cost[n][ctx0];
|
||||||
int cost;
|
// bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
|
||||||
|
// (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
|
||||||
|
// be missing during the loop.
|
||||||
|
int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
|
||||||
|
|
||||||
if (res->last < 0) {
|
if (res->last < 0) {
|
||||||
return VP8BitCost(0, p0);
|
return VP8BitCost(0, p0);
|
||||||
}
|
}
|
||||||
cost = VP8BitCost(1, p0);
|
|
||||||
for (; n < res->last; ++n) {
|
for (; n < res->last; ++n) {
|
||||||
const int v = abs(res->coeffs[n]);
|
const int v = abs(res->coeffs[n]);
|
||||||
const int b = VP8EncBands[n + 1];
|
const int b = VP8EncBands[n + 1];
|
||||||
const int ctx = (v >= 2) ? 2 : v;
|
const int ctx = (v >= 2) ? 2 : v;
|
||||||
cost += VP8LevelCost(t, v);
|
cost += VP8LevelCost(t, v);
|
||||||
t = res->cost[b][ctx];
|
t = res->cost[b][ctx];
|
||||||
// the masking trick is faster than "if (v) cost += ..." with clang
|
|
||||||
cost += (v ? ~0U : 0) & VP8BitCost(1, res->prob[b][ctx][0]);
|
|
||||||
}
|
}
|
||||||
// Last coefficient is always non-zero
|
// Last coefficient is always non-zero
|
||||||
{
|
{
|
||||||
|
@ -548,7 +548,7 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
|
|||||||
int ctx0, int coeff_type,
|
int ctx0, int coeff_type,
|
||||||
const VP8Matrix* const mtx,
|
const VP8Matrix* const mtx,
|
||||||
int lambda) {
|
int lambda) {
|
||||||
ProbaArray* const last_costs = it->enc_->proba_.coeffs_[coeff_type];
|
ProbaArray* const probas = it->enc_->proba_.coeffs_[coeff_type];
|
||||||
CostArray* const costs = it->enc_->proba_.level_cost_[coeff_type];
|
CostArray* const costs = it->enc_->proba_.level_cost_[coeff_type];
|
||||||
const int first = (coeff_type == 0) ? 1 : 0;
|
const int first = (coeff_type == 0) ? 1 : 0;
|
||||||
Node nodes[17][NUM_NODES];
|
Node nodes[17][NUM_NODES];
|
||||||
@ -562,7 +562,7 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
|
|||||||
score_t cost;
|
score_t cost;
|
||||||
score_t max_error;
|
score_t max_error;
|
||||||
const int thresh = mtx->q_[1] * mtx->q_[1] / 4;
|
const int thresh = mtx->q_[1] * mtx->q_[1] / 4;
|
||||||
const int last_proba = last_costs[VP8EncBands[first]][ctx0][0];
|
const int last_proba = probas[VP8EncBands[first]][ctx0][0];
|
||||||
|
|
||||||
// compute maximal distortion.
|
// compute maximal distortion.
|
||||||
max_error = 0;
|
max_error = 0;
|
||||||
@ -583,7 +583,7 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
|
|||||||
// initialize source node.
|
// initialize source node.
|
||||||
n = first - 1;
|
n = first - 1;
|
||||||
for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
|
for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
|
||||||
NODE(n, m).cost = 0;
|
NODE(n, m).cost = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;
|
||||||
NODE(n, m).error = max_error;
|
NODE(n, m).error = max_error;
|
||||||
NODE(n, m).ctx = ctx0;
|
NODE(n, m).ctx = ctx0;
|
||||||
}
|
}
|
||||||
@ -608,7 +608,7 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
|
|||||||
int delta_error, new_error;
|
int delta_error, new_error;
|
||||||
score_t cur_score = MAX_COST;
|
score_t cur_score = MAX_COST;
|
||||||
int level = level0 + m;
|
int level = level0 + m;
|
||||||
int last_proba;
|
int last_pos_cost; // extra cost if last coeff's position is < 15
|
||||||
|
|
||||||
cur->sign = sign;
|
cur->sign = sign;
|
||||||
cur->level = level;
|
cur->level = level;
|
||||||
@ -617,7 +617,9 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
|
|||||||
cur->cost = MAX_COST;
|
cur->cost = MAX_COST;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
last_proba = last_costs[VP8EncBands[n + 1]][cur->ctx][0];
|
last_pos_cost =
|
||||||
|
(n < 15) ? VP8BitCost(0, probas[VP8EncBands[n + 1]][cur->ctx][0])
|
||||||
|
: 0;
|
||||||
|
|
||||||
// Compute delta_error = how much coding this level will
|
// Compute delta_error = how much coding this level will
|
||||||
// subtract as distortion to max_error
|
// subtract as distortion to max_error
|
||||||
@ -631,20 +633,16 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
|
|||||||
const int prev_ctx = prev->ctx;
|
const int prev_ctx = prev->ctx;
|
||||||
const uint16_t* const tcost = costs[VP8EncBands[n]][prev_ctx];
|
const uint16_t* const tcost = costs[VP8EncBands[n]][prev_ctx];
|
||||||
const score_t total_error = prev->error - delta_error;
|
const score_t total_error = prev->error - delta_error;
|
||||||
score_t cost, base_cost, score;
|
score_t cost, score;
|
||||||
|
|
||||||
if (prev->cost >= MAX_COST) { // dead node?
|
if (prev->cost >= MAX_COST) { // dead node?
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Base cost of both terminal/non-terminal
|
// Base cost of both terminal/non-terminal
|
||||||
base_cost = prev->cost + VP8LevelCost(tcost, level);
|
cost = prev->cost + VP8LevelCost(tcost, level);
|
||||||
|
|
||||||
// Examine node assuming it's a non-terminal one.
|
// Examine node assuming it's a non-terminal one.
|
||||||
cost = base_cost;
|
|
||||||
if (level && n < 15) {
|
|
||||||
cost += VP8BitCost(1, last_proba);
|
|
||||||
}
|
|
||||||
score = RDScoreTrellis(lambda, cost, total_error);
|
score = RDScoreTrellis(lambda, cost, total_error);
|
||||||
if (score < cur_score) {
|
if (score < cur_score) {
|
||||||
cur_score = score;
|
cur_score = score;
|
||||||
@ -655,9 +653,7 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
|
|||||||
|
|
||||||
// Now, record best terminal node (and thus best entry in the graph).
|
// Now, record best terminal node (and thus best entry in the graph).
|
||||||
if (level) {
|
if (level) {
|
||||||
cost = base_cost;
|
score = RDScoreTrellis(lambda, cost + last_pos_cost, total_error);
|
||||||
if (n < 15) cost += VP8BitCost(0, last_proba);
|
|
||||||
score = RDScoreTrellis(lambda, cost, total_error);
|
|
||||||
if (score < best_score) {
|
if (score < best_score) {
|
||||||
best_score = score;
|
best_score = score;
|
||||||
best_path[0] = n; // best eob position
|
best_path[0] = n; // best eob position
|
||||||
|
Loading…
Reference in New Issue
Block a user