mirror of
https://github.com/webmproject/libwebp.git
synced 2024-12-27 06:08:21 +01:00
1-2% faster encoding by removing an indirection in GetResidualCost()
The MIPS code for cost is not updated yet, that's why i keep Residual::*cost around for now. Should be removed in favor of *costs later. Change-Id: Id1d09a8c37ea8c5b34ad5eb8811d6a3ec6c4d89f
This commit is contained in:
parent
eddb7e70be
commit
2382050748
@ -323,7 +323,8 @@ static int GetResidualCost(int ctx0, const VP8Residual* const res) {
|
||||
int n = res->first;
|
||||
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
|
||||
const int p0 = res->prob[n][ctx0][0];
|
||||
const uint16_t* t = res->cost[n][ctx0];
|
||||
CostArrayPtr const costs = res->costs;
|
||||
const uint16_t* t = costs[n][ctx0];
|
||||
// bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
|
||||
// (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
|
||||
// be missing during the loop.
|
||||
@ -334,10 +335,9 @@ static int GetResidualCost(int ctx0, const VP8Residual* const res) {
|
||||
}
|
||||
for (; n < res->last; ++n) {
|
||||
const int v = abs(res->coeffs[n]);
|
||||
const int b = VP8EncBands[n + 1];
|
||||
const int ctx = (v >= 2) ? 2 : v;
|
||||
cost += VP8LevelCost(t, v);
|
||||
t = res->cost[b][ctx];
|
||||
t = costs[n + 1][ctx];
|
||||
}
|
||||
// Last coefficient is always non-zero
|
||||
{
|
||||
|
@ -51,7 +51,8 @@ static int GetResidualCostSSE2(int ctx0, const VP8Residual* const res) {
|
||||
int n = res->first;
|
||||
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
|
||||
const int p0 = res->prob[n][ctx0][0];
|
||||
const uint16_t* t = res->cost[n][ctx0];
|
||||
CostArrayPtr const costs = res->costs;
|
||||
const uint16_t* t = costs[n][ctx0];
|
||||
// bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
|
||||
// (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
|
||||
// be missing during the loop.
|
||||
@ -87,9 +88,8 @@ static int GetResidualCostSSE2(int ctx0, const VP8Residual* const res) {
|
||||
const int ctx = ctxs[n];
|
||||
const int level = levels[n];
|
||||
const int flevel = abs_levels[n]; // full level
|
||||
const int b = VP8EncBands[n + 1];
|
||||
cost += VP8LevelFixedCosts[flevel] + t[level]; // simplified VP8LevelCost()
|
||||
t = res->cost[b][ctx];
|
||||
t = costs[n + 1][ctx];
|
||||
}
|
||||
// Last coefficient is always non-zero
|
||||
{
|
||||
|
@ -63,6 +63,7 @@ void VP8CalculateLevelCosts(VP8Proba* const proba) {
|
||||
if (!proba->dirty_) return; // nothing to do.
|
||||
|
||||
for (ctype = 0; ctype < NUM_TYPES; ++ctype) {
|
||||
int n;
|
||||
for (band = 0; band < NUM_BANDS; ++band) {
|
||||
for (ctx = 0; ctx < NUM_CTX; ++ctx) {
|
||||
const uint8_t* const p = proba->coeffs_[ctype][band][ctx];
|
||||
@ -78,6 +79,12 @@ void VP8CalculateLevelCosts(VP8Proba* const proba) {
|
||||
// actually constant.
|
||||
}
|
||||
}
|
||||
for (n = 0; n < 16; ++n) { // replicate bands. We don't need to sentinel.
|
||||
for (ctx = 0; ctx < NUM_CTX; ++ctx) {
|
||||
proba->remapped_costs_[ctype][n][ctx] =
|
||||
proba->level_cost_[ctype][VP8EncBands[n]][ctx];
|
||||
}
|
||||
}
|
||||
}
|
||||
proba->dirty_ = 0;
|
||||
}
|
||||
@ -202,6 +209,7 @@ void VP8InitResidual(int first, int coeff_type,
|
||||
res->prob = enc->proba_.coeffs_[coeff_type];
|
||||
res->stats = enc->proba_.stats_[coeff_type];
|
||||
res->cost = enc->proba_.level_cost_[coeff_type];
|
||||
res->costs = enc->proba_.remapped_costs_[coeff_type];
|
||||
res->first = first;
|
||||
}
|
||||
|
||||
|
@ -31,9 +31,10 @@ struct VP8Residual {
|
||||
const int16_t* coeffs;
|
||||
|
||||
int coeff_type;
|
||||
ProbaArray* prob;
|
||||
StatsArray* stats;
|
||||
CostArray* cost;
|
||||
ProbaArray* prob;
|
||||
StatsArray* stats;
|
||||
CostArray* cost; // TODO(skal): remove in favor of *costs
|
||||
CostArrayPtr costs;
|
||||
};
|
||||
|
||||
void VP8InitResidual(int first, int coeff_type,
|
||||
|
@ -550,7 +550,8 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
||||
const VP8Matrix* const mtx,
|
||||
int lambda) {
|
||||
const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
|
||||
const CostArray* const costs = enc->proba_.level_cost_[coeff_type];
|
||||
CostArrayPtr const costs =
|
||||
(CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];
|
||||
const int first = (coeff_type == 0) ? 1 : 0;
|
||||
Node nodes[16][NUM_NODES];
|
||||
ScoreState score_states[2][NUM_NODES];
|
||||
@ -587,7 +588,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
||||
for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
|
||||
const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;
|
||||
ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);
|
||||
ss_cur[m].costs = costs[VP8EncBands[first]][ctx0];
|
||||
ss_cur[m].costs = costs[first][ctx0];
|
||||
}
|
||||
}
|
||||
|
||||
@ -621,7 +622,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
||||
int best_prev = 0; // default, in case
|
||||
|
||||
ss_cur[m].score = MAX_COST;
|
||||
ss_cur[m].costs = costs[band][ctx];
|
||||
ss_cur[m].costs = costs[n + 1][ctx];
|
||||
if (level > MAX_LEVEL || level < 0) { // node is dead?
|
||||
continue;
|
||||
}
|
||||
|
@ -151,6 +151,8 @@ typedef uint32_t proba_t; // 16b + 16b
|
||||
typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];
|
||||
typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS];
|
||||
typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];
|
||||
typedef const uint16_t* (*CostArrayPtr)[NUM_CTX]; // for easy casting
|
||||
typedef const uint16_t* CostArrayMap[16][NUM_CTX];
|
||||
typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats
|
||||
|
||||
typedef struct VP8Encoder VP8Encoder;
|
||||
@ -170,6 +172,7 @@ typedef struct {
|
||||
ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 1056 bytes
|
||||
StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 4224 bytes
|
||||
CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 13056 bytes
|
||||
CostArrayMap remapped_costs_[NUM_TYPES]; // 1536 bytes
|
||||
int dirty_; // if true, need to call VP8CalculateLevelCosts()
|
||||
int use_skip_proba_; // Note: we always use skip_proba for now.
|
||||
int nb_skip_; // number of skipped blocks
|
||||
|
Loading…
Reference in New Issue
Block a user