1-2% faster encoding by removing an indirection in GetResidualCost()

The MIPS code for cost is not updated yet, that's why i keep Residual::*cost
around for now. Should be removed in favor of *costs later.

Change-Id: Id1d09a8c37ea8c5b34ad5eb8811d6a3ec6c4d89f
This commit is contained in:
Pascal Massimino 2015-02-19 08:44:35 +01:00
parent eddb7e70be
commit 2382050748
6 changed files with 25 additions and 12 deletions

View File

@ -323,7 +323,8 @@ static int GetResidualCost(int ctx0, const VP8Residual* const res) {
int n = res->first;
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
const int p0 = res->prob[n][ctx0][0];
const uint16_t* t = res->cost[n][ctx0];
CostArrayPtr const costs = res->costs;
const uint16_t* t = costs[n][ctx0];
// bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
// (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
// be missing during the loop.
@ -334,10 +335,9 @@ static int GetResidualCost(int ctx0, const VP8Residual* const res) {
}
for (; n < res->last; ++n) {
const int v = abs(res->coeffs[n]);
const int b = VP8EncBands[n + 1];
const int ctx = (v >= 2) ? 2 : v;
cost += VP8LevelCost(t, v);
t = res->cost[b][ctx];
t = costs[n + 1][ctx];
}
// Last coefficient is always non-zero
{

View File

@ -51,7 +51,8 @@ static int GetResidualCostSSE2(int ctx0, const VP8Residual* const res) {
int n = res->first;
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
const int p0 = res->prob[n][ctx0][0];
const uint16_t* t = res->cost[n][ctx0];
CostArrayPtr const costs = res->costs;
const uint16_t* t = costs[n][ctx0];
// bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
// (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
// be missing during the loop.
@ -87,9 +88,8 @@ static int GetResidualCostSSE2(int ctx0, const VP8Residual* const res) {
const int ctx = ctxs[n];
const int level = levels[n];
const int flevel = abs_levels[n]; // full level
const int b = VP8EncBands[n + 1];
cost += VP8LevelFixedCosts[flevel] + t[level]; // simplified VP8LevelCost()
t = res->cost[b][ctx];
t = costs[n + 1][ctx];
}
// Last coefficient is always non-zero
{

View File

@ -63,6 +63,7 @@ void VP8CalculateLevelCosts(VP8Proba* const proba) {
if (!proba->dirty_) return; // nothing to do.
for (ctype = 0; ctype < NUM_TYPES; ++ctype) {
int n;
for (band = 0; band < NUM_BANDS; ++band) {
for (ctx = 0; ctx < NUM_CTX; ++ctx) {
const uint8_t* const p = proba->coeffs_[ctype][band][ctx];
@ -78,6 +79,12 @@ void VP8CalculateLevelCosts(VP8Proba* const proba) {
// actually constant.
}
}
for (n = 0; n < 16; ++n) { // replicate bands. We don't need to sentinel.
for (ctx = 0; ctx < NUM_CTX; ++ctx) {
proba->remapped_costs_[ctype][n][ctx] =
proba->level_cost_[ctype][VP8EncBands[n]][ctx];
}
}
}
proba->dirty_ = 0;
}
@ -202,6 +209,7 @@ void VP8InitResidual(int first, int coeff_type,
res->prob = enc->proba_.coeffs_[coeff_type];
res->stats = enc->proba_.stats_[coeff_type];
res->cost = enc->proba_.level_cost_[coeff_type];
res->costs = enc->proba_.remapped_costs_[coeff_type];
res->first = first;
}

View File

@ -31,9 +31,10 @@ struct VP8Residual {
const int16_t* coeffs;
int coeff_type;
ProbaArray* prob;
StatsArray* stats;
CostArray* cost;
ProbaArray* prob;
StatsArray* stats;
CostArray* cost; // TODO(skal): remove in favor of *costs
CostArrayPtr costs;
};
void VP8InitResidual(int first, int coeff_type,

View File

@ -550,7 +550,8 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
const VP8Matrix* const mtx,
int lambda) {
const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
const CostArray* const costs = enc->proba_.level_cost_[coeff_type];
CostArrayPtr const costs =
(CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];
const int first = (coeff_type == 0) ? 1 : 0;
Node nodes[16][NUM_NODES];
ScoreState score_states[2][NUM_NODES];
@ -587,7 +588,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;
ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);
ss_cur[m].costs = costs[VP8EncBands[first]][ctx0];
ss_cur[m].costs = costs[first][ctx0];
}
}
@ -621,7 +622,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
int best_prev = 0; // default, in case
ss_cur[m].score = MAX_COST;
ss_cur[m].costs = costs[band][ctx];
ss_cur[m].costs = costs[n + 1][ctx];
if (level > MAX_LEVEL || level < 0) { // node is dead?
continue;
}

View File

@ -151,6 +151,8 @@ typedef uint32_t proba_t; // 16b + 16b
typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];
typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS];
typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];
typedef const uint16_t* (*CostArrayPtr)[NUM_CTX]; // for easy casting
typedef const uint16_t* CostArrayMap[16][NUM_CTX];
typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats
typedef struct VP8Encoder VP8Encoder;
@ -170,6 +172,7 @@ typedef struct {
ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 1056 bytes
StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 4224 bytes
CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 13056 bytes
CostArrayMap remapped_costs_[NUM_TYPES]; // 1536 bytes
int dirty_; // if true, need to call VP8CalculateLevelCosts()
int use_skip_proba_; // Note: we always use skip_proba for now.
int nb_skip_; // number of skipped blocks