mirror of
https://github.com/webmproject/libwebp.git
synced 2024-12-27 06:08:21 +01:00
1-2% faster encoding by removing an indirection in GetResidualCost()
The MIPS code for cost is not updated yet, that's why i keep Residual::*cost around for now. Should be removed in favor of *costs later. Change-Id: Id1d09a8c37ea8c5b34ad5eb8811d6a3ec6c4d89f
This commit is contained in:
parent
eddb7e70be
commit
2382050748
@ -323,7 +323,8 @@ static int GetResidualCost(int ctx0, const VP8Residual* const res) {
|
|||||||
int n = res->first;
|
int n = res->first;
|
||||||
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
|
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
|
||||||
const int p0 = res->prob[n][ctx0][0];
|
const int p0 = res->prob[n][ctx0][0];
|
||||||
const uint16_t* t = res->cost[n][ctx0];
|
CostArrayPtr const costs = res->costs;
|
||||||
|
const uint16_t* t = costs[n][ctx0];
|
||||||
// bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
|
// bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
|
||||||
// (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
|
// (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
|
||||||
// be missing during the loop.
|
// be missing during the loop.
|
||||||
@ -334,10 +335,9 @@ static int GetResidualCost(int ctx0, const VP8Residual* const res) {
|
|||||||
}
|
}
|
||||||
for (; n < res->last; ++n) {
|
for (; n < res->last; ++n) {
|
||||||
const int v = abs(res->coeffs[n]);
|
const int v = abs(res->coeffs[n]);
|
||||||
const int b = VP8EncBands[n + 1];
|
|
||||||
const int ctx = (v >= 2) ? 2 : v;
|
const int ctx = (v >= 2) ? 2 : v;
|
||||||
cost += VP8LevelCost(t, v);
|
cost += VP8LevelCost(t, v);
|
||||||
t = res->cost[b][ctx];
|
t = costs[n + 1][ctx];
|
||||||
}
|
}
|
||||||
// Last coefficient is always non-zero
|
// Last coefficient is always non-zero
|
||||||
{
|
{
|
||||||
|
@ -51,7 +51,8 @@ static int GetResidualCostSSE2(int ctx0, const VP8Residual* const res) {
|
|||||||
int n = res->first;
|
int n = res->first;
|
||||||
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
|
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
|
||||||
const int p0 = res->prob[n][ctx0][0];
|
const int p0 = res->prob[n][ctx0][0];
|
||||||
const uint16_t* t = res->cost[n][ctx0];
|
CostArrayPtr const costs = res->costs;
|
||||||
|
const uint16_t* t = costs[n][ctx0];
|
||||||
// bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
|
// bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
|
||||||
// (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
|
// (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
|
||||||
// be missing during the loop.
|
// be missing during the loop.
|
||||||
@ -87,9 +88,8 @@ static int GetResidualCostSSE2(int ctx0, const VP8Residual* const res) {
|
|||||||
const int ctx = ctxs[n];
|
const int ctx = ctxs[n];
|
||||||
const int level = levels[n];
|
const int level = levels[n];
|
||||||
const int flevel = abs_levels[n]; // full level
|
const int flevel = abs_levels[n]; // full level
|
||||||
const int b = VP8EncBands[n + 1];
|
|
||||||
cost += VP8LevelFixedCosts[flevel] + t[level]; // simplified VP8LevelCost()
|
cost += VP8LevelFixedCosts[flevel] + t[level]; // simplified VP8LevelCost()
|
||||||
t = res->cost[b][ctx];
|
t = costs[n + 1][ctx];
|
||||||
}
|
}
|
||||||
// Last coefficient is always non-zero
|
// Last coefficient is always non-zero
|
||||||
{
|
{
|
||||||
|
@ -63,6 +63,7 @@ void VP8CalculateLevelCosts(VP8Proba* const proba) {
|
|||||||
if (!proba->dirty_) return; // nothing to do.
|
if (!proba->dirty_) return; // nothing to do.
|
||||||
|
|
||||||
for (ctype = 0; ctype < NUM_TYPES; ++ctype) {
|
for (ctype = 0; ctype < NUM_TYPES; ++ctype) {
|
||||||
|
int n;
|
||||||
for (band = 0; band < NUM_BANDS; ++band) {
|
for (band = 0; band < NUM_BANDS; ++band) {
|
||||||
for (ctx = 0; ctx < NUM_CTX; ++ctx) {
|
for (ctx = 0; ctx < NUM_CTX; ++ctx) {
|
||||||
const uint8_t* const p = proba->coeffs_[ctype][band][ctx];
|
const uint8_t* const p = proba->coeffs_[ctype][band][ctx];
|
||||||
@ -78,6 +79,12 @@ void VP8CalculateLevelCosts(VP8Proba* const proba) {
|
|||||||
// actually constant.
|
// actually constant.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for (n = 0; n < 16; ++n) { // replicate bands. We don't need to sentinel.
|
||||||
|
for (ctx = 0; ctx < NUM_CTX; ++ctx) {
|
||||||
|
proba->remapped_costs_[ctype][n][ctx] =
|
||||||
|
proba->level_cost_[ctype][VP8EncBands[n]][ctx];
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
proba->dirty_ = 0;
|
proba->dirty_ = 0;
|
||||||
}
|
}
|
||||||
@ -202,6 +209,7 @@ void VP8InitResidual(int first, int coeff_type,
|
|||||||
res->prob = enc->proba_.coeffs_[coeff_type];
|
res->prob = enc->proba_.coeffs_[coeff_type];
|
||||||
res->stats = enc->proba_.stats_[coeff_type];
|
res->stats = enc->proba_.stats_[coeff_type];
|
||||||
res->cost = enc->proba_.level_cost_[coeff_type];
|
res->cost = enc->proba_.level_cost_[coeff_type];
|
||||||
|
res->costs = enc->proba_.remapped_costs_[coeff_type];
|
||||||
res->first = first;
|
res->first = first;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -31,9 +31,10 @@ struct VP8Residual {
|
|||||||
const int16_t* coeffs;
|
const int16_t* coeffs;
|
||||||
|
|
||||||
int coeff_type;
|
int coeff_type;
|
||||||
ProbaArray* prob;
|
ProbaArray* prob;
|
||||||
StatsArray* stats;
|
StatsArray* stats;
|
||||||
CostArray* cost;
|
CostArray* cost; // TODO(skal): remove in favor of *costs
|
||||||
|
CostArrayPtr costs;
|
||||||
};
|
};
|
||||||
|
|
||||||
void VP8InitResidual(int first, int coeff_type,
|
void VP8InitResidual(int first, int coeff_type,
|
||||||
|
@ -550,7 +550,8 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
|||||||
const VP8Matrix* const mtx,
|
const VP8Matrix* const mtx,
|
||||||
int lambda) {
|
int lambda) {
|
||||||
const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
|
const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
|
||||||
const CostArray* const costs = enc->proba_.level_cost_[coeff_type];
|
CostArrayPtr const costs =
|
||||||
|
(CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];
|
||||||
const int first = (coeff_type == 0) ? 1 : 0;
|
const int first = (coeff_type == 0) ? 1 : 0;
|
||||||
Node nodes[16][NUM_NODES];
|
Node nodes[16][NUM_NODES];
|
||||||
ScoreState score_states[2][NUM_NODES];
|
ScoreState score_states[2][NUM_NODES];
|
||||||
@ -587,7 +588,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
|||||||
for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
|
for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
|
||||||
const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;
|
const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;
|
||||||
ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);
|
ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);
|
||||||
ss_cur[m].costs = costs[VP8EncBands[first]][ctx0];
|
ss_cur[m].costs = costs[first][ctx0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -621,7 +622,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
|
|||||||
int best_prev = 0; // default, in case
|
int best_prev = 0; // default, in case
|
||||||
|
|
||||||
ss_cur[m].score = MAX_COST;
|
ss_cur[m].score = MAX_COST;
|
||||||
ss_cur[m].costs = costs[band][ctx];
|
ss_cur[m].costs = costs[n + 1][ctx];
|
||||||
if (level > MAX_LEVEL || level < 0) { // node is dead?
|
if (level > MAX_LEVEL || level < 0) { // node is dead?
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -151,6 +151,8 @@ typedef uint32_t proba_t; // 16b + 16b
|
|||||||
typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];
|
typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];
|
||||||
typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS];
|
typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS];
|
||||||
typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];
|
typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];
|
||||||
|
typedef const uint16_t* (*CostArrayPtr)[NUM_CTX]; // for easy casting
|
||||||
|
typedef const uint16_t* CostArrayMap[16][NUM_CTX];
|
||||||
typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats
|
typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats
|
||||||
|
|
||||||
typedef struct VP8Encoder VP8Encoder;
|
typedef struct VP8Encoder VP8Encoder;
|
||||||
@ -170,6 +172,7 @@ typedef struct {
|
|||||||
ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 1056 bytes
|
ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 1056 bytes
|
||||||
StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 4224 bytes
|
StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 4224 bytes
|
||||||
CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 13056 bytes
|
CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 13056 bytes
|
||||||
|
CostArrayMap remapped_costs_[NUM_TYPES]; // 1536 bytes
|
||||||
int dirty_; // if true, need to call VP8CalculateLevelCosts()
|
int dirty_; // if true, need to call VP8CalculateLevelCosts()
|
||||||
int use_skip_proba_; // Note: we always use skip_proba for now.
|
int use_skip_proba_; // Note: we always use skip_proba for now.
|
||||||
int nb_skip_; // number of skipped blocks
|
int nb_skip_; // number of skipped blocks
|
||||||
|
Loading…
Reference in New Issue
Block a user