use 16bit counters for recording proba counts

This proved being ok, even for large pictures, provided one
takes care of overflow. When an overflow is bound to occur, the
counters are renormalized.
Overall, shaves ~12k of memory.

Change-Id: I2ba21a407964fe1a34c352371cba15166e0c4548
This commit is contained in:
Pascal Massimino 2012-01-23 17:50:58 -08:00
parent 7f23678da0
commit 0de3096b3a
3 changed files with 46 additions and 36 deletions

View File

@ -26,18 +26,12 @@ static WEBP_INLINE int VP8BitCost(int bit, uint8_t proba) {
return !bit ? VP8EntropyCost[proba] : VP8EntropyCost[255 - proba]; return !bit ? VP8EntropyCost[proba] : VP8EntropyCost[255 - proba];
} }
// Cost of coding 'nb' 1's and 'total-nb' 0's using 'proba' probability.
static WEBP_INLINE uint64_t VP8BranchCost(uint64_t nb, uint64_t total,
uint8_t proba) {
return nb * VP8BitCost(1, proba) + (total - nb) * VP8BitCost(0, proba);
}
// Level cost calculations // Level cost calculations
extern const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2]; extern const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2];
void VP8CalculateLevelCosts(VP8Proba* const proba); void VP8CalculateLevelCosts(VP8Proba* const proba);
static WEBP_INLINE int VP8LevelCost(const uint16_t* const table, int level) { static WEBP_INLINE int VP8LevelCost(const uint16_t* const table, int level) {
return VP8LevelFixedCosts[level] return VP8LevelFixedCosts[level]
+ table[level > MAX_VARIABLE_LEVEL ? MAX_VARIABLE_LEVEL : level]; + table[(level > MAX_VARIABLE_LEVEL) ? MAX_VARIABLE_LEVEL : level];
} }
// Mode costs // Mode costs

View File

@ -9,6 +9,7 @@
// //
// Author: Skal (pascal.massimino@gmail.com) // Author: Skal (pascal.massimino@gmail.com)
#include <assert.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <math.h> #include <math.h>
@ -132,9 +133,14 @@ static void ResetTokenStats(VP8Encoder* const enc) {
} }
// Record proba context used // Record proba context used
static int Record(int bit, uint64_t* const stats) { static int Record(int bit, proba_t* const stats) {
stats[0] += bit; proba_t p = *stats;
stats[1] += 1; if (p >= 0xffff0000u) { // an overflow is inbound.
p = ((p + 1u) >> 1) & 0x7fff7fffu; // -> divide the stats by 2.
}
// record bit count (lower 16 bits) and increment total count (upper 16 bits).
p += 0x00010000u + bit;
*stats = p;
return bit; return bit;
} }
@ -145,33 +151,33 @@ static int Record(int bit, uint64_t* const stats) {
// Note: no need to record the fixed probas. // Note: no need to record the fixed probas.
static int RecordCoeffs(int ctx, VP8Residual* res) { static int RecordCoeffs(int ctx, VP8Residual* res) {
int n = res->first; int n = res->first;
uint64_t (*s)[2] = res->stats[VP8EncBands[n]][ctx]; proba_t *s = res->stats[VP8EncBands[n]][ctx];
if (res->last < 0) { if (res->last < 0) {
Record(0, s[0]); Record(0, s + 0);
return 0; return 0;
} }
while (n <= res->last) { while (n <= res->last) {
int v; int v;
Record(1, s[0]); Record(1, s + 0);
while ((v = res->coeffs[n++]) == 0) { while ((v = res->coeffs[n++]) == 0) {
Record(0, s[1]); Record(0, s + 1);
s = res->stats[VP8EncBands[n]][0]; s = res->stats[VP8EncBands[n]][0];
} }
Record(1, s[1]); Record(1, s + 1);
if (!Record(2u < (unsigned int)(v + 1), s[2])) { // v = -1 or 1 if (!Record(2u < (unsigned int)(v + 1), s + 2)) { // v = -1 or 1
s = res->stats[VP8EncBands[n]][1]; s = res->stats[VP8EncBands[n]][1];
} else { } else {
v = abs(v); v = abs(v);
#if !defined(USE_LEVEL_CODE_TABLE) #if !defined(USE_LEVEL_CODE_TABLE)
if (!Record(v > 4, s[3])) { if (!Record(v > 4, s + 3)) {
if (Record(v != 2, s[4])) if (Record(v != 2, s + 4))
Record(v == 4, s[5]); Record(v == 4, s + 5);
} else if (!Record(v > 10, s[6])) { } else if (!Record(v > 10, s + 6)) {
Record(v > 6, s[7]); Record(v > 6, s + 7);
} else if (!Record((v >= 3 + (8 << 2)), s[8])) { } else if (!Record((v >= 3 + (8 << 2)), s + 8)) {
Record((v >= 3 + (8 << 1)), s[9]); Record((v >= 3 + (8 << 1)), s + 9);
} else { } else {
Record((v >= 3 + (8 << 3)), s[10]); Record((v >= 3 + (8 << 3)), s + 10);
} }
#else #else
if (v > MAX_VARIABLE_LEVEL) if (v > MAX_VARIABLE_LEVEL)
@ -183,21 +189,27 @@ static int RecordCoeffs(int ctx, VP8Residual* res) {
int i; int i;
for (i = 0; (pattern >>= 1) != 0; ++i) { for (i = 0; (pattern >>= 1) != 0; ++i) {
const int mask = 2 << i; const int mask = 2 << i;
if (pattern & 1) Record(!!(bits & mask), s[3 + i]); if (pattern & 1) Record(!!(bits & mask), s + 3 + i);
} }
} }
#endif #endif
s = res->stats[VP8EncBands[n]][2]; s = res->stats[VP8EncBands[n]][2];
} }
} }
if (n < 16) Record(0, s[0]); if (n < 16) Record(0, s + 0);
return 1; return 1;
} }
// Collect statistics and deduce probabilities for next coding pass. // Collect statistics and deduce probabilities for next coding pass.
// Return the total bit-cost for coding the probability updates. // Return the total bit-cost for coding the probability updates.
static int CalcTokenProba(uint64_t nb, uint64_t total) { static int CalcTokenProba(int nb, int total) {
return (int)(nb ? ((total - nb) * 255 + total / 2) / total : 255); assert(nb <= total);
return nb ? (255 - nb * 255 / total) : 255;
}
// Cost of coding 'nb' 1's and 'total-nb' 0's using 'proba' probability.
static int BranchCost(int nb, int total, int proba) {
return nb * VP8BitCost(1, proba) + (total - nb) * VP8BitCost(0, proba);
} }
static int FinalizeTokenProbas(VP8Encoder* const enc) { static int FinalizeTokenProbas(VP8Encoder* const enc) {
@ -208,14 +220,17 @@ static int FinalizeTokenProbas(VP8Encoder* const enc) {
for (b = 0; b < NUM_BANDS; ++b) { for (b = 0; b < NUM_BANDS; ++b) {
for (c = 0; c < NUM_CTX; ++c) { for (c = 0; c < NUM_CTX; ++c) {
for (p = 0; p < NUM_PROBAS; ++p) { for (p = 0; p < NUM_PROBAS; ++p) {
const uint64_t* const cnt = proba->stats_[t][b][c][p]; const proba_t stats = proba->stats_[t][b][c][p];
const int nb = (stats >> 0) & 0xffff;
const int total = (stats >> 16) & 0xffff;
const int update_proba = VP8CoeffsUpdateProba[t][b][c][p]; const int update_proba = VP8CoeffsUpdateProba[t][b][c][p];
const int old_p = VP8CoeffsProba0[t][b][c][p]; const int old_p = VP8CoeffsProba0[t][b][c][p];
const int new_p = CalcTokenProba(cnt[0], cnt[1]); const int new_p = CalcTokenProba(nb, total);
const uint64_t old_cost = VP8BranchCost(cnt[0], cnt[1], old_p) const int old_cost = BranchCost(nb, total, old_p)
+ VP8BitCost(0, update_proba); + VP8BitCost(0, update_proba);
const uint64_t new_cost = VP8BranchCost(cnt[0], cnt[1], new_p) const int new_cost = BranchCost(nb, total, new_p)
+ VP8BitCost(1, update_proba) + 8 * 256; + VP8BitCost(1, update_proba)
+ 8 * 256;
const int use_new_p = (old_cost > new_cost); const int use_new_p = (old_cost > new_cost);
size += VP8BitCost(use_new_p, update_proba); size += VP8BitCost(use_new_p, update_proba);
if (use_new_p) { // only use proba that seem meaningful enough. if (use_new_p) { // only use proba that seem meaningful enough.

View File

@ -165,8 +165,9 @@ extern const uint8_t VP8Zigzag[16];
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Headers // Headers
typedef uint32_t proba_t; // 16b + 16b
typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS]; typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];
typedef uint64_t StatsArray[NUM_CTX][NUM_PROBAS][2]; typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS];
typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1]; typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];
typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats
@ -185,7 +186,7 @@ typedef struct {
uint8_t segments_[3]; // probabilities for segment tree uint8_t segments_[3]; // probabilities for segment tree
uint8_t skip_proba_; // final probability of being skipped. uint8_t skip_proba_; // final probability of being skipped.
ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 924 bytes ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 924 bytes
StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 7.4k StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 4224 bytes
CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 11.4k CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 11.4k
int use_skip_proba_; // Note: we always use skip_proba for now. int use_skip_proba_; // Note: we always use skip_proba for now.
int nb_skip_; // number of skipped blocks int nb_skip_; // number of skipped blocks