From 0de3096b3af27d8b7ca7af2f9666ff8e157128e6 Mon Sep 17 00:00:00 2001 From: Pascal Massimino Date: Mon, 23 Jan 2012 17:50:58 -0800 Subject: [PATCH] use 16bit counters for recording proba counts This proved being ok, even for large pictures, provided one takes care of overflow. When an overflow is bound to occur, the counters are renormalized. Overall, shaves ~12k of memory. Change-Id: I2ba21a407964fe1a34c352371cba15166e0c4548 --- src/enc/cost.h | 8 +----- src/enc/frame.c | 69 ++++++++++++++++++++++++++++------------------- src/enc/vp8enci.h | 5 ++-- 3 files changed, 46 insertions(+), 36 deletions(-) diff --git a/src/enc/cost.h b/src/enc/cost.h index 4f55e2cc..09b75b69 100644 --- a/src/enc/cost.h +++ b/src/enc/cost.h @@ -26,18 +26,12 @@ static WEBP_INLINE int VP8BitCost(int bit, uint8_t proba) { return !bit ? VP8EntropyCost[proba] : VP8EntropyCost[255 - proba]; } -// Cost of coding 'nb' 1's and 'total-nb' 0's using 'proba' probability. -static WEBP_INLINE uint64_t VP8BranchCost(uint64_t nb, uint64_t total, - uint8_t proba) { - return nb * VP8BitCost(1, proba) + (total - nb) * VP8BitCost(0, proba); -} - // Level cost calculations extern const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2]; void VP8CalculateLevelCosts(VP8Proba* const proba); static WEBP_INLINE int VP8LevelCost(const uint16_t* const table, int level) { return VP8LevelFixedCosts[level] - + table[level > MAX_VARIABLE_LEVEL ? MAX_VARIABLE_LEVEL : level]; + + table[(level > MAX_VARIABLE_LEVEL) ? MAX_VARIABLE_LEVEL : level]; } // Mode costs diff --git a/src/enc/frame.c b/src/enc/frame.c index f9fbdf2d..615aa960 100644 --- a/src/enc/frame.c +++ b/src/enc/frame.c @@ -9,6 +9,7 @@ // // Author: Skal (pascal.massimino@gmail.com) +#include #include #include #include @@ -132,9 +133,14 @@ static void ResetTokenStats(VP8Encoder* const enc) { } // Record proba context used -static int Record(int bit, uint64_t* const stats) { - stats[0] += bit; - stats[1] += 1; +static int Record(int bit, proba_t* const stats) { + proba_t p = *stats; + if (p >= 0xffff0000u) { // an overflow is inbound. + p = ((p + 1u) >> 1) & 0x7fff7fffu; // -> divide the stats by 2. + } + // record bit count (lower 16 bits) and increment total count (upper 16 bits). + p += 0x00010000u + bit; + *stats = p; return bit; } @@ -145,33 +151,33 @@ static int Record(int bit, uint64_t* const stats) { // Note: no need to record the fixed probas. static int RecordCoeffs(int ctx, VP8Residual* res) { int n = res->first; - uint64_t (*s)[2] = res->stats[VP8EncBands[n]][ctx]; + proba_t *s = res->stats[VP8EncBands[n]][ctx]; if (res->last < 0) { - Record(0, s[0]); + Record(0, s + 0); return 0; } while (n <= res->last) { int v; - Record(1, s[0]); + Record(1, s + 0); while ((v = res->coeffs[n++]) == 0) { - Record(0, s[1]); + Record(0, s + 1); s = res->stats[VP8EncBands[n]][0]; } - Record(1, s[1]); - if (!Record(2u < (unsigned int)(v + 1), s[2])) { // v = -1 or 1 + Record(1, s + 1); + if (!Record(2u < (unsigned int)(v + 1), s + 2)) { // v = -1 or 1 s = res->stats[VP8EncBands[n]][1]; } else { v = abs(v); #if !defined(USE_LEVEL_CODE_TABLE) - if (!Record(v > 4, s[3])) { - if (Record(v != 2, s[4])) - Record(v == 4, s[5]); - } else if (!Record(v > 10, s[6])) { - Record(v > 6, s[7]); - } else if (!Record((v >= 3 + (8 << 2)), s[8])) { - Record((v >= 3 + (8 << 1)), s[9]); + if (!Record(v > 4, s + 3)) { + if (Record(v != 2, s + 4)) + Record(v == 4, s + 5); + } else if (!Record(v > 10, s + 6)) { + Record(v > 6, s + 7); + } else if (!Record((v >= 3 + (8 << 2)), s + 8)) { + Record((v >= 3 + (8 << 1)), s + 9); } else { - Record((v >= 3 + (8 << 3)), s[10]); + Record((v >= 3 + (8 << 3)), s + 10); } #else if (v > MAX_VARIABLE_LEVEL) @@ -183,21 +189,27 @@ static int RecordCoeffs(int ctx, VP8Residual* res) { int i; for (i = 0; (pattern >>= 1) != 0; ++i) { const int mask = 2 << i; - if (pattern & 1) Record(!!(bits & mask), s[3 + i]); + if (pattern & 1) Record(!!(bits & mask), s + 3 + i); } } #endif s = res->stats[VP8EncBands[n]][2]; } } - if (n < 16) Record(0, s[0]); + if (n < 16) Record(0, s + 0); return 1; } // Collect statistics and deduce probabilities for next coding pass. // Return the total bit-cost for coding the probability updates. -static int CalcTokenProba(uint64_t nb, uint64_t total) { - return (int)(nb ? ((total - nb) * 255 + total / 2) / total : 255); +static int CalcTokenProba(int nb, int total) { + assert(nb <= total); + return nb ? (255 - nb * 255 / total) : 255; +} + +// Cost of coding 'nb' 1's and 'total-nb' 0's using 'proba' probability. +static int BranchCost(int nb, int total, int proba) { + return nb * VP8BitCost(1, proba) + (total - nb) * VP8BitCost(0, proba); } static int FinalizeTokenProbas(VP8Encoder* const enc) { @@ -208,14 +220,17 @@ static int FinalizeTokenProbas(VP8Encoder* const enc) { for (b = 0; b < NUM_BANDS; ++b) { for (c = 0; c < NUM_CTX; ++c) { for (p = 0; p < NUM_PROBAS; ++p) { - const uint64_t* const cnt = proba->stats_[t][b][c][p]; + const proba_t stats = proba->stats_[t][b][c][p]; + const int nb = (stats >> 0) & 0xffff; + const int total = (stats >> 16) & 0xffff; const int update_proba = VP8CoeffsUpdateProba[t][b][c][p]; const int old_p = VP8CoeffsProba0[t][b][c][p]; - const int new_p = CalcTokenProba(cnt[0], cnt[1]); - const uint64_t old_cost = VP8BranchCost(cnt[0], cnt[1], old_p) - + VP8BitCost(0, update_proba); - const uint64_t new_cost = VP8BranchCost(cnt[0], cnt[1], new_p) - + VP8BitCost(1, update_proba) + 8 * 256; + const int new_p = CalcTokenProba(nb, total); + const int old_cost = BranchCost(nb, total, old_p) + + VP8BitCost(0, update_proba); + const int new_cost = BranchCost(nb, total, new_p) + + VP8BitCost(1, update_proba) + + 8 * 256; const int use_new_p = (old_cost > new_cost); size += VP8BitCost(use_new_p, update_proba); if (use_new_p) { // only use proba that seem meaningful enough. diff --git a/src/enc/vp8enci.h b/src/enc/vp8enci.h index 57e16cf4..ade1a81e 100644 --- a/src/enc/vp8enci.h +++ b/src/enc/vp8enci.h @@ -165,8 +165,9 @@ extern const uint8_t VP8Zigzag[16]; //------------------------------------------------------------------------------ // Headers +typedef uint32_t proba_t; // 16b + 16b typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS]; -typedef uint64_t StatsArray[NUM_CTX][NUM_PROBAS][2]; +typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS]; typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1]; typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats @@ -185,7 +186,7 @@ typedef struct { uint8_t segments_[3]; // probabilities for segment tree uint8_t skip_proba_; // final probability of being skipped. ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 924 bytes - StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 7.4k + StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 4224 bytes CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 11.4k int use_skip_proba_; // Note: we always use skip_proba for now. int nb_skip_; // number of skipped blocks