diff --git a/src/dsp/lossless.h b/src/dsp/lossless.h index 45f7b97c..e350e4ca 100644 --- a/src/dsp/lossless.h +++ b/src/dsp/lossless.h @@ -185,8 +185,8 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16]; typedef uint32_t (*VP8LCostFunc)(const uint32_t* population, int length); typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y, int length); -typedef float (*VP8LCombinedShannonEntropyFunc)(const uint32_t X[256], - const uint32_t Y[256]); +typedef uint64_t (*VP8LCombinedShannonEntropyFunc)(const uint32_t X[256], + const uint32_t Y[256]); extern VP8LCostFunc VP8LExtraCost; extern VP8LCostCombinedFunc VP8LExtraCostCombined; @@ -198,7 +198,7 @@ typedef struct { // small struct to hold counters } VP8LStreaks; typedef struct { // small struct to hold bit entropy results - float entropy; // entropy + uint64_t entropy; // entropy uint32_t sum; // sum of the population int nonzeros; // number of non-zero elements in the population uint32_t max_val; // maximum value in the population diff --git a/src/dsp/lossless_common.h b/src/dsp/lossless_common.h index 54abbdbd..33f2c4dc 100644 --- a/src/dsp/lossless_common.h +++ b/src/dsp/lossless_common.h @@ -82,15 +82,12 @@ static WEBP_INLINE int VP8LNearLosslessBits(int near_lossless_quality) { #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086 // LOG_2_RECIPROCAL * (1 << LOG_2_PRECISION_BITS) #define LOG_2_RECIPROCAL_FIXED_DOUBLE 12102203.161561485379934310913085937500 -#define LOG_2_RECIPROCAL_FIXED 12102203 +#define LOG_2_RECIPROCAL_FIXED ((uint64_t)12102203) #define LOG_LOOKUP_IDX_MAX 256 extern const uint32_t kLog2Table[LOG_LOOKUP_IDX_MAX]; -extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX]; -// TODO(vrabaud) remove this table once VP8LFastSLog2 is switched to fixed -// point. -extern const float kLog2fTable[LOG_LOOKUP_IDX_MAX]; +extern const uint64_t kSLog2Table[LOG_LOOKUP_IDX_MAX]; typedef uint32_t (*VP8LFastLog2SlowFunc)(uint32_t v); -typedef float (*VP8LFastSLog2SlowFunc)(uint32_t v); +typedef uint64_t (*VP8LFastSLog2SlowFunc)(uint32_t v); extern VP8LFastLog2SlowFunc VP8LFastLog2Slow; extern VP8LFastSLog2SlowFunc VP8LFastSLog2Slow; @@ -99,7 +96,7 @@ static WEBP_INLINE uint32_t VP8LFastLog2(uint32_t v) { return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v); } // Fast calculation of v * log2(v) for integer input. -static WEBP_INLINE float VP8LFastSLog2(uint32_t v) { +static WEBP_INLINE uint64_t VP8LFastSLog2(uint32_t v) { return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v); } diff --git a/src/dsp/lossless_enc.c b/src/dsp/lossless_enc.c index c0663f1f..6dfc653e 100644 --- a/src/dsp/lossless_enc.c +++ b/src/dsp/lossless_enc.c @@ -24,138 +24,6 @@ #include "src/dsp/lossless_common.h" #include "src/dsp/yuv.h" -// lookup table for small values of log2(int) -const float kLog2fTable[LOG_LOOKUP_IDX_MAX] = { - 0.0000000000000000f, 0.0000000000000000f, - 1.0000000000000000f, 1.5849625007211560f, - 2.0000000000000000f, 2.3219280948873621f, - 2.5849625007211560f, 2.8073549220576041f, - 3.0000000000000000f, 3.1699250014423121f, - 3.3219280948873621f, 3.4594316186372973f, - 3.5849625007211560f, 3.7004397181410921f, - 3.8073549220576041f, 3.9068905956085187f, - 4.0000000000000000f, 4.0874628412503390f, - 4.1699250014423121f, 4.2479275134435852f, - 4.3219280948873626f, 4.3923174227787606f, - 4.4594316186372973f, 4.5235619560570130f, - 4.5849625007211560f, 4.6438561897747243f, - 4.7004397181410917f, 4.7548875021634682f, - 4.8073549220576037f, 4.8579809951275718f, - 4.9068905956085187f, 4.9541963103868749f, - 5.0000000000000000f, 5.0443941193584533f, - 5.0874628412503390f, 5.1292830169449663f, - 5.1699250014423121f, 5.2094533656289501f, - 5.2479275134435852f, 5.2854022188622487f, - 5.3219280948873626f, 5.3575520046180837f, - 5.3923174227787606f, 5.4262647547020979f, - 5.4594316186372973f, 5.4918530963296747f, - 5.5235619560570130f, 5.5545888516776376f, - 5.5849625007211560f, 5.6147098441152083f, - 5.6438561897747243f, 5.6724253419714951f, - 5.7004397181410917f, 5.7279204545631987f, - 5.7548875021634682f, 5.7813597135246599f, - 5.8073549220576037f, 5.8328900141647412f, - 5.8579809951275718f, 5.8826430493618415f, - 5.9068905956085187f, 5.9307373375628866f, - 5.9541963103868749f, 5.9772799234999167f, - 6.0000000000000000f, 6.0223678130284543f, - 6.0443941193584533f, 6.0660891904577720f, - 6.0874628412503390f, 6.1085244567781691f, - 6.1292830169449663f, 6.1497471195046822f, - 6.1699250014423121f, 6.1898245588800175f, - 6.2094533656289501f, 6.2288186904958804f, - 6.2479275134435852f, 6.2667865406949010f, - 6.2854022188622487f, 6.3037807481771030f, - 6.3219280948873626f, 6.3398500028846243f, - 6.3575520046180837f, 6.3750394313469245f, - 6.3923174227787606f, 6.4093909361377017f, - 6.4262647547020979f, 6.4429434958487279f, - 6.4594316186372973f, 6.4757334309663976f, - 6.4918530963296747f, 6.5077946401986963f, - 6.5235619560570130f, 6.5391588111080309f, - 6.5545888516776376f, 6.5698556083309478f, - 6.5849625007211560f, 6.5999128421871278f, - 6.6147098441152083f, 6.6293566200796094f, - 6.6438561897747243f, 6.6582114827517946f, - 6.6724253419714951f, 6.6865005271832185f, - 6.7004397181410917f, 6.7142455176661224f, - 6.7279204545631987f, 6.7414669864011464f, - 6.7548875021634682f, 6.7681843247769259f, - 6.7813597135246599f, 6.7944158663501061f, - 6.8073549220576037f, 6.8201789624151878f, - 6.8328900141647412f, 6.8454900509443747f, - 6.8579809951275718f, 6.8703647195834047f, - 6.8826430493618415f, 6.8948177633079437f, - 6.9068905956085187f, 6.9188632372745946f, - 6.9307373375628866f, 6.9425145053392398f, - 6.9541963103868749f, 6.9657842846620869f, - 6.9772799234999167f, 6.9886846867721654f, - 7.0000000000000000f, 7.0112272554232539f, - 7.0223678130284543f, 7.0334230015374501f, - 7.0443941193584533f, 7.0552824355011898f, - 7.0660891904577720f, 7.0768155970508308f, - 7.0874628412503390f, 7.0980320829605263f, - 7.1085244567781691f, 7.1189410727235076f, - 7.1292830169449663f, 7.1395513523987936f, - 7.1497471195046822f, 7.1598713367783890f, - 7.1699250014423121f, 7.1799090900149344f, - 7.1898245588800175f, 7.1996723448363644f, - 7.2094533656289501f, 7.2191685204621611f, - 7.2288186904958804f, 7.2384047393250785f, - 7.2479275134435852f, 7.2573878426926521f, - 7.2667865406949010f, 7.2761244052742375f, - 7.2854022188622487f, 7.2946207488916270f, - 7.3037807481771030f, 7.3128829552843557f, - 7.3219280948873626f, 7.3309168781146167f, - 7.3398500028846243f, 7.3487281542310771f, - 7.3575520046180837f, 7.3663222142458160f, - 7.3750394313469245f, 7.3837042924740519f, - 7.3923174227787606f, 7.4008794362821843f, - 7.4093909361377017f, 7.4178525148858982f, - 7.4262647547020979f, 7.4346282276367245f, - 7.4429434958487279f, 7.4512111118323289f, - 7.4594316186372973f, 7.4676055500829976f, - 7.4757334309663976f, 7.4838157772642563f, - 7.4918530963296747f, 7.4998458870832056f, - 7.5077946401986963f, 7.5156998382840427f, - 7.5235619560570130f, 7.5313814605163118f, - 7.5391588111080309f, 7.5468944598876364f, - 7.5545888516776376f, 7.5622424242210728f, - 7.5698556083309478f, 7.5774288280357486f, - 7.5849625007211560f, 7.5924570372680806f, - 7.5999128421871278f, 7.6073303137496104f, - 7.6147098441152083f, 7.6220518194563764f, - 7.6293566200796094f, 7.6366246205436487f, - 7.6438561897747243f, 7.6510516911789281f, - 7.6582114827517946f, 7.6653359171851764f, - 7.6724253419714951f, 7.6794800995054464f, - 7.6865005271832185f, 7.6934869574993252f, - 7.7004397181410917f, 7.7073591320808825f, - 7.7142455176661224f, 7.7210991887071855f, - 7.7279204545631987f, 7.7347096202258383f, - 7.7414669864011464f, 7.7481928495894605f, - 7.7548875021634682f, 7.7615512324444795f, - 7.7681843247769259f, 7.7747870596011736f, - 7.7813597135246599f, 7.7879025593914317f, - 7.7944158663501061f, 7.8008998999203047f, - 7.8073549220576037f, 7.8137811912170374f, - 7.8201789624151878f, 7.8265484872909150f, - 7.8328900141647412f, 7.8392037880969436f, - 7.8454900509443747f, 7.8517490414160571f, - 7.8579809951275718f, 7.8641861446542797f, - 7.8703647195834047f, 7.8765169465649993f, - 7.8826430493618415f, 7.8887432488982591f, - 7.8948177633079437f, 7.9008668079807486f, - 7.9068905956085187f, 7.9128893362299619f, - 7.9188632372745946f, 7.9248125036057812f, - 7.9307373375628866f, 7.9366379390025709f, - 7.9425145053392398f, 7.9483672315846778f, - 7.9541963103868749f, 7.9600019320680805f, - 7.9657842846620869f, 7.9715435539507719f, - 7.9772799234999167f, 7.9829935746943103f, - 7.9886846867721654f, 7.9943534368588577f -}; - // lookup table for small values of log2(int) * (1 << LOG_2_PRECISION_BITS). // Obtained in Python with: // a = [ str(round((1<<23)*math.log2(i))) if i else "0" for i in range(256)] @@ -201,71 +69,78 @@ const uint32_t kLog2Table[LOG_LOOKUP_IDX_MAX] = { 66918274, 66966204, 67013944, 67061497 }; -const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = { - 0.00000000f, 0.00000000f, 2.00000000f, 4.75488750f, - 8.00000000f, 11.60964047f, 15.50977500f, 19.65148445f, - 24.00000000f, 28.52932501f, 33.21928095f, 38.05374781f, - 43.01955001f, 48.10571634f, 53.30296891f, 58.60335893f, - 64.00000000f, 69.48686830f, 75.05865003f, 80.71062276f, - 86.43856190f, 92.23866588f, 98.10749561f, 104.04192499f, - 110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f, - 134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f, - 160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f, - 186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f, - 212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f, - 240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f, - 268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f, - 296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f, - 325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f, - 354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f, - 384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f, - 413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f, - 444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f, - 474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f, - 505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f, - 536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f, - 568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f, - 600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f, - 632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f, - 664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f, - 696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f, - 729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f, - 762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f, - 795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f, - 828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f, - 862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f, - 896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f, - 929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f, - 963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f, - 998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f, - 1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f, - 1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f, - 1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f, - 1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f, - 1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f, - 1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f, - 1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f, - 1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f, - 1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f, - 1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f, - 1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f, - 1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f, - 1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f, - 1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f, - 1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f, - 1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f, - 1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f, - 1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f, - 1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f, - 1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f, - 1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f, - 1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f, - 1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f, - 1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f, - 1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f, - 1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f, - 1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f, - 2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f +// lookup table for small values of int*log2(int) * (1 << LOG_2_PRECISION_BITS). +// Obtained in Python with: +// a=[ "%d"%i if i<(1<<32) else "%dull"%i +// for i in [ round((1<= LOG_LOOKUP_IDX_MAX); if (v < APPROX_LOG_WITH_CORRECTION_MAX) { + const uint64_t orig_v = v; + uint64_t correction; #if !defined(WEBP_HAVE_SLOW_CLZ_CTZ) // use clz if available - const int log_cnt = BitsLog2Floor(v) - 7; + const uint64_t log_cnt = BitsLog2Floor(v) - 7; const uint32_t y = 1 << log_cnt; - int correction = 0; - const float v_f = (float)v; - const uint32_t orig_v = v; v >>= log_cnt; #else - int log_cnt = 0; + uint64_t log_cnt = 0; uint32_t y = 1; - int correction = 0; - const float v_f = (float)v; - const uint32_t orig_v = v; do { ++log_cnt; v = v >> 1; @@ -399,11 +270,11 @@ static float FastSLog2Slow_C(uint32_t v) { // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v) // The correction factor: log(1 + d) ~ d; for very small d values, so // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v - // LOG_2_RECIPROCAL ~ 23/16 - correction = (23 * (orig_v & (y - 1))) >> 4; - return v_f * (kLog2fTable[v] + log_cnt) + correction; + correction = LOG_2_RECIPROCAL_FIXED * (orig_v & (y - 1)); + return orig_v * (kLog2Table[v] + (log_cnt << LOG_2_PRECISION_BITS)) + + correction; } else { - return (float)(LOG_2_RECIPROCAL * v * log((double)v)); + return (uint64_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * v * log((double)v) + .5); } } @@ -430,8 +301,7 @@ static uint32_t FastLog2Slow_C(uint32_t v) { if (orig_v >= APPROX_LOG_MAX) { // Since the division is still expensive, add this correction factor only // for large values of 'v'. - const uint64_t correction = - (uint64_t)LOG_2_RECIPROCAL_FIXED * (orig_v & (y - 1)); + const uint64_t correction = LOG_2_RECIPROCAL_FIXED * (orig_v & (y - 1)); log_2 += (uint32_t)DivRound(correction, orig_v); } return log_2; @@ -444,30 +314,30 @@ static uint32_t FastLog2Slow_C(uint32_t v) { // Methods to calculate Entropy (Shannon). // Compute the combined Shanon's entropy for distribution {X} and {X+Y} -static float CombinedShannonEntropy_C(const uint32_t X[256], - const uint32_t Y[256]) { +static uint64_t CombinedShannonEntropy_C(const uint32_t X[256], + const uint32_t Y[256]) { int i; - float retval = 0.f; + uint64_t retval = 0; uint32_t sumX = 0, sumXY = 0; for (i = 0; i < 256; ++i) { const uint32_t x = X[i]; if (x != 0) { const uint32_t xy = x + Y[i]; sumX += x; - retval -= VP8LFastSLog2(x); + retval += VP8LFastSLog2(x); sumXY += xy; - retval -= VP8LFastSLog2(xy); + retval += VP8LFastSLog2(xy); } else if (Y[i] != 0) { sumXY += Y[i]; - retval -= VP8LFastSLog2(Y[i]); + retval += VP8LFastSLog2(Y[i]); } } - retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY); + retval = VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY) - retval; return retval; } void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) { - entropy->entropy = 0.; + entropy->entropy = 0; entropy->sum = 0; entropy->nonzeros = 0; entropy->max_val = 0; @@ -485,13 +355,13 @@ void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n, entropy->sum += array[i]; entropy->nonzero_code = i; ++entropy->nonzeros; - entropy->entropy -= VP8LFastSLog2(array[i]); + entropy->entropy += VP8LFastSLog2(array[i]); if (entropy->max_val < array[i]) { entropy->max_val = array[i]; } } } - entropy->entropy += VP8LFastSLog2(entropy->sum); + entropy->entropy = VP8LFastSLog2(entropy->sum) - entropy->entropy; } static WEBP_INLINE void GetEntropyUnrefinedHelper( @@ -504,7 +374,7 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper( bit_entropy->sum += (*val_prev) * streak; bit_entropy->nonzeros += streak; bit_entropy->nonzero_code = *i_prev; - bit_entropy->entropy -= VP8LFastSLog2(*val_prev) * streak; + bit_entropy->entropy += VP8LFastSLog2(*val_prev) * streak; if (bit_entropy->max_val < *val_prev) { bit_entropy->max_val = *val_prev; } @@ -536,7 +406,7 @@ static void GetEntropyUnrefined_C(const uint32_t X[], int length, } GetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats); - bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum); + bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy; } static void GetCombinedEntropyUnrefined_C(const uint32_t X[], @@ -559,7 +429,7 @@ static void GetCombinedEntropyUnrefined_C(const uint32_t X[], } GetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, bit_entropy, stats); - bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum); + bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy; } //------------------------------------------------------------------------------ diff --git a/src/dsp/lossless_enc_mips32.c b/src/dsp/lossless_enc_mips32.c index f69e00e7..58529f9a 100644 --- a/src/dsp/lossless_enc_mips32.c +++ b/src/dsp/lossless_enc_mips32.c @@ -23,12 +23,12 @@ #include #include -static float FastSLog2Slow_MIPS32(uint32_t v) { +static uint64_t FastSLog2Slow_MIPS32(uint32_t v) { assert(v >= LOG_LOOKUP_IDX_MAX); if (v < APPROX_LOG_WITH_CORRECTION_MAX) { - uint32_t log_cnt, y, correction; + uint32_t log_cnt, y; + uint64_t correction; const int c24 = 24; - const float v_f = (float)v; uint32_t temp; // Xf = 256 = 2^8 @@ -49,13 +49,14 @@ static float FastSLog2Slow_MIPS32(uint32_t v) { // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v) // The correction factor: log(1 + d) ~ d; for very small d values, so // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v - // LOG_2_RECIPROCAL ~ 23/16 // (v % y) = (v % 2^log_cnt) = v & (2^log_cnt - 1) - correction = (23 * (v & (y - 1))) >> 4; - return v_f * (kLog2fTable[temp] + log_cnt) + correction; + correction = LOG_2_RECIPROCAL_FIXED * (v & (y - 1)); + return (uint64_t)v * (kLog2Table[temp] + + ((uint64_t)log_cnt << LOG_2_PRECISION_BITS)) + + correction; } else { - return (float)(LOG_2_RECIPROCAL * v * log((double)v)); + return (uint64_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * v * log((double)v) + .5); } } @@ -82,8 +83,7 @@ static uint32_t FastLog2Slow_MIPS32(uint32_t v) { if (v >= APPROX_LOG_MAX) { // Since the division is still expensive, add this correction factor only // for large values of 'v'. - const uint64_t correction = - (uint64_t)LOG_2_RECIPROCAL_FIXED * (v & (y - 1)); + const uint64_t correction = LOG_2_RECIPROCAL_FIXED * (v & (y - 1)); log_2 += (uint32_t)DivRound(correction, v); } return log_2; @@ -227,7 +227,7 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper( bit_entropy->sum += (*val_prev) * streak; bit_entropy->nonzeros += streak; bit_entropy->nonzero_code = *i_prev; - bit_entropy->entropy -= VP8LFastSLog2(*val_prev) * streak; + bit_entropy->entropy += VP8LFastSLog2(*val_prev) * streak; if (bit_entropy->max_val < *val_prev) { bit_entropy->max_val = *val_prev; } @@ -259,7 +259,7 @@ static void GetEntropyUnrefined_MIPS32(const uint32_t X[], int length, } GetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats); - bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum); + bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy; } static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[], @@ -282,7 +282,7 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[], } GetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, entropy, stats); - entropy->entropy += VP8LFastSLog2(entropy->sum); + entropy->entropy = VP8LFastSLog2(entropy->sum) - entropy->entropy; } #define ASM_START \ diff --git a/src/dsp/lossless_enc_sse2.c b/src/dsp/lossless_enc_sse2.c index e8c71a30..0d9dbc7d 100644 --- a/src/dsp/lossless_enc_sse2.c +++ b/src/dsp/lossless_enc_sse2.c @@ -237,10 +237,10 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) { // when compared to -noasm. #if !(defined(WEBP_HAVE_SLOW_CLZ_CTZ) || defined(__i386__) || defined(_M_IX86)) -static float CombinedShannonEntropy_SSE2(const uint32_t X[256], - const uint32_t Y[256]) { +static uint64_t CombinedShannonEntropy_SSE2(const uint32_t X[256], + const uint32_t Y[256]) { int i; - float retval = 0.f; + uint64_t retval = 0; uint32_t sumX = 0, sumXY = 0; const __m128i zero = _mm_setzero_si128(); @@ -265,15 +265,15 @@ static float CombinedShannonEntropy_SSE2(const uint32_t X[256], if ((mx >> j) & 1) { const int x = X[i + j]; sumXY += x; - retval -= VP8LFastSLog2(x); + retval += VP8LFastSLog2(x); } xy = X[i + j] + Y[i + j]; sumX += xy; - retval -= VP8LFastSLog2(xy); + retval += VP8LFastSLog2(xy); my &= my - 1; } } - retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY); + retval = VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY) - retval; return retval; } diff --git a/src/enc/histogram_enc.c b/src/enc/histogram_enc.c index f9459357..3584a6fb 100644 --- a/src/enc/histogram_enc.c +++ b/src/enc/histogram_enc.c @@ -229,8 +229,6 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, static WEBP_INLINE uint64_t BitsEntropyRefine(const VP8LBitEntropy* entropy) { uint64_t mix; - const uint64_t fixed_point_entropy = - (uint64_t)(entropy->entropy * (1ll << LOG_2_PRECISION_BITS) + .5); if (entropy->nonzeros < 5) { if (entropy->nonzeros <= 1) { return 0; @@ -240,7 +238,7 @@ static WEBP_INLINE uint64_t BitsEntropyRefine(const VP8LBitEntropy* entropy) { // distributions of these are combined. if (entropy->nonzeros == 2) { return DivRound(99 * ((uint64_t)entropy->sum << LOG_2_PRECISION_BITS) + - fixed_point_entropy, + entropy->entropy, 100); } // No matter what the entropy says, we cannot be better than min_limit @@ -260,8 +258,8 @@ static WEBP_INLINE uint64_t BitsEntropyRefine(const VP8LBitEntropy* entropy) { uint64_t min_limit = (uint64_t)(2 * entropy->sum - entropy->max_val) << LOG_2_PRECISION_BITS; min_limit = - DivRound(mix * min_limit + (1000 - mix) * fixed_point_entropy, 1000); - return (fixed_point_entropy < min_limit) ? min_limit : fixed_point_entropy; + DivRound(mix * min_limit + (1000 - mix) * entropy->entropy, 1000); + return (entropy->entropy < min_limit) ? min_limit : entropy->entropy; } } diff --git a/src/enc/predictor_enc.c b/src/enc/predictor_enc.c index a3d0d760..9cfb70ec 100644 --- a/src/enc/predictor_enc.c +++ b/src/enc/predictor_enc.c @@ -57,7 +57,9 @@ static float PredictionCostSpatialHistogram( // Compute the new cost if 'tile' is added to 'accumulate' but also add the // cost of the current histogram to guide the spatial predictor selection. // Basically, favor low entropy, locally and globally. - retval += VP8LCombinedShannonEntropy(&tile[i * 256], &accumulated[i * 256]); + retval += (float)VP8LCombinedShannonEntropy(&tile[i * 256], + &accumulated[i * 256]) / + (1ll << LOG_2_PRECISION_BITS); } // Favor keeping the areas locally similar. if (mode == left_mode) retval -= kSpatialPredictorBias; @@ -541,7 +543,8 @@ static float PredictionCostCrossColor(const uint32_t accumulated[256], // Favor low entropy, locally and globally. // Favor small absolute values for PredictionCostSpatial static const float kExpValue = 2.4f; - return VP8LCombinedShannonEntropy(counts, accumulated) + + return (float)VP8LCombinedShannonEntropy(counts, accumulated) / + (1ll << LOG_2_PRECISION_BITS) + PredictionCostBias(counts, 3, kExpValue); }