Convert VP8LFastSLog2 to fixed point

Speedups: 1% with '-lossless', 2% with '-lossless -q 100 -m6'

Change-Id: I1d79ea8e3e9e4bac7bcea4d7cbcc1bd56273988e
This commit is contained in:
Vincent Rabaud 2024-07-09 15:44:18 +02:00
parent 66408c2c7c
commit fb444b692b
7 changed files with 128 additions and 260 deletions

View File

@ -185,8 +185,8 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
typedef uint32_t (*VP8LCostFunc)(const uint32_t* population, int length); typedef uint32_t (*VP8LCostFunc)(const uint32_t* population, int length);
typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y, typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
int length); int length);
typedef float (*VP8LCombinedShannonEntropyFunc)(const uint32_t X[256], typedef uint64_t (*VP8LCombinedShannonEntropyFunc)(const uint32_t X[256],
const uint32_t Y[256]); const uint32_t Y[256]);
extern VP8LCostFunc VP8LExtraCost; extern VP8LCostFunc VP8LExtraCost;
extern VP8LCostCombinedFunc VP8LExtraCostCombined; extern VP8LCostCombinedFunc VP8LExtraCostCombined;
@ -198,7 +198,7 @@ typedef struct { // small struct to hold counters
} VP8LStreaks; } VP8LStreaks;
typedef struct { // small struct to hold bit entropy results typedef struct { // small struct to hold bit entropy results
float entropy; // entropy uint64_t entropy; // entropy
uint32_t sum; // sum of the population uint32_t sum; // sum of the population
int nonzeros; // number of non-zero elements in the population int nonzeros; // number of non-zero elements in the population
uint32_t max_val; // maximum value in the population uint32_t max_val; // maximum value in the population

View File

@ -82,15 +82,12 @@ static WEBP_INLINE int VP8LNearLosslessBits(int near_lossless_quality) {
#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086 #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
// LOG_2_RECIPROCAL * (1 << LOG_2_PRECISION_BITS) // LOG_2_RECIPROCAL * (1 << LOG_2_PRECISION_BITS)
#define LOG_2_RECIPROCAL_FIXED_DOUBLE 12102203.161561485379934310913085937500 #define LOG_2_RECIPROCAL_FIXED_DOUBLE 12102203.161561485379934310913085937500
#define LOG_2_RECIPROCAL_FIXED 12102203 #define LOG_2_RECIPROCAL_FIXED ((uint64_t)12102203)
#define LOG_LOOKUP_IDX_MAX 256 #define LOG_LOOKUP_IDX_MAX 256
extern const uint32_t kLog2Table[LOG_LOOKUP_IDX_MAX]; extern const uint32_t kLog2Table[LOG_LOOKUP_IDX_MAX];
extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX]; extern const uint64_t kSLog2Table[LOG_LOOKUP_IDX_MAX];
// TODO(vrabaud) remove this table once VP8LFastSLog2 is switched to fixed
// point.
extern const float kLog2fTable[LOG_LOOKUP_IDX_MAX];
typedef uint32_t (*VP8LFastLog2SlowFunc)(uint32_t v); typedef uint32_t (*VP8LFastLog2SlowFunc)(uint32_t v);
typedef float (*VP8LFastSLog2SlowFunc)(uint32_t v); typedef uint64_t (*VP8LFastSLog2SlowFunc)(uint32_t v);
extern VP8LFastLog2SlowFunc VP8LFastLog2Slow; extern VP8LFastLog2SlowFunc VP8LFastLog2Slow;
extern VP8LFastSLog2SlowFunc VP8LFastSLog2Slow; extern VP8LFastSLog2SlowFunc VP8LFastSLog2Slow;
@ -99,7 +96,7 @@ static WEBP_INLINE uint32_t VP8LFastLog2(uint32_t v) {
return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v); return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v);
} }
// Fast calculation of v * log2(v) for integer input. // Fast calculation of v * log2(v) for integer input.
static WEBP_INLINE float VP8LFastSLog2(uint32_t v) { static WEBP_INLINE uint64_t VP8LFastSLog2(uint32_t v) {
return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v); return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v);
} }

View File

@ -24,138 +24,6 @@
#include "src/dsp/lossless_common.h" #include "src/dsp/lossless_common.h"
#include "src/dsp/yuv.h" #include "src/dsp/yuv.h"
// lookup table for small values of log2(int)
const float kLog2fTable[LOG_LOOKUP_IDX_MAX] = {
0.0000000000000000f, 0.0000000000000000f,
1.0000000000000000f, 1.5849625007211560f,
2.0000000000000000f, 2.3219280948873621f,
2.5849625007211560f, 2.8073549220576041f,
3.0000000000000000f, 3.1699250014423121f,
3.3219280948873621f, 3.4594316186372973f,
3.5849625007211560f, 3.7004397181410921f,
3.8073549220576041f, 3.9068905956085187f,
4.0000000000000000f, 4.0874628412503390f,
4.1699250014423121f, 4.2479275134435852f,
4.3219280948873626f, 4.3923174227787606f,
4.4594316186372973f, 4.5235619560570130f,
4.5849625007211560f, 4.6438561897747243f,
4.7004397181410917f, 4.7548875021634682f,
4.8073549220576037f, 4.8579809951275718f,
4.9068905956085187f, 4.9541963103868749f,
5.0000000000000000f, 5.0443941193584533f,
5.0874628412503390f, 5.1292830169449663f,
5.1699250014423121f, 5.2094533656289501f,
5.2479275134435852f, 5.2854022188622487f,
5.3219280948873626f, 5.3575520046180837f,
5.3923174227787606f, 5.4262647547020979f,
5.4594316186372973f, 5.4918530963296747f,
5.5235619560570130f, 5.5545888516776376f,
5.5849625007211560f, 5.6147098441152083f,
5.6438561897747243f, 5.6724253419714951f,
5.7004397181410917f, 5.7279204545631987f,
5.7548875021634682f, 5.7813597135246599f,
5.8073549220576037f, 5.8328900141647412f,
5.8579809951275718f, 5.8826430493618415f,
5.9068905956085187f, 5.9307373375628866f,
5.9541963103868749f, 5.9772799234999167f,
6.0000000000000000f, 6.0223678130284543f,
6.0443941193584533f, 6.0660891904577720f,
6.0874628412503390f, 6.1085244567781691f,
6.1292830169449663f, 6.1497471195046822f,
6.1699250014423121f, 6.1898245588800175f,
6.2094533656289501f, 6.2288186904958804f,
6.2479275134435852f, 6.2667865406949010f,
6.2854022188622487f, 6.3037807481771030f,
6.3219280948873626f, 6.3398500028846243f,
6.3575520046180837f, 6.3750394313469245f,
6.3923174227787606f, 6.4093909361377017f,
6.4262647547020979f, 6.4429434958487279f,
6.4594316186372973f, 6.4757334309663976f,
6.4918530963296747f, 6.5077946401986963f,
6.5235619560570130f, 6.5391588111080309f,
6.5545888516776376f, 6.5698556083309478f,
6.5849625007211560f, 6.5999128421871278f,
6.6147098441152083f, 6.6293566200796094f,
6.6438561897747243f, 6.6582114827517946f,
6.6724253419714951f, 6.6865005271832185f,
6.7004397181410917f, 6.7142455176661224f,
6.7279204545631987f, 6.7414669864011464f,
6.7548875021634682f, 6.7681843247769259f,
6.7813597135246599f, 6.7944158663501061f,
6.8073549220576037f, 6.8201789624151878f,
6.8328900141647412f, 6.8454900509443747f,
6.8579809951275718f, 6.8703647195834047f,
6.8826430493618415f, 6.8948177633079437f,
6.9068905956085187f, 6.9188632372745946f,
6.9307373375628866f, 6.9425145053392398f,
6.9541963103868749f, 6.9657842846620869f,
6.9772799234999167f, 6.9886846867721654f,
7.0000000000000000f, 7.0112272554232539f,
7.0223678130284543f, 7.0334230015374501f,
7.0443941193584533f, 7.0552824355011898f,
7.0660891904577720f, 7.0768155970508308f,
7.0874628412503390f, 7.0980320829605263f,
7.1085244567781691f, 7.1189410727235076f,
7.1292830169449663f, 7.1395513523987936f,
7.1497471195046822f, 7.1598713367783890f,
7.1699250014423121f, 7.1799090900149344f,
7.1898245588800175f, 7.1996723448363644f,
7.2094533656289501f, 7.2191685204621611f,
7.2288186904958804f, 7.2384047393250785f,
7.2479275134435852f, 7.2573878426926521f,
7.2667865406949010f, 7.2761244052742375f,
7.2854022188622487f, 7.2946207488916270f,
7.3037807481771030f, 7.3128829552843557f,
7.3219280948873626f, 7.3309168781146167f,
7.3398500028846243f, 7.3487281542310771f,
7.3575520046180837f, 7.3663222142458160f,
7.3750394313469245f, 7.3837042924740519f,
7.3923174227787606f, 7.4008794362821843f,
7.4093909361377017f, 7.4178525148858982f,
7.4262647547020979f, 7.4346282276367245f,
7.4429434958487279f, 7.4512111118323289f,
7.4594316186372973f, 7.4676055500829976f,
7.4757334309663976f, 7.4838157772642563f,
7.4918530963296747f, 7.4998458870832056f,
7.5077946401986963f, 7.5156998382840427f,
7.5235619560570130f, 7.5313814605163118f,
7.5391588111080309f, 7.5468944598876364f,
7.5545888516776376f, 7.5622424242210728f,
7.5698556083309478f, 7.5774288280357486f,
7.5849625007211560f, 7.5924570372680806f,
7.5999128421871278f, 7.6073303137496104f,
7.6147098441152083f, 7.6220518194563764f,
7.6293566200796094f, 7.6366246205436487f,
7.6438561897747243f, 7.6510516911789281f,
7.6582114827517946f, 7.6653359171851764f,
7.6724253419714951f, 7.6794800995054464f,
7.6865005271832185f, 7.6934869574993252f,
7.7004397181410917f, 7.7073591320808825f,
7.7142455176661224f, 7.7210991887071855f,
7.7279204545631987f, 7.7347096202258383f,
7.7414669864011464f, 7.7481928495894605f,
7.7548875021634682f, 7.7615512324444795f,
7.7681843247769259f, 7.7747870596011736f,
7.7813597135246599f, 7.7879025593914317f,
7.7944158663501061f, 7.8008998999203047f,
7.8073549220576037f, 7.8137811912170374f,
7.8201789624151878f, 7.8265484872909150f,
7.8328900141647412f, 7.8392037880969436f,
7.8454900509443747f, 7.8517490414160571f,
7.8579809951275718f, 7.8641861446542797f,
7.8703647195834047f, 7.8765169465649993f,
7.8826430493618415f, 7.8887432488982591f,
7.8948177633079437f, 7.9008668079807486f,
7.9068905956085187f, 7.9128893362299619f,
7.9188632372745946f, 7.9248125036057812f,
7.9307373375628866f, 7.9366379390025709f,
7.9425145053392398f, 7.9483672315846778f,
7.9541963103868749f, 7.9600019320680805f,
7.9657842846620869f, 7.9715435539507719f,
7.9772799234999167f, 7.9829935746943103f,
7.9886846867721654f, 7.9943534368588577f
};
// lookup table for small values of log2(int) * (1 << LOG_2_PRECISION_BITS). // lookup table for small values of log2(int) * (1 << LOG_2_PRECISION_BITS).
// Obtained in Python with: // Obtained in Python with:
// a = [ str(round((1<<23)*math.log2(i))) if i else "0" for i in range(256)] // a = [ str(round((1<<23)*math.log2(i))) if i else "0" for i in range(256)]
@ -201,71 +69,78 @@ const uint32_t kLog2Table[LOG_LOOKUP_IDX_MAX] = {
66918274, 66966204, 67013944, 67061497 66918274, 66966204, 67013944, 67061497
}; };
const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = { // lookup table for small values of int*log2(int) * (1 << LOG_2_PRECISION_BITS).
0.00000000f, 0.00000000f, 2.00000000f, 4.75488750f, // Obtained in Python with:
8.00000000f, 11.60964047f, 15.50977500f, 19.65148445f, // a=[ "%d"%i if i<(1<<32) else "%dull"%i
24.00000000f, 28.52932501f, 33.21928095f, 38.05374781f, // for i in [ round((1<<LOG_2_PRECISION_BITS)*math.log2(i)*i) if i
43.01955001f, 48.10571634f, 53.30296891f, 58.60335893f, // else 0 for i in range(256)]]
64.00000000f, 69.48686830f, 75.05865003f, 80.71062276f, // print(',\n '.join([','.join(v) for v in batched([i.rjust(15)
86.43856190f, 92.23866588f, 98.10749561f, 104.04192499f, // for i in a],4)]))
110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f, const uint64_t kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f, 0, 0, 16777216, 39886887,
160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f, 67108864, 97388723, 130105423, 164848600,
186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f, 201326592, 239321324, 278663526, 319217973,
212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f, 360874141, 403539997, 447137711, 491600606,
240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f, 536870912, 582898099, 629637592, 677049776,
268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f, 725099212, 773754010, 822985323, 872766924,
296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f, 923074875, 973887230, 1025183802, 1076945958,
325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f, 1129156447, 1181799249, 1234859451, 1288323135,
354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f, 1342177280, 1396409681, 1451008871, 1505964059,
384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f, 1561265072, 1616902301, 1672866655, 1729149526,
413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f, 1785742744, 1842638548, 1899829557, 1957308741,
444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f, 2015069397, 2073105127, 2131409817, 2189977618ull,
474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f, 2248802933ull, 2307880396ull, 2367204859ull, 2426771383ull,
505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f, 2486575220ull, 2546611805ull, 2606876748ull, 2667365819ull,
536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f, 2728074942ull, 2789000187ull, 2850137762ull, 2911484006ull,
568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f, 2973035382ull, 3034788471ull, 3096739966ull, 3158886666ull,
600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f, 3221225472ull, 3283753383ull, 3346467489ull, 3409364969ull,
632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f, 3472443085ull, 3535699182ull, 3599130679ull, 3662735070ull,
664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f, 3726509920ull, 3790452862ull, 3854561593ull, 3918833872ull,
696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f, 3983267519ull, 4047860410ull, 4112610476ull, 4177515704ull,
729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f, 4242574127ull, 4307783833ull, 4373142952ull, 4438649662ull,
762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f, 4504302186ull, 4570098787ull, 4636037770ull, 4702117480ull,
795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f, 4768336298ull, 4834692645ull, 4901184974ull, 4967811774ull,
828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f, 5034571569ull, 5101462912ull, 5168484389ull, 5235634615ull,
862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f, 5302912235ull, 5370315922ull, 5437844376ull, 5505496324ull,
896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f, 5573270518ull, 5641165737ull, 5709180782ull, 5777314477ull,
929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f, 5845565671ull, 5913933235ull, 5982416059ull, 6051013057ull,
963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f, 6119723161ull, 6188545324ull, 6257478518ull, 6326521733ull,
998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f, 6395673979ull, 6464934282ull, 6534301685ull, 6603775250ull,
1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f, 6673354052ull, 6743037185ull, 6812823756ull, 6882712890ull,
1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f, 6952703725ull, 7022795412ull, 7092987118ull, 7163278025ull,
1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f, 7233667324ull, 7304154222ull, 7374737939ull, 7445417707ull,
1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f, 7516192768ull, 7587062379ull, 7658025806ull, 7729082328ull,
1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f, 7800231234ull, 7871471825ull, 7942803410ull, 8014225311ull,
1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f, 8085736859ull, 8157337394ull, 8229026267ull, 8300802839ull,
1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f, 8372666477ull, 8444616560ull, 8516652476ull, 8588773618ull,
1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f, 8660979393ull, 8733269211ull, 8805642493ull, 8878098667ull,
1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f, 8950637170ull, 9023257446ull, 9095958945ull, 9168741125ull,
1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f, 9241603454ull, 9314545403ull, 9387566451ull, 9460666086ull,
1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f, 9533843800ull, 9607099093ull, 9680431471ull, 9753840445ull,
1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f, 9827325535ull, 9900886263ull, 9974522161ull, 10048232765ull,
1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f, 10122017615ull, 10195876260ull, 10269808253ull, 10343813150ull,
1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f, 10417890516ull, 10492039919ull, 10566260934ull, 10640553138ull,
1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f, 10714916116ull, 10789349456ull, 10863852751ull, 10938425600ull,
1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f, 11013067604ull, 11087778372ull, 11162557513ull, 11237404645ull,
1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f, 11312319387ull, 11387301364ull, 11462350205ull, 11537465541ull,
1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f, 11612647010ull, 11687894253ull, 11763206912ull, 11838584638ull,
1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f, 11914027082ull, 11989533899ull, 12065104750ull, 12140739296ull,
1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f, 12216437206ull, 12292198148ull, 12368021795ull, 12443907826ull,
1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f, 12519855920ull, 12595865759ull, 12671937032ull, 12748069427ull,
1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f, 12824262637ull, 12900516358ull, 12976830290ull, 13053204134ull,
1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f, 13129637595ull, 13206130381ull, 13282682202ull, 13359292772ull,
1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f, 13435961806ull, 13512689025ull, 13589474149ull, 13666316903ull,
1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f, 13743217014ull, 13820174211ull, 13897188225ull, 13974258793ull,
1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f, 14051385649ull, 14128568535ull, 14205807192ull, 14283101363ull,
1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f, 14360450796ull, 14437855239ull, 14515314443ull, 14592828162ull,
2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f 14670396151ull, 14748018167ull, 14825693972ull, 14903423326ull,
14981205995ull, 15059041743ull, 15136930339ull, 15214871554ull,
15292865160ull, 15370910930ull, 15449008641ull, 15527158071ull,
15605359001ull, 15683611210ull, 15761914485ull, 15840268608ull,
15918673369ull, 15997128556ull, 16075633960ull, 16154189373ull,
16232794589ull, 16311449405ull, 16390153617ull, 16468907026ull,
16547709431ull, 16626560636ull, 16705460444ull, 16784408661ull,
16863405094ull, 16942449552ull, 17021541845ull, 17100681785ull
}; };
const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX] = { const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX] = {
@ -371,23 +246,19 @@ const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
}; };
static float FastSLog2Slow_C(uint32_t v) { static uint64_t FastSLog2Slow_C(uint32_t v) {
assert(v >= LOG_LOOKUP_IDX_MAX); assert(v >= LOG_LOOKUP_IDX_MAX);
if (v < APPROX_LOG_WITH_CORRECTION_MAX) { if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
const uint64_t orig_v = v;
uint64_t correction;
#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ) #if !defined(WEBP_HAVE_SLOW_CLZ_CTZ)
// use clz if available // use clz if available
const int log_cnt = BitsLog2Floor(v) - 7; const uint64_t log_cnt = BitsLog2Floor(v) - 7;
const uint32_t y = 1 << log_cnt; const uint32_t y = 1 << log_cnt;
int correction = 0;
const float v_f = (float)v;
const uint32_t orig_v = v;
v >>= log_cnt; v >>= log_cnt;
#else #else
int log_cnt = 0; uint64_t log_cnt = 0;
uint32_t y = 1; uint32_t y = 1;
int correction = 0;
const float v_f = (float)v;
const uint32_t orig_v = v;
do { do {
++log_cnt; ++log_cnt;
v = v >> 1; v = v >> 1;
@ -399,11 +270,11 @@ static float FastSLog2Slow_C(uint32_t v) {
// log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v) // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
// The correction factor: log(1 + d) ~ d; for very small d values, so // The correction factor: log(1 + d) ~ d; for very small d values, so
// log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
// LOG_2_RECIPROCAL ~ 23/16 correction = LOG_2_RECIPROCAL_FIXED * (orig_v & (y - 1));
correction = (23 * (orig_v & (y - 1))) >> 4; return orig_v * (kLog2Table[v] + (log_cnt << LOG_2_PRECISION_BITS)) +
return v_f * (kLog2fTable[v] + log_cnt) + correction; correction;
} else { } else {
return (float)(LOG_2_RECIPROCAL * v * log((double)v)); return (uint64_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * v * log((double)v) + .5);
} }
} }
@ -430,8 +301,7 @@ static uint32_t FastLog2Slow_C(uint32_t v) {
if (orig_v >= APPROX_LOG_MAX) { if (orig_v >= APPROX_LOG_MAX) {
// Since the division is still expensive, add this correction factor only // Since the division is still expensive, add this correction factor only
// for large values of 'v'. // for large values of 'v'.
const uint64_t correction = const uint64_t correction = LOG_2_RECIPROCAL_FIXED * (orig_v & (y - 1));
(uint64_t)LOG_2_RECIPROCAL_FIXED * (orig_v & (y - 1));
log_2 += (uint32_t)DivRound(correction, orig_v); log_2 += (uint32_t)DivRound(correction, orig_v);
} }
return log_2; return log_2;
@ -444,30 +314,30 @@ static uint32_t FastLog2Slow_C(uint32_t v) {
// Methods to calculate Entropy (Shannon). // Methods to calculate Entropy (Shannon).
// Compute the combined Shanon's entropy for distribution {X} and {X+Y} // Compute the combined Shanon's entropy for distribution {X} and {X+Y}
static float CombinedShannonEntropy_C(const uint32_t X[256], static uint64_t CombinedShannonEntropy_C(const uint32_t X[256],
const uint32_t Y[256]) { const uint32_t Y[256]) {
int i; int i;
float retval = 0.f; uint64_t retval = 0;
uint32_t sumX = 0, sumXY = 0; uint32_t sumX = 0, sumXY = 0;
for (i = 0; i < 256; ++i) { for (i = 0; i < 256; ++i) {
const uint32_t x = X[i]; const uint32_t x = X[i];
if (x != 0) { if (x != 0) {
const uint32_t xy = x + Y[i]; const uint32_t xy = x + Y[i];
sumX += x; sumX += x;
retval -= VP8LFastSLog2(x); retval += VP8LFastSLog2(x);
sumXY += xy; sumXY += xy;
retval -= VP8LFastSLog2(xy); retval += VP8LFastSLog2(xy);
} else if (Y[i] != 0) { } else if (Y[i] != 0) {
sumXY += Y[i]; sumXY += Y[i];
retval -= VP8LFastSLog2(Y[i]); retval += VP8LFastSLog2(Y[i]);
} }
} }
retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY); retval = VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY) - retval;
return retval; return retval;
} }
void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) { void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
entropy->entropy = 0.; entropy->entropy = 0;
entropy->sum = 0; entropy->sum = 0;
entropy->nonzeros = 0; entropy->nonzeros = 0;
entropy->max_val = 0; entropy->max_val = 0;
@ -485,13 +355,13 @@ void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
entropy->sum += array[i]; entropy->sum += array[i];
entropy->nonzero_code = i; entropy->nonzero_code = i;
++entropy->nonzeros; ++entropy->nonzeros;
entropy->entropy -= VP8LFastSLog2(array[i]); entropy->entropy += VP8LFastSLog2(array[i]);
if (entropy->max_val < array[i]) { if (entropy->max_val < array[i]) {
entropy->max_val = array[i]; entropy->max_val = array[i];
} }
} }
} }
entropy->entropy += VP8LFastSLog2(entropy->sum); entropy->entropy = VP8LFastSLog2(entropy->sum) - entropy->entropy;
} }
static WEBP_INLINE void GetEntropyUnrefinedHelper( static WEBP_INLINE void GetEntropyUnrefinedHelper(
@ -504,7 +374,7 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
bit_entropy->sum += (*val_prev) * streak; bit_entropy->sum += (*val_prev) * streak;
bit_entropy->nonzeros += streak; bit_entropy->nonzeros += streak;
bit_entropy->nonzero_code = *i_prev; bit_entropy->nonzero_code = *i_prev;
bit_entropy->entropy -= VP8LFastSLog2(*val_prev) * streak; bit_entropy->entropy += VP8LFastSLog2(*val_prev) * streak;
if (bit_entropy->max_val < *val_prev) { if (bit_entropy->max_val < *val_prev) {
bit_entropy->max_val = *val_prev; bit_entropy->max_val = *val_prev;
} }
@ -536,7 +406,7 @@ static void GetEntropyUnrefined_C(const uint32_t X[], int length,
} }
GetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats); GetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats);
bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum); bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;
} }
static void GetCombinedEntropyUnrefined_C(const uint32_t X[], static void GetCombinedEntropyUnrefined_C(const uint32_t X[],
@ -559,7 +429,7 @@ static void GetCombinedEntropyUnrefined_C(const uint32_t X[],
} }
GetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, bit_entropy, stats); GetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, bit_entropy, stats);
bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum); bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------

View File

@ -23,12 +23,12 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
static float FastSLog2Slow_MIPS32(uint32_t v) { static uint64_t FastSLog2Slow_MIPS32(uint32_t v) {
assert(v >= LOG_LOOKUP_IDX_MAX); assert(v >= LOG_LOOKUP_IDX_MAX);
if (v < APPROX_LOG_WITH_CORRECTION_MAX) { if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
uint32_t log_cnt, y, correction; uint32_t log_cnt, y;
uint64_t correction;
const int c24 = 24; const int c24 = 24;
const float v_f = (float)v;
uint32_t temp; uint32_t temp;
// Xf = 256 = 2^8 // Xf = 256 = 2^8
@ -49,13 +49,14 @@ static float FastSLog2Slow_MIPS32(uint32_t v) {
// log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v) // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
// The correction factor: log(1 + d) ~ d; for very small d values, so // The correction factor: log(1 + d) ~ d; for very small d values, so
// log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
// LOG_2_RECIPROCAL ~ 23/16
// (v % y) = (v % 2^log_cnt) = v & (2^log_cnt - 1) // (v % y) = (v % 2^log_cnt) = v & (2^log_cnt - 1)
correction = (23 * (v & (y - 1))) >> 4; correction = LOG_2_RECIPROCAL_FIXED * (v & (y - 1));
return v_f * (kLog2fTable[temp] + log_cnt) + correction; return (uint64_t)v * (kLog2Table[temp] +
((uint64_t)log_cnt << LOG_2_PRECISION_BITS)) +
correction;
} else { } else {
return (float)(LOG_2_RECIPROCAL * v * log((double)v)); return (uint64_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * v * log((double)v) + .5);
} }
} }
@ -82,8 +83,7 @@ static uint32_t FastLog2Slow_MIPS32(uint32_t v) {
if (v >= APPROX_LOG_MAX) { if (v >= APPROX_LOG_MAX) {
// Since the division is still expensive, add this correction factor only // Since the division is still expensive, add this correction factor only
// for large values of 'v'. // for large values of 'v'.
const uint64_t correction = const uint64_t correction = LOG_2_RECIPROCAL_FIXED * (v & (y - 1));
(uint64_t)LOG_2_RECIPROCAL_FIXED * (v & (y - 1));
log_2 += (uint32_t)DivRound(correction, v); log_2 += (uint32_t)DivRound(correction, v);
} }
return log_2; return log_2;
@ -227,7 +227,7 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
bit_entropy->sum += (*val_prev) * streak; bit_entropy->sum += (*val_prev) * streak;
bit_entropy->nonzeros += streak; bit_entropy->nonzeros += streak;
bit_entropy->nonzero_code = *i_prev; bit_entropy->nonzero_code = *i_prev;
bit_entropy->entropy -= VP8LFastSLog2(*val_prev) * streak; bit_entropy->entropy += VP8LFastSLog2(*val_prev) * streak;
if (bit_entropy->max_val < *val_prev) { if (bit_entropy->max_val < *val_prev) {
bit_entropy->max_val = *val_prev; bit_entropy->max_val = *val_prev;
} }
@ -259,7 +259,7 @@ static void GetEntropyUnrefined_MIPS32(const uint32_t X[], int length,
} }
GetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats); GetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats);
bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum); bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;
} }
static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[], static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
@ -282,7 +282,7 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
} }
GetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, entropy, stats); GetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, entropy, stats);
entropy->entropy += VP8LFastSLog2(entropy->sum); entropy->entropy = VP8LFastSLog2(entropy->sum) - entropy->entropy;
} }
#define ASM_START \ #define ASM_START \

View File

@ -237,10 +237,10 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
// when compared to -noasm. // when compared to -noasm.
#if !(defined(WEBP_HAVE_SLOW_CLZ_CTZ) || defined(__i386__) || defined(_M_IX86)) #if !(defined(WEBP_HAVE_SLOW_CLZ_CTZ) || defined(__i386__) || defined(_M_IX86))
static float CombinedShannonEntropy_SSE2(const uint32_t X[256], static uint64_t CombinedShannonEntropy_SSE2(const uint32_t X[256],
const uint32_t Y[256]) { const uint32_t Y[256]) {
int i; int i;
float retval = 0.f; uint64_t retval = 0;
uint32_t sumX = 0, sumXY = 0; uint32_t sumX = 0, sumXY = 0;
const __m128i zero = _mm_setzero_si128(); const __m128i zero = _mm_setzero_si128();
@ -265,15 +265,15 @@ static float CombinedShannonEntropy_SSE2(const uint32_t X[256],
if ((mx >> j) & 1) { if ((mx >> j) & 1) {
const int x = X[i + j]; const int x = X[i + j];
sumXY += x; sumXY += x;
retval -= VP8LFastSLog2(x); retval += VP8LFastSLog2(x);
} }
xy = X[i + j] + Y[i + j]; xy = X[i + j] + Y[i + j];
sumX += xy; sumX += xy;
retval -= VP8LFastSLog2(xy); retval += VP8LFastSLog2(xy);
my &= my - 1; my &= my - 1;
} }
} }
retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY); retval = VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY) - retval;
return retval; return retval;
} }

View File

@ -229,8 +229,6 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
static WEBP_INLINE uint64_t BitsEntropyRefine(const VP8LBitEntropy* entropy) { static WEBP_INLINE uint64_t BitsEntropyRefine(const VP8LBitEntropy* entropy) {
uint64_t mix; uint64_t mix;
const uint64_t fixed_point_entropy =
(uint64_t)(entropy->entropy * (1ll << LOG_2_PRECISION_BITS) + .5);
if (entropy->nonzeros < 5) { if (entropy->nonzeros < 5) {
if (entropy->nonzeros <= 1) { if (entropy->nonzeros <= 1) {
return 0; return 0;
@ -240,7 +238,7 @@ static WEBP_INLINE uint64_t BitsEntropyRefine(const VP8LBitEntropy* entropy) {
// distributions of these are combined. // distributions of these are combined.
if (entropy->nonzeros == 2) { if (entropy->nonzeros == 2) {
return DivRound(99 * ((uint64_t)entropy->sum << LOG_2_PRECISION_BITS) + return DivRound(99 * ((uint64_t)entropy->sum << LOG_2_PRECISION_BITS) +
fixed_point_entropy, entropy->entropy,
100); 100);
} }
// No matter what the entropy says, we cannot be better than min_limit // No matter what the entropy says, we cannot be better than min_limit
@ -260,8 +258,8 @@ static WEBP_INLINE uint64_t BitsEntropyRefine(const VP8LBitEntropy* entropy) {
uint64_t min_limit = (uint64_t)(2 * entropy->sum - entropy->max_val) uint64_t min_limit = (uint64_t)(2 * entropy->sum - entropy->max_val)
<< LOG_2_PRECISION_BITS; << LOG_2_PRECISION_BITS;
min_limit = min_limit =
DivRound(mix * min_limit + (1000 - mix) * fixed_point_entropy, 1000); DivRound(mix * min_limit + (1000 - mix) * entropy->entropy, 1000);
return (fixed_point_entropy < min_limit) ? min_limit : fixed_point_entropy; return (entropy->entropy < min_limit) ? min_limit : entropy->entropy;
} }
} }

View File

@ -57,7 +57,9 @@ static float PredictionCostSpatialHistogram(
// Compute the new cost if 'tile' is added to 'accumulate' but also add the // Compute the new cost if 'tile' is added to 'accumulate' but also add the
// cost of the current histogram to guide the spatial predictor selection. // cost of the current histogram to guide the spatial predictor selection.
// Basically, favor low entropy, locally and globally. // Basically, favor low entropy, locally and globally.
retval += VP8LCombinedShannonEntropy(&tile[i * 256], &accumulated[i * 256]); retval += (float)VP8LCombinedShannonEntropy(&tile[i * 256],
&accumulated[i * 256]) /
(1ll << LOG_2_PRECISION_BITS);
} }
// Favor keeping the areas locally similar. // Favor keeping the areas locally similar.
if (mode == left_mode) retval -= kSpatialPredictorBias; if (mode == left_mode) retval -= kSpatialPredictorBias;
@ -541,7 +543,8 @@ static float PredictionCostCrossColor(const uint32_t accumulated[256],
// Favor low entropy, locally and globally. // Favor low entropy, locally and globally.
// Favor small absolute values for PredictionCostSpatial // Favor small absolute values for PredictionCostSpatial
static const float kExpValue = 2.4f; static const float kExpValue = 2.4f;
return VP8LCombinedShannonEntropy(counts, accumulated) + return (float)VP8LCombinedShannonEntropy(counts, accumulated) /
(1ll << LOG_2_PRECISION_BITS) +
PredictionCostBias(counts, 3, kExpValue); PredictionCostBias(counts, 3, kExpValue);
} }