mirror of
				https://github.com/webmproject/libwebp.git
				synced 2025-10-31 18:35:41 +01:00 
			
		
		
		
	Convert VP8LFastLog2 to fixed point
The lossless encoding speed-ups are: - up to 1% with default parameters - up to 4% in cruncher mode: -q 100 -m 6 Change-Id: Id92d4bad0b0a2c28c8aa9ff5280eea5717017f30
This commit is contained in:
		| @@ -73,16 +73,29 @@ static WEBP_INLINE int VP8LNearLosslessBits(int near_lossless_quality) { | ||||
| // Keeping a high threshold for now. | ||||
| #define APPROX_LOG_WITH_CORRECTION_MAX  65536 | ||||
| #define APPROX_LOG_MAX                   4096 | ||||
| // VP8LFastLog2 and VP8LFastSLog2 are used on elements from image histograms. | ||||
| // The histogram values cannot exceed the maximum number of pixels, which | ||||
| // is (1 << 14) * (1 << 14). Therefore S * log(S) < (1 << 33). | ||||
| // No more than 32 bits of precision should be chosen. | ||||
| // To match the original float implementation, 23 bits of precision are used. | ||||
| #define LOG_2_PRECISION_BITS 23 | ||||
| #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086 | ||||
| // LOG_2_RECIPROCAL * (1 << LOG_2_PRECISION_BITS) | ||||
| #define LOG_2_RECIPROCAL_FIXED_DOUBLE 12102203.161561485379934310913085937500 | ||||
| #define LOG_2_RECIPROCAL_FIXED 12102203 | ||||
| #define LOG_LOOKUP_IDX_MAX 256 | ||||
| extern const float kLog2Table[LOG_LOOKUP_IDX_MAX]; | ||||
| extern const uint32_t kLog2Table[LOG_LOOKUP_IDX_MAX]; | ||||
| extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX]; | ||||
| typedef float (*VP8LFastLog2SlowFunc)(uint32_t v); | ||||
| // TODO(vrabaud) remove this table once VP8LFastSLog2 is switched to fixed | ||||
| // point. | ||||
| extern const float kLog2fTable[LOG_LOOKUP_IDX_MAX]; | ||||
| typedef uint32_t (*VP8LFastLog2SlowFunc)(uint32_t v); | ||||
| typedef float (*VP8LFastSLog2SlowFunc)(uint32_t v); | ||||
|  | ||||
| extern VP8LFastLog2SlowFunc VP8LFastLog2Slow; | ||||
| extern VP8LFastLog2SlowFunc VP8LFastSLog2Slow; | ||||
| extern VP8LFastSLog2SlowFunc VP8LFastSLog2Slow; | ||||
|  | ||||
| static WEBP_INLINE float VP8LFastLog2(uint32_t v) { | ||||
| static WEBP_INLINE uint32_t VP8LFastLog2(uint32_t v) { | ||||
|   return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v); | ||||
| } | ||||
| // Fast calculation of v * log2(v) for integer input. | ||||
| @@ -90,6 +103,14 @@ static WEBP_INLINE float VP8LFastSLog2(uint32_t v) { | ||||
|   return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v); | ||||
| } | ||||
|  | ||||
| static WEBP_INLINE uint64_t RightShiftRound(uint64_t v, uint32_t shift) { | ||||
|   return (v + (1ull << shift >> 1)) >> shift; | ||||
| } | ||||
|  | ||||
| static WEBP_INLINE int64_t DivRound(int64_t a, int64_t b) { | ||||
|   return ((a < 0) == (b < 0)) ? ((a + b / 2) / b) : ((a - b / 2) / b); | ||||
| } | ||||
|  | ||||
| // ----------------------------------------------------------------------------- | ||||
| // PrefixEncode() | ||||
|  | ||||
|   | ||||
| @@ -25,7 +25,7 @@ | ||||
| #include "src/dsp/yuv.h" | ||||
|  | ||||
| // lookup table for small values of log2(int) | ||||
| const float kLog2Table[LOG_LOOKUP_IDX_MAX] = { | ||||
| const float kLog2fTable[LOG_LOOKUP_IDX_MAX] = { | ||||
|   0.0000000000000000f, 0.0000000000000000f, | ||||
|   1.0000000000000000f, 1.5849625007211560f, | ||||
|   2.0000000000000000f, 2.3219280948873621f, | ||||
| @@ -156,6 +156,51 @@ const float kLog2Table[LOG_LOOKUP_IDX_MAX] = { | ||||
|   7.9886846867721654f, 7.9943534368588577f | ||||
| }; | ||||
|  | ||||
| // lookup table for small values of log2(int) * (1 << LOG_2_PRECISION_BITS). | ||||
| // Obtained in Python with: | ||||
| // a = [ str(round((1<<23)*math.log2(i))) if i else "0" for i in range(256)] | ||||
| // print(',\n'.join(['  '+','.join(v) | ||||
| //       for v in batched([i.rjust(9) for i in a],7)])) | ||||
| const uint32_t kLog2Table[LOG_LOOKUP_IDX_MAX] = { | ||||
|          0,        0,  8388608, 13295629, 16777216, 19477745, 21684237, | ||||
|   23549800, 25165824, 26591258, 27866353, 29019816, 30072845, 31041538, | ||||
|   31938408, 32773374, 33554432, 34288123, 34979866, 35634199, 36254961, | ||||
|   36845429, 37408424, 37946388, 38461453, 38955489, 39430146, 39886887, | ||||
|   40327016, 40751698, 41161982, 41558811, 41943040, 42315445, 42676731, | ||||
|   43027545, 43368474, 43700062, 44022807, 44337167, 44643569, 44942404, | ||||
|   45234037, 45518808, 45797032, 46069003, 46334996, 46595268, 46850061, | ||||
|   47099600, 47344097, 47583753, 47818754, 48049279, 48275495, 48497560, | ||||
|   48715624, 48929828, 49140306, 49347187, 49550590, 49750631, 49947419, | ||||
|   50141058, 50331648, 50519283, 50704053, 50886044, 51065339, 51242017, | ||||
|   51416153, 51587818, 51757082, 51924012, 52088670, 52251118, 52411415, | ||||
|   52569616, 52725775, 52879946, 53032177, 53182516, 53331012, 53477707, | ||||
|   53622645, 53765868, 53907416, 54047327, 54185640, 54322389, 54457611, | ||||
|   54591338, 54723604, 54854440, 54983876, 55111943, 55238669, 55364082, | ||||
|   55488208, 55611074, 55732705, 55853126, 55972361, 56090432, 56207362, | ||||
|   56323174, 56437887, 56551524, 56664103, 56775645, 56886168, 56995691, | ||||
|   57104232, 57211808, 57318436, 57424133, 57528914, 57632796, 57735795, | ||||
|   57837923, 57939198, 58039632, 58139239, 58238033, 58336027, 58433234, | ||||
|   58529666, 58625336, 58720256, 58814437, 58907891, 59000628, 59092661, | ||||
|   59183999, 59274652, 59364632, 59453947, 59542609, 59630625, 59718006, | ||||
|   59804761, 59890898, 59976426, 60061354, 60145690, 60229443, 60312620, | ||||
|   60395229, 60477278, 60558775, 60639726, 60720140, 60800023, 60879382, | ||||
|   60958224, 61036555, 61114383, 61191714, 61268554, 61344908, 61420785, | ||||
|   61496188, 61571124, 61645600, 61719620, 61793189, 61866315, 61939001, | ||||
|   62011253, 62083076, 62154476, 62225457, 62296024, 62366182, 62435935, | ||||
|   62505289, 62574248, 62642816, 62710997, 62778797, 62846219, 62913267, | ||||
|   62979946, 63046260, 63112212, 63177807, 63243048, 63307939, 63372484, | ||||
|   63436687, 63500551, 63564080, 63627277, 63690146, 63752690, 63814912, | ||||
|   63876816, 63938405, 63999682, 64060650, 64121313, 64181673, 64241734, | ||||
|   64301498, 64360969, 64420148, 64479040, 64537646, 64595970, 64654014, | ||||
|   64711782, 64769274, 64826495, 64883447, 64940132, 64996553, 65052711, | ||||
|   65108611, 65164253, 65219641, 65274776, 65329662, 65384299, 65438691, | ||||
|   65492840, 65546747, 65600416, 65653847, 65707044, 65760008, 65812741, | ||||
|   65865245, 65917522, 65969575, 66021404, 66073013, 66124403, 66175575, | ||||
|   66226531, 66277275, 66327806, 66378127, 66428240, 66478146, 66527847, | ||||
|   66577345, 66626641, 66675737, 66724635, 66773336, 66821842, 66870154, | ||||
|   66918274, 66966204, 67013944, 67061497 | ||||
| }; | ||||
|  | ||||
| const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = { | ||||
|   0.00000000f,    0.00000000f,  2.00000000f,   4.75488750f, | ||||
|   8.00000000f,   11.60964047f,  15.50977500f,  19.65148445f, | ||||
| @@ -356,43 +401,42 @@ static float FastSLog2Slow_C(uint32_t v) { | ||||
|     // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v | ||||
|     // LOG_2_RECIPROCAL ~ 23/16 | ||||
|     correction = (23 * (orig_v & (y - 1))) >> 4; | ||||
|     return v_f * (kLog2Table[v] + log_cnt) + correction; | ||||
|     return v_f * (kLog2fTable[v] + log_cnt) + correction; | ||||
|   } else { | ||||
|     return (float)(LOG_2_RECIPROCAL * v * log((double)v)); | ||||
|   } | ||||
| } | ||||
|  | ||||
| static float FastLog2Slow_C(uint32_t v) { | ||||
| static uint32_t FastLog2Slow_C(uint32_t v) { | ||||
|   assert(v >= LOG_LOOKUP_IDX_MAX); | ||||
|   if (v < APPROX_LOG_WITH_CORRECTION_MAX) { | ||||
|     const uint32_t orig_v = v; | ||||
|     uint32_t log_2; | ||||
| #if !defined(WEBP_HAVE_SLOW_CLZ_CTZ) | ||||
|     // use clz if available | ||||
|     const int log_cnt = BitsLog2Floor(v) - 7; | ||||
|     const uint32_t log_cnt = BitsLog2Floor(v) - 7; | ||||
|     const uint32_t y = 1 << log_cnt; | ||||
|     const uint32_t orig_v = v; | ||||
|     double log_2; | ||||
|     v >>= log_cnt; | ||||
| #else | ||||
|     int log_cnt = 0; | ||||
|     uint32_t log_cnt = 0; | ||||
|     uint32_t y = 1; | ||||
|     const uint32_t orig_v = v; | ||||
|     double log_2; | ||||
|     do { | ||||
|       ++log_cnt; | ||||
|       v = v >> 1; | ||||
|       y = y << 1; | ||||
|     } while (v >= LOG_LOOKUP_IDX_MAX); | ||||
| #endif | ||||
|     log_2 = kLog2Table[v] + log_cnt; | ||||
|     log_2 = kLog2Table[v] + (log_cnt << LOG_2_PRECISION_BITS); | ||||
|     if (orig_v >= APPROX_LOG_MAX) { | ||||
|       // Since the division is still expensive, add this correction factor only | ||||
|       // for large values of 'v'. | ||||
|       const int correction = (23 * (orig_v & (y - 1))) >> 4; | ||||
|       log_2 += (double)correction / orig_v; | ||||
|       const uint64_t correction = | ||||
|           (uint64_t)LOG_2_RECIPROCAL_FIXED * (orig_v & (y - 1)); | ||||
|       log_2 += (uint32_t)DivRound(correction, orig_v); | ||||
|     } | ||||
|     return (float)log_2; | ||||
|     return log_2; | ||||
|   } else { | ||||
|     return (float)(LOG_2_RECIPROCAL * log((double)v)); | ||||
|     return (uint32_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * log((double)v) + .5); | ||||
|   } | ||||
| } | ||||
|  | ||||
| @@ -779,7 +823,7 @@ VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms; | ||||
| VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms; | ||||
|  | ||||
| VP8LFastLog2SlowFunc VP8LFastLog2Slow; | ||||
| VP8LFastLog2SlowFunc VP8LFastSLog2Slow; | ||||
| VP8LFastSLog2SlowFunc VP8LFastSLog2Slow; | ||||
|  | ||||
| VP8LCostFunc VP8LExtraCost; | ||||
| VP8LCostCombinedFunc VP8LExtraCostCombined; | ||||
|   | ||||
| @@ -53,18 +53,18 @@ static float FastSLog2Slow_MIPS32(uint32_t v) { | ||||
|  | ||||
|     // (v % y) = (v % 2^log_cnt) = v & (2^log_cnt - 1) | ||||
|     correction = (23 * (v & (y - 1))) >> 4; | ||||
|     return v_f * (kLog2Table[temp] + log_cnt) + correction; | ||||
|     return v_f * (kLog2fTable[temp] + log_cnt) + correction; | ||||
|   } else { | ||||
|     return (float)(LOG_2_RECIPROCAL * v * log((double)v)); | ||||
|   } | ||||
| } | ||||
|  | ||||
| static float FastLog2Slow_MIPS32(uint32_t v) { | ||||
| static uint32_t FastLog2Slow_MIPS32(uint32_t v) { | ||||
|   assert(v >= LOG_LOOKUP_IDX_MAX); | ||||
|   if (v < APPROX_LOG_WITH_CORRECTION_MAX) { | ||||
|     uint32_t log_cnt, y; | ||||
|     const int c24 = 24; | ||||
|     double log_2; | ||||
|     uint32_t log_2; | ||||
|     uint32_t temp; | ||||
|  | ||||
|     __asm__ volatile( | ||||
| @@ -78,17 +78,17 @@ static float FastLog2Slow_MIPS32(uint32_t v) { | ||||
|       : [c24]"r"(c24), [v]"r"(v) | ||||
|     ); | ||||
|  | ||||
|     log_2 = kLog2Table[temp] + log_cnt; | ||||
|     log_2 = kLog2Table[temp] + (log_cnt << LOG_2_PRECISION_BITS); | ||||
|     if (v >= APPROX_LOG_MAX) { | ||||
|       // Since the division is still expensive, add this correction factor only | ||||
|       // for large values of 'v'. | ||||
|  | ||||
|       const uint32_t correction = (23 * (v & (y - 1))) >> 4; | ||||
|       log_2 += (double)correction / v; | ||||
|       const uint64_t correction = | ||||
|           (uint64_t)LOG_2_RECIPROCAL_FIXED * (v & (y - 1)); | ||||
|       log_2 += (uint32_t)DivRound(correction, v); | ||||
|     } | ||||
|     return (float)log_2; | ||||
|     return log_2; | ||||
|   } else { | ||||
|     return (float)(LOG_2_RECIPROCAL * log((double)v)); | ||||
|     return (uint32_t)(LOG_2_RECIPROCAL_FIXED_DOUBLE * log((double)v) + .5); | ||||
|   } | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -15,7 +15,7 @@ | ||||
| // | ||||
|  | ||||
| #include <assert.h> | ||||
| #include <float.h> | ||||
| #include <string.h> | ||||
|  | ||||
| #include "src/dsp/lossless_common.h" | ||||
| #include "src/enc/backward_references_enc.h" | ||||
| @@ -31,15 +31,15 @@ extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs, | ||||
|                                       const PixOrCopy v); | ||||
|  | ||||
| typedef struct { | ||||
|   float alpha_[VALUES_IN_BYTE]; | ||||
|   float red_[VALUES_IN_BYTE]; | ||||
|   float blue_[VALUES_IN_BYTE]; | ||||
|   float distance_[NUM_DISTANCE_CODES]; | ||||
|   float* literal_; | ||||
|   uint32_t alpha_[VALUES_IN_BYTE]; | ||||
|   uint32_t red_[VALUES_IN_BYTE]; | ||||
|   uint32_t blue_[VALUES_IN_BYTE]; | ||||
|   uint32_t distance_[NUM_DISTANCE_CODES]; | ||||
|   uint32_t* literal_; | ||||
| } CostModel; | ||||
|  | ||||
| static void ConvertPopulationCountTableToBitEstimates( | ||||
|     int num_symbols, const uint32_t population_counts[], float output[]) { | ||||
|     int num_symbols, const uint32_t population_counts[], uint32_t output[]) { | ||||
|   uint32_t sum = 0; | ||||
|   int nonzeros = 0; | ||||
|   int i; | ||||
| @@ -52,7 +52,7 @@ static void ConvertPopulationCountTableToBitEstimates( | ||||
|   if (nonzeros <= 1) { | ||||
|     memset(output, 0, num_symbols * sizeof(*output)); | ||||
|   } else { | ||||
|     const float logsum = VP8LFastLog2(sum); | ||||
|     const uint32_t logsum = VP8LFastLog2(sum); | ||||
|     for (i = 0; i < num_symbols; ++i) { | ||||
|       output[i] = logsum - VP8LFastLog2(population_counts[i]); | ||||
|     } | ||||
| @@ -93,47 +93,47 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits, | ||||
|   return ok; | ||||
| } | ||||
|  | ||||
| static WEBP_INLINE float GetLiteralCost(const CostModel* const m, uint32_t v) { | ||||
|   return m->alpha_[v >> 24] + | ||||
|          m->red_[(v >> 16) & 0xff] + | ||||
|          m->literal_[(v >> 8) & 0xff] + | ||||
|          m->blue_[v & 0xff]; | ||||
| static WEBP_INLINE int64_t GetLiteralCost(const CostModel* const m, | ||||
|                                           uint32_t v) { | ||||
|   return (int64_t)m->alpha_[v >> 24] + m->red_[(v >> 16) & 0xff] + | ||||
|          m->literal_[(v >> 8) & 0xff] + m->blue_[v & 0xff]; | ||||
| } | ||||
|  | ||||
| static WEBP_INLINE float GetCacheCost(const CostModel* const m, uint32_t idx) { | ||||
| static WEBP_INLINE int64_t GetCacheCost(const CostModel* const m, | ||||
|                                         uint32_t idx) { | ||||
|   const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx; | ||||
|   return m->literal_[literal_idx]; | ||||
|   return (int64_t)m->literal_[literal_idx]; | ||||
| } | ||||
|  | ||||
| static WEBP_INLINE float GetLengthCost(const CostModel* const m, | ||||
|                                        uint32_t length) { | ||||
| static WEBP_INLINE int64_t GetLengthCost(const CostModel* const m, | ||||
|                                          uint32_t length) { | ||||
|   int code, extra_bits; | ||||
|   VP8LPrefixEncodeBits(length, &code, &extra_bits); | ||||
|   return m->literal_[VALUES_IN_BYTE + code] + extra_bits; | ||||
|   return (int64_t)m->literal_[VALUES_IN_BYTE + code] + | ||||
|          ((int64_t)extra_bits << LOG_2_PRECISION_BITS); | ||||
| } | ||||
|  | ||||
| static WEBP_INLINE float GetDistanceCost(const CostModel* const m, | ||||
|                                          uint32_t distance) { | ||||
| static WEBP_INLINE int64_t GetDistanceCost(const CostModel* const m, | ||||
|                                            uint32_t distance) { | ||||
|   int code, extra_bits; | ||||
|   VP8LPrefixEncodeBits(distance, &code, &extra_bits); | ||||
|   return m->distance_[code] + extra_bits; | ||||
|   return (int64_t)m->distance_[code] + | ||||
|          ((int64_t)extra_bits << LOG_2_PRECISION_BITS); | ||||
| } | ||||
|  | ||||
| static WEBP_INLINE void AddSingleLiteralWithCostModel( | ||||
|     const uint32_t* const argb, VP8LColorCache* const hashers, | ||||
|     const CostModel* const cost_model, int idx, int use_color_cache, | ||||
|     float prev_cost, float* const cost, uint16_t* const dist_array) { | ||||
|   float cost_val = prev_cost; | ||||
|     int64_t prev_cost, int64_t* const cost, uint16_t* const dist_array) { | ||||
|   int64_t cost_val = prev_cost; | ||||
|   const uint32_t color = argb[idx]; | ||||
|   const int ix = use_color_cache ? VP8LColorCacheContains(hashers, color) : -1; | ||||
|   if (ix >= 0) { | ||||
|     // use_color_cache is true and hashers contains color | ||||
|     const float mul0 = 0.68f; | ||||
|     cost_val += GetCacheCost(cost_model, ix) * mul0; | ||||
|     cost_val += DivRound(GetCacheCost(cost_model, ix) * 68, 100); | ||||
|   } else { | ||||
|     const float mul1 = 0.82f; | ||||
|     if (use_color_cache) VP8LColorCacheInsert(hashers, color); | ||||
|     cost_val += GetLiteralCost(cost_model, color) * mul1; | ||||
|     cost_val += DivRound(GetLiteralCost(cost_model, color) * 82, 100); | ||||
|   } | ||||
|   if (cost[idx] > cost_val) { | ||||
|     cost[idx] = cost_val; | ||||
| @@ -163,7 +163,7 @@ static WEBP_INLINE void AddSingleLiteralWithCostModel( | ||||
| // therefore no overlapping intervals. | ||||
| typedef struct CostInterval CostInterval; | ||||
| struct CostInterval { | ||||
|   float cost_; | ||||
|   int64_t cost_; | ||||
|   int start_; | ||||
|   int end_; | ||||
|   int index_; | ||||
| @@ -173,7 +173,7 @@ struct CostInterval { | ||||
|  | ||||
| // The GetLengthCost(cost_model, k) are cached in a CostCacheInterval. | ||||
| typedef struct { | ||||
|   float cost_; | ||||
|   int64_t cost_; | ||||
|   int start_; | ||||
|   int end_;       // Exclusive. | ||||
| } CostCacheInterval; | ||||
| @@ -188,8 +188,9 @@ typedef struct { | ||||
|   int count_;  // The number of stored intervals. | ||||
|   CostCacheInterval* cache_intervals_; | ||||
|   size_t cache_intervals_size_; | ||||
|   float cost_cache_[MAX_LENGTH];  // Contains the GetLengthCost(cost_model, k). | ||||
|   float* costs_; | ||||
|   // Contains the GetLengthCost(cost_model, k). | ||||
|   int64_t cost_cache_[MAX_LENGTH]; | ||||
|   int64_t* costs_; | ||||
|   uint16_t* dist_array_; | ||||
|   // Most of the time, we only need few intervals -> use a free-list, to avoid | ||||
|   // fragmentation with small allocs in most common cases. | ||||
| @@ -298,7 +299,7 @@ static int CostManagerInit(CostManager* const manager, | ||||
|     cur->end_ = 1; | ||||
|     cur->cost_ = manager->cost_cache_[0]; | ||||
|     for (i = 1; i < cost_cache_size; ++i) { | ||||
|       const float cost_val = manager->cost_cache_[i]; | ||||
|       const int64_t cost_val = manager->cost_cache_[i]; | ||||
|       if (cost_val != cur->cost_) { | ||||
|         ++cur; | ||||
|         // Initialize an interval. | ||||
| @@ -311,13 +312,17 @@ static int CostManagerInit(CostManager* const manager, | ||||
|            manager->cache_intervals_size_); | ||||
|   } | ||||
|  | ||||
|   manager->costs_ = (float*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_)); | ||||
|   manager->costs_ = | ||||
|       (int64_t*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_)); | ||||
|   if (manager->costs_ == NULL) { | ||||
|     CostManagerClear(manager); | ||||
|     return 0; | ||||
|   } | ||||
|   // Set the initial costs_ high for every pixel as we will keep the minimum. | ||||
|   for (i = 0; i < pix_count; ++i) manager->costs_[i] = FLT_MAX; | ||||
|   // Set the initial costs_ to INT64_MAX for every pixel as we will keep the | ||||
|   // minimum. | ||||
|   for (i = 0; i < pix_count; ++i) { | ||||
|     manager->costs_[i] = (int64_t)((1ull << 63) - 1); | ||||
|   } | ||||
|  | ||||
|   return 1; | ||||
| } | ||||
| @@ -325,7 +330,7 @@ static int CostManagerInit(CostManager* const manager, | ||||
| // Given the cost and the position that define an interval, update the cost at | ||||
| // pixel 'i' if it is smaller than the previously computed value. | ||||
| static WEBP_INLINE void UpdateCost(CostManager* const manager, int i, | ||||
|                                    int position, float cost) { | ||||
|                                    int position, int64_t cost) { | ||||
|   const int k = i - position; | ||||
|   assert(k >= 0 && k < MAX_LENGTH); | ||||
|  | ||||
| @@ -339,7 +344,7 @@ static WEBP_INLINE void UpdateCost(CostManager* const manager, int i, | ||||
| // all the pixels between 'start' and 'end' excluded. | ||||
| static WEBP_INLINE void UpdateCostPerInterval(CostManager* const manager, | ||||
|                                               int start, int end, int position, | ||||
|                                               float cost) { | ||||
|                                               int64_t cost) { | ||||
|   int i; | ||||
|   for (i = start; i < end; ++i) UpdateCost(manager, i, position, cost); | ||||
| } | ||||
| @@ -424,7 +429,7 @@ static WEBP_INLINE void PositionOrphanInterval(CostManager* const manager, | ||||
| // interval_in as a hint. The intervals are sorted by start_ value. | ||||
| static WEBP_INLINE void InsertInterval(CostManager* const manager, | ||||
|                                        CostInterval* const interval_in, | ||||
|                                        float cost, int position, int start, | ||||
|                                        int64_t cost, int position, int start, | ||||
|                                        int end) { | ||||
|   CostInterval* interval_new; | ||||
|  | ||||
| @@ -463,7 +468,7 @@ static WEBP_INLINE void InsertInterval(CostManager* const manager, | ||||
| // If handling the interval or one of its subintervals becomes to heavy, its | ||||
| // contribution is added to the costs right away. | ||||
| static WEBP_INLINE void PushInterval(CostManager* const manager, | ||||
|                                      float distance_cost, int position, | ||||
|                                      int64_t distance_cost, int position, | ||||
|                                      int len) { | ||||
|   size_t i; | ||||
|   CostInterval* interval = manager->head_; | ||||
| @@ -478,7 +483,7 @@ static WEBP_INLINE void PushInterval(CostManager* const manager, | ||||
|     int j; | ||||
|     for (j = position; j < position + len; ++j) { | ||||
|       const int k = j - position; | ||||
|       float cost_tmp; | ||||
|       int64_t cost_tmp; | ||||
|       assert(k >= 0 && k < MAX_LENGTH); | ||||
|       cost_tmp = distance_cost + manager->cost_cache_[k]; | ||||
|  | ||||
| @@ -498,7 +503,7 @@ static WEBP_INLINE void PushInterval(CostManager* const manager, | ||||
|     const int end = position + (cost_cache_intervals[i].end_ > len | ||||
|                                  ? len | ||||
|                                  : cost_cache_intervals[i].end_); | ||||
|     const float cost = distance_cost + cost_cache_intervals[i].cost_; | ||||
|     const int64_t cost = distance_cost + cost_cache_intervals[i].cost_; | ||||
|  | ||||
|     for (; interval != NULL && interval->start_ < end; | ||||
|          interval = interval_next) { | ||||
| @@ -576,7 +581,7 @@ static int BackwardReferencesHashChainDistanceOnly( | ||||
|   const int pix_count = xsize * ysize; | ||||
|   const int use_color_cache = (cache_bits > 0); | ||||
|   const size_t literal_array_size = | ||||
|       sizeof(float) * (VP8LHistogramNumCodes(cache_bits)); | ||||
|       sizeof(*((CostModel*)NULL)->literal_) * VP8LHistogramNumCodes(cache_bits); | ||||
|   const size_t cost_model_size = sizeof(CostModel) + literal_array_size; | ||||
|   CostModel* const cost_model = | ||||
|       (CostModel*)WebPSafeCalloc(1ULL, cost_model_size); | ||||
| @@ -584,13 +589,13 @@ static int BackwardReferencesHashChainDistanceOnly( | ||||
|   CostManager* cost_manager = | ||||
|       (CostManager*)WebPSafeCalloc(1ULL, sizeof(*cost_manager)); | ||||
|   int offset_prev = -1, len_prev = -1; | ||||
|   float offset_cost = -1.f; | ||||
|   int64_t offset_cost = -1; | ||||
|   int first_offset_is_constant = -1;  // initialized with 'impossible' value | ||||
|   int reach = 0; | ||||
|  | ||||
|   if (cost_model == NULL || cost_manager == NULL) goto Error; | ||||
|  | ||||
|   cost_model->literal_ = (float*)(cost_model + 1); | ||||
|   cost_model->literal_ = (uint32_t*)(cost_model + 1); | ||||
|   if (use_color_cache) { | ||||
|     cc_init = VP8LColorCacheInit(&hashers, cache_bits); | ||||
|     if (!cc_init) goto Error; | ||||
| @@ -608,11 +613,12 @@ static int BackwardReferencesHashChainDistanceOnly( | ||||
|   // non-processed locations from this point. | ||||
|   dist_array[0] = 0; | ||||
|   // Add first pixel as literal. | ||||
|   AddSingleLiteralWithCostModel(argb, &hashers, cost_model, 0, use_color_cache, | ||||
|                                 0.f, cost_manager->costs_, dist_array); | ||||
|   AddSingleLiteralWithCostModel(argb, &hashers, cost_model, /*idx=*/0, | ||||
|                                 use_color_cache, /*prev_cost=*/0, | ||||
|                                 cost_manager->costs_, dist_array); | ||||
|  | ||||
|   for (i = 1; i < pix_count; ++i) { | ||||
|     const float prev_cost = cost_manager->costs_[i - 1]; | ||||
|     const int64_t prev_cost = cost_manager->costs_[i - 1]; | ||||
|     int offset, len; | ||||
|     VP8LHashChainFindCopy(hash_chain, i, &offset, &len); | ||||
|  | ||||
|   | ||||
| @@ -180,14 +180,16 @@ static int AnalyzeEntropy(const uint32_t* argb, | ||||
|       // When including transforms, there is an overhead in bits from | ||||
|       // storing them. This overhead is small but matters for small images. | ||||
|       // For spatial, there are 14 transformations. | ||||
|       entropy[kSpatial] += VP8LSubSampleSize(width, transform_bits) * | ||||
|                            VP8LSubSampleSize(height, transform_bits) * | ||||
|                            VP8LFastLog2(14); | ||||
|       entropy[kSpatial] += RightShiftRound( | ||||
|           (uint64_t)VP8LSubSampleSize(width, transform_bits) * | ||||
|               VP8LSubSampleSize(height, transform_bits) * VP8LFastLog2(14), | ||||
|           LOG_2_PRECISION_BITS); | ||||
|       // For color transforms: 24 as only 3 channels are considered in a | ||||
|       // ColorTransformElement. | ||||
|       entropy[kSpatialSubGreen] += VP8LSubSampleSize(width, transform_bits) * | ||||
|                                    VP8LSubSampleSize(height, transform_bits) * | ||||
|                                    VP8LFastLog2(24); | ||||
|       entropy[kSpatialSubGreen] += RightShiftRound( | ||||
|           (uint64_t)VP8LSubSampleSize(width, transform_bits) * | ||||
|               VP8LSubSampleSize(height, transform_bits) * VP8LFastLog2(24), | ||||
|           LOG_2_PRECISION_BITS); | ||||
|       // For palettes, add the cost of storing the palette. | ||||
|       // We empirically estimate the cost of a compressed entry as 8 bits. | ||||
|       // The palette is differential-coded when compressed hence a much | ||||
|   | ||||
		Reference in New Issue
	
	Block a user