mirror of
				https://github.com/webmproject/libwebp.git
				synced 2025-10-31 02:15:42 +01:00 
			
		
		
		
	Provide an SSE2 implementation of CombinedShannonEntropy.
CombinedShannonEntropy takes 30% for lossless compression. This implementation speeds up the overall process by 2 to 3 %. Change-Id: I04a71743284c38814fd0726034d51a02b1b6ba8f
This commit is contained in:
		| @@ -199,9 +199,12 @@ static WEBP_INLINE float VP8LFastSLog2(uint32_t v) { | ||||
| typedef double (*VP8LCostFunc)(const uint32_t* population, int length); | ||||
| typedef double (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y, | ||||
|                                        int length); | ||||
| typedef float (*VP8LCombinedShannonEntropyFunc)(const int X[256], | ||||
|                                                 const int Y[256]); | ||||
|  | ||||
| extern VP8LCostFunc VP8LExtraCost; | ||||
| extern VP8LCostCombinedFunc VP8LExtraCostCombined; | ||||
| extern VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy; | ||||
|  | ||||
| typedef struct {        // small struct to hold counters | ||||
|   int counts[2];        // index: 0=zero steak, 1=non-zero streak | ||||
|   | ||||
| @@ -413,15 +413,15 @@ static float CombinedShannonEntropy(const int X[256], const int Y[256]) { | ||||
|   int sumX = 0, sumXY = 0; | ||||
|   for (i = 0; i < 256; ++i) { | ||||
|     const int x = X[i]; | ||||
|     const int xy = x + Y[i]; | ||||
|     if (x != 0) { | ||||
|       const int xy = x + Y[i]; | ||||
|       sumX += x; | ||||
|       retval -= VP8LFastSLog2(x); | ||||
|       sumXY += xy; | ||||
|       retval -= VP8LFastSLog2(xy); | ||||
|     } else if (xy != 0) { | ||||
|       sumXY += xy; | ||||
|       retval -= VP8LFastSLog2(xy); | ||||
|     } else if (Y[i] != 0) { | ||||
|       sumXY += Y[i]; | ||||
|       retval -= VP8LFastSLog2(Y[i]); | ||||
|     } | ||||
|   } | ||||
|   retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY); | ||||
| @@ -435,7 +435,7 @@ static float PredictionCostSpatialHistogram(const int accumulated[4][256], | ||||
|   for (i = 0; i < 4; ++i) { | ||||
|     const double kExpValue = 0.94; | ||||
|     retval += PredictionCostSpatial(tile[i], 1, kExpValue); | ||||
|     retval += CombinedShannonEntropy(tile[i], accumulated[i]); | ||||
|     retval += VP8LCombinedShannonEntropy(tile[i], accumulated[i]); | ||||
|   } | ||||
|   return (float)retval; | ||||
| } | ||||
| @@ -894,7 +894,7 @@ static float PredictionCostCrossColor(const int accumulated[256], | ||||
|   // Favor low entropy, locally and globally. | ||||
|   // Favor small absolute values for PredictionCostSpatial | ||||
|   static const double kExpValue = 2.4; | ||||
|   return CombinedShannonEntropy(counts, accumulated) + | ||||
|   return VP8LCombinedShannonEntropy(counts, accumulated) + | ||||
|          PredictionCostSpatial(counts, 3, kExpValue); | ||||
| } | ||||
|  | ||||
| @@ -1269,6 +1269,7 @@ VP8LFastLog2SlowFunc VP8LFastSLog2Slow; | ||||
|  | ||||
| VP8LCostFunc VP8LExtraCost; | ||||
| VP8LCostCombinedFunc VP8LExtraCostCombined; | ||||
| VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy; | ||||
|  | ||||
| VP8LCostCountFunc VP8LHuffmanCostCount; | ||||
|  | ||||
| @@ -1300,6 +1301,7 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) { | ||||
|  | ||||
|   VP8LExtraCost = ExtraCost; | ||||
|   VP8LExtraCostCombined = ExtraCostCombined; | ||||
|   VP8LCombinedShannonEntropy = CombinedShannonEntropy; | ||||
|  | ||||
|   VP8LHuffmanCostCount = HuffmanCostCount; | ||||
|  | ||||
|   | ||||
| @@ -250,6 +250,80 @@ static void HistogramAdd(const VP8LHistogram* const a, | ||||
|   } | ||||
| } | ||||
|  | ||||
| //------------------------------------------------------------------------------ | ||||
| // Entropy | ||||
|  | ||||
| // Checks whether the X or Y contribution is worth computing and adding. | ||||
| // Used in loop unrolling. | ||||
| #define ANALYZE_X_OR_Y(x_or_y, j)                                   \ | ||||
|   do {                                                              \ | ||||
|     if (x_or_y[i + j] != 0) retval -= VP8LFastSLog2(x_or_y[i + j]); \ | ||||
|   } while (0) | ||||
|  | ||||
| // Checks whether the X + Y contribution is worth computing and adding. | ||||
| // Used in loop unrolling. | ||||
| #define ANALYZE_XY(j)                  \ | ||||
|   do {                                 \ | ||||
|     if (tmp[j] != 0) {                 \ | ||||
|       retval -= VP8LFastSLog2(tmp[j]); \ | ||||
|       ANALYZE_X_OR_Y(X, j);            \ | ||||
|     }                                  \ | ||||
|   } while (0) | ||||
|  | ||||
| static float CombinedShannonEntropy(const int X[256], const int Y[256]) { | ||||
|   int i; | ||||
|   double retval = 0.; | ||||
|   int sumX, sumXY; | ||||
|   int32_t tmp[4]; | ||||
|   __m128i zero = _mm_setzero_si128(); | ||||
|   // Sums up X + Y, 4 ints at a time (and will merge it at the end for sumXY). | ||||
|   __m128i sumXY_128 = zero; | ||||
|   __m128i sumX_128 = zero; | ||||
|  | ||||
|   for (i = 0; i < 256; i += 4) { | ||||
|     const __m128i x = _mm_loadu_si128((const __m128i*)(X + i)); | ||||
|     const __m128i y = _mm_loadu_si128((const __m128i*)(Y + i)); | ||||
|  | ||||
|     // Check if any X is non-zero: this actually provides a speedup as X is | ||||
|     // usually sparse. | ||||
|     if (_mm_movemask_epi8(_mm_cmpeq_epi32(x, zero)) != 0xFFFF) { | ||||
|       const __m128i xy_128 = _mm_add_epi32(x, y); | ||||
|       sumXY_128 = _mm_add_epi32(sumXY_128, xy_128); | ||||
|  | ||||
|       sumX_128 = _mm_add_epi32(sumX_128, x); | ||||
|  | ||||
|       // Analyze the different X + Y. | ||||
|       _mm_storeu_si128((__m128i*)tmp, xy_128); | ||||
|  | ||||
|       ANALYZE_XY(0); | ||||
|       ANALYZE_XY(1); | ||||
|       ANALYZE_XY(2); | ||||
|       ANALYZE_XY(3); | ||||
|     } else { | ||||
|       // X is fully 0, so only deal with Y. | ||||
|       sumXY_128 = _mm_add_epi32(sumXY_128, y); | ||||
|  | ||||
|       ANALYZE_X_OR_Y(Y, 0); | ||||
|       ANALYZE_X_OR_Y(Y, 1); | ||||
|       ANALYZE_X_OR_Y(Y, 2); | ||||
|       ANALYZE_X_OR_Y(Y, 3); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // Sum up sumX_128 to get sumX. | ||||
|   _mm_storeu_si128((__m128i*)tmp, sumX_128); | ||||
|   sumX = tmp[3] + tmp[2] + tmp[1] + tmp[0]; | ||||
|  | ||||
|   // Sum up sumXY_128 to get sumXY. | ||||
|   _mm_storeu_si128((__m128i*)tmp, sumXY_128); | ||||
|   sumXY = tmp[3] + tmp[2] + tmp[1] + tmp[0]; | ||||
|  | ||||
|   retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY); | ||||
|   return (float)retval; | ||||
| } | ||||
| #undef ANALYZE_X_OR_Y | ||||
| #undef ANALYZE_XY | ||||
|  | ||||
| //------------------------------------------------------------------------------ | ||||
| // Entry point | ||||
|  | ||||
| @@ -261,6 +335,7 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE2(void) { | ||||
|   VP8LCollectColorBlueTransforms = CollectColorBlueTransforms; | ||||
|   VP8LCollectColorRedTransforms = CollectColorRedTransforms; | ||||
|   VP8LHistogramAdd = HistogramAdd; | ||||
|   VP8LCombinedShannonEntropy = CombinedShannonEntropy; | ||||
| } | ||||
|  | ||||
| #else  // !WEBP_USE_SSE2 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user