mirror of
https://github.com/webmproject/libwebp.git
synced 2025-01-30 00:22:55 +01:00
Provide an SSE2 implementation of CombinedShannonEntropy.
CombinedShannonEntropy takes 30% for lossless compression. This implementation speeds up the overall process by 2 to 3 %. Change-Id: I04a71743284c38814fd0726034d51a02b1b6ba8f
This commit is contained in:
parent
04507dc91f
commit
2835089d6a
@ -199,9 +199,12 @@ static WEBP_INLINE float VP8LFastSLog2(uint32_t v) {
|
|||||||
typedef double (*VP8LCostFunc)(const uint32_t* population, int length);
|
typedef double (*VP8LCostFunc)(const uint32_t* population, int length);
|
||||||
typedef double (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
|
typedef double (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
|
||||||
int length);
|
int length);
|
||||||
|
typedef float (*VP8LCombinedShannonEntropyFunc)(const int X[256],
|
||||||
|
const int Y[256]);
|
||||||
|
|
||||||
extern VP8LCostFunc VP8LExtraCost;
|
extern VP8LCostFunc VP8LExtraCost;
|
||||||
extern VP8LCostCombinedFunc VP8LExtraCostCombined;
|
extern VP8LCostCombinedFunc VP8LExtraCostCombined;
|
||||||
|
extern VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
|
||||||
|
|
||||||
typedef struct { // small struct to hold counters
|
typedef struct { // small struct to hold counters
|
||||||
int counts[2]; // index: 0=zero steak, 1=non-zero streak
|
int counts[2]; // index: 0=zero steak, 1=non-zero streak
|
||||||
|
@ -413,15 +413,15 @@ static float CombinedShannonEntropy(const int X[256], const int Y[256]) {
|
|||||||
int sumX = 0, sumXY = 0;
|
int sumX = 0, sumXY = 0;
|
||||||
for (i = 0; i < 256; ++i) {
|
for (i = 0; i < 256; ++i) {
|
||||||
const int x = X[i];
|
const int x = X[i];
|
||||||
const int xy = x + Y[i];
|
|
||||||
if (x != 0) {
|
if (x != 0) {
|
||||||
|
const int xy = x + Y[i];
|
||||||
sumX += x;
|
sumX += x;
|
||||||
retval -= VP8LFastSLog2(x);
|
retval -= VP8LFastSLog2(x);
|
||||||
sumXY += xy;
|
sumXY += xy;
|
||||||
retval -= VP8LFastSLog2(xy);
|
retval -= VP8LFastSLog2(xy);
|
||||||
} else if (xy != 0) {
|
} else if (Y[i] != 0) {
|
||||||
sumXY += xy;
|
sumXY += Y[i];
|
||||||
retval -= VP8LFastSLog2(xy);
|
retval -= VP8LFastSLog2(Y[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
|
retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
|
||||||
@ -435,7 +435,7 @@ static float PredictionCostSpatialHistogram(const int accumulated[4][256],
|
|||||||
for (i = 0; i < 4; ++i) {
|
for (i = 0; i < 4; ++i) {
|
||||||
const double kExpValue = 0.94;
|
const double kExpValue = 0.94;
|
||||||
retval += PredictionCostSpatial(tile[i], 1, kExpValue);
|
retval += PredictionCostSpatial(tile[i], 1, kExpValue);
|
||||||
retval += CombinedShannonEntropy(tile[i], accumulated[i]);
|
retval += VP8LCombinedShannonEntropy(tile[i], accumulated[i]);
|
||||||
}
|
}
|
||||||
return (float)retval;
|
return (float)retval;
|
||||||
}
|
}
|
||||||
@ -894,7 +894,7 @@ static float PredictionCostCrossColor(const int accumulated[256],
|
|||||||
// Favor low entropy, locally and globally.
|
// Favor low entropy, locally and globally.
|
||||||
// Favor small absolute values for PredictionCostSpatial
|
// Favor small absolute values for PredictionCostSpatial
|
||||||
static const double kExpValue = 2.4;
|
static const double kExpValue = 2.4;
|
||||||
return CombinedShannonEntropy(counts, accumulated) +
|
return VP8LCombinedShannonEntropy(counts, accumulated) +
|
||||||
PredictionCostSpatial(counts, 3, kExpValue);
|
PredictionCostSpatial(counts, 3, kExpValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1269,6 +1269,7 @@ VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
|
|||||||
|
|
||||||
VP8LCostFunc VP8LExtraCost;
|
VP8LCostFunc VP8LExtraCost;
|
||||||
VP8LCostCombinedFunc VP8LExtraCostCombined;
|
VP8LCostCombinedFunc VP8LExtraCostCombined;
|
||||||
|
VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
|
||||||
|
|
||||||
VP8LCostCountFunc VP8LHuffmanCostCount;
|
VP8LCostCountFunc VP8LHuffmanCostCount;
|
||||||
|
|
||||||
@ -1300,6 +1301,7 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) {
|
|||||||
|
|
||||||
VP8LExtraCost = ExtraCost;
|
VP8LExtraCost = ExtraCost;
|
||||||
VP8LExtraCostCombined = ExtraCostCombined;
|
VP8LExtraCostCombined = ExtraCostCombined;
|
||||||
|
VP8LCombinedShannonEntropy = CombinedShannonEntropy;
|
||||||
|
|
||||||
VP8LHuffmanCostCount = HuffmanCostCount;
|
VP8LHuffmanCostCount = HuffmanCostCount;
|
||||||
|
|
||||||
|
@ -250,6 +250,80 @@ static void HistogramAdd(const VP8LHistogram* const a,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
// Entropy
|
||||||
|
|
||||||
|
// Checks whether the X or Y contribution is worth computing and adding.
|
||||||
|
// Used in loop unrolling.
|
||||||
|
#define ANALYZE_X_OR_Y(x_or_y, j) \
|
||||||
|
do { \
|
||||||
|
if (x_or_y[i + j] != 0) retval -= VP8LFastSLog2(x_or_y[i + j]); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
// Checks whether the X + Y contribution is worth computing and adding.
|
||||||
|
// Used in loop unrolling.
|
||||||
|
#define ANALYZE_XY(j) \
|
||||||
|
do { \
|
||||||
|
if (tmp[j] != 0) { \
|
||||||
|
retval -= VP8LFastSLog2(tmp[j]); \
|
||||||
|
ANALYZE_X_OR_Y(X, j); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
static float CombinedShannonEntropy(const int X[256], const int Y[256]) {
|
||||||
|
int i;
|
||||||
|
double retval = 0.;
|
||||||
|
int sumX, sumXY;
|
||||||
|
int32_t tmp[4];
|
||||||
|
__m128i zero = _mm_setzero_si128();
|
||||||
|
// Sums up X + Y, 4 ints at a time (and will merge it at the end for sumXY).
|
||||||
|
__m128i sumXY_128 = zero;
|
||||||
|
__m128i sumX_128 = zero;
|
||||||
|
|
||||||
|
for (i = 0; i < 256; i += 4) {
|
||||||
|
const __m128i x = _mm_loadu_si128((const __m128i*)(X + i));
|
||||||
|
const __m128i y = _mm_loadu_si128((const __m128i*)(Y + i));
|
||||||
|
|
||||||
|
// Check if any X is non-zero: this actually provides a speedup as X is
|
||||||
|
// usually sparse.
|
||||||
|
if (_mm_movemask_epi8(_mm_cmpeq_epi32(x, zero)) != 0xFFFF) {
|
||||||
|
const __m128i xy_128 = _mm_add_epi32(x, y);
|
||||||
|
sumXY_128 = _mm_add_epi32(sumXY_128, xy_128);
|
||||||
|
|
||||||
|
sumX_128 = _mm_add_epi32(sumX_128, x);
|
||||||
|
|
||||||
|
// Analyze the different X + Y.
|
||||||
|
_mm_storeu_si128((__m128i*)tmp, xy_128);
|
||||||
|
|
||||||
|
ANALYZE_XY(0);
|
||||||
|
ANALYZE_XY(1);
|
||||||
|
ANALYZE_XY(2);
|
||||||
|
ANALYZE_XY(3);
|
||||||
|
} else {
|
||||||
|
// X is fully 0, so only deal with Y.
|
||||||
|
sumXY_128 = _mm_add_epi32(sumXY_128, y);
|
||||||
|
|
||||||
|
ANALYZE_X_OR_Y(Y, 0);
|
||||||
|
ANALYZE_X_OR_Y(Y, 1);
|
||||||
|
ANALYZE_X_OR_Y(Y, 2);
|
||||||
|
ANALYZE_X_OR_Y(Y, 3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sum up sumX_128 to get sumX.
|
||||||
|
_mm_storeu_si128((__m128i*)tmp, sumX_128);
|
||||||
|
sumX = tmp[3] + tmp[2] + tmp[1] + tmp[0];
|
||||||
|
|
||||||
|
// Sum up sumXY_128 to get sumXY.
|
||||||
|
_mm_storeu_si128((__m128i*)tmp, sumXY_128);
|
||||||
|
sumXY = tmp[3] + tmp[2] + tmp[1] + tmp[0];
|
||||||
|
|
||||||
|
retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
|
||||||
|
return (float)retval;
|
||||||
|
}
|
||||||
|
#undef ANALYZE_X_OR_Y
|
||||||
|
#undef ANALYZE_XY
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Entry point
|
// Entry point
|
||||||
|
|
||||||
@ -261,6 +335,7 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE2(void) {
|
|||||||
VP8LCollectColorBlueTransforms = CollectColorBlueTransforms;
|
VP8LCollectColorBlueTransforms = CollectColorBlueTransforms;
|
||||||
VP8LCollectColorRedTransforms = CollectColorRedTransforms;
|
VP8LCollectColorRedTransforms = CollectColorRedTransforms;
|
||||||
VP8LHistogramAdd = HistogramAdd;
|
VP8LHistogramAdd = HistogramAdd;
|
||||||
|
VP8LCombinedShannonEntropy = CombinedShannonEntropy;
|
||||||
}
|
}
|
||||||
|
|
||||||
#else // !WEBP_USE_SSE2
|
#else // !WEBP_USE_SSE2
|
||||||
|
Loading…
x
Reference in New Issue
Block a user