mirror of
https://github.com/webmproject/libwebp.git
synced 2024-12-26 13:48:21 +01:00
SSE optimization for vector mismatch.
Change-Id: I564b822033b59d86635230f29ed6197e306a2c4f
This commit is contained in:
parent
7db53831a9
commit
8ce975ac82
@ -262,6 +262,11 @@ extern VP8LHistogramAddFunc VP8LHistogramAdd;
|
||||
// -----------------------------------------------------------------------------
|
||||
// PrefixEncode()
|
||||
|
||||
typedef int (*VP8LVectorMismatchFunc)(const uint32_t* const array1,
|
||||
const uint32_t* const array2, int length);
|
||||
// Returns the first index where array1 and array2 are different.
|
||||
extern VP8LVectorMismatchFunc VP8LVectorMismatch;
|
||||
|
||||
static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) {
|
||||
const int log_floor = BitsLog2Floor(n);
|
||||
if (n == (n & ~(n - 1))) // zero or a power of two.
|
||||
|
@ -1053,6 +1053,17 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int VectorMismatch(const uint32_t* const array1,
|
||||
const uint32_t* const array2, int length) {
|
||||
int match_len = 0;
|
||||
|
||||
while (match_len < length && array1[match_len] == array2[match_len]) {
|
||||
++match_len;
|
||||
}
|
||||
return match_len;
|
||||
}
|
||||
|
||||
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
|
||||
void VP8LBundleColorMap(const uint8_t* const row, int width,
|
||||
int xbits, uint32_t* const dst) {
|
||||
@ -1149,6 +1160,8 @@ GetEntropyUnrefinedHelperFunc VP8LGetEntropyUnrefinedHelper;
|
||||
|
||||
VP8LHistogramAddFunc VP8LHistogramAdd;
|
||||
|
||||
VP8LVectorMismatchFunc VP8LVectorMismatch;
|
||||
|
||||
extern void VP8LEncDspInitSSE2(void);
|
||||
extern void VP8LEncDspInitSSE41(void);
|
||||
extern void VP8LEncDspInitNEON(void);
|
||||
@ -1181,6 +1194,8 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) {
|
||||
|
||||
VP8LHistogramAdd = HistogramAdd;
|
||||
|
||||
VP8LVectorMismatch = VectorMismatch;
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
@ -324,6 +324,57 @@ static float CombinedShannonEntropy(const int X[256], const int Y[256]) {
|
||||
#undef ANALYZE_X_OR_Y
|
||||
#undef ANALYZE_XY
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static int VectorMismatch(const uint32_t* const array1,
|
||||
const uint32_t* const array2, int length) {
|
||||
int match_len;
|
||||
|
||||
if (12 <= length) {
|
||||
__m128i A0 = _mm_loadu_si128((const __m128i*)&array1[0]);
|
||||
__m128i A1 = _mm_loadu_si128((const __m128i*)&array2[0]);
|
||||
match_len = 0;
|
||||
do {
|
||||
// Loop unrolling and early load both provide a speedup of 10% for the
|
||||
// current function. Also, max_limit can be MAX_LENGTH=4096 at most.
|
||||
const __m128i cmpA = _mm_cmpeq_epi32(A0, A1);
|
||||
const __m128i B0 =
|
||||
_mm_loadu_si128((const __m128i*)&array1[match_len + 4]);
|
||||
const __m128i B1 =
|
||||
_mm_loadu_si128((const __m128i*)&array2[match_len + 4]);
|
||||
if (_mm_movemask_epi8(cmpA) != 0xffff) break;
|
||||
match_len += 4;
|
||||
|
||||
{
|
||||
const __m128i cmpB = _mm_cmpeq_epi32(B0, B1);
|
||||
A0 = _mm_loadu_si128((const __m128i*)&array1[match_len + 4]);
|
||||
A1 = _mm_loadu_si128((const __m128i*)&array2[match_len + 4]);
|
||||
if (_mm_movemask_epi8(cmpB) != 0xffff) break;
|
||||
match_len += 4;
|
||||
}
|
||||
} while (match_len + 12 < length);
|
||||
} else {
|
||||
match_len = 0;
|
||||
// Unroll the potential first two loops.
|
||||
if (4 <= length &&
|
||||
_mm_movemask_epi8(_mm_cmpeq_epi32(
|
||||
_mm_loadu_si128((const __m128i*)&array1[0]),
|
||||
_mm_loadu_si128((const __m128i*)&array2[0]))) == 0xffff) {
|
||||
match_len = 4;
|
||||
if (8 <= length &&
|
||||
_mm_movemask_epi8(_mm_cmpeq_epi32(
|
||||
_mm_loadu_si128((const __m128i*)&array1[4]),
|
||||
_mm_loadu_si128((const __m128i*)&array2[4]))) == 0xffff)
|
||||
match_len = 8;
|
||||
}
|
||||
}
|
||||
|
||||
while (match_len < length && array1[match_len] == array2[match_len]) {
|
||||
++match_len;
|
||||
}
|
||||
return match_len;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
||||
@ -336,6 +387,7 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE2(void) {
|
||||
VP8LCollectColorRedTransforms = CollectColorRedTransforms;
|
||||
VP8LHistogramAdd = HistogramAdd;
|
||||
VP8LCombinedShannonEntropy = CombinedShannonEntropy;
|
||||
VP8LVectorMismatch = VectorMismatch;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
@ -57,32 +57,19 @@ static int DistanceToPlaneCode(int xsize, int dist) {
|
||||
return dist + 120;
|
||||
}
|
||||
|
||||
// Returns the exact index where array1 and array2 are different if this
|
||||
// index is strictly superior to best_len_match. Otherwise, it returns 0.
|
||||
// Returns the exact index where array1 and array2 are different. For an index
|
||||
// inferior or equal to best_len_match, the return value just has to be strictly
|
||||
// inferior to best_len_match. The current behavior is to return 0 if this index
|
||||
// is best_len_match, and the index itself otherwise.
|
||||
// If no two elements are the same, it returns max_limit.
|
||||
static WEBP_INLINE int FindMatchLength(const uint32_t* const array1,
|
||||
const uint32_t* const array2,
|
||||
int best_len_match,
|
||||
int max_limit) {
|
||||
int match_len;
|
||||
|
||||
int best_len_match, int max_limit) {
|
||||
// Before 'expensive' linear match, check if the two arrays match at the
|
||||
// current best length index.
|
||||
if (array1[best_len_match] != array2[best_len_match]) return 0;
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
// Check if anything is different up to best_len_match excluded.
|
||||
// memcmp seems to be slower on ARM so it is disabled for now.
|
||||
if (memcmp(array1, array2, best_len_match * sizeof(*array1))) return 0;
|
||||
match_len = best_len_match + 1;
|
||||
#else
|
||||
match_len = 0;
|
||||
#endif
|
||||
|
||||
while (match_len < max_limit && array1[match_len] == array2[match_len]) {
|
||||
++match_len;
|
||||
}
|
||||
return match_len;
|
||||
return VP8LVectorMismatch(array1, array2, max_limit);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
Loading…
Reference in New Issue
Block a user