mirror of
https://github.com/webmproject/libwebp.git
synced 2025-07-16 22:09:57 +02:00
4-5% faster encoding using SSE2 for GetResidualCost
new file: cost_sse2.c Change-Id: I4896c07f5ff2443ef743f4435fe2758d95a672ed
This commit is contained in:
@ -914,33 +914,6 @@ static int Quantize2Blocks(int16_t in[32], int16_t out[32],
|
||||
return nz;
|
||||
}
|
||||
|
||||
// Forward declaration.
|
||||
void VP8SetResidualCoeffsSSE2(const int16_t* const coeffs,
|
||||
VP8Residual* const res);
|
||||
|
||||
void VP8SetResidualCoeffsSSE2(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
const __m128i c0 = _mm_loadu_si128((const __m128i*)coeffs);
|
||||
const __m128i c1 = _mm_loadu_si128((const __m128i*)(coeffs + 8));
|
||||
// Use SSE to compare 8 values with a single instruction.
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i m0 = _mm_cmpeq_epi16(c0, zero);
|
||||
const __m128i m1 = _mm_cmpeq_epi16(c1, zero);
|
||||
// Get the comparison results as a bitmask, consisting of two times 16 bits:
|
||||
// two identical bits for each result. Concatenate both bitmasks to get a
|
||||
// single 32 bit value. Negate the mask to get the position of entries that
|
||||
// are not equal to zero. We don't need to mask out least significant bits
|
||||
// according to res->first, since coeffs[0] is 0 if res->first > 0
|
||||
const uint32_t mask =
|
||||
~(((uint32_t)_mm_movemask_epi8(m1) << 16) | _mm_movemask_epi8(m0));
|
||||
// The position of the most significant non-zero bit indicates the position of
|
||||
// the last non-zero value. Divide the result by two because __movemask_epi8
|
||||
// operates on 8 bit values instead of 16 bit values.
|
||||
assert(res->first == 0 || coeffs[0] == 0);
|
||||
res->last = mask ? (BitsLog2Floor(mask) >> 1) : -1;
|
||||
res->coeffs = coeffs;
|
||||
}
|
||||
|
||||
#endif // WEBP_USE_SSE2
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -965,4 +938,3 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitSSE2(void) {
|
||||
VP8TDisto16x16 = Disto16x16;
|
||||
#endif // WEBP_USE_SSE2
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user