mirror of
https://github.com/webmproject/libwebp.git
synced 2025-01-26 22:52:55 +01:00
QuantizeBlock SSE2 Optimization:
Another store to load forward block was detected coming from the function FTransform. FTransform save the output data 4 times 8 bytes each. when this data is later being loaded by the QuantizeBlock function in one chunk of 16 bytes that caused a store to load forward block. The fix was done in the FTransform function where each two consecutive 8 bytes were merged into one 16 bytes register and saved into the memory. This fix gives ~21% function level gain and 1.6% user level gain. Change-Id: Idc27c307d5083f3ebe206d3ca19059e5bd465992
This commit is contained in:
parent
2bc0dc3edc
commit
27bfeee43a
@ -445,10 +445,10 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
// -> f1 = f1 + 1 - (a3 == 0)
|
||||
const __m128i g1 = _mm_add_epi16(f1, _mm_cmpeq_epi16(a32, zero));
|
||||
|
||||
_mm_storel_epi64((__m128i*)&out[ 0], d0);
|
||||
_mm_storel_epi64((__m128i*)&out[ 4], g1);
|
||||
_mm_storel_epi64((__m128i*)&out[ 8], d2);
|
||||
_mm_storel_epi64((__m128i*)&out[12], f3);
|
||||
const __m128i d0_g1 = _mm_unpacklo_epi64(d0, g1);
|
||||
const __m128i d2_f3 = _mm_unpacklo_epi64(d2, f3);
|
||||
_mm_storeu_si128((__m128i*)&out[0], d0_g1);
|
||||
_mm_storeu_si128((__m128i*)&out[8], d2_f3);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user