mirror of
https://github.com/webmproject/libwebp.git
synced 2024-12-28 14:38:21 +01:00
SSE2 variants of Subtract-Green: Rectify loop condition
When 4 pixels are left, they should be processed with SSE2. Decoding is marginally faster (~0.4%). Encoding speed: No observable difference. Change-Id: I3cf21c07145a560ff795451e65e64faf148d5c3e
This commit is contained in:
parent
daccbf400d
commit
4fd7c82e6a
@ -123,7 +123,7 @@ static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
|
||||
static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
|
||||
const __m128i mask = _mm_set1_epi32(0x0000ff00);
|
||||
int i;
|
||||
for (i = 0; i + 4 < num_pixels; i += 4) {
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]);
|
||||
const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|...
|
||||
const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|...
|
||||
@ -139,7 +139,7 @@ static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
|
||||
static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {
|
||||
const __m128i mask = _mm_set1_epi32(0x0000ff00);
|
||||
int i;
|
||||
for (i = 0; i + 4 < num_pixels; i += 4) {
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]);
|
||||
const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|...
|
||||
const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|...
|
||||
|
Loading…
Reference in New Issue
Block a user