Merge "SSE2: slightly faster (~5%) AddGreenToBlueAndRed()"

This commit is contained in:
Pascal Massimino 2015-06-24 09:37:44 +00:00 committed by Gerrit Code Review
commit abcb012841

View File

@ -156,15 +156,13 @@ static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
// Subtract-Green Transform // Subtract-Green Transform
static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) { static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {
const __m128i mask = _mm_set1_epi32(0x0000ff00);
int i; int i;
for (i = 0; i + 4 <= num_pixels; i += 4) { for (i = 0; i + 4 <= num_pixels; i += 4) {
const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb
const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|... const __m128i A = _mm_srli_epi16(in, 8); // 0 a 0 g
const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|... const __m128i B = _mm_shufflelo_epi16(A, _MM_SHUFFLE(2, 2, 0, 0));
const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|... const __m128i C = _mm_shufflehi_epi16(B, _MM_SHUFFLE(2, 2, 0, 0)); // 0g0g
const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g); const __m128i out = _mm_add_epi8(in, C);
const __m128i out = _mm_add_epi8(in, in_0g0g);
_mm_storeu_si128((__m128i*)&argb_data[i], out); _mm_storeu_si128((__m128i*)&argb_data[i], out);
} }
// fallthrough and finish off with plain-C // fallthrough and finish off with plain-C