Implement ExtractGreen_SSE2

Change-Id: I74f50e0c01603a640aa8cf7c9658d477e696ea8a
This commit is contained in:
Vincent Rabaud 2023-06-19 10:58:07 +02:00
parent fe80fbbd6b
commit 64819c7cf3

View File

@ -144,6 +144,46 @@ static int ExtractAlpha_SSE2(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
return (alpha_and == 0xff);
}
static void ExtractGreen_SSE2(const uint32_t* WEBP_RESTRICT argb,
uint8_t* WEBP_RESTRICT alpha, int size) {
int i;
const __m128i mask = _mm_set1_epi32(0xff);
const __m128i* src = (const __m128i*)argb;
for (i = 0; i + 16 <= size; i += 16, src += 4) {
const __m128i a0 = _mm_loadu_si128(src + 0);
const __m128i a1 = _mm_loadu_si128(src + 1);
const __m128i a2 = _mm_loadu_si128(src + 2);
const __m128i a3 = _mm_loadu_si128(src + 3);
const __m128i b0 = _mm_srli_epi32(a0, 8);
const __m128i b1 = _mm_srli_epi32(a1, 8);
const __m128i b2 = _mm_srli_epi32(a2, 8);
const __m128i b3 = _mm_srli_epi32(a3, 8);
const __m128i c0 = _mm_and_si128(b0, mask);
const __m128i c1 = _mm_and_si128(b1, mask);
const __m128i c2 = _mm_and_si128(b2, mask);
const __m128i c3 = _mm_and_si128(b3, mask);
const __m128i d0 = _mm_packs_epi32(c0, c1);
const __m128i d1 = _mm_packs_epi32(c2, c3);
const __m128i e = _mm_packus_epi16(d0, d1);
// store
_mm_storeu_si128((__m128i*)&alpha[i], e);
}
if (i + 8 <= size) {
const __m128i a0 = _mm_loadu_si128(src + 0);
const __m128i a1 = _mm_loadu_si128(src + 1);
const __m128i b0 = _mm_srli_epi32(a0, 8);
const __m128i b1 = _mm_srli_epi32(a1, 8);
const __m128i c0 = _mm_and_si128(b0, mask);
const __m128i c1 = _mm_and_si128(b1, mask);
const __m128i d = _mm_packs_epi32(c0, c1);
const __m128i e = _mm_packus_epi16(d, d);
_mm_storel_epi64((__m128i*)&alpha[i], e);
i += 8;
}
for (; i < size; ++i) alpha[i] = argb[i] >> 8;
}
//------------------------------------------------------------------------------
// Non-dither premultiplied modes
@ -354,6 +394,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE2(void) {
WebPDispatchAlpha = DispatchAlpha_SSE2;
WebPDispatchAlphaToGreen = DispatchAlphaToGreen_SSE2;
WebPExtractAlpha = ExtractAlpha_SSE2;
WebPExtractGreen = ExtractGreen_SSE2;
WebPHasAlpha8b = HasAlpha8b_SSE2;
WebPHasAlpha32b = HasAlpha32b_SSE2;