SSE4.1 implementation of some alpha-processing functions

DispatchAlpha* functions are hard to speed up, compared to SSE2.
ExtractAlpha sees a ~15% speed-up though.

Change-Id: I8715c2defecbc832f469eed7e6ffd012146b52de
This commit is contained in:
Pascal Massimino
2015-06-19 14:23:38 +02:00
committed by James Zern
parent 7f9c98f21d
commit bfc300c7ff
6 changed files with 102 additions and 0 deletions

View File

@ -345,6 +345,7 @@ int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
extern void WebPInitAlphaProcessingMIPSdspR2(void);
extern void WebPInitAlphaProcessingSSE2(void);
extern void WebPInitAlphaProcessingSSE41(void);
static volatile VP8CPUInfo alpha_processing_last_cpuinfo_used =
(VP8CPUInfo)&alpha_processing_last_cpuinfo_used;
@ -365,6 +366,11 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
#if defined(WEBP_USE_SSE2)
if (VP8GetCPUInfo(kSSE2)) {
WebPInitAlphaProcessingSSE2();
#if defined(WEBP_USE_SSE41)
if (VP8GetCPUInfo(kSSE4_1)) {
WebPInitAlphaProcessingSSE41();
}
#endif
}
#endif
#if defined(WEBP_USE_MIPS_DSP_R2)