From b94cee98fbad26c421ca79082b3b5db9b4580fea Mon Sep 17 00:00:00 2001 From: James Zern Date: Mon, 20 Nov 2017 20:34:05 -0800 Subject: [PATCH] dec_sse2: remove HE8uv_SSE2 with gcc-4.8, clang-4.0.1/5 this is no faster (actually up to 2x slower) than the code generated for memset (0x01010... * dst[-1]). shuffles in sse4 recover a bit, but performance is still down. Change-Id: Ie85e8353f8ede559d0b05a1d388787fd18ecc80f --- src/dsp/dec_sse2.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/dsp/dec_sse2.c b/src/dsp/dec_sse2.c index c96e8e96..b3840faf 100644 --- a/src/dsp/dec_sse2.c +++ b/src/dsp/dec_sse2.c @@ -1127,15 +1127,6 @@ static void VE8uv_SSE2(uint8_t* dst) { // vertical } } -static void HE8uv_SSE2(uint8_t* dst) { // horizontal - int j; - for (j = 0; j < 8; ++j) { - const __m128i values = _mm_set1_epi8(dst[-1]); - _mm_storel_epi64((__m128i*)dst, values); - dst += BPS; - } -} - // helper for chroma-DC predictions static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) { int j; @@ -1224,7 +1215,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE2(void) { VP8PredChroma8[0] = DC8uv_SSE2; VP8PredChroma8[1] = TM8uv_SSE2; VP8PredChroma8[2] = VE8uv_SSE2; - VP8PredChroma8[3] = HE8uv_SSE2; VP8PredChroma8[4] = DC8uvNoTop_SSE2; VP8PredChroma8[5] = DC8uvNoLeft_SSE2; VP8PredChroma8[6] = DC8uvNoTopLeft_SSE2;