lossless*sse2: improve non-const 16-bit vector creation

use _mm_set1_epi32 instead of _mm_set_epi16 with non-const values;
reduces shifts and ors.

Change-Id: Ie2cb2ab815f642855d03c6f3001223bcac4bd35c
This commit is contained in:
James Zern
2018-02-17 17:59:20 -08:00
parent 551948e45f
commit 8043504f95
2 changed files with 15 additions and 25 deletions

View File

@ -453,14 +453,11 @@ static void TransformColorInverse_SSE2(const VP8LMultipliers* const m,
int num_pixels, uint32_t* dst) {
// sign-extended multiplying constants, pre-shifted by 5.
#define CST(X) (((int16_t)(m->X << 8)) >> 5) // sign-extend
const __m128i mults_rb = _mm_set_epi16(
CST(green_to_red_), CST(green_to_blue_),
CST(green_to_red_), CST(green_to_blue_),
CST(green_to_red_), CST(green_to_blue_),
CST(green_to_red_), CST(green_to_blue_));
const __m128i mults_b2 = _mm_set_epi16(
CST(red_to_blue_), 0, CST(red_to_blue_), 0,
CST(red_to_blue_), 0, CST(red_to_blue_), 0);
#define MK_CST_16(HI, LO) \
_mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
const __m128i mults_rb = MK_CST_16(CST(green_to_red_), CST(green_to_blue_));
const __m128i mults_b2 = MK_CST_16(CST(red_to_blue_), 0);
#undef MK_CST_16
#undef CST
const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); // alpha-green masks
int i;