Merge "wasm: Replace 5 __builtin_shufflevector()" into portable-intrinsics

2025-08-30 07:42:27 +02:00 · 2017-07-11 22:57:32 +00:00
parent e83df9d208 3387fb6fa6
commit 09bcd9a397
1 changed files with 5 additions and 12 deletions
--- a/src/dsp/dec_wasm.c
+++ b/src/dsp/dec_wasm.c
@@ -104,10 +104,8 @@ static WEBP_INLINE int16x8 _mulhi_int16x8(const int16x8 in, const int32x4 k) {
  const int16x8 zero = (int16x8){0, 0, 0, 0, 0, 0, 0, 0};
  const int32x4 sixteen = (int32x4){16, 16, 16, 16};
  // Put in upper 16 bits so we can preserve the sign
-  const int32x4 in_lo =
+  const int32x4 in_lo = (int32x4)_unpacklo_epi16(zero, in);
-      (int32x4)__builtin_shufflevector(in, zero, 8, 0, 8, 1, 8, 2, 8, 3);
+  const int32x4 in_hi = (int32x4)_unpackhi_epi16(zero, in);
  const int32x4 in_hi =
      (int32x4)__builtin_shufflevector(in, zero, 8, 4, 8, 5, 8, 6, 8, 7);
  const int32x4 _lo = (in_lo >> sixteen) * k;
  const int32x4 _hi = (in_hi >> sixteen) * k;
  // only keep the upper 16 bits
@@ -621,12 +619,8 @@ static WEBP_INLINE void DoFilter6(int8x16* const p2, int8x16* const p1,
    const int16x8 m = not_hev & *mask;
    const int16x8 f = a & m;
-    const int16x8 f_lo =
+    const int16x8 f_lo = (int16x8)_unpacklo_epi8(zero, f);
-        (int16x8)__builtin_shufflevector((int8x16)f, zero, 16, 0, 16, 1, 16, 2,
+    const int16x8 f_hi = (int16x8)_unpackhi_epi8(zero, f);
                                         16, 3, 16, 4, 16, 5, 16, 6, 16, 7);
    const int16x8 f_hi = (int16x8)__builtin_shufflevector(
        (int8x16)f, zero, 16, 8, 16, 9, 16, 10, 16, 11, 16, 12, 16, 13, 16, 14,
        16, 15);
    const int16x8 f9_lo = _mulhi_int16x8(f_lo, k9);  // Filter (lo) * 9
    const int16x8 f9_hi = _mulhi_int16x8(f_hi, k9);  // Filter (hi) * 9
@@ -1250,8 +1244,7 @@ static void TrueMotion(uint8_t* dst, uint32_t size) {
          (uint8x16)int16x8_to_uint8x16_sat(base + top_base_0);
      const uint8x16 out_1 =
          (uint8x16)int16x8_to_uint8x16_sat(base + top_base_1);
-      const uint8x16 out = (uint8x16)__builtin_shufflevector(
+      const uint8x16 out = _unpacklo_epi64(out_0, out_1);
          out_0, out_1, 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23);
      memcpy(dst, &out, 16);
    }
  }