mirror of
				https://github.com/webmproject/libwebp.git
				synced 2025-10-31 18:35:41 +01:00 
			
		
		
		
	wasm: Enable neon add,sub with sat builtins
For performance testing BUG=webp:352 Change-Id: I3fab48ee610437ac07dd603208972edf17c6f50b
This commit is contained in:
		| @@ -332,10 +332,18 @@ static void Transform(const int16_t* in, uint8_t* dst, int do_two) { | |||||||
| // For testing purposes. | // For testing purposes. | ||||||
| // #define ENABLE_X86_BUILTIN_ADDSUB_SAT | // #define ENABLE_X86_BUILTIN_ADDSUB_SAT | ||||||
|  |  | ||||||
|  | #if defined(__aarch64__) | ||||||
|  | // For now, this only works for aarch64 | ||||||
|  | // TODO(slavarnway): Find a workaround for the ICE when using clang-3.9.1 | ||||||
|  | #define ENABLE_NEON_BUILTIN_ADDSUB_SAT | ||||||
|  | #endif | ||||||
|  |  | ||||||
| static WEBP_INLINE uint8x16 uint8x16_add_sat(const uint8x16 a, | static WEBP_INLINE uint8x16 uint8x16_add_sat(const uint8x16 a, | ||||||
|                                              const uint8x16 b) { |                                              const uint8x16 b) { | ||||||
| #ifdef ENABLE_X86_BUILTIN_ADDSUB_SAT | #if defined(ENABLE_X86_BUILTIN_ADDSUB_SAT) | ||||||
|   return (uint8x16)__builtin_ia32_paddusb128(a, b); |   return (uint8x16)__builtin_ia32_paddusb128(a, b); | ||||||
|  | #elif defined(ENABLE_NEON_BUILTIN_ADDSUB_SAT) | ||||||
|  |   return (uint8x16)__builtin_neon_vqaddq_v(a, b, 48); | ||||||
| #else | #else | ||||||
|   // Generic implementation for non-x86 |   // Generic implementation for non-x86 | ||||||
|   const uint8x16 zero = splat_uint8(0); |   const uint8x16 zero = splat_uint8(0); | ||||||
| @@ -352,8 +360,10 @@ static WEBP_INLINE uint8x16 uint8x16_add_sat(const uint8x16 a, | |||||||
| } | } | ||||||
|  |  | ||||||
| static WEBP_INLINE int8x16 int8x16_add_sat(const int8x16 a, const int8x16 b) { | static WEBP_INLINE int8x16 int8x16_add_sat(const int8x16 a, const int8x16 b) { | ||||||
| #ifdef ENABLE_X86_BUILTIN_ADDSUB_SAT | #if defined(ENABLE_X86_BUILTIN_ADDSUB_SAT) | ||||||
|   return (int8x16)__builtin_ia32_paddsb128(a, b); |   return (int8x16)__builtin_ia32_paddsb128(a, b); | ||||||
|  | #elif defined(ENABLE_NEON_BUILTIN_ADDSUB_SAT) | ||||||
|  |   return (int8x16)__builtin_neon_vqaddq_v(a, b, 32); | ||||||
| #else | #else | ||||||
|   // Generic implementation for non-x86 |   // Generic implementation for non-x86 | ||||||
|   const int8x16 zero = splat_uint8(0); |   const int8x16 zero = splat_uint8(0); | ||||||
| @@ -372,8 +382,10 @@ static WEBP_INLINE int8x16 int8x16_add_sat(const int8x16 a, const int8x16 b) { | |||||||
|  |  | ||||||
| static WEBP_INLINE uint8x16 uint8x16_sub_sat(const uint8x16 a, | static WEBP_INLINE uint8x16 uint8x16_sub_sat(const uint8x16 a, | ||||||
|                                              const uint8x16 b) { |                                              const uint8x16 b) { | ||||||
| #ifdef ENABLE_X86_BUILTIN_ADDSUB_SAT | #if defined(ENABLE_X86_BUILTIN_ADDSUB_SAT) | ||||||
|   return (uint8x16)__builtin_ia32_psubusb128(a, b); |   return (uint8x16)__builtin_ia32_psubusb128(a, b); | ||||||
|  | #elif defined(ENABLE_NEON_BUILTIN_ADDSUB_SAT) | ||||||
|  |   return  (int8x16)__builtin_neon_vqsubq_v(a, b, 48); | ||||||
| #else | #else | ||||||
|   // Generic implementation for non-x86 |   // Generic implementation for non-x86 | ||||||
|   const uint8x16 zero = splat_uint8(0); |   const uint8x16 zero = splat_uint8(0); | ||||||
| @@ -390,8 +402,10 @@ static WEBP_INLINE uint8x16 uint8x16_sub_sat(const uint8x16 a, | |||||||
| } | } | ||||||
|  |  | ||||||
| static WEBP_INLINE int8x16 int8x16_sub_sat(const int8x16 a, const int8x16 b) { | static WEBP_INLINE int8x16 int8x16_sub_sat(const int8x16 a, const int8x16 b) { | ||||||
| #ifdef ENABLE_X86_BUILTIN_ADDSUB_SAT | #if defined(ENABLE_X86_BUILTIN_ADDSUB_SAT) | ||||||
|   return (int8x16)__builtin_ia32_psubsb128(a, b); |   return (int8x16)__builtin_ia32_psubsb128(a, b); | ||||||
|  | #elif defined(ENABLE_NEON_BUILTIN_ADDSUB_SAT) | ||||||
|  |   return  (int8x16)__builtin_neon_vqsubq_v(a, b, 32); | ||||||
| #else | #else | ||||||
|   // Generic implementation for non-x86 |   // Generic implementation for non-x86 | ||||||
|   const int8x16 zero = splat_uint8(0); |   const int8x16 zero = splat_uint8(0); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user