mirror of
https://github.com/webmproject/libwebp.git
synced 2025-07-13 06:24:27 +02:00
cpu.h: add WEBP_AARCH64
and define it to true for __aarch64__ and Win Arm64 + Visual Studio. Microsoft's compiler (cl.exe) does not define __aarch64__, but relies on _M_ARM64 & _M_ARM64EC Bug: b/277254922 Change-Id: I20e4fa07a4031599db69e3d7ba9050345315ef51
This commit is contained in:
@ -29,7 +29,7 @@ static void SetResidualCoeffs_NEON(const int16_t* const coeffs,
|
||||
const uint8x16_t eob = vcombine_u8(vqmovn_u16(eob_0), vqmovn_u16(eob_1));
|
||||
const uint8x16_t masked = vandq_u8(eob, vld1q_u8(position));
|
||||
|
||||
#ifdef __aarch64__
|
||||
#if WEBP_AARCH64
|
||||
res->last = vmaxvq_u8(masked) - 1;
|
||||
#else
|
||||
const uint8x8_t eob_8x8 = vmax_u8(vget_low_u8(masked), vget_high_u8(masked));
|
||||
@ -43,7 +43,7 @@ static void SetResidualCoeffs_NEON(const int16_t* const coeffs,
|
||||
|
||||
vst1_lane_s32(&res->last, vreinterpret_s32_u32(eob_32x2), 0);
|
||||
--res->last;
|
||||
#endif // __aarch64__
|
||||
#endif // WEBP_AARCH64
|
||||
|
||||
res->coeffs = coeffs;
|
||||
}
|
||||
|
@ -105,6 +105,12 @@
|
||||
#define WEBP_USE_INTRINSICS
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
#define WEBP_AARCH64 1
|
||||
#else
|
||||
#define WEBP_AARCH64 0
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON)
|
||||
#define WEBP_HAVE_NEON
|
||||
#endif
|
||||
@ -134,8 +140,7 @@
|
||||
#define WEBP_NEON_OMIT_C_CODE 0
|
||||
#endif
|
||||
|
||||
#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || \
|
||||
defined(__aarch64__))
|
||||
#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || WEBP_AARCH64)
|
||||
#define WEBP_NEON_WORK_AROUND_GCC 1
|
||||
#else
|
||||
#define WEBP_NEON_WORK_AROUND_GCC 0
|
||||
|
@ -1428,7 +1428,7 @@ static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) {
|
||||
|
||||
if (do_top) {
|
||||
const uint8x8_t A = vld1_u8(dst - BPS); // top row
|
||||
#if defined(__aarch64__)
|
||||
#if WEBP_AARCH64
|
||||
const uint16_t p2 = vaddlv_u8(A);
|
||||
sum_top = vdupq_n_u16(p2);
|
||||
#else
|
||||
@ -1511,7 +1511,7 @@ static WEBP_INLINE void DC16_NEON(uint8_t* dst, int do_top, int do_left) {
|
||||
|
||||
if (do_top) {
|
||||
const uint8x16_t A = vld1q_u8(dst - BPS); // top row
|
||||
#if defined(__aarch64__)
|
||||
#if WEBP_AARCH64
|
||||
const uint16_t p3 = vaddlvq_u8(A);
|
||||
sum_top = vdupq_n_u16(p3);
|
||||
#else
|
||||
|
@ -764,7 +764,7 @@ static WEBP_INLINE void AccumulateSSE16_NEON(const uint8_t* const a,
|
||||
|
||||
// Horizontal sum of all four uint32_t values in 'sum'.
|
||||
static int SumToInt_NEON(uint32x4_t sum) {
|
||||
#if defined(__aarch64__)
|
||||
#if WEBP_AARCH64
|
||||
return (int)vaddvq_u32(sum);
|
||||
#else
|
||||
const uint64x2_t sum2 = vpaddlq_u32(sum);
|
||||
@ -865,7 +865,7 @@ static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
|
||||
uint8x8x4_t shuffles;
|
||||
// vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
|
||||
// non-standard versions there.
|
||||
#if defined(__APPLE__) && defined(__aarch64__) && \
|
||||
#if defined(__APPLE__) && WEBP_AARCH64 && \
|
||||
defined(__apple_build_version__) && (__apple_build_version__< 6020037)
|
||||
uint8x16x2_t all_out;
|
||||
INIT_VECTOR2(all_out, vreinterpretq_u8_s16(out0), vreinterpretq_u8_s16(out1));
|
||||
|
@ -25,7 +25,7 @@
|
||||
|
||||
// vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
|
||||
// non-standard versions there.
|
||||
#if defined(__APPLE__) && defined(__aarch64__) && \
|
||||
#if defined(__APPLE__) && WEBP_AARCH64 && \
|
||||
defined(__apple_build_version__) && (__apple_build_version__< 6020037)
|
||||
#define USE_VTBLQ
|
||||
#endif
|
||||
|
@ -498,7 +498,7 @@ static void PredictorAdd13_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
|
||||
// non-standard versions there.
|
||||
#if defined(__APPLE__) && defined(__aarch64__) && \
|
||||
#if defined(__APPLE__) && WEBP_AARCH64 && \
|
||||
defined(__apple_build_version__) && (__apple_build_version__< 6020037)
|
||||
#define USE_VTBLQ
|
||||
#endif
|
||||
|
@ -21,7 +21,7 @@
|
||||
// Right now, some intrinsics functions seem slower, so we disable them
|
||||
// everywhere except newer clang/gcc or aarch64 where the inline assembly is
|
||||
// incompatible.
|
||||
#if LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,9) || defined(__aarch64__)
|
||||
#if LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,9) || WEBP_AARCH64
|
||||
#define WEBP_USE_INTRINSICS // use intrinsics when possible
|
||||
#endif
|
||||
|
||||
@ -46,7 +46,7 @@
|
||||
// if using intrinsics, this flag avoids some functions that make gcc-4.6.3
|
||||
// crash ("internal compiler error: in immed_double_const, at emit-rtl.").
|
||||
// (probably similar to gcc.gnu.org/bugzilla/show_bug.cgi?id=48183)
|
||||
#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
|
||||
#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || WEBP_AARCH64)
|
||||
#define WORK_AROUND_GCC
|
||||
#endif
|
||||
|
||||
|
@ -22,7 +22,7 @@
|
||||
#define IsFlat IsFlat_NEON
|
||||
|
||||
static uint32_t horizontal_add_uint32x4(const uint32x4_t a) {
|
||||
#if defined(__aarch64__)
|
||||
#if WEBP_AARCH64
|
||||
return vaddvq_u32(a);
|
||||
#else
|
||||
const uint64x2_t b = vpaddlq_u32(a);
|
||||
|
Reference in New Issue
Block a user