enc_neon: enable intrinsics-only functions

CollectHistogram / SSE* / QuantizeBlock have no inline equivalents,
enable them where possible and use USE_INTRINSICS to control borderline
cases: it's left undefined for now.

Change-Id: I62235bc4ddb8aa0769d1ce18a90e0d7da1e18155
This commit is contained in:
James Zern 2014-04-26 12:57:29 -07:00
parent f937e01261
commit 42b35e086b

View File

@ -15,7 +15,7 @@
#if defined(WEBP_USE_NEON) #if defined(WEBP_USE_NEON)
#define USE_INTRINSICS // use intrinsics when possible // #define USE_INTRINSICS // use intrinsics when possible
#include <assert.h> #include <assert.h>
#include <arm_neon.h> #include <arm_neon.h>
@ -36,7 +36,7 @@ static const int16_t kC2 = 17734; // half of kC2, actually. See comment above.
// (with gcc-4.6). So we disable it for now. Later, it'll be conditional to // (with gcc-4.6). So we disable it for now. Later, it'll be conditional to
// USE_INTRINSICS define. // USE_INTRINSICS define.
// With gcc-4.8, it's a little faster speed than inlined-assembly. // With gcc-4.8, it's a little faster speed than inlined-assembly.
#if 0 // defined(USE_INTRINSICS) #if defined(USE_INTRINSICS)
// Treats 'v' as an uint8x8_t and zero extends to an int16x8_t. // Treats 'v' as an uint8x8_t and zero extends to an int16x8_t.
static WEBP_INLINE int16x8_t ConvertU8ToS16(uint32x2_t v) { static WEBP_INLINE int16x8_t ConvertU8ToS16(uint32x2_t v) {
@ -264,7 +264,7 @@ static uint8x16_t Load4x4(const uint8_t* src) {
// Forward transform. // Forward transform.
#if 0 // #ifdef USE_INTRINSICS #if defined(USE_INTRINSICS)
static WEBP_INLINE void Transpose4x4_S16(const int16x4_t A, const int16x4_t B, static WEBP_INLINE void Transpose4x4_S16(const int16x4_t A, const int16x4_t B,
const int16x4_t C, const int16x4_t D, const int16x4_t C, const int16x4_t D,
@ -583,7 +583,7 @@ static void FTransformWHT(const int16_t* src, int16_t* out) {
// (with gcc-4.6). So we disable it for now. Later, it'll be conditional to // (with gcc-4.6). So we disable it for now. Later, it'll be conditional to
// USE_INTRINSICS define. // USE_INTRINSICS define.
// With gcc-4.8, it's only slightly slower than the inlined. // With gcc-4.8, it's only slightly slower than the inlined.
#if 0 // #ifdef USE_INTRINSICS #if defined(USE_INTRINSICS)
// Zero extend an uint16x4_t 'v' to an int32x4_t. // Zero extend an uint16x4_t 'v' to an int32x4_t.
static WEBP_INLINE int32x4_t ConvertU16ToS32(uint16x4_t v) { static WEBP_INLINE int32x4_t ConvertU16ToS32(uint16x4_t v) {
@ -909,9 +909,6 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
return D; return D;
} }
#if defined(USE_INTRINSICS)
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred, static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
@ -1002,13 +999,11 @@ static int SSE4x4(const uint8_t* a, const uint8_t* b) {
return SumToInt(vpaddlq_u16(prod)); return SumToInt(vpaddlq_u16(prod));
} }
#endif // USE_INTRINSICS
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Compilation with gcc-4.6.x is problematic for now. Disable this function // Compilation with gcc-4.6.x is problematic for now. Disable this function
// in this case. // in this case.
#if (__GNUC__ <= 4 && __GNUC_MINOR__ < 8) #if !LOCAL_GCC_PREREQ(4,8)
#define SKIP_QUANTIZE #define SKIP_QUANTIZE
#endif #endif
@ -1088,15 +1083,11 @@ void VP8EncDspInitNEON(void) {
VP8TDisto4x4 = Disto4x4; VP8TDisto4x4 = Disto4x4;
VP8TDisto16x16 = Disto16x16; VP8TDisto16x16 = Disto16x16;
#if defined(USE_INTRINSICS)
VP8CollectHistogram = CollectHistogram; VP8CollectHistogram = CollectHistogram;
VP8SSE16x16 = SSE16x16; VP8SSE16x16 = SSE16x16;
VP8SSE16x8 = SSE16x8; VP8SSE16x8 = SSE16x8;
VP8SSE8x8 = SSE8x8; VP8SSE8x8 = SSE8x8;
VP8SSE4x4 = SSE4x4; VP8SSE4x4 = SSE4x4;
#else
(void)Load4x4; // to avoid a warning
#endif
#if !defined(SKIP_QUANTIZE) #if !defined(SKIP_QUANTIZE)
VP8EncQuantizeBlock = QuantizeBlock; VP8EncQuantizeBlock = QuantizeBlock;
#endif #endif