mirror of
https://github.com/webmproject/libwebp.git
synced 2024-12-27 22:28:22 +01:00
lossless_neon: enable subtract green for aarch64
similar to:
1ba61b0
enable NEON intrinsics in aarch64 builds
vtbl1_u8 is available everywhere but Xcode-based iOS arm64 builds, use
vtbl1q_u8 there.
performance varies based on the input, 1-3% on encode was observed
Change-Id: Ifec35b37eb856acfcf69ed7f16fa078cd40b7034
This commit is contained in:
parent
72831f6b28
commit
416e1cea9b
@ -259,20 +259,44 @@ static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
|
|||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Subtract-Green Transform
|
// Subtract-Green Transform
|
||||||
|
|
||||||
// vtbl? are unavailable in iOS/arm64 builds.
|
// vtbl?_u8 are marked unavailable for iOS arm64, use wider versions there.
|
||||||
#if !defined(__aarch64__)
|
#if defined(__APPLE__) && defined(__aarch64__) && \
|
||||||
|
defined(__apple_build_version__)
|
||||||
|
#define USE_VTBLQ
|
||||||
|
#endif
|
||||||
|
|
||||||
// 255 = byte will be zero'd
|
#ifdef USE_VTBLQ
|
||||||
|
// 255 = byte will be zeroed
|
||||||
|
static const uint8_t kGreenShuffle[16] = {
|
||||||
|
1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255
|
||||||
|
};
|
||||||
|
|
||||||
|
static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
|
||||||
|
const uint8x16_t shuffle) {
|
||||||
|
return vcombine_u8(vtbl1q_u8(argb, vget_low_u8(shuffle)),
|
||||||
|
vtbl1q_u8(argb, vget_high_u8(shuffle)));
|
||||||
|
}
|
||||||
|
#else // !USE_VTBLQ
|
||||||
|
// 255 = byte will be zeroed
|
||||||
static const uint8_t kGreenShuffle[8] = { 1, 255, 1, 255, 5, 255, 5, 255 };
|
static const uint8_t kGreenShuffle[8] = { 1, 255, 1, 255, 5, 255, 5, 255 };
|
||||||
|
|
||||||
|
static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
|
||||||
|
const uint8x8_t shuffle) {
|
||||||
|
return vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle),
|
||||||
|
vtbl1_u8(vget_high_u8(argb), shuffle));
|
||||||
|
}
|
||||||
|
#endif // USE_VTBLQ
|
||||||
|
|
||||||
static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
|
static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
|
||||||
const uint32_t* const end = argb_data + (num_pixels & ~3);
|
const uint32_t* const end = argb_data + (num_pixels & ~3);
|
||||||
|
#ifdef USE_VTBLQ
|
||||||
|
const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);
|
||||||
|
#else
|
||||||
const uint8x8_t shuffle = vld1_u8(kGreenShuffle);
|
const uint8x8_t shuffle = vld1_u8(kGreenShuffle);
|
||||||
|
#endif
|
||||||
for (; argb_data < end; argb_data += 4) {
|
for (; argb_data < end; argb_data += 4) {
|
||||||
const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);
|
const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);
|
||||||
const uint8x16_t greens =
|
const uint8x16_t greens = DoGreenShuffle(argb, shuffle);
|
||||||
vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle),
|
|
||||||
vtbl1_u8(vget_high_u8(argb), shuffle));
|
|
||||||
vst1q_u8((uint8_t*)argb_data, vsubq_u8(argb, greens));
|
vst1q_u8((uint8_t*)argb_data, vsubq_u8(argb, greens));
|
||||||
}
|
}
|
||||||
// fallthrough and finish off with plain-C
|
// fallthrough and finish off with plain-C
|
||||||
@ -281,19 +305,21 @@ static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
|
|||||||
|
|
||||||
static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {
|
static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {
|
||||||
const uint32_t* const end = argb_data + (num_pixels & ~3);
|
const uint32_t* const end = argb_data + (num_pixels & ~3);
|
||||||
|
#ifdef USE_VTBLQ
|
||||||
|
const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);
|
||||||
|
#else
|
||||||
const uint8x8_t shuffle = vld1_u8(kGreenShuffle);
|
const uint8x8_t shuffle = vld1_u8(kGreenShuffle);
|
||||||
|
#endif
|
||||||
for (; argb_data < end; argb_data += 4) {
|
for (; argb_data < end; argb_data += 4) {
|
||||||
const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);
|
const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);
|
||||||
const uint8x16_t greens =
|
const uint8x16_t greens = DoGreenShuffle(argb, shuffle);
|
||||||
vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle),
|
|
||||||
vtbl1_u8(vget_high_u8(argb), shuffle));
|
|
||||||
vst1q_u8((uint8_t*)argb_data, vaddq_u8(argb, greens));
|
vst1q_u8((uint8_t*)argb_data, vaddq_u8(argb, greens));
|
||||||
}
|
}
|
||||||
// fallthrough and finish off with plain-C
|
// fallthrough and finish off with plain-C
|
||||||
VP8LAddGreenToBlueAndRed_C(argb_data, num_pixels & 3);
|
VP8LAddGreenToBlueAndRed_C(argb_data, num_pixels & 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // !__aarch64__
|
#undef USE_VTBLQ
|
||||||
|
|
||||||
#endif // WEBP_USE_INTRINSICS
|
#endif // WEBP_USE_INTRINSICS
|
||||||
|
|
||||||
@ -320,11 +346,9 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitNEON(void) {
|
|||||||
VP8LPredictors[12] = Predictor12;
|
VP8LPredictors[12] = Predictor12;
|
||||||
VP8LPredictors[13] = Predictor13;
|
VP8LPredictors[13] = Predictor13;
|
||||||
|
|
||||||
#if !defined(__aarch64__)
|
|
||||||
VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
|
VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
|
||||||
VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
|
VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // WEBP_USE_NEON
|
#endif // WEBP_USE_NEON
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user