Merge "lossless_neon: enable subtract green for aarch64"

This commit is contained in:
James Zern 2015-02-02 16:01:40 -08:00 committed by Gerrit Code Review
commit b7de794622

View File

@ -259,20 +259,44 @@ static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Subtract-Green Transform // Subtract-Green Transform
// vtbl? are unavailable in iOS/arm64 builds. // vtbl?_u8 are marked unavailable for iOS arm64, use wider versions there.
#if !defined(__aarch64__) #if defined(__APPLE__) && defined(__aarch64__) && \
defined(__apple_build_version__)
#define USE_VTBLQ
#endif
// 255 = byte will be zero'd #ifdef USE_VTBLQ
// 255 = byte will be zeroed
static const uint8_t kGreenShuffle[16] = {
1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255
};
static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
const uint8x16_t shuffle) {
return vcombine_u8(vtbl1q_u8(argb, vget_low_u8(shuffle)),
vtbl1q_u8(argb, vget_high_u8(shuffle)));
}
#else // !USE_VTBLQ
// 255 = byte will be zeroed
static const uint8_t kGreenShuffle[8] = { 1, 255, 1, 255, 5, 255, 5, 255 }; static const uint8_t kGreenShuffle[8] = { 1, 255, 1, 255, 5, 255, 5, 255 };
static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
const uint8x8_t shuffle) {
return vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle),
vtbl1_u8(vget_high_u8(argb), shuffle));
}
#endif // USE_VTBLQ
static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) { static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
const uint32_t* const end = argb_data + (num_pixels & ~3); const uint32_t* const end = argb_data + (num_pixels & ~3);
#ifdef USE_VTBLQ
const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);
#else
const uint8x8_t shuffle = vld1_u8(kGreenShuffle); const uint8x8_t shuffle = vld1_u8(kGreenShuffle);
#endif
for (; argb_data < end; argb_data += 4) { for (; argb_data < end; argb_data += 4) {
const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data); const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);
const uint8x16_t greens = const uint8x16_t greens = DoGreenShuffle(argb, shuffle);
vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle),
vtbl1_u8(vget_high_u8(argb), shuffle));
vst1q_u8((uint8_t*)argb_data, vsubq_u8(argb, greens)); vst1q_u8((uint8_t*)argb_data, vsubq_u8(argb, greens));
} }
// fallthrough and finish off with plain-C // fallthrough and finish off with plain-C
@ -281,19 +305,21 @@ static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) { static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {
const uint32_t* const end = argb_data + (num_pixels & ~3); const uint32_t* const end = argb_data + (num_pixels & ~3);
#ifdef USE_VTBLQ
const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);
#else
const uint8x8_t shuffle = vld1_u8(kGreenShuffle); const uint8x8_t shuffle = vld1_u8(kGreenShuffle);
#endif
for (; argb_data < end; argb_data += 4) { for (; argb_data < end; argb_data += 4) {
const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data); const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);
const uint8x16_t greens = const uint8x16_t greens = DoGreenShuffle(argb, shuffle);
vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle),
vtbl1_u8(vget_high_u8(argb), shuffle));
vst1q_u8((uint8_t*)argb_data, vaddq_u8(argb, greens)); vst1q_u8((uint8_t*)argb_data, vaddq_u8(argb, greens));
} }
// fallthrough and finish off with plain-C // fallthrough and finish off with plain-C
VP8LAddGreenToBlueAndRed_C(argb_data, num_pixels & 3); VP8LAddGreenToBlueAndRed_C(argb_data, num_pixels & 3);
} }
#endif // !__aarch64__ #undef USE_VTBLQ
#endif // WEBP_USE_INTRINSICS #endif // WEBP_USE_INTRINSICS
@ -320,11 +346,9 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitNEON(void) {
VP8LPredictors[12] = Predictor12; VP8LPredictors[12] = Predictor12;
VP8LPredictors[13] = Predictor13; VP8LPredictors[13] = Predictor13;
#if !defined(__aarch64__)
VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed; VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed; VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
#endif #endif
#endif
#endif // WEBP_USE_NEON #endif // WEBP_USE_NEON
} }