mirror of
https://github.com/webmproject/libwebp.git
synced 2024-12-28 14:38:21 +01:00
Merge changes I6eac17e5,I32d2b514
* changes: dec_neon: add TM8uv dsp: initialize VP8PredChroma8 in VP8DspInit()
This commit is contained in:
commit
f399d30764
@ -463,10 +463,7 @@ const VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES] = {
|
|||||||
DC16NoTop, DC16NoLeft, DC16NoTopLeft
|
DC16NoTop, DC16NoLeft, DC16NoTopLeft
|
||||||
};
|
};
|
||||||
|
|
||||||
const VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES] = {
|
VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];
|
||||||
DC8uv, TM8uv, VE8uv, HE8uv,
|
|
||||||
DC8uvNoTop, DC8uvNoLeft, DC8uvNoTopLeft
|
|
||||||
};
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Edge filtering functions
|
// Edge filtering functions
|
||||||
@ -721,6 +718,14 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
|
|||||||
VP8PredLuma4[8] = HD4;
|
VP8PredLuma4[8] = HD4;
|
||||||
VP8PredLuma4[9] = HU4;
|
VP8PredLuma4[9] = HU4;
|
||||||
|
|
||||||
|
VP8PredChroma8[0] = DC8uv;
|
||||||
|
VP8PredChroma8[1] = TM8uv;
|
||||||
|
VP8PredChroma8[2] = VE8uv;
|
||||||
|
VP8PredChroma8[3] = HE8uv;
|
||||||
|
VP8PredChroma8[4] = DC8uvNoTop;
|
||||||
|
VP8PredChroma8[5] = DC8uvNoLeft;
|
||||||
|
VP8PredChroma8[6] = DC8uvNoTopLeft;
|
||||||
|
|
||||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||||
if (VP8GetCPUInfo != NULL) {
|
if (VP8GetCPUInfo != NULL) {
|
||||||
#if defined(WEBP_USE_SSE2)
|
#if defined(WEBP_USE_SSE2)
|
||||||
|
@ -1281,11 +1281,15 @@ static void DC4(uint8_t* dst) { // DC
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void TM4(uint8_t* dst) { // TrueMotion
|
// TrueMotion (4x4 + 8x8)
|
||||||
|
static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
|
||||||
const uint8x8_t TL = vdup_n_u8(dst[-BPS - 1]); // top-left pixel 'A[-1]'
|
const uint8x8_t TL = vdup_n_u8(dst[-BPS - 1]); // top-left pixel 'A[-1]'
|
||||||
const uint8x8_t T = vld1_u8(dst - BPS); // top row 'A[0..3]'
|
const uint8x8_t T = vld1_u8(dst - BPS); // top row 'A[0..3]'
|
||||||
const int16x8_t d = vreinterpretq_s16_u16(vsubl_u8(T, TL)); // A[c] - A[-1]
|
const int16x8_t d = vreinterpretq_s16_u16(vsubl_u8(T, TL)); // A[c] - A[-1]
|
||||||
const int16x8_t l0 = ConvertU8ToS16(vld1_u8(dst + 0 * BPS - 1)); // left edge
|
int y;
|
||||||
|
for (y = 0; y < size; y += 4) {
|
||||||
|
// left edge
|
||||||
|
const int16x8_t l0 = ConvertU8ToS16(vld1_u8(dst + 0 * BPS - 1));
|
||||||
const int16x8_t l1 = ConvertU8ToS16(vld1_u8(dst + 1 * BPS - 1));
|
const int16x8_t l1 = ConvertU8ToS16(vld1_u8(dst + 1 * BPS - 1));
|
||||||
const int16x8_t l2 = ConvertU8ToS16(vld1_u8(dst + 2 * BPS - 1));
|
const int16x8_t l2 = ConvertU8ToS16(vld1_u8(dst + 2 * BPS - 1));
|
||||||
const int16x8_t l3 = ConvertU8ToS16(vld1_u8(dst + 3 * BPS - 1));
|
const int16x8_t l3 = ConvertU8ToS16(vld1_u8(dst + 3 * BPS - 1));
|
||||||
@ -1302,12 +1306,23 @@ static void TM4(uint8_t* dst) { // TrueMotion
|
|||||||
const uint32x2_t r1_u32 = vreinterpret_u32_u8(vqmovun_s16(r1));
|
const uint32x2_t r1_u32 = vreinterpret_u32_u8(vqmovun_s16(r1));
|
||||||
const uint32x2_t r2_u32 = vreinterpret_u32_u8(vqmovun_s16(r2));
|
const uint32x2_t r2_u32 = vreinterpret_u32_u8(vqmovun_s16(r2));
|
||||||
const uint32x2_t r3_u32 = vreinterpret_u32_u8(vqmovun_s16(r3));
|
const uint32x2_t r3_u32 = vreinterpret_u32_u8(vqmovun_s16(r3));
|
||||||
|
if (size == 4) {
|
||||||
vst1_lane_u32((uint32_t*)(dst + 0 * BPS), r0_u32, 0);
|
vst1_lane_u32((uint32_t*)(dst + 0 * BPS), r0_u32, 0);
|
||||||
vst1_lane_u32((uint32_t*)(dst + 1 * BPS), r1_u32, 0);
|
vst1_lane_u32((uint32_t*)(dst + 1 * BPS), r1_u32, 0);
|
||||||
vst1_lane_u32((uint32_t*)(dst + 2 * BPS), r2_u32, 0);
|
vst1_lane_u32((uint32_t*)(dst + 2 * BPS), r2_u32, 0);
|
||||||
vst1_lane_u32((uint32_t*)(dst + 3 * BPS), r3_u32, 0);
|
vst1_lane_u32((uint32_t*)(dst + 3 * BPS), r3_u32, 0);
|
||||||
|
} else {
|
||||||
|
vst1_u32((uint32_t*)(dst + 0 * BPS), r0_u32);
|
||||||
|
vst1_u32((uint32_t*)(dst + 1 * BPS), r1_u32);
|
||||||
|
vst1_u32((uint32_t*)(dst + 2 * BPS), r2_u32);
|
||||||
|
vst1_u32((uint32_t*)(dst + 3 * BPS), r3_u32);
|
||||||
|
}
|
||||||
|
dst += 4 * BPS;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void TM4(uint8_t* dst) { return TrueMotion(dst, 4); }
|
||||||
|
|
||||||
static void VE4(uint8_t* dst) { // vertical
|
static void VE4(uint8_t* dst) { // vertical
|
||||||
// NB: avoid vld1_u64 here as an alignment hint may be added -> SIGBUS.
|
// NB: avoid vld1_u64 here as an alignment hint may be added -> SIGBUS.
|
||||||
const uint64x1_t A0 = vreinterpret_u64_u8(vld1_u8(dst - BPS - 1)); // top row
|
const uint64x1_t A0 = vreinterpret_u64_u8(vld1_u8(dst - BPS - 1)); // top row
|
||||||
@ -1371,6 +1386,11 @@ static void LD4(uint8_t* dst) { // Down-left
|
|||||||
vst1_lane_u32((uint32_t*)(dst + 3 * BPS), r3, 0);
|
vst1_lane_u32((uint32_t*)(dst + 3 * BPS), r3, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
// Chroma
|
||||||
|
|
||||||
|
static void TM8uv(uint8_t* dst) { return TrueMotion(dst, 8); }
|
||||||
|
|
||||||
#endif // WEBP_USE_NEON
|
#endif // WEBP_USE_NEON
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
@ -1407,5 +1427,7 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitNEON(void) {
|
|||||||
VP8PredLuma4[2] = VE4;
|
VP8PredLuma4[2] = VE4;
|
||||||
VP8PredLuma4[4] = RD4;
|
VP8PredLuma4[4] = RD4;
|
||||||
VP8PredLuma4[6] = LD4;
|
VP8PredLuma4[6] = LD4;
|
||||||
|
|
||||||
|
VP8PredChroma8[1] = TM8uv;
|
||||||
#endif // WEBP_USE_NEON
|
#endif // WEBP_USE_NEON
|
||||||
}
|
}
|
||||||
|
@ -176,7 +176,7 @@ extern VP8WHT VP8TransformWHT;
|
|||||||
// assumed accessible when needed.
|
// assumed accessible when needed.
|
||||||
typedef void (*VP8PredFunc)(uint8_t* dst);
|
typedef void (*VP8PredFunc)(uint8_t* dst);
|
||||||
extern const VP8PredFunc VP8PredLuma16[/* NUM_B_DC_MODES */];
|
extern const VP8PredFunc VP8PredLuma16[/* NUM_B_DC_MODES */];
|
||||||
extern const VP8PredFunc VP8PredChroma8[/* NUM_B_DC_MODES */];
|
extern VP8PredFunc VP8PredChroma8[/* NUM_B_DC_MODES */];
|
||||||
extern VP8PredFunc VP8PredLuma4[/* NUM_BMODES */];
|
extern VP8PredFunc VP8PredLuma4[/* NUM_BMODES */];
|
||||||
|
|
||||||
// clipping tables (for filtering)
|
// clipping tables (for filtering)
|
||||||
|
Loading…
Reference in New Issue
Block a user