mirror of
				https://github.com/webmproject/libwebp.git
				synced 2025-10-31 10:25:46 +01:00 
			
		
		
		
	dec_neon: add LD4 intra predictor
based on SSE2 version, ~55% faster Change-Id: I782282ffc31dcf238890b3ba0decccf1d793dad0
This commit is contained in:
		| @@ -1277,6 +1277,25 @@ static void VE4(uint8_t* dst) {    // vertical | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | static void LD4(uint8_t* dst) {    // Down-left | ||||||
|  |   // Note using the same shift trick as VE4() is slower here. | ||||||
|  |   const uint8x8_t ABCDEFGH = vld1_u8(dst - BPS + 0); | ||||||
|  |   const uint8x8_t BCDEFGH0 = vld1_u8(dst - BPS + 1); | ||||||
|  |   const uint8x8_t CDEFGH00 = vld1_u8(dst - BPS + 2); | ||||||
|  |   const uint8x8_t CDEFGHH0 = vset_lane_u8(dst[-BPS + 7], CDEFGH00, 6); | ||||||
|  |   const uint8x8_t avg1 = vhadd_u8(ABCDEFGH, CDEFGHH0); | ||||||
|  |   const uint8x8_t avg2 = vrhadd_u8(avg1, BCDEFGH0); | ||||||
|  |   const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2); | ||||||
|  |   const uint32x2_t r0 = vreinterpret_u32_u8(avg2); | ||||||
|  |   const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8)); | ||||||
|  |   const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16)); | ||||||
|  |   const uint32x2_t r3 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24)); | ||||||
|  |   vst1_lane_u32((uint32_t*)(dst + 0 * BPS), r0, 0); | ||||||
|  |   vst1_lane_u32((uint32_t*)(dst + 1 * BPS), r1, 0); | ||||||
|  |   vst1_lane_u32((uint32_t*)(dst + 2 * BPS), r2, 0); | ||||||
|  |   vst1_lane_u32((uint32_t*)(dst + 3 * BPS), r3, 0); | ||||||
|  | } | ||||||
|  |  | ||||||
| #endif   // WEBP_USE_NEON | #endif   // WEBP_USE_NEON | ||||||
|  |  | ||||||
| //------------------------------------------------------------------------------ | //------------------------------------------------------------------------------ | ||||||
| @@ -1309,5 +1328,6 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitNEON(void) { | |||||||
|   VP8SimpleHFilter16i = SimpleHFilter16i; |   VP8SimpleHFilter16i = SimpleHFilter16i; | ||||||
|  |  | ||||||
|   VP8PredLuma4[2] = VE4; |   VP8PredLuma4[2] = VE4; | ||||||
|  |   VP8PredLuma4[6] = LD4; | ||||||
| #endif   // WEBP_USE_NEON | #endif   // WEBP_USE_NEON | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user