dsp/upsampling*: use WEBP_RESTRICT qualifier

Better vectorization in the C code, fewer instructions in NEON, and some
code reordering / better register usage in SSE2/SSE4 w/ndk
r27/gcc-13/clang-16.

This only affects non-vector pointers; any vector pointers are left as a
follow up.

Change-Id: Ib29980f778ad3dbb952178ad8dee39b8673c4ff8
This commit is contained in:
James Zern
2024-08-16 19:02:19 -07:00
parent 35915b389e
commit 23bbafbeb8
7 changed files with 137 additions and 76 deletions

View File

@@ -90,8 +90,9 @@
} while (0)
// Turn the macro into a function for reducing code-size when non-critical
static void Upsample32Pixels_SSE41(const uint8_t r1[], const uint8_t r2[],
uint8_t* const out) {
static void Upsample32Pixels_SSE41(const uint8_t* WEBP_RESTRICT const r1,
const uint8_t* WEBP_RESTRICT const r2,
uint8_t* WEBP_RESTRICT const out) {
UPSAMPLE_32PIXELS(r1, r2, out);
}
@@ -116,10 +117,14 @@ static void Upsample32Pixels_SSE41(const uint8_t r1[], const uint8_t r2[],
} while (0)
#define SSE4_UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
const uint8_t* top_u, const uint8_t* top_v, \
const uint8_t* cur_u, const uint8_t* cur_v, \
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
const uint8_t* WEBP_RESTRICT bottom_y, \
const uint8_t* WEBP_RESTRICT top_u, \
const uint8_t* WEBP_RESTRICT top_v, \
const uint8_t* WEBP_RESTRICT cur_u, \
const uint8_t* WEBP_RESTRICT cur_v, \
uint8_t* WEBP_RESTRICT top_dst, \
uint8_t* WEBP_RESTRICT bottom_dst, int len) { \
int uv_pos, pos; \
/* 16byte-aligned array to cache reconstructed u and v */ \
uint8_t uv_buf[14 * 32 + 15] = { 0 }; \
@@ -202,10 +207,14 @@ extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
extern void WebPInitYUV444ConvertersSSE41(void);
#define YUV444_FUNC(FUNC_NAME, CALL, CALL_C, XSTEP) \
extern void CALL_C(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
uint8_t* dst, int len); \
static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
uint8_t* dst, int len) { \
extern void CALL_C(const uint8_t* WEBP_RESTRICT y, \
const uint8_t* WEBP_RESTRICT u, \
const uint8_t* WEBP_RESTRICT v, \
uint8_t* WEBP_RESTRICT dst, int len); \
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
const uint8_t* WEBP_RESTRICT u, \
const uint8_t* WEBP_RESTRICT v, \
uint8_t* WEBP_RESTRICT dst, int len) { \
int i; \
const int max_len = len & ~31; \
for (i = 0; i < max_len; i += 32) { \