From 35915b389eab7fa08c6db4feeb52aa0ff2dbde03 Mon Sep 17 00:00:00 2001 From: James Zern Date: Fri, 16 Aug 2024 16:55:00 -0700 Subject: [PATCH] dsp/rescaler*: use WEBP_RESTRICT qualifier Some improvement in the C code. No changes in NEON or SSE2 w/ndk r27/gcc-13/clang-16. This only affects non-vector pointers; any vector pointers are left as a follow up. Change-Id: I2316122db893f48f0afda90a147c83cac7f07526 --- src/dsp/dsp.h | 20 ++++++++++++-------- src/dsp/rescaler.c | 11 ++++++----- src/dsp/rescaler_mips32.c | 8 ++++---- src/dsp/rescaler_msa.c | 27 ++++++++++++++------------- src/dsp/rescaler_neon.c | 4 ++-- src/dsp/rescaler_sse2.c | 18 ++++++++---------- 6 files changed, 46 insertions(+), 42 deletions(-) diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h index 30a1d43f..b7c2acf0 100644 --- a/src/dsp/dsp.h +++ b/src/dsp/dsp.h @@ -366,8 +366,9 @@ struct WebPRescaler; // Import a row of data and save its contribution in the rescaler. // 'channel' denotes the channel number to be imported. 'Expand' corresponds to // the wrk->x_expand case. Otherwise, 'Shrink' is to be used. -typedef void (*WebPRescalerImportRowFunc)(struct WebPRescaler* const wrk, - const uint8_t* src); +typedef void (*WebPRescalerImportRowFunc)( + struct WebPRescaler* WEBP_RESTRICT const wrk, + const uint8_t* WEBP_RESTRICT src); extern WebPRescalerImportRowFunc WebPRescalerImportRowExpand; extern WebPRescalerImportRowFunc WebPRescalerImportRowShrink; @@ -380,16 +381,19 @@ extern WebPRescalerExportRowFunc WebPRescalerExportRowExpand; extern WebPRescalerExportRowFunc WebPRescalerExportRowShrink; // Plain-C implementation, as fall-back. -extern void WebPRescalerImportRowExpand_C(struct WebPRescaler* const wrk, - const uint8_t* src); -extern void WebPRescalerImportRowShrink_C(struct WebPRescaler* const wrk, - const uint8_t* src); +extern void WebPRescalerImportRowExpand_C( + struct WebPRescaler* WEBP_RESTRICT const wrk, + const uint8_t* WEBP_RESTRICT src); +extern void WebPRescalerImportRowShrink_C( + struct WebPRescaler* WEBP_RESTRICT const wrk, + const uint8_t* WEBP_RESTRICT src); extern void WebPRescalerExportRowExpand_C(struct WebPRescaler* const wrk); extern void WebPRescalerExportRowShrink_C(struct WebPRescaler* const wrk); // Main entry calls: -extern void WebPRescalerImportRow(struct WebPRescaler* const wrk, - const uint8_t* src); +extern void WebPRescalerImportRow( + struct WebPRescaler* WEBP_RESTRICT const wrk, + const uint8_t* WEBP_RESTRICT src); // Export one row (starting at x_out position) from rescaler. extern void WebPRescalerExportRow(struct WebPRescaler* const wrk); diff --git a/src/dsp/rescaler.c b/src/dsp/rescaler.c index 325d8be1..a96ca669 100644 --- a/src/dsp/rescaler.c +++ b/src/dsp/rescaler.c @@ -26,8 +26,8 @@ //------------------------------------------------------------------------------ // Row import -void WebPRescalerImportRowExpand_C(WebPRescaler* const wrk, - const uint8_t* src) { +void WebPRescalerImportRowExpand_C(WebPRescaler* WEBP_RESTRICT const wrk, + const uint8_t* WEBP_RESTRICT src) { const int x_stride = wrk->num_channels; const int x_out_max = wrk->dst_width * wrk->num_channels; int channel; @@ -59,8 +59,8 @@ void WebPRescalerImportRowExpand_C(WebPRescaler* const wrk, } } -void WebPRescalerImportRowShrink_C(WebPRescaler* const wrk, - const uint8_t* src) { +void WebPRescalerImportRowShrink_C(WebPRescaler* WEBP_RESTRICT const wrk, + const uint8_t* WEBP_RESTRICT src) { const int x_stride = wrk->num_channels; const int x_out_max = wrk->dst_width * wrk->num_channels; int channel; @@ -158,7 +158,8 @@ void WebPRescalerExportRowShrink_C(WebPRescaler* const wrk) { //------------------------------------------------------------------------------ // Main entry calls -void WebPRescalerImportRow(WebPRescaler* const wrk, const uint8_t* src) { +void WebPRescalerImportRow(WebPRescaler* WEBP_RESTRICT const wrk, + const uint8_t* WEBP_RESTRICT src) { assert(!WebPRescalerInputDone(wrk)); if (!wrk->x_expand) { WebPRescalerImportRowShrink(wrk, src); diff --git a/src/dsp/rescaler_mips32.c b/src/dsp/rescaler_mips32.c index 61f63c61..b5168caa 100644 --- a/src/dsp/rescaler_mips32.c +++ b/src/dsp/rescaler_mips32.c @@ -21,8 +21,8 @@ //------------------------------------------------------------------------------ // Row import -static void ImportRowShrink_MIPS32(WebPRescaler* const wrk, - const uint8_t* src) { +static void ImportRowShrink_MIPS32(WebPRescaler* WEBP_RESTRICT const wrk, + const uint8_t* WEBP_RESTRICT src) { const int x_stride = wrk->num_channels; const int x_out_max = wrk->dst_width * wrk->num_channels; const int fx_scale = wrk->fx_scale; @@ -81,8 +81,8 @@ static void ImportRowShrink_MIPS32(WebPRescaler* const wrk, } } -static void ImportRowExpand_MIPS32(WebPRescaler* const wrk, - const uint8_t* src) { +static void ImportRowExpand_MIPS32(WebPRescaler* WEBP_RESTRICT const wrk, + const uint8_t* WEBP_RESTRICT src) { const int x_stride = wrk->num_channels; const int x_out_max = wrk->dst_width * wrk->num_channels; const int x_add = wrk->x_add; diff --git a/src/dsp/rescaler_msa.c b/src/dsp/rescaler_msa.c index 256dbdd4..954d0fdf 100644 --- a/src/dsp/rescaler_msa.c +++ b/src/dsp/rescaler_msa.c @@ -114,9 +114,9 @@ dst = __msa_copy_s_w((v4i32)t0, 0); \ } while (0) -static WEBP_INLINE void ExportRowExpand_0(const uint32_t* frow, uint8_t* dst, - int length, - WebPRescaler* const wrk) { +static WEBP_INLINE void ExportRowExpand_0( + const uint32_t* WEBP_RESTRICT frow, uint8_t* WEBP_RESTRICT dst, int length, + WebPRescaler* WEBP_RESTRICT const wrk) { const v4u32 scale = (v4u32)__msa_fill_w(wrk->fy_scale); const v4u32 shift = (v4u32)__msa_fill_w(WEBP_RESCALER_RFIX); const v4i32 zero = { 0 }; @@ -171,9 +171,10 @@ static WEBP_INLINE void ExportRowExpand_0(const uint32_t* frow, uint8_t* dst, } } -static WEBP_INLINE void ExportRowExpand_1(const uint32_t* frow, uint32_t* irow, - uint8_t* dst, int length, - WebPRescaler* const wrk) { +static WEBP_INLINE void ExportRowExpand_1( + const uint32_t* WEBP_RESTRICT frow, uint32_t* WEBP_RESTRICT irow, + uint8_t* WEBP_RESTRICT dst, int length, + WebPRescaler* WEBP_RESTRICT const wrk) { const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub); const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B); const v4i32 B1 = __msa_fill_w(B); @@ -262,10 +263,10 @@ static void RescalerExportRowExpand_MIPSdspR2(WebPRescaler* const wrk) { } #if 0 // disabled for now. TODO(skal): make match the C-code -static WEBP_INLINE void ExportRowShrink_0(const uint32_t* frow, uint32_t* irow, - uint8_t* dst, int length, - const uint32_t yscale, - WebPRescaler* const wrk) { +static WEBP_INLINE void ExportRowShrink_0( + const uint32_t* WEBP_RESTRICT frow, uint32_t* WEBP_RESTRICT irow, + uint8_t* WEBP_RESTRICT dst, int length, const uint32_t yscale, + WebPRescaler* WEBP_RESTRICT const wrk) { const v4u32 y_scale = (v4u32)__msa_fill_w(yscale); const v4u32 fxyscale = (v4u32)__msa_fill_w(wrk->fxy_scale); const v4u32 shiftval = (v4u32)__msa_fill_w(WEBP_RESCALER_RFIX); @@ -348,9 +349,9 @@ static WEBP_INLINE void ExportRowShrink_0(const uint32_t* frow, uint32_t* irow, } } -static WEBP_INLINE void ExportRowShrink_1(uint32_t* irow, uint8_t* dst, - int length, - WebPRescaler* const wrk) { +static WEBP_INLINE void ExportRowShrink_1( + uint32_t* WEBP_RESTRICT irow, uint8_t* WEBP_RESTRICT dst, int length, + WebPRescaler* WEBP_RESTRICT const wrk) { const v4u32 scale = (v4u32)__msa_fill_w(wrk->fxy_scale); const v4u32 shift = (v4u32)__msa_fill_w(WEBP_RESCALER_RFIX); const v4i32 zero = { 0 }; diff --git a/src/dsp/rescaler_neon.c b/src/dsp/rescaler_neon.c index 957a92db..ab4ddc00 100644 --- a/src/dsp/rescaler_neon.c +++ b/src/dsp/rescaler_neon.c @@ -45,8 +45,8 @@ #error "MULT_FIX/WEBP_RESCALER_RFIX need some more work" #endif -static uint32x4_t Interpolate_NEON(const rescaler_t* const frow, - const rescaler_t* const irow, +static uint32x4_t Interpolate_NEON(const rescaler_t* WEBP_RESTRICT const frow, + const rescaler_t* WEBP_RESTRICT const irow, uint32_t A, uint32_t B) { LOAD_32x4(frow, A0); LOAD_32x4(irow, B0); diff --git a/src/dsp/rescaler_sse2.c b/src/dsp/rescaler_sse2.c index 3f18e94e..e898e2ac 100644 --- a/src/dsp/rescaler_sse2.c +++ b/src/dsp/rescaler_sse2.c @@ -43,8 +43,8 @@ static void LoadEightPixels_SSE2(const uint8_t* const src, __m128i* out) { *out = _mm_unpacklo_epi8(A, zero); } -static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk, - const uint8_t* src) { +static void RescalerImportRowExpand_SSE2(WebPRescaler* WEBP_RESTRICT const wrk, + const uint8_t* WEBP_RESTRICT src) { rescaler_t* frow = wrk->frow; const rescaler_t* const frow_end = frow + wrk->dst_width * wrk->num_channels; const int x_add = wrk->x_add; @@ -109,8 +109,8 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk, assert(accum == 0); } -static void RescalerImportRowShrink_SSE2(WebPRescaler* const wrk, - const uint8_t* src) { +static void RescalerImportRowShrink_SSE2(WebPRescaler* WEBP_RESTRICT const wrk, + const uint8_t* WEBP_RESTRICT src) { const int x_sub = wrk->x_sub; int accum = 0; const __m128i zero = _mm_setzero_si128(); @@ -168,12 +168,10 @@ static void RescalerImportRowShrink_SSE2(WebPRescaler* const wrk, // Row export // load *src as epi64, multiply by mult and store result in [out0 ... out3] -static WEBP_INLINE void LoadDispatchAndMult_SSE2(const rescaler_t* const src, - const __m128i* const mult, - __m128i* const out0, - __m128i* const out1, - __m128i* const out2, - __m128i* const out3) { +static WEBP_INLINE void LoadDispatchAndMult_SSE2( + const rescaler_t* WEBP_RESTRICT const src, const __m128i* const mult, + __m128i* const out0, __m128i* const out1, __m128i* const out2, + __m128i* const out3) { const __m128i A0 = _mm_loadu_si128((const __m128i*)(src + 0)); const __m128i A1 = _mm_loadu_si128((const __m128i*)(src + 4)); const __m128i A2 = _mm_srli_epi64(A0, 32);