dsp/dec*: use WEBP_RESTRICT qualifier

A minor improvement for arm targets with ndk r27/gcc-13 in H/VFilter8 (a couple fewer moves w/aarch64) and much better vectorization of DitherCombine8x8_C in most targets. This only affects non-vector pointers; any vector pointers are left as a follow up. Change-Id: I03e73e6d6404261bb8408a9ae76a4b6ef142f8f0
2025-07-21 00:09:52 +02:00 · 2021-07-03 17:52:50 -07:00
parent 02eac8a741
commit 201894ef24
8 changed files with 121 additions and 82 deletions
--- a/src/dsp/dec_sse2.c
+++ b/src/dsp/dec_sse2.c
@ -30,7 +30,8 @@
 //------------------------------------------------------------------------------
 // Transforms (Paragraph 14.4)

-static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
+static void Transform_SSE2(const int16_t* WEBP_RESTRICT in,
+                           uint8_t* WEBP_RESTRICT dst, int do_two) {
  // This implementation makes use of 16-bit fixed point versions of two
  // multiply constants:
  //    K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
@ -197,7 +198,8 @@ static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {

 #if (USE_TRANSFORM_AC3 == 1)

-static void TransformAC3_SSE2(const int16_t* in, uint8_t* dst) {
+static void TransformAC3_SSE2(const int16_t* WEBP_RESTRICT in,
+                              uint8_t* WEBP_RESTRICT dst) {
  const __m128i A = _mm_set1_epi16(in[0] + 4);
  const __m128i c4 = _mm_set1_epi16(WEBP_TRANSFORM_AC3_MUL2(in[4]));
  const __m128i d4 = _mm_set1_epi16(WEBP_TRANSFORM_AC3_MUL1(in[4]));
@ -792,8 +794,8 @@ static void HFilter16i_SSE2(uint8_t* p, int stride,
 }

 // 8-pixels wide variant, for chroma filtering
-static void VFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
-                          int thresh, int ithresh, int hev_thresh) {
+static void VFilter8_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
+                          int stride, int thresh, int ithresh, int hev_thresh) {
  __m128i mask;
  __m128i t1, p2, p1, p0, q0, q1, q2;

@ -817,8 +819,8 @@ static void VFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
  STOREUV(q2, u, v, 2 * stride);
 }

-static void HFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
-                          int thresh, int ithresh, int hev_thresh) {
+static void HFilter8_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
+                          int stride, int thresh, int ithresh, int hev_thresh) {
  __m128i mask;
  __m128i p3, p2, p1, p0, q0, q1, q2, q3;

@ -837,7 +839,8 @@ static void HFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
  Store16x4_SSE2(&q0, &q1, &q2, &q3, u, v, stride);
 }

-static void VFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
+static void VFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
+                           int stride,
                           int thresh, int ithresh, int hev_thresh) {
  __m128i mask;
  __m128i t1, t2, p1, p0, q0, q1;
@ -863,7 +866,8 @@ static void VFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
  STOREUV(q1, u, v, 1 * stride);
 }

-static void HFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
+static void HFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
+                           int stride,
                           int thresh, int ithresh, int hev_thresh) {
  __m128i mask;
  __m128i t1, t2, p1, p0, q0, q1;