dsp/lossless*: use WEBP_RESTRICT qualifier

lossless_enc: better vectorization, most benefits seen in AddVector/Eq
              w/ndk r27/gcc-13/clang-16
lossless: minor reordering and some improvement to PredictorAdd5_SSE2
          w/gcc-13

This only affects non-vector pointers; any vector pointers are left as a
follow up.

Change-Id: I2356e314f391ee2f2c71f00bc6ee10097d3881e7
This commit is contained in:
James Zern
2024-08-16 16:51:11 -07:00
parent 04d4b4f387
commit a32b436bd5
14 changed files with 213 additions and 171 deletions

View File

@ -182,13 +182,13 @@ uint32_t VP8LPredictor13_C(const uint32_t* const left,
}
static void PredictorAdd0_C(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int x;
(void)upper;
for (x = 0; x < num_pixels; ++x) out[x] = VP8LAddPixels(in[x], ARGB_BLACK);
}
static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
uint32_t left = out[-1];
(void)upper;
@ -441,8 +441,8 @@ static int is_big_endian(void) {
return (tmp.b[0] != 1);
}
void VP8LConvertBGRAToRGB_C(const uint32_t* src,
int num_pixels, uint8_t* dst) {
void VP8LConvertBGRAToRGB_C(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
const uint32_t argb = *src++;
@ -452,8 +452,8 @@ void VP8LConvertBGRAToRGB_C(const uint32_t* src,
}
}
void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
int num_pixels, uint8_t* dst) {
void VP8LConvertBGRAToRGBA_C(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
const uint32_t argb = *src++;
@ -464,8 +464,8 @@ void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
}
}
void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
int num_pixels, uint8_t* dst) {
void VP8LConvertBGRAToRGBA4444_C(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
const uint32_t argb = *src++;
@ -481,8 +481,8 @@ void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
}
}
void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
int num_pixels, uint8_t* dst) {
void VP8LConvertBGRAToRGB565_C(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
const uint32_t argb = *src++;
@ -498,8 +498,8 @@ void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
}
}
void VP8LConvertBGRAToBGR_C(const uint32_t* src,
int num_pixels, uint8_t* dst) {
void VP8LConvertBGRAToBGR_C(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
const uint32_t argb = *src++;
@ -509,8 +509,8 @@ void VP8LConvertBGRAToBGR_C(const uint32_t* src,
}
}
static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
int swap_on_big_endian) {
static void CopyOrSwap(const uint32_t* WEBP_RESTRICT src, int num_pixels,
uint8_t* WEBP_RESTRICT dst, int swap_on_big_endian) {
if (is_big_endian() == swap_on_big_endian) {
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {