dsp/lossless*: use WEBP_RESTRICT qualifier

lossless_enc: better vectorization, most benefits seen in AddVector/Eq
              w/ndk r27/gcc-13/clang-16
lossless: minor reordering and some improvement to PredictorAdd5_SSE2
          w/gcc-13

This only affects non-vector pointers; any vector pointers are left as a
follow up.

Change-Id: I2356e314f391ee2f2c71f00bc6ee10097d3881e7
This commit is contained in:
James Zern
2024-08-16 16:51:11 -07:00
parent 04d4b4f387
commit a32b436bd5
14 changed files with 213 additions and 171 deletions

View File

@@ -359,8 +359,8 @@ void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
entropy->nonzero_code = VP8L_NON_TRIVIAL_SYM;
}
void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
VP8LBitEntropy* const entropy) {
void VP8LBitsEntropyUnrefined(const uint32_t* WEBP_RESTRICT const array, int n,
VP8LBitEntropy* WEBP_RESTRICT const entropy) {
int i;
VP8LBitEntropyInit(entropy);
@@ -380,8 +380,10 @@ void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
}
static WEBP_INLINE void GetEntropyUnrefinedHelper(
uint32_t val, int i, uint32_t* const val_prev, int* const i_prev,
VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats) {
uint32_t val, int i, uint32_t* WEBP_RESTRICT const val_prev,
int* WEBP_RESTRICT const i_prev,
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
VP8LStreaks* WEBP_RESTRICT const stats) {
const int streak = i - *i_prev;
// Gather info for the bit entropy.
@@ -403,9 +405,10 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
*i_prev = i;
}
static void GetEntropyUnrefined_C(const uint32_t X[], int length,
VP8LBitEntropy* const bit_entropy,
VP8LStreaks* const stats) {
static void GetEntropyUnrefined_C(
const uint32_t X[], int length,
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
VP8LStreaks* WEBP_RESTRICT const stats) {
int i;
int i_prev = 0;
uint32_t x_prev = X[0];
@@ -424,11 +427,10 @@ static void GetEntropyUnrefined_C(const uint32_t X[], int length,
bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;
}
static void GetCombinedEntropyUnrefined_C(const uint32_t X[],
const uint32_t Y[],
int length,
VP8LBitEntropy* const bit_entropy,
VP8LStreaks* const stats) {
static void GetCombinedEntropyUnrefined_C(
const uint32_t X[], const uint32_t Y[], int length,
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
VP8LStreaks* WEBP_RESTRICT const stats) {
int i = 1;
int i_prev = 0;
uint32_t xy_prev = X[0] + Y[0];
@@ -468,8 +470,8 @@ static WEBP_INLINE int8_t U32ToS8(uint32_t v) {
return (int8_t)(v & 0xff);
}
void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
int num_pixels) {
void VP8LTransformColor_C(const VP8LMultipliers* WEBP_RESTRICT const m,
uint32_t* WEBP_RESTRICT data, int num_pixels) {
int i;
for (i = 0; i < num_pixels; ++i) {
const uint32_t argb = data[i];
@@ -505,7 +507,8 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
return (new_blue & 0xff);
}
void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
void VP8LCollectColorRedTransforms_C(const uint32_t* WEBP_RESTRICT argb,
int stride,
int tile_width, int tile_height,
int green_to_red, uint32_t histo[]) {
while (tile_height-- > 0) {
@@ -517,7 +520,8 @@ void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
}
}
void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
void VP8LCollectColorBlueTransforms_C(const uint32_t* WEBP_RESTRICT argb,
int stride,
int tile_width, int tile_height,
int green_to_blue, int red_to_blue,
uint32_t histo[]) {
@@ -544,8 +548,8 @@ static int VectorMismatch_C(const uint32_t* const array1,
}
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
uint32_t* dst) {
void VP8LBundleColorMap_C(const uint8_t* WEBP_RESTRICT const row,
int width, int xbits, uint32_t* WEBP_RESTRICT dst) {
int x;
if (xbits > 0) {
const int bit_depth = 1 << (3 - xbits);
@@ -576,7 +580,8 @@ static uint32_t ExtraCost_C(const uint32_t* population, int length) {
return cost;
}
static uint32_t ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
static uint32_t ExtraCostCombined_C(const uint32_t* WEBP_RESTRICT X,
const uint32_t* WEBP_RESTRICT Y,
int length) {
int i;
uint32_t cost = X[4] + Y[4] + X[5] + Y[5];
@@ -591,13 +596,15 @@ static uint32_t ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
//------------------------------------------------------------------------------
static void AddVector_C(const uint32_t* a, const uint32_t* b, uint32_t* out,
int size) {
static void AddVector_C(const uint32_t* WEBP_RESTRICT a,
const uint32_t* WEBP_RESTRICT b,
uint32_t* WEBP_RESTRICT out, int size) {
int i;
for (i = 0; i < size; ++i) out[i] = a[i] + b[i];
}
static void AddVectorEq_C(const uint32_t* a, uint32_t* out, int size) {
static void AddVectorEq_C(const uint32_t* WEBP_RESTRICT a,
uint32_t* WEBP_RESTRICT out, int size) {
int i;
for (i = 0; i < size; ++i) out[i] += a[i];
}
@@ -626,8 +633,9 @@ static void AddVectorEq_C(const uint32_t* a, uint32_t* out, int size) {
} \
} while (0)
void VP8LHistogramAdd(const VP8LHistogram* const a,
const VP8LHistogram* const b, VP8LHistogram* const out) {
void VP8LHistogramAdd(const VP8LHistogram* WEBP_RESTRICT const a,
const VP8LHistogram* WEBP_RESTRICT const b,
VP8LHistogram* WEBP_RESTRICT const out) {
int i;
const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
assert(a->palette_code_bits_ == b->palette_code_bits_);
@@ -657,14 +665,14 @@ void VP8LHistogramAdd(const VP8LHistogram* const a,
// Image transforms.
static void PredictorSub0_C(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
for (i = 0; i < num_pixels; ++i) out[i] = VP8LSubPixels(in[i], ARGB_BLACK);
(void)upper;
}
static void PredictorSub1_C(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
for (i = 0; i < num_pixels; ++i) out[i] = VP8LSubPixels(in[i], in[i - 1]);
(void)upper;
@@ -675,7 +683,8 @@ static void PredictorSub1_C(const uint32_t* in, const uint32_t* upper,
#define GENERATE_PREDICTOR_SUB(PREDICTOR_I) \
static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in, \
const uint32_t* upper, \
int num_pixels, uint32_t* out) { \
int num_pixels, \
uint32_t* WEBP_RESTRICT out) { \
int x; \
assert(upper != NULL); \
for (x = 0; x < num_pixels; ++x) { \