diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h index 13df7dfa..30a1d43f 100644 --- a/src/dsp/dsp.h +++ b/src/dsp/dsp.h @@ -131,9 +131,10 @@ typedef struct { int max_value; int last_non_zero; } VP8Histogram; -typedef void (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred, +typedef void (*VP8CHisto)(const uint8_t* WEBP_RESTRICT ref, + const uint8_t* WEBP_RESTRICT pred, int start_block, int end_block, - VP8Histogram* const histo); + VP8Histogram* WEBP_RESTRICT const histo); extern VP8CHisto VP8CollectHistogram; // General-purpose util function to help VP8CollectHistogram(). void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1], diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c index e7cbfd9e..a02443f1 100644 --- a/src/dsp/lossless.c +++ b/src/dsp/lossless.c @@ -182,13 +182,13 @@ uint32_t VP8LPredictor13_C(const uint32_t* const left, } static void PredictorAdd0_C(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int x; (void)upper; for (x = 0; x < num_pixels; ++x) out[x] = VP8LAddPixels(in[x], ARGB_BLACK); } static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; uint32_t left = out[-1]; (void)upper; @@ -441,8 +441,8 @@ static int is_big_endian(void) { return (tmp.b[0] != 1); } -void VP8LConvertBGRAToRGB_C(const uint32_t* src, - int num_pixels, uint8_t* dst) { +void VP8LConvertBGRAToRGB_C(const uint32_t* WEBP_RESTRICT src, + int num_pixels, uint8_t* WEBP_RESTRICT dst) { const uint32_t* const src_end = src + num_pixels; while (src < src_end) { const uint32_t argb = *src++; @@ -452,8 +452,8 @@ void VP8LConvertBGRAToRGB_C(const uint32_t* src, } } -void VP8LConvertBGRAToRGBA_C(const uint32_t* src, - int num_pixels, uint8_t* dst) { +void VP8LConvertBGRAToRGBA_C(const uint32_t* WEBP_RESTRICT src, + int num_pixels, uint8_t* WEBP_RESTRICT dst) { const uint32_t* const src_end = src + num_pixels; while (src < src_end) { const uint32_t argb = *src++; @@ -464,8 +464,8 @@ void VP8LConvertBGRAToRGBA_C(const uint32_t* src, } } -void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src, - int num_pixels, uint8_t* dst) { +void VP8LConvertBGRAToRGBA4444_C(const uint32_t* WEBP_RESTRICT src, + int num_pixels, uint8_t* WEBP_RESTRICT dst) { const uint32_t* const src_end = src + num_pixels; while (src < src_end) { const uint32_t argb = *src++; @@ -481,8 +481,8 @@ void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src, } } -void VP8LConvertBGRAToRGB565_C(const uint32_t* src, - int num_pixels, uint8_t* dst) { +void VP8LConvertBGRAToRGB565_C(const uint32_t* WEBP_RESTRICT src, + int num_pixels, uint8_t* WEBP_RESTRICT dst) { const uint32_t* const src_end = src + num_pixels; while (src < src_end) { const uint32_t argb = *src++; @@ -498,8 +498,8 @@ void VP8LConvertBGRAToRGB565_C(const uint32_t* src, } } -void VP8LConvertBGRAToBGR_C(const uint32_t* src, - int num_pixels, uint8_t* dst) { +void VP8LConvertBGRAToBGR_C(const uint32_t* WEBP_RESTRICT src, + int num_pixels, uint8_t* WEBP_RESTRICT dst) { const uint32_t* const src_end = src + num_pixels; while (src < src_end) { const uint32_t argb = *src++; @@ -509,8 +509,8 @@ void VP8LConvertBGRAToBGR_C(const uint32_t* src, } } -static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst, - int swap_on_big_endian) { +static void CopyOrSwap(const uint32_t* WEBP_RESTRICT src, int num_pixels, + uint8_t* WEBP_RESTRICT dst, int swap_on_big_endian) { if (is_big_endian() == swap_on_big_endian) { const uint32_t* const src_end = src + num_pixels; while (src < src_end) { diff --git a/src/dsp/lossless.h b/src/dsp/lossless.h index 53278cda..a72e3b2a 100644 --- a/src/dsp/lossless.h +++ b/src/dsp/lossless.h @@ -18,6 +18,7 @@ #include "src/webp/types.h" #include "src/webp/decode.h" +#include "src/dsp/dsp.h" #include "src/enc/histogram_enc.h" #include "src/utils/utils.h" @@ -60,7 +61,7 @@ uint32_t VP8LPredictor13_C(const uint32_t* const left, // These Add/Sub function expects upper[-1] and out[-1] to be readable. typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in, const uint32_t* upper, int num_pixels, - uint32_t* out); + uint32_t* WEBP_RESTRICT out); extern VP8LPredictorAddSubFunc VP8LPredictorsAdd[16]; extern VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16]; @@ -91,8 +92,8 @@ void VP8LInverseTransform(const struct VP8LTransform* const transform, const uint32_t* const in, uint32_t* const out); // Color space conversion. -typedef void (*VP8LConvertFunc)(const uint32_t* src, int num_pixels, - uint8_t* dst); +typedef void (*VP8LConvertFunc)(const uint32_t* WEBP_RESTRICT src, + int num_pixels, uint8_t* WEBP_RESTRICT dst); extern VP8LConvertFunc VP8LConvertBGRAToRGB; extern VP8LConvertFunc VP8LConvertBGRAToRGBA; extern VP8LConvertFunc VP8LConvertBGRAToRGBA4444; @@ -145,29 +146,33 @@ void VP8LDspInit(void); typedef void (*VP8LProcessEncBlueAndRedFunc)(uint32_t* dst, int num_pixels); extern VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed; -typedef void (*VP8LTransformColorFunc)(const VP8LMultipliers* const m, - uint32_t* dst, int num_pixels); +typedef void (*VP8LTransformColorFunc)( + const VP8LMultipliers* WEBP_RESTRICT const m, uint32_t* WEBP_RESTRICT dst, + int num_pixels); extern VP8LTransformColorFunc VP8LTransformColor; typedef void (*VP8LCollectColorBlueTransformsFunc)( - const uint32_t* argb, int stride, + const uint32_t* WEBP_RESTRICT argb, int stride, int tile_width, int tile_height, int green_to_blue, int red_to_blue, uint32_t histo[]); extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms; typedef void (*VP8LCollectColorRedTransformsFunc)( - const uint32_t* argb, int stride, + const uint32_t* WEBP_RESTRICT argb, int stride, int tile_width, int tile_height, int green_to_red, uint32_t histo[]); extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms; // Expose some C-only fallback functions -void VP8LTransformColor_C(const VP8LMultipliers* const m, - uint32_t* data, int num_pixels); -void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels); -void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride, +void VP8LTransformColor_C(const VP8LMultipliers* WEBP_RESTRICT const m, + uint32_t* WEBP_RESTRICT data, int num_pixels); +void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* WEBP_RESTRICT argb_data, + int num_pixels); +void VP8LCollectColorRedTransforms_C(const uint32_t* WEBP_RESTRICT argb, + int stride, int tile_width, int tile_height, int green_to_red, uint32_t histo[]); -void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride, +void VP8LCollectColorBlueTransforms_C(const uint32_t* WEBP_RESTRICT argb, + int stride, int tile_width, int tile_height, int green_to_blue, int red_to_blue, uint32_t histo[]); @@ -179,7 +184,8 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16]; // Huffman-cost related functions. typedef uint32_t (*VP8LCostFunc)(const uint32_t* population, int length); -typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y, +typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* WEBP_RESTRICT X, + const uint32_t* WEBP_RESTRICT Y, int length); typedef uint64_t (*VP8LCombinedShannonEntropyFunc)(const uint32_t X[256], const uint32_t Y[256]); @@ -210,26 +216,30 @@ void VP8LBitEntropyInit(VP8LBitEntropy* const entropy); // codec specific heuristics. typedef void (*VP8LGetCombinedEntropyUnrefinedFunc)( const uint32_t X[], const uint32_t Y[], int length, - VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats); + VP8LBitEntropy* WEBP_RESTRICT const bit_entropy, + VP8LStreaks* WEBP_RESTRICT const stats); extern VP8LGetCombinedEntropyUnrefinedFunc VP8LGetCombinedEntropyUnrefined; // Get the entropy for the distribution 'X'. -typedef void (*VP8LGetEntropyUnrefinedFunc)(const uint32_t X[], int length, - VP8LBitEntropy* const bit_entropy, - VP8LStreaks* const stats); +typedef void (*VP8LGetEntropyUnrefinedFunc)( + const uint32_t X[], int length, + VP8LBitEntropy* WEBP_RESTRICT const bit_entropy, + VP8LStreaks* WEBP_RESTRICT const stats); extern VP8LGetEntropyUnrefinedFunc VP8LGetEntropyUnrefined; void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n, VP8LBitEntropy* const entropy); -typedef void (*VP8LAddVectorFunc)(const uint32_t* a, const uint32_t* b, - uint32_t* out, int size); +typedef void (*VP8LAddVectorFunc)(const uint32_t* WEBP_RESTRICT a, + const uint32_t* WEBP_RESTRICT b, + uint32_t* WEBP_RESTRICT out, int size); extern VP8LAddVectorFunc VP8LAddVector; -typedef void (*VP8LAddVectorEqFunc)(const uint32_t* a, uint32_t* out, int size); +typedef void (*VP8LAddVectorEqFunc)(const uint32_t* WEBP_RESTRICT a, + uint32_t* WEBP_RESTRICT out, int size); extern VP8LAddVectorEqFunc VP8LAddVectorEq; -void VP8LHistogramAdd(const VP8LHistogram* const a, - const VP8LHistogram* const b, - VP8LHistogram* const out); +void VP8LHistogramAdd(const VP8LHistogram* WEBP_RESTRICT const a, + const VP8LHistogram* WEBP_RESTRICT const b, + VP8LHistogram* WEBP_RESTRICT const out); // ----------------------------------------------------------------------------- // PrefixEncode() @@ -239,11 +249,12 @@ typedef int (*VP8LVectorMismatchFunc)(const uint32_t* const array1, // Returns the first index where array1 and array2 are different. extern VP8LVectorMismatchFunc VP8LVectorMismatch; -typedef void (*VP8LBundleColorMapFunc)(const uint8_t* const row, int width, - int xbits, uint32_t* dst); +typedef void (*VP8LBundleColorMapFunc)(const uint8_t* WEBP_RESTRICT const row, + int width, int xbits, + uint32_t* WEBP_RESTRICT dst); extern VP8LBundleColorMapFunc VP8LBundleColorMap; -void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits, - uint32_t* dst); +void VP8LBundleColorMap_C(const uint8_t* WEBP_RESTRICT const row, + int width, int xbits, uint32_t* WEBP_RESTRICT dst); // Must be called before calling any of the above methods. void VP8LEncDspInit(void); diff --git a/src/dsp/lossless_common.h b/src/dsp/lossless_common.h index 33f2c4dc..66eadf1f 100644 --- a/src/dsp/lossless_common.h +++ b/src/dsp/lossless_common.h @@ -194,15 +194,15 @@ uint32_t VP8LSubPixels(uint32_t a, uint32_t b) { // The predictor is added to the output pixel (which // is therefore considered as a residual) to get the final prediction. -#define GENERATE_PREDICTOR_ADD(PREDICTOR, PREDICTOR_ADD) \ -static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \ - int num_pixels, uint32_t* out) { \ - int x; \ - assert(upper != NULL); \ - for (x = 0; x < num_pixels; ++x) { \ - const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x); \ - out[x] = VP8LAddPixels(in[x], pred); \ - } \ +#define GENERATE_PREDICTOR_ADD(PREDICTOR, PREDICTOR_ADD) \ +static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \ + int num_pixels, uint32_t* WEBP_RESTRICT out) { \ + int x; \ + assert(upper != NULL); \ + for (x = 0; x < num_pixels; ++x) { \ + const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x); \ + out[x] = VP8LAddPixels(in[x], pred); \ + } \ } #ifdef __cplusplus diff --git a/src/dsp/lossless_enc.c b/src/dsp/lossless_enc.c index 6ba1b373..7e621a71 100644 --- a/src/dsp/lossless_enc.c +++ b/src/dsp/lossless_enc.c @@ -359,8 +359,8 @@ void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) { entropy->nonzero_code = VP8L_NON_TRIVIAL_SYM; } -void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n, - VP8LBitEntropy* const entropy) { +void VP8LBitsEntropyUnrefined(const uint32_t* WEBP_RESTRICT const array, int n, + VP8LBitEntropy* WEBP_RESTRICT const entropy) { int i; VP8LBitEntropyInit(entropy); @@ -380,8 +380,10 @@ void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n, } static WEBP_INLINE void GetEntropyUnrefinedHelper( - uint32_t val, int i, uint32_t* const val_prev, int* const i_prev, - VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats) { + uint32_t val, int i, uint32_t* WEBP_RESTRICT const val_prev, + int* WEBP_RESTRICT const i_prev, + VP8LBitEntropy* WEBP_RESTRICT const bit_entropy, + VP8LStreaks* WEBP_RESTRICT const stats) { const int streak = i - *i_prev; // Gather info for the bit entropy. @@ -403,9 +405,10 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper( *i_prev = i; } -static void GetEntropyUnrefined_C(const uint32_t X[], int length, - VP8LBitEntropy* const bit_entropy, - VP8LStreaks* const stats) { +static void GetEntropyUnrefined_C( + const uint32_t X[], int length, + VP8LBitEntropy* WEBP_RESTRICT const bit_entropy, + VP8LStreaks* WEBP_RESTRICT const stats) { int i; int i_prev = 0; uint32_t x_prev = X[0]; @@ -424,11 +427,10 @@ static void GetEntropyUnrefined_C(const uint32_t X[], int length, bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy; } -static void GetCombinedEntropyUnrefined_C(const uint32_t X[], - const uint32_t Y[], - int length, - VP8LBitEntropy* const bit_entropy, - VP8LStreaks* const stats) { +static void GetCombinedEntropyUnrefined_C( + const uint32_t X[], const uint32_t Y[], int length, + VP8LBitEntropy* WEBP_RESTRICT const bit_entropy, + VP8LStreaks* WEBP_RESTRICT const stats) { int i = 1; int i_prev = 0; uint32_t xy_prev = X[0] + Y[0]; @@ -468,8 +470,8 @@ static WEBP_INLINE int8_t U32ToS8(uint32_t v) { return (int8_t)(v & 0xff); } -void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data, - int num_pixels) { +void VP8LTransformColor_C(const VP8LMultipliers* WEBP_RESTRICT const m, + uint32_t* WEBP_RESTRICT data, int num_pixels) { int i; for (i = 0; i < num_pixels; ++i) { const uint32_t argb = data[i]; @@ -505,7 +507,8 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue, return (new_blue & 0xff); } -void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride, +void VP8LCollectColorRedTransforms_C(const uint32_t* WEBP_RESTRICT argb, + int stride, int tile_width, int tile_height, int green_to_red, uint32_t histo[]) { while (tile_height-- > 0) { @@ -517,7 +520,8 @@ void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride, } } -void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride, +void VP8LCollectColorBlueTransforms_C(const uint32_t* WEBP_RESTRICT argb, + int stride, int tile_width, int tile_height, int green_to_blue, int red_to_blue, uint32_t histo[]) { @@ -544,8 +548,8 @@ static int VectorMismatch_C(const uint32_t* const array1, } // Bundles multiple (1, 2, 4 or 8) pixels into a single pixel. -void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits, - uint32_t* dst) { +void VP8LBundleColorMap_C(const uint8_t* WEBP_RESTRICT const row, + int width, int xbits, uint32_t* WEBP_RESTRICT dst) { int x; if (xbits > 0) { const int bit_depth = 1 << (3 - xbits); @@ -576,7 +580,8 @@ static uint32_t ExtraCost_C(const uint32_t* population, int length) { return cost; } -static uint32_t ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y, +static uint32_t ExtraCostCombined_C(const uint32_t* WEBP_RESTRICT X, + const uint32_t* WEBP_RESTRICT Y, int length) { int i; uint32_t cost = X[4] + Y[4] + X[5] + Y[5]; @@ -591,13 +596,15 @@ static uint32_t ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y, //------------------------------------------------------------------------------ -static void AddVector_C(const uint32_t* a, const uint32_t* b, uint32_t* out, - int size) { +static void AddVector_C(const uint32_t* WEBP_RESTRICT a, + const uint32_t* WEBP_RESTRICT b, + uint32_t* WEBP_RESTRICT out, int size) { int i; for (i = 0; i < size; ++i) out[i] = a[i] + b[i]; } -static void AddVectorEq_C(const uint32_t* a, uint32_t* out, int size) { +static void AddVectorEq_C(const uint32_t* WEBP_RESTRICT a, + uint32_t* WEBP_RESTRICT out, int size) { int i; for (i = 0; i < size; ++i) out[i] += a[i]; } @@ -626,8 +633,9 @@ static void AddVectorEq_C(const uint32_t* a, uint32_t* out, int size) { } \ } while (0) -void VP8LHistogramAdd(const VP8LHistogram* const a, - const VP8LHistogram* const b, VP8LHistogram* const out) { +void VP8LHistogramAdd(const VP8LHistogram* WEBP_RESTRICT const a, + const VP8LHistogram* WEBP_RESTRICT const b, + VP8LHistogram* WEBP_RESTRICT const out) { int i; const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_); assert(a->palette_code_bits_ == b->palette_code_bits_); @@ -657,14 +665,14 @@ void VP8LHistogramAdd(const VP8LHistogram* const a, // Image transforms. static void PredictorSub0_C(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; for (i = 0; i < num_pixels; ++i) out[i] = VP8LSubPixels(in[i], ARGB_BLACK); (void)upper; } static void PredictorSub1_C(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; for (i = 0; i < num_pixels; ++i) out[i] = VP8LSubPixels(in[i], in[i - 1]); (void)upper; @@ -675,7 +683,8 @@ static void PredictorSub1_C(const uint32_t* in, const uint32_t* upper, #define GENERATE_PREDICTOR_SUB(PREDICTOR_I) \ static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in, \ const uint32_t* upper, \ - int num_pixels, uint32_t* out) { \ + int num_pixels, \ + uint32_t* WEBP_RESTRICT out) { \ int x; \ assert(upper != NULL); \ for (x = 0; x < num_pixels; ++x) { \ diff --git a/src/dsp/lossless_enc_mips32.c b/src/dsp/lossless_enc_mips32.c index 58529f9a..8e9d7358 100644 --- a/src/dsp/lossless_enc_mips32.c +++ b/src/dsp/lossless_enc_mips32.c @@ -149,8 +149,9 @@ static uint32_t ExtraCost_MIPS32(const uint32_t* const population, int length) { // pY += 2; // } // return cost; -static uint32_t ExtraCostCombined_MIPS32(const uint32_t* const X, - const uint32_t* const Y, int length) { +static uint32_t ExtraCostCombined_MIPS32(const uint32_t* WEBP_RESTRICT const X, + const uint32_t* WEBP_RESTRICT const Y, + int length) { int i, temp0, temp1, temp2, temp3; const uint32_t* pX = &X[4]; const uint32_t* pY = &Y[4]; @@ -215,8 +216,10 @@ static uint32_t ExtraCostCombined_MIPS32(const uint32_t* const X, // Returns the various RLE counts static WEBP_INLINE void GetEntropyUnrefinedHelper( - uint32_t val, int i, uint32_t* const val_prev, int* const i_prev, - VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats) { + uint32_t val, int i, uint32_t* WEBP_RESTRICT const val_prev, + int* WEBP_RESTRICT const i_prev, + VP8LBitEntropy* WEBP_RESTRICT const bit_entropy, + VP8LStreaks* WEBP_RESTRICT const stats) { int* const pstreaks = &stats->streaks[0][0]; int* const pcnts = &stats->counts[0]; int temp0, temp1, temp2, temp3; @@ -241,9 +244,10 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper( *i_prev = i; } -static void GetEntropyUnrefined_MIPS32(const uint32_t X[], int length, - VP8LBitEntropy* const bit_entropy, - VP8LStreaks* const stats) { +static void GetEntropyUnrefined_MIPS32( + const uint32_t X[], int length, + VP8LBitEntropy* WEBP_RESTRICT const bit_entropy, + VP8LStreaks* WEBP_RESTRICT const stats) { int i; int i_prev = 0; uint32_t x_prev = X[0]; @@ -262,11 +266,10 @@ static void GetEntropyUnrefined_MIPS32(const uint32_t X[], int length, bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy; } -static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[], - const uint32_t Y[], - int length, - VP8LBitEntropy* const entropy, - VP8LStreaks* const stats) { +static void GetCombinedEntropyUnrefined_MIPS32( + const uint32_t X[], const uint32_t Y[], int length, + VP8LBitEntropy* WEBP_RESTRICT const entropy, + VP8LStreaks* WEBP_RESTRICT const stats) { int i = 1; int i_prev = 0; uint32_t xy_prev = X[0] + Y[0]; @@ -344,8 +347,9 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[], ASM_END_COMMON_0 \ ASM_END_COMMON_1 -static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb, - uint32_t* pout, int size) { +static void AddVector_MIPS32(const uint32_t* WEBP_RESTRICT pa, + const uint32_t* WEBP_RESTRICT pb, + uint32_t* WEBP_RESTRICT pout, int size) { uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; const int end = ((size) / 4) * 4; const uint32_t* const LoopEnd = pa + end; @@ -356,7 +360,8 @@ static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb, for (i = 0; i < size - end; ++i) pout[i] = pa[i] + pb[i]; } -static void AddVectorEq_MIPS32(const uint32_t* pa, uint32_t* pout, int size) { +static void AddVectorEq_MIPS32(const uint32_t* WEBP_RESTRICT pa, + uint32_t* WEBP_RESTRICT pout, int size) { uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; const int end = ((size) / 4) * 4; const uint32_t* const LoopEnd = pa + end; diff --git a/src/dsp/lossless_enc_mips_dsp_r2.c b/src/dsp/lossless_enc_mips_dsp_r2.c index 6eaab0af..e10b8f7e 100644 --- a/src/dsp/lossless_enc_mips_dsp_r2.c +++ b/src/dsp/lossless_enc_mips_dsp_r2.c @@ -78,8 +78,9 @@ static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred, return (uint32_t)((int)(color_pred) * color) >> 5; } -static void TransformColor_MIPSdspR2(const VP8LMultipliers* const m, - uint32_t* data, int num_pixels) { +static void TransformColor_MIPSdspR2( + const VP8LMultipliers* WEBP_RESTRICT const m, uint32_t* WEBP_RESTRICT data, + int num_pixels) { int temp0, temp1, temp2, temp3, temp4, temp5; uint32_t argb, argb1, new_red, new_red1; const uint32_t G_to_R = m->green_to_red_; @@ -172,7 +173,8 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue, } static void CollectColorBlueTransforms_MIPSdspR2( - const uint32_t* argb, int stride, int tile_width, int tile_height, + const uint32_t* WEBP_RESTRICT argb, int stride, + int tile_width, int tile_height, int green_to_blue, int red_to_blue, uint32_t histo[]) { const int rtb = (red_to_blue << 16) | (red_to_blue & 0xffff); const int gtb = (green_to_blue << 16) | (green_to_blue & 0xffff); @@ -221,11 +223,9 @@ static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red, return (new_red & 0xff); } -static void CollectColorRedTransforms_MIPSdspR2(const uint32_t* argb, - int stride, int tile_width, - int tile_height, - int green_to_red, - uint32_t histo[]) { +static void CollectColorRedTransforms_MIPSdspR2( + const uint32_t* WEBP_RESTRICT argb, int stride, + int tile_width, int tile_height, int green_to_red, uint32_t histo[]) { const int gtr = (green_to_red << 16) | (green_to_red & 0xffff); while (tile_height-- > 0) { int x; diff --git a/src/dsp/lossless_enc_msa.c b/src/dsp/lossless_enc_msa.c index 600dddfb..6d835ab7 100644 --- a/src/dsp/lossless_enc_msa.c +++ b/src/dsp/lossless_enc_msa.c @@ -48,8 +48,8 @@ dst = VSHF_UB(src, t0, mask1); \ } while (0) -static void TransformColor_MSA(const VP8LMultipliers* const m, uint32_t* data, - int num_pixels) { +static void TransformColor_MSA(const VP8LMultipliers* WEBP_RESTRICT const m, + uint32_t* WEBP_RESTRICT data, int num_pixels) { v16u8 src0, dst0; const v16i8 g2br = (v16i8)__msa_fill_w(m->green_to_blue_ | (m->green_to_red_ << 16)); diff --git a/src/dsp/lossless_enc_neon.c b/src/dsp/lossless_enc_neon.c index e32c7961..838204a7 100644 --- a/src/dsp/lossless_enc_neon.c +++ b/src/dsp/lossless_enc_neon.c @@ -72,8 +72,9 @@ static void SubtractGreenFromBlueAndRed_NEON(uint32_t* argb_data, //------------------------------------------------------------------------------ // Color Transform -static void TransformColor_NEON(const VP8LMultipliers* const m, - uint32_t* argb_data, int num_pixels) { +static void TransformColor_NEON(const VP8LMultipliers* WEBP_RESTRICT const m, + uint32_t* WEBP_RESTRICT argb_data, + int num_pixels) { // sign-extended multiplying constants, pre-shifted by 6. #define CST(X) (((int16_t)(m->X << 8)) >> 6) const int16_t rb[8] = { diff --git a/src/dsp/lossless_enc_sse2.c b/src/dsp/lossless_enc_sse2.c index 530acc37..f6706dd5 100644 --- a/src/dsp/lossless_enc_sse2.c +++ b/src/dsp/lossless_enc_sse2.c @@ -49,8 +49,9 @@ static void SubtractGreenFromBlueAndRed_SSE2(uint32_t* argb_data, #define MK_CST_16(HI, LO) \ _mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff))) -static void TransformColor_SSE2(const VP8LMultipliers* const m, - uint32_t* argb_data, int num_pixels) { +static void TransformColor_SSE2(const VP8LMultipliers* WEBP_RESTRICT const m, + uint32_t* WEBP_RESTRICT argb_data, + int num_pixels) { const __m128i mults_rb = MK_CST_16(CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_)); const __m128i mults_b2 = MK_CST_16(CST_5b(m->red_to_blue_), 0); @@ -79,7 +80,8 @@ static void TransformColor_SSE2(const VP8LMultipliers* const m, //------------------------------------------------------------------------------ #define SPAN 8 -static void CollectColorBlueTransforms_SSE2(const uint32_t* argb, int stride, +static void CollectColorBlueTransforms_SSE2(const uint32_t* WEBP_RESTRICT argb, + int stride, int tile_width, int tile_height, int green_to_blue, int red_to_blue, uint32_t histo[]) { @@ -126,7 +128,8 @@ static void CollectColorBlueTransforms_SSE2(const uint32_t* argb, int stride, } } -static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride, +static void CollectColorRedTransforms_SSE2(const uint32_t* WEBP_RESTRICT argb, + int stride, int tile_width, int tile_height, int green_to_red, uint32_t histo[]) { const __m128i mults_g = MK_CST_16(0, CST_5b(green_to_red)); @@ -173,8 +176,9 @@ static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride, // Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But // that's ok since the histogram values are less than 1<<28 (max picture size). #define LINE_SIZE 16 // 8 or 16 -static void AddVector_SSE2(const uint32_t* a, const uint32_t* b, uint32_t* out, - int size) { +static void AddVector_SSE2(const uint32_t* WEBP_RESTRICT a, + const uint32_t* WEBP_RESTRICT b, + uint32_t* WEBP_RESTRICT out, int size) { int i; for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) { const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]); @@ -201,7 +205,8 @@ static void AddVector_SSE2(const uint32_t* a, const uint32_t* b, uint32_t* out, } } -static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) { +static void AddVectorEq_SSE2(const uint32_t* WEBP_RESTRICT a, + uint32_t* WEBP_RESTRICT out, int size) { int i; for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) { const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]); @@ -333,8 +338,9 @@ static int VectorMismatch_SSE2(const uint32_t* const array1, } // Bundles multiple (1, 2, 4 or 8) pixels into a single pixel. -static void BundleColorMap_SSE2(const uint8_t* const row, int width, int xbits, - uint32_t* dst) { +static void BundleColorMap_SSE2(const uint8_t* WEBP_RESTRICT const row, + int width, int xbits, + uint32_t* WEBP_RESTRICT dst) { int x; assert(xbits >= 0); assert(xbits <= 3); @@ -423,7 +429,7 @@ static WEBP_INLINE void Average2_m128i(const __m128i* const a0, // Predictor0: ARGB_BLACK. static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; const __m128i black = _mm_set1_epi32((int)ARGB_BLACK); for (i = 0; i + 4 <= num_pixels; i += 4) { @@ -440,7 +446,8 @@ static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper, #define GENERATE_PREDICTOR_1(X, IN) \ static void PredictorSub##X##_SSE2(const uint32_t* const in, \ const uint32_t* const upper, \ - int num_pixels, uint32_t* const out) { \ + int num_pixels, \ + uint32_t* WEBP_RESTRICT const out) { \ int i; \ for (i = 0; i + 4 <= num_pixels; i += 4) { \ const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); \ @@ -462,7 +469,7 @@ GENERATE_PREDICTOR_1(4, upper[i - 1]) // Predictor4: TL // Predictor5: avg2(avg2(L, TR), T) static void PredictorSub5_SSE2(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; for (i = 0; i + 4 <= num_pixels; i += 4) { const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]); @@ -482,7 +489,8 @@ static void PredictorSub5_SSE2(const uint32_t* in, const uint32_t* upper, #define GENERATE_PREDICTOR_2(X, A, B) \ static void PredictorSub##X##_SSE2(const uint32_t* in, const uint32_t* upper, \ - int num_pixels, uint32_t* out) { \ + int num_pixels, \ + uint32_t* WEBP_RESTRICT out) { \ int i; \ for (i = 0; i + 4 <= num_pixels; i += 4) { \ const __m128i tA = _mm_loadu_si128((const __m128i*)&(A)); \ @@ -506,7 +514,7 @@ GENERATE_PREDICTOR_2(9, upper[i], upper[i + 1]) // Predictor9: average(T, TR) // Predictor10: avg(avg(L,TL), avg(T, TR)). static void PredictorSub10_SSE2(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; for (i = 0; i + 4 <= num_pixels; i += 4) { const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]); @@ -541,7 +549,7 @@ static void GetSumAbsDiff32_SSE2(const __m128i* const A, const __m128i* const B, } static void PredictorSub11_SSE2(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; for (i = 0; i + 4 <= num_pixels; i += 4) { const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]); @@ -567,7 +575,7 @@ static void PredictorSub11_SSE2(const uint32_t* in, const uint32_t* upper, // Predictor12: ClampedSubSubtractFull. static void PredictorSub12_SSE2(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; const __m128i zero = _mm_setzero_si128(); for (i = 0; i + 4 <= num_pixels; i += 4) { @@ -596,7 +604,7 @@ static void PredictorSub12_SSE2(const uint32_t* in, const uint32_t* upper, // Predictors13: ClampedAddSubtractHalf static void PredictorSub13_SSE2(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; const __m128i zero = _mm_setzero_si128(); for (i = 0; i + 2 <= num_pixels; i += 2) { diff --git a/src/dsp/lossless_enc_sse41.c b/src/dsp/lossless_enc_sse41.c index 9a0dcf9b..87ed056f 100644 --- a/src/dsp/lossless_enc_sse41.c +++ b/src/dsp/lossless_enc_sse41.c @@ -44,8 +44,9 @@ static uint32_t ExtraCost_SSE41(const uint32_t* const a, int length) { return HorizontalSum_SSE41(cost); } -static uint32_t ExtraCostCombined_SSE41(const uint32_t* const a, - const uint32_t* const b, int length) { +static uint32_t ExtraCostCombined_SSE41(const uint32_t* WEBP_RESTRICT const a, + const uint32_t* WEBP_RESTRICT const b, + int length) { int i; __m128i cost = _mm_add_epi32(_mm_set_epi32(2 * a[7], 2 * a[6], a[5], a[4]), _mm_set_epi32(2 * b[7], 2 * b[6], b[5], b[4])); @@ -95,7 +96,8 @@ static void SubtractGreenFromBlueAndRed_SSE41(uint32_t* argb_data, #define MK_CST_16(HI, LO) \ _mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff))) -static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride, +static void CollectColorBlueTransforms_SSE41(const uint32_t* WEBP_RESTRICT argb, + int stride, int tile_width, int tile_height, int green_to_blue, int red_to_blue, uint32_t histo[]) { @@ -141,7 +143,8 @@ static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride, } } -static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride, +static void CollectColorRedTransforms_SSE41(const uint32_t* WEBP_RESTRICT argb, + int stride, int tile_width, int tile_height, int green_to_red, uint32_t histo[]) { diff --git a/src/dsp/lossless_neon.c b/src/dsp/lossless_neon.c index e9960db3..93f41cef 100644 --- a/src/dsp/lossless_neon.c +++ b/src/dsp/lossless_neon.c @@ -26,8 +26,8 @@ #if !defined(WORK_AROUND_GCC) // gcc 4.6.0 had some trouble (NDK-r9) with this code. We only use it for // gcc-4.8.x at least. -static void ConvertBGRAToRGBA_NEON(const uint32_t* src, - int num_pixels, uint8_t* dst) { +static void ConvertBGRAToRGBA_NEON(const uint32_t* WEBP_RESTRICT src, + int num_pixels, uint8_t* WEBP_RESTRICT dst) { const uint32_t* const end = src + (num_pixels & ~15); for (; src < end; src += 16) { uint8x16x4_t pixel = vld4q_u8((uint8_t*)src); @@ -41,8 +41,8 @@ static void ConvertBGRAToRGBA_NEON(const uint32_t* src, VP8LConvertBGRAToRGBA_C(src, num_pixels & 15, dst); // left-overs } -static void ConvertBGRAToBGR_NEON(const uint32_t* src, - int num_pixels, uint8_t* dst) { +static void ConvertBGRAToBGR_NEON(const uint32_t* WEBP_RESTRICT src, + int num_pixels, uint8_t* WEBP_RESTRICT dst) { const uint32_t* const end = src + (num_pixels & ~15); for (; src < end; src += 16) { const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src); @@ -53,8 +53,8 @@ static void ConvertBGRAToBGR_NEON(const uint32_t* src, VP8LConvertBGRAToBGR_C(src, num_pixels & 15, dst); // left-overs } -static void ConvertBGRAToRGB_NEON(const uint32_t* src, - int num_pixels, uint8_t* dst) { +static void ConvertBGRAToRGB_NEON(const uint32_t* WEBP_RESTRICT src, + int num_pixels, uint8_t* WEBP_RESTRICT dst) { const uint32_t* const end = src + (num_pixels & ~15); for (; src < end; src += 16) { const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src); @@ -71,8 +71,8 @@ static void ConvertBGRAToRGB_NEON(const uint32_t* src, static const uint8_t kRGBAShuffle[8] = { 2, 1, 0, 3, 6, 5, 4, 7 }; -static void ConvertBGRAToRGBA_NEON(const uint32_t* src, - int num_pixels, uint8_t* dst) { +static void ConvertBGRAToRGBA_NEON(const uint32_t* WEBP_RESTRICT src, + int num_pixels, uint8_t* WEBP_RESTRICT dst) { const uint32_t* const end = src + (num_pixels & ~1); const uint8x8_t shuffle = vld1_u8(kRGBAShuffle); for (; src < end; src += 2) { @@ -89,8 +89,8 @@ static const uint8_t kBGRShuffle[3][8] = { { 21, 22, 24, 25, 26, 28, 29, 30 } }; -static void ConvertBGRAToBGR_NEON(const uint32_t* src, - int num_pixels, uint8_t* dst) { +static void ConvertBGRAToBGR_NEON(const uint32_t* WEBP_RESTRICT src, + int num_pixels, uint8_t* WEBP_RESTRICT dst) { const uint32_t* const end = src + (num_pixels & ~7); const uint8x8_t shuffle0 = vld1_u8(kBGRShuffle[0]); const uint8x8_t shuffle1 = vld1_u8(kBGRShuffle[1]); @@ -116,8 +116,8 @@ static const uint8_t kRGBShuffle[3][8] = { { 21, 20, 26, 25, 24, 30, 29, 28 } }; -static void ConvertBGRAToRGB_NEON(const uint32_t* src, - int num_pixels, uint8_t* dst) { +static void ConvertBGRAToRGB_NEON(const uint32_t* WEBP_RESTRICT src, + int num_pixels, uint8_t* WEBP_RESTRICT dst) { const uint32_t* const end = src + (num_pixels & ~7); const uint8x8_t shuffle0 = vld1_u8(kRGBShuffle[0]); const uint8x8_t shuffle1 = vld1_u8(kRGBShuffle[1]); @@ -209,7 +209,7 @@ static uint32_t Predictor13_NEON(const uint32_t* const left, // Predictor0: ARGB_BLACK. static void PredictorAdd0_NEON(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; const uint8x16_t black = vreinterpretq_u8_u32(vdupq_n_u32(ARGB_BLACK)); for (i = 0; i + 4 <= num_pixels; i += 4) { @@ -222,7 +222,7 @@ static void PredictorAdd0_NEON(const uint32_t* in, const uint32_t* upper, // Predictor1: left. static void PredictorAdd1_NEON(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; const uint8x16_t zero = LOADQ_U32_AS_U8(0); for (i = 0; i + 4 <= num_pixels; i += 4) { @@ -248,7 +248,7 @@ static void PredictorAdd1_NEON(const uint32_t* in, const uint32_t* upper, #define GENERATE_PREDICTOR_1(X, IN) \ static void PredictorAdd##X##_NEON(const uint32_t* in, \ const uint32_t* upper, int num_pixels, \ - uint32_t* out) { \ + uint32_t* WEBP_RESTRICT out) { \ int i; \ for (i = 0; i + 4 <= num_pixels; i += 4) { \ const uint8x16_t src = LOADQ_U32P_AS_U8(&in[i]); \ @@ -276,7 +276,7 @@ GENERATE_PREDICTOR_1(4, upper[i - 1]) } while (0) static void PredictorAdd5_NEON(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; uint8x16_t L = LOADQ_U32_AS_U8(out[-1]); for (i = 0; i + 4 <= num_pixels; i += 4) { @@ -301,7 +301,7 @@ static void PredictorAdd5_NEON(const uint32_t* in, const uint32_t* upper, // Predictor6: average(left, TL) static void PredictorAdd6_NEON(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; uint8x16_t L = LOADQ_U32_AS_U8(out[-1]); for (i = 0; i + 4 <= num_pixels; i += 4) { @@ -317,7 +317,7 @@ static void PredictorAdd6_NEON(const uint32_t* in, const uint32_t* upper, // Predictor7: average(left, T) static void PredictorAdd7_NEON(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; uint8x16_t L = LOADQ_U32_AS_U8(out[-1]); for (i = 0; i + 4 <= num_pixels; i += 4) { @@ -335,7 +335,7 @@ static void PredictorAdd7_NEON(const uint32_t* in, const uint32_t* upper, #define GENERATE_PREDICTOR_2(X, IN) \ static void PredictorAdd##X##_NEON(const uint32_t* in, \ const uint32_t* upper, int num_pixels, \ - uint32_t* out) { \ + uint32_t* WEBP_RESTRICT out) { \ int i; \ for (i = 0; i + 4 <= num_pixels; i += 4) { \ const uint8x16_t src = LOADQ_U32P_AS_U8(&in[i]); \ @@ -363,7 +363,7 @@ GENERATE_PREDICTOR_2(9, upper[i + 1]) } while (0) static void PredictorAdd10_NEON(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; uint8x16_t L = LOADQ_U32_AS_U8(out[-1]); for (i = 0; i + 4 <= num_pixels; i += 4) { @@ -394,7 +394,7 @@ static void PredictorAdd10_NEON(const uint32_t* in, const uint32_t* upper, } while (0) static void PredictorAdd11_NEON(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; uint8x16_t L = LOADQ_U32_AS_U8(out[-1]); for (i = 0; i + 4 <= num_pixels; i += 4) { @@ -427,7 +427,7 @@ static void PredictorAdd11_NEON(const uint32_t* in, const uint32_t* upper, } while (0) static void PredictorAdd12_NEON(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; uint16x8_t L = vmovl_u8(LOAD_U32_AS_U8(out[-1])); for (i = 0; i + 4 <= num_pixels; i += 4) { @@ -468,7 +468,7 @@ static void PredictorAdd12_NEON(const uint32_t* in, const uint32_t* upper, } while (0) static void PredictorAdd13_NEON(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; uint8x16_t L = LOADQ_U32_AS_U8(out[-1]); for (i = 0; i + 4 <= num_pixels; i += 4) { diff --git a/src/dsp/lossless_sse2.c b/src/dsp/lossless_sse2.c index 4b6a532c..5b68d1cf 100644 --- a/src/dsp/lossless_sse2.c +++ b/src/dsp/lossless_sse2.c @@ -186,7 +186,7 @@ static uint32_t Predictor13_SSE2(const uint32_t* const left, // Predictor0: ARGB_BLACK. static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; const __m128i black = _mm_set1_epi32((int)ARGB_BLACK); for (i = 0; i + 4 <= num_pixels; i += 4) { @@ -202,7 +202,7 @@ static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper, // Predictor1: left. static void PredictorAdd1_SSE2(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; __m128i prev = _mm_set1_epi32((int)out[-1]); for (i = 0; i + 4 <= num_pixels; i += 4) { @@ -230,7 +230,8 @@ static void PredictorAdd1_SSE2(const uint32_t* in, const uint32_t* upper, // per 8 bit channel. #define GENERATE_PREDICTOR_1(X, IN) \ static void PredictorAdd##X##_SSE2(const uint32_t* in, const uint32_t* upper, \ - int num_pixels, uint32_t* out) { \ + int num_pixels, \ + uint32_t* WEBP_RESTRICT out) { \ int i; \ for (i = 0; i + 4 <= num_pixels; i += 4) { \ const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); \ @@ -259,7 +260,8 @@ GENERATE_PREDICTOR_ADD(Predictor7_SSE2, PredictorAdd7_SSE2) #define GENERATE_PREDICTOR_2(X, IN) \ static void PredictorAdd##X##_SSE2(const uint32_t* in, const uint32_t* upper, \ - int num_pixels, uint32_t* out) { \ + int num_pixels, \ + uint32_t* WEBP_RESTRICT out) { \ int i; \ for (i = 0; i + 4 <= num_pixels; i += 4) { \ const __m128i Tother = _mm_loadu_si128((const __m128i*)&(IN)); \ @@ -297,7 +299,7 @@ GENERATE_PREDICTOR_2(9, upper[i + 1]) } while (0) static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; __m128i L = _mm_cvtsi32_si128((int)out[-1]); for (i = 0; i + 4 <= num_pixels; i += 4) { @@ -344,7 +346,7 @@ static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper, } while (0) static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; __m128i pa; __m128i L = _mm_cvtsi32_si128((int)out[-1]); @@ -395,7 +397,7 @@ static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper, } while (0) static void PredictorAdd12_SSE2(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { + int num_pixels, uint32_t* WEBP_RESTRICT out) { int i; const __m128i zero = _mm_setzero_si128(); const __m128i L8 = _mm_cvtsi32_si128((int)out[-1]); @@ -490,8 +492,8 @@ static void TransformColorInverse_SSE2(const VP8LMultipliers* const m, //------------------------------------------------------------------------------ // Color-space conversion functions -static void ConvertBGRAToRGB_SSE2(const uint32_t* src, int num_pixels, - uint8_t* dst) { +static void ConvertBGRAToRGB_SSE2(const uint32_t* WEBP_RESTRICT src, + int num_pixels, uint8_t* WEBP_RESTRICT dst) { const __m128i* in = (const __m128i*)src; __m128i* out = (__m128i*)dst; @@ -526,8 +528,8 @@ static void ConvertBGRAToRGB_SSE2(const uint32_t* src, int num_pixels, } } -static void ConvertBGRAToRGBA_SSE2(const uint32_t* src, - int num_pixels, uint8_t* dst) { +static void ConvertBGRAToRGBA_SSE2(const uint32_t* WEBP_RESTRICT src, + int num_pixels, uint8_t* WEBP_RESTRICT dst) { const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ff); const __m128i* in = (const __m128i*)src; __m128i* out = (__m128i*)dst; @@ -554,8 +556,9 @@ static void ConvertBGRAToRGBA_SSE2(const uint32_t* src, } } -static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src, - int num_pixels, uint8_t* dst) { +static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* WEBP_RESTRICT src, + int num_pixels, + uint8_t* WEBP_RESTRICT dst) { const __m128i mask_0x0f = _mm_set1_epi8(0x0f); const __m128i mask_0xf0 = _mm_set1_epi8((char)0xf0); const __m128i* in = (const __m128i*)src; @@ -590,8 +593,9 @@ static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src, } } -static void ConvertBGRAToRGB565_SSE2(const uint32_t* src, - int num_pixels, uint8_t* dst) { +static void ConvertBGRAToRGB565_SSE2(const uint32_t* WEBP_RESTRICT src, + int num_pixels, + uint8_t* WEBP_RESTRICT dst) { const __m128i mask_0xe0 = _mm_set1_epi8((char)0xe0); const __m128i mask_0xf8 = _mm_set1_epi8((char)0xf8); const __m128i mask_0x07 = _mm_set1_epi8(0x07); @@ -631,8 +635,8 @@ static void ConvertBGRAToRGB565_SSE2(const uint32_t* src, } } -static void ConvertBGRAToBGR_SSE2(const uint32_t* src, - int num_pixels, uint8_t* dst) { +static void ConvertBGRAToBGR_SSE2(const uint32_t* WEBP_RESTRICT src, + int num_pixels, uint8_t* WEBP_RESTRICT dst) { const __m128i mask_l = _mm_set_epi32(0, 0x00ffffff, 0, 0x00ffffff); const __m128i mask_h = _mm_set_epi32(0x00ffffff, 0, 0x00ffffff, 0); const __m128i* in = (const __m128i*)src; diff --git a/src/dsp/lossless_sse41.c b/src/dsp/lossless_sse41.c index bb7ce761..a2d19144 100644 --- a/src/dsp/lossless_sse41.c +++ b/src/dsp/lossless_sse41.c @@ -77,8 +77,8 @@ static void TransformColorInverse_SSE41(const VP8LMultipliers* const m, } \ } while (0) -static void ConvertBGRAToRGB_SSE41(const uint32_t* src, int num_pixels, - uint8_t* dst) { +static void ConvertBGRAToRGB_SSE41(const uint32_t* WEBP_RESTRICT src, + int num_pixels, uint8_t* WEBP_RESTRICT dst) { const __m128i* in = (const __m128i*)src; __m128i* out = (__m128i*)dst; const __m128i perm0 = _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9, @@ -95,8 +95,8 @@ static void ConvertBGRAToRGB_SSE41(const uint32_t* src, int num_pixels, } } -static void ConvertBGRAToBGR_SSE41(const uint32_t* src, - int num_pixels, uint8_t* dst) { +static void ConvertBGRAToBGR_SSE41(const uint32_t* WEBP_RESTRICT src, + int num_pixels, uint8_t* WEBP_RESTRICT dst) { const __m128i* in = (const __m128i*)src; __m128i* out = (__m128i*)dst; const __m128i perm0 = _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10,