dsp/lossless*: use WEBP_RESTRICT qualifier

lossless_enc: better vectorization, most benefits seen in AddVector/Eq
              w/ndk r27/gcc-13/clang-16
lossless: minor reordering and some improvement to PredictorAdd5_SSE2
          w/gcc-13

This only affects non-vector pointers; any vector pointers are left as a
follow up.

Change-Id: I2356e314f391ee2f2c71f00bc6ee10097d3881e7
This commit is contained in:
James Zern 2024-08-16 16:51:11 -07:00
parent 04d4b4f387
commit a32b436bd5
14 changed files with 213 additions and 171 deletions

View File

@ -131,9 +131,10 @@ typedef struct {
int max_value;
int last_non_zero;
} VP8Histogram;
typedef void (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
typedef void (*VP8CHisto)(const uint8_t* WEBP_RESTRICT ref,
const uint8_t* WEBP_RESTRICT pred,
int start_block, int end_block,
VP8Histogram* const histo);
VP8Histogram* WEBP_RESTRICT const histo);
extern VP8CHisto VP8CollectHistogram;
// General-purpose util function to help VP8CollectHistogram().
void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],

View File

@ -182,13 +182,13 @@ uint32_t VP8LPredictor13_C(const uint32_t* const left,
}
static void PredictorAdd0_C(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int x;
(void)upper;
for (x = 0; x < num_pixels; ++x) out[x] = VP8LAddPixels(in[x], ARGB_BLACK);
}
static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
uint32_t left = out[-1];
(void)upper;
@ -441,8 +441,8 @@ static int is_big_endian(void) {
return (tmp.b[0] != 1);
}
void VP8LConvertBGRAToRGB_C(const uint32_t* src,
int num_pixels, uint8_t* dst) {
void VP8LConvertBGRAToRGB_C(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
const uint32_t argb = *src++;
@ -452,8 +452,8 @@ void VP8LConvertBGRAToRGB_C(const uint32_t* src,
}
}
void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
int num_pixels, uint8_t* dst) {
void VP8LConvertBGRAToRGBA_C(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
const uint32_t argb = *src++;
@ -464,8 +464,8 @@ void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
}
}
void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
int num_pixels, uint8_t* dst) {
void VP8LConvertBGRAToRGBA4444_C(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
const uint32_t argb = *src++;
@ -481,8 +481,8 @@ void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
}
}
void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
int num_pixels, uint8_t* dst) {
void VP8LConvertBGRAToRGB565_C(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
const uint32_t argb = *src++;
@ -498,8 +498,8 @@ void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
}
}
void VP8LConvertBGRAToBGR_C(const uint32_t* src,
int num_pixels, uint8_t* dst) {
void VP8LConvertBGRAToBGR_C(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
const uint32_t argb = *src++;
@ -509,8 +509,8 @@ void VP8LConvertBGRAToBGR_C(const uint32_t* src,
}
}
static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
int swap_on_big_endian) {
static void CopyOrSwap(const uint32_t* WEBP_RESTRICT src, int num_pixels,
uint8_t* WEBP_RESTRICT dst, int swap_on_big_endian) {
if (is_big_endian() == swap_on_big_endian) {
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {

View File

@ -18,6 +18,7 @@
#include "src/webp/types.h"
#include "src/webp/decode.h"
#include "src/dsp/dsp.h"
#include "src/enc/histogram_enc.h"
#include "src/utils/utils.h"
@ -60,7 +61,7 @@ uint32_t VP8LPredictor13_C(const uint32_t* const left,
// These Add/Sub function expects upper[-1] and out[-1] to be readable.
typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in,
const uint32_t* upper, int num_pixels,
uint32_t* out);
uint32_t* WEBP_RESTRICT out);
extern VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
extern VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];
@ -91,8 +92,8 @@ void VP8LInverseTransform(const struct VP8LTransform* const transform,
const uint32_t* const in, uint32_t* const out);
// Color space conversion.
typedef void (*VP8LConvertFunc)(const uint32_t* src, int num_pixels,
uint8_t* dst);
typedef void (*VP8LConvertFunc)(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst);
extern VP8LConvertFunc VP8LConvertBGRAToRGB;
extern VP8LConvertFunc VP8LConvertBGRAToRGBA;
extern VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
@ -145,29 +146,33 @@ void VP8LDspInit(void);
typedef void (*VP8LProcessEncBlueAndRedFunc)(uint32_t* dst, int num_pixels);
extern VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
typedef void (*VP8LTransformColorFunc)(const VP8LMultipliers* const m,
uint32_t* dst, int num_pixels);
typedef void (*VP8LTransformColorFunc)(
const VP8LMultipliers* WEBP_RESTRICT const m, uint32_t* WEBP_RESTRICT dst,
int num_pixels);
extern VP8LTransformColorFunc VP8LTransformColor;
typedef void (*VP8LCollectColorBlueTransformsFunc)(
const uint32_t* argb, int stride,
const uint32_t* WEBP_RESTRICT argb, int stride,
int tile_width, int tile_height,
int green_to_blue, int red_to_blue, uint32_t histo[]);
extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
typedef void (*VP8LCollectColorRedTransformsFunc)(
const uint32_t* argb, int stride,
const uint32_t* WEBP_RESTRICT argb, int stride,
int tile_width, int tile_height,
int green_to_red, uint32_t histo[]);
extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
// Expose some C-only fallback functions
void VP8LTransformColor_C(const VP8LMultipliers* const m,
uint32_t* data, int num_pixels);
void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels);
void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
void VP8LTransformColor_C(const VP8LMultipliers* WEBP_RESTRICT const m,
uint32_t* WEBP_RESTRICT data, int num_pixels);
void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* WEBP_RESTRICT argb_data,
int num_pixels);
void VP8LCollectColorRedTransforms_C(const uint32_t* WEBP_RESTRICT argb,
int stride,
int tile_width, int tile_height,
int green_to_red, uint32_t histo[]);
void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
void VP8LCollectColorBlueTransforms_C(const uint32_t* WEBP_RESTRICT argb,
int stride,
int tile_width, int tile_height,
int green_to_blue, int red_to_blue,
uint32_t histo[]);
@ -179,7 +184,8 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
// Huffman-cost related functions.
typedef uint32_t (*VP8LCostFunc)(const uint32_t* population, int length);
typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* WEBP_RESTRICT X,
const uint32_t* WEBP_RESTRICT Y,
int length);
typedef uint64_t (*VP8LCombinedShannonEntropyFunc)(const uint32_t X[256],
const uint32_t Y[256]);
@ -210,26 +216,30 @@ void VP8LBitEntropyInit(VP8LBitEntropy* const entropy);
// codec specific heuristics.
typedef void (*VP8LGetCombinedEntropyUnrefinedFunc)(
const uint32_t X[], const uint32_t Y[], int length,
VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats);
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
VP8LStreaks* WEBP_RESTRICT const stats);
extern VP8LGetCombinedEntropyUnrefinedFunc VP8LGetCombinedEntropyUnrefined;
// Get the entropy for the distribution 'X'.
typedef void (*VP8LGetEntropyUnrefinedFunc)(const uint32_t X[], int length,
VP8LBitEntropy* const bit_entropy,
VP8LStreaks* const stats);
typedef void (*VP8LGetEntropyUnrefinedFunc)(
const uint32_t X[], int length,
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
VP8LStreaks* WEBP_RESTRICT const stats);
extern VP8LGetEntropyUnrefinedFunc VP8LGetEntropyUnrefined;
void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
VP8LBitEntropy* const entropy);
typedef void (*VP8LAddVectorFunc)(const uint32_t* a, const uint32_t* b,
uint32_t* out, int size);
typedef void (*VP8LAddVectorFunc)(const uint32_t* WEBP_RESTRICT a,
const uint32_t* WEBP_RESTRICT b,
uint32_t* WEBP_RESTRICT out, int size);
extern VP8LAddVectorFunc VP8LAddVector;
typedef void (*VP8LAddVectorEqFunc)(const uint32_t* a, uint32_t* out, int size);
typedef void (*VP8LAddVectorEqFunc)(const uint32_t* WEBP_RESTRICT a,
uint32_t* WEBP_RESTRICT out, int size);
extern VP8LAddVectorEqFunc VP8LAddVectorEq;
void VP8LHistogramAdd(const VP8LHistogram* const a,
const VP8LHistogram* const b,
VP8LHistogram* const out);
void VP8LHistogramAdd(const VP8LHistogram* WEBP_RESTRICT const a,
const VP8LHistogram* WEBP_RESTRICT const b,
VP8LHistogram* WEBP_RESTRICT const out);
// -----------------------------------------------------------------------------
// PrefixEncode()
@ -239,11 +249,12 @@ typedef int (*VP8LVectorMismatchFunc)(const uint32_t* const array1,
// Returns the first index where array1 and array2 are different.
extern VP8LVectorMismatchFunc VP8LVectorMismatch;
typedef void (*VP8LBundleColorMapFunc)(const uint8_t* const row, int width,
int xbits, uint32_t* dst);
typedef void (*VP8LBundleColorMapFunc)(const uint8_t* WEBP_RESTRICT const row,
int width, int xbits,
uint32_t* WEBP_RESTRICT dst);
extern VP8LBundleColorMapFunc VP8LBundleColorMap;
void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
uint32_t* dst);
void VP8LBundleColorMap_C(const uint8_t* WEBP_RESTRICT const row,
int width, int xbits, uint32_t* WEBP_RESTRICT dst);
// Must be called before calling any of the above methods.
void VP8LEncDspInit(void);

View File

@ -194,15 +194,15 @@ uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
// The predictor is added to the output pixel (which
// is therefore considered as a residual) to get the final prediction.
#define GENERATE_PREDICTOR_ADD(PREDICTOR, PREDICTOR_ADD) \
static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
int num_pixels, uint32_t* out) { \
int x; \
assert(upper != NULL); \
for (x = 0; x < num_pixels; ++x) { \
const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x); \
out[x] = VP8LAddPixels(in[x], pred); \
} \
#define GENERATE_PREDICTOR_ADD(PREDICTOR, PREDICTOR_ADD) \
static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
int num_pixels, uint32_t* WEBP_RESTRICT out) { \
int x; \
assert(upper != NULL); \
for (x = 0; x < num_pixels; ++x) { \
const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x); \
out[x] = VP8LAddPixels(in[x], pred); \
} \
}
#ifdef __cplusplus

View File

@ -359,8 +359,8 @@ void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
entropy->nonzero_code = VP8L_NON_TRIVIAL_SYM;
}
void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
VP8LBitEntropy* const entropy) {
void VP8LBitsEntropyUnrefined(const uint32_t* WEBP_RESTRICT const array, int n,
VP8LBitEntropy* WEBP_RESTRICT const entropy) {
int i;
VP8LBitEntropyInit(entropy);
@ -380,8 +380,10 @@ void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
}
static WEBP_INLINE void GetEntropyUnrefinedHelper(
uint32_t val, int i, uint32_t* const val_prev, int* const i_prev,
VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats) {
uint32_t val, int i, uint32_t* WEBP_RESTRICT const val_prev,
int* WEBP_RESTRICT const i_prev,
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
VP8LStreaks* WEBP_RESTRICT const stats) {
const int streak = i - *i_prev;
// Gather info for the bit entropy.
@ -403,9 +405,10 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
*i_prev = i;
}
static void GetEntropyUnrefined_C(const uint32_t X[], int length,
VP8LBitEntropy* const bit_entropy,
VP8LStreaks* const stats) {
static void GetEntropyUnrefined_C(
const uint32_t X[], int length,
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
VP8LStreaks* WEBP_RESTRICT const stats) {
int i;
int i_prev = 0;
uint32_t x_prev = X[0];
@ -424,11 +427,10 @@ static void GetEntropyUnrefined_C(const uint32_t X[], int length,
bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;
}
static void GetCombinedEntropyUnrefined_C(const uint32_t X[],
const uint32_t Y[],
int length,
VP8LBitEntropy* const bit_entropy,
VP8LStreaks* const stats) {
static void GetCombinedEntropyUnrefined_C(
const uint32_t X[], const uint32_t Y[], int length,
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
VP8LStreaks* WEBP_RESTRICT const stats) {
int i = 1;
int i_prev = 0;
uint32_t xy_prev = X[0] + Y[0];
@ -468,8 +470,8 @@ static WEBP_INLINE int8_t U32ToS8(uint32_t v) {
return (int8_t)(v & 0xff);
}
void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
int num_pixels) {
void VP8LTransformColor_C(const VP8LMultipliers* WEBP_RESTRICT const m,
uint32_t* WEBP_RESTRICT data, int num_pixels) {
int i;
for (i = 0; i < num_pixels; ++i) {
const uint32_t argb = data[i];
@ -505,7 +507,8 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
return (new_blue & 0xff);
}
void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
void VP8LCollectColorRedTransforms_C(const uint32_t* WEBP_RESTRICT argb,
int stride,
int tile_width, int tile_height,
int green_to_red, uint32_t histo[]) {
while (tile_height-- > 0) {
@ -517,7 +520,8 @@ void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
}
}
void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
void VP8LCollectColorBlueTransforms_C(const uint32_t* WEBP_RESTRICT argb,
int stride,
int tile_width, int tile_height,
int green_to_blue, int red_to_blue,
uint32_t histo[]) {
@ -544,8 +548,8 @@ static int VectorMismatch_C(const uint32_t* const array1,
}
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
uint32_t* dst) {
void VP8LBundleColorMap_C(const uint8_t* WEBP_RESTRICT const row,
int width, int xbits, uint32_t* WEBP_RESTRICT dst) {
int x;
if (xbits > 0) {
const int bit_depth = 1 << (3 - xbits);
@ -576,7 +580,8 @@ static uint32_t ExtraCost_C(const uint32_t* population, int length) {
return cost;
}
static uint32_t ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
static uint32_t ExtraCostCombined_C(const uint32_t* WEBP_RESTRICT X,
const uint32_t* WEBP_RESTRICT Y,
int length) {
int i;
uint32_t cost = X[4] + Y[4] + X[5] + Y[5];
@ -591,13 +596,15 @@ static uint32_t ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
//------------------------------------------------------------------------------
static void AddVector_C(const uint32_t* a, const uint32_t* b, uint32_t* out,
int size) {
static void AddVector_C(const uint32_t* WEBP_RESTRICT a,
const uint32_t* WEBP_RESTRICT b,
uint32_t* WEBP_RESTRICT out, int size) {
int i;
for (i = 0; i < size; ++i) out[i] = a[i] + b[i];
}
static void AddVectorEq_C(const uint32_t* a, uint32_t* out, int size) {
static void AddVectorEq_C(const uint32_t* WEBP_RESTRICT a,
uint32_t* WEBP_RESTRICT out, int size) {
int i;
for (i = 0; i < size; ++i) out[i] += a[i];
}
@ -626,8 +633,9 @@ static void AddVectorEq_C(const uint32_t* a, uint32_t* out, int size) {
} \
} while (0)
void VP8LHistogramAdd(const VP8LHistogram* const a,
const VP8LHistogram* const b, VP8LHistogram* const out) {
void VP8LHistogramAdd(const VP8LHistogram* WEBP_RESTRICT const a,
const VP8LHistogram* WEBP_RESTRICT const b,
VP8LHistogram* WEBP_RESTRICT const out) {
int i;
const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
assert(a->palette_code_bits_ == b->palette_code_bits_);
@ -657,14 +665,14 @@ void VP8LHistogramAdd(const VP8LHistogram* const a,
// Image transforms.
static void PredictorSub0_C(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
for (i = 0; i < num_pixels; ++i) out[i] = VP8LSubPixels(in[i], ARGB_BLACK);
(void)upper;
}
static void PredictorSub1_C(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
for (i = 0; i < num_pixels; ++i) out[i] = VP8LSubPixels(in[i], in[i - 1]);
(void)upper;
@ -675,7 +683,8 @@ static void PredictorSub1_C(const uint32_t* in, const uint32_t* upper,
#define GENERATE_PREDICTOR_SUB(PREDICTOR_I) \
static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in, \
const uint32_t* upper, \
int num_pixels, uint32_t* out) { \
int num_pixels, \
uint32_t* WEBP_RESTRICT out) { \
int x; \
assert(upper != NULL); \
for (x = 0; x < num_pixels; ++x) { \

View File

@ -149,8 +149,9 @@ static uint32_t ExtraCost_MIPS32(const uint32_t* const population, int length) {
// pY += 2;
// }
// return cost;
static uint32_t ExtraCostCombined_MIPS32(const uint32_t* const X,
const uint32_t* const Y, int length) {
static uint32_t ExtraCostCombined_MIPS32(const uint32_t* WEBP_RESTRICT const X,
const uint32_t* WEBP_RESTRICT const Y,
int length) {
int i, temp0, temp1, temp2, temp3;
const uint32_t* pX = &X[4];
const uint32_t* pY = &Y[4];
@ -215,8 +216,10 @@ static uint32_t ExtraCostCombined_MIPS32(const uint32_t* const X,
// Returns the various RLE counts
static WEBP_INLINE void GetEntropyUnrefinedHelper(
uint32_t val, int i, uint32_t* const val_prev, int* const i_prev,
VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats) {
uint32_t val, int i, uint32_t* WEBP_RESTRICT const val_prev,
int* WEBP_RESTRICT const i_prev,
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
VP8LStreaks* WEBP_RESTRICT const stats) {
int* const pstreaks = &stats->streaks[0][0];
int* const pcnts = &stats->counts[0];
int temp0, temp1, temp2, temp3;
@ -241,9 +244,10 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
*i_prev = i;
}
static void GetEntropyUnrefined_MIPS32(const uint32_t X[], int length,
VP8LBitEntropy* const bit_entropy,
VP8LStreaks* const stats) {
static void GetEntropyUnrefined_MIPS32(
const uint32_t X[], int length,
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
VP8LStreaks* WEBP_RESTRICT const stats) {
int i;
int i_prev = 0;
uint32_t x_prev = X[0];
@ -262,11 +266,10 @@ static void GetEntropyUnrefined_MIPS32(const uint32_t X[], int length,
bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;
}
static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
const uint32_t Y[],
int length,
VP8LBitEntropy* const entropy,
VP8LStreaks* const stats) {
static void GetCombinedEntropyUnrefined_MIPS32(
const uint32_t X[], const uint32_t Y[], int length,
VP8LBitEntropy* WEBP_RESTRICT const entropy,
VP8LStreaks* WEBP_RESTRICT const stats) {
int i = 1;
int i_prev = 0;
uint32_t xy_prev = X[0] + Y[0];
@ -344,8 +347,9 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
ASM_END_COMMON_0 \
ASM_END_COMMON_1
static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb,
uint32_t* pout, int size) {
static void AddVector_MIPS32(const uint32_t* WEBP_RESTRICT pa,
const uint32_t* WEBP_RESTRICT pb,
uint32_t* WEBP_RESTRICT pout, int size) {
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
const int end = ((size) / 4) * 4;
const uint32_t* const LoopEnd = pa + end;
@ -356,7 +360,8 @@ static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb,
for (i = 0; i < size - end; ++i) pout[i] = pa[i] + pb[i];
}
static void AddVectorEq_MIPS32(const uint32_t* pa, uint32_t* pout, int size) {
static void AddVectorEq_MIPS32(const uint32_t* WEBP_RESTRICT pa,
uint32_t* WEBP_RESTRICT pout, int size) {
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
const int end = ((size) / 4) * 4;
const uint32_t* const LoopEnd = pa + end;

View File

@ -78,8 +78,9 @@ static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
return (uint32_t)((int)(color_pred) * color) >> 5;
}
static void TransformColor_MIPSdspR2(const VP8LMultipliers* const m,
uint32_t* data, int num_pixels) {
static void TransformColor_MIPSdspR2(
const VP8LMultipliers* WEBP_RESTRICT const m, uint32_t* WEBP_RESTRICT data,
int num_pixels) {
int temp0, temp1, temp2, temp3, temp4, temp5;
uint32_t argb, argb1, new_red, new_red1;
const uint32_t G_to_R = m->green_to_red_;
@ -172,7 +173,8 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
}
static void CollectColorBlueTransforms_MIPSdspR2(
const uint32_t* argb, int stride, int tile_width, int tile_height,
const uint32_t* WEBP_RESTRICT argb, int stride,
int tile_width, int tile_height,
int green_to_blue, int red_to_blue, uint32_t histo[]) {
const int rtb = (red_to_blue << 16) | (red_to_blue & 0xffff);
const int gtb = (green_to_blue << 16) | (green_to_blue & 0xffff);
@ -221,11 +223,9 @@ static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
return (new_red & 0xff);
}
static void CollectColorRedTransforms_MIPSdspR2(const uint32_t* argb,
int stride, int tile_width,
int tile_height,
int green_to_red,
uint32_t histo[]) {
static void CollectColorRedTransforms_MIPSdspR2(
const uint32_t* WEBP_RESTRICT argb, int stride,
int tile_width, int tile_height, int green_to_red, uint32_t histo[]) {
const int gtr = (green_to_red << 16) | (green_to_red & 0xffff);
while (tile_height-- > 0) {
int x;

View File

@ -48,8 +48,8 @@
dst = VSHF_UB(src, t0, mask1); \
} while (0)
static void TransformColor_MSA(const VP8LMultipliers* const m, uint32_t* data,
int num_pixels) {
static void TransformColor_MSA(const VP8LMultipliers* WEBP_RESTRICT const m,
uint32_t* WEBP_RESTRICT data, int num_pixels) {
v16u8 src0, dst0;
const v16i8 g2br = (v16i8)__msa_fill_w(m->green_to_blue_ |
(m->green_to_red_ << 16));

View File

@ -72,8 +72,9 @@ static void SubtractGreenFromBlueAndRed_NEON(uint32_t* argb_data,
//------------------------------------------------------------------------------
// Color Transform
static void TransformColor_NEON(const VP8LMultipliers* const m,
uint32_t* argb_data, int num_pixels) {
static void TransformColor_NEON(const VP8LMultipliers* WEBP_RESTRICT const m,
uint32_t* WEBP_RESTRICT argb_data,
int num_pixels) {
// sign-extended multiplying constants, pre-shifted by 6.
#define CST(X) (((int16_t)(m->X << 8)) >> 6)
const int16_t rb[8] = {

View File

@ -49,8 +49,9 @@ static void SubtractGreenFromBlueAndRed_SSE2(uint32_t* argb_data,
#define MK_CST_16(HI, LO) \
_mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
static void TransformColor_SSE2(const VP8LMultipliers* const m,
uint32_t* argb_data, int num_pixels) {
static void TransformColor_SSE2(const VP8LMultipliers* WEBP_RESTRICT const m,
uint32_t* WEBP_RESTRICT argb_data,
int num_pixels) {
const __m128i mults_rb = MK_CST_16(CST_5b(m->green_to_red_),
CST_5b(m->green_to_blue_));
const __m128i mults_b2 = MK_CST_16(CST_5b(m->red_to_blue_), 0);
@ -79,7 +80,8 @@ static void TransformColor_SSE2(const VP8LMultipliers* const m,
//------------------------------------------------------------------------------
#define SPAN 8
static void CollectColorBlueTransforms_SSE2(const uint32_t* argb, int stride,
static void CollectColorBlueTransforms_SSE2(const uint32_t* WEBP_RESTRICT argb,
int stride,
int tile_width, int tile_height,
int green_to_blue, int red_to_blue,
uint32_t histo[]) {
@ -126,7 +128,8 @@ static void CollectColorBlueTransforms_SSE2(const uint32_t* argb, int stride,
}
}
static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride,
static void CollectColorRedTransforms_SSE2(const uint32_t* WEBP_RESTRICT argb,
int stride,
int tile_width, int tile_height,
int green_to_red, uint32_t histo[]) {
const __m128i mults_g = MK_CST_16(0, CST_5b(green_to_red));
@ -173,8 +176,9 @@ static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride,
// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
// that's ok since the histogram values are less than 1<<28 (max picture size).
#define LINE_SIZE 16 // 8 or 16
static void AddVector_SSE2(const uint32_t* a, const uint32_t* b, uint32_t* out,
int size) {
static void AddVector_SSE2(const uint32_t* WEBP_RESTRICT a,
const uint32_t* WEBP_RESTRICT b,
uint32_t* WEBP_RESTRICT out, int size) {
int i;
for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) {
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
@ -201,7 +205,8 @@ static void AddVector_SSE2(const uint32_t* a, const uint32_t* b, uint32_t* out,
}
}
static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
static void AddVectorEq_SSE2(const uint32_t* WEBP_RESTRICT a,
uint32_t* WEBP_RESTRICT out, int size) {
int i;
for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) {
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
@ -333,8 +338,9 @@ static int VectorMismatch_SSE2(const uint32_t* const array1,
}
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
static void BundleColorMap_SSE2(const uint8_t* const row, int width, int xbits,
uint32_t* dst) {
static void BundleColorMap_SSE2(const uint8_t* WEBP_RESTRICT const row,
int width, int xbits,
uint32_t* WEBP_RESTRICT dst) {
int x;
assert(xbits >= 0);
assert(xbits <= 3);
@ -423,7 +429,7 @@ static WEBP_INLINE void Average2_m128i(const __m128i* const a0,
// Predictor0: ARGB_BLACK.
static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
const __m128i black = _mm_set1_epi32((int)ARGB_BLACK);
for (i = 0; i + 4 <= num_pixels; i += 4) {
@ -440,7 +446,8 @@ static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper,
#define GENERATE_PREDICTOR_1(X, IN) \
static void PredictorSub##X##_SSE2(const uint32_t* const in, \
const uint32_t* const upper, \
int num_pixels, uint32_t* const out) { \
int num_pixels, \
uint32_t* WEBP_RESTRICT const out) { \
int i; \
for (i = 0; i + 4 <= num_pixels; i += 4) { \
const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); \
@ -462,7 +469,7 @@ GENERATE_PREDICTOR_1(4, upper[i - 1]) // Predictor4: TL
// Predictor5: avg2(avg2(L, TR), T)
static void PredictorSub5_SSE2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
for (i = 0; i + 4 <= num_pixels; i += 4) {
const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]);
@ -482,7 +489,8 @@ static void PredictorSub5_SSE2(const uint32_t* in, const uint32_t* upper,
#define GENERATE_PREDICTOR_2(X, A, B) \
static void PredictorSub##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
int num_pixels, uint32_t* out) { \
int num_pixels, \
uint32_t* WEBP_RESTRICT out) { \
int i; \
for (i = 0; i + 4 <= num_pixels; i += 4) { \
const __m128i tA = _mm_loadu_si128((const __m128i*)&(A)); \
@ -506,7 +514,7 @@ GENERATE_PREDICTOR_2(9, upper[i], upper[i + 1]) // Predictor9: average(T, TR)
// Predictor10: avg(avg(L,TL), avg(T, TR)).
static void PredictorSub10_SSE2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
for (i = 0; i + 4 <= num_pixels; i += 4) {
const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]);
@ -541,7 +549,7 @@ static void GetSumAbsDiff32_SSE2(const __m128i* const A, const __m128i* const B,
}
static void PredictorSub11_SSE2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
for (i = 0; i + 4 <= num_pixels; i += 4) {
const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]);
@ -567,7 +575,7 @@ static void PredictorSub11_SSE2(const uint32_t* in, const uint32_t* upper,
// Predictor12: ClampedSubSubtractFull.
static void PredictorSub12_SSE2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
const __m128i zero = _mm_setzero_si128();
for (i = 0; i + 4 <= num_pixels; i += 4) {
@ -596,7 +604,7 @@ static void PredictorSub12_SSE2(const uint32_t* in, const uint32_t* upper,
// Predictors13: ClampedAddSubtractHalf
static void PredictorSub13_SSE2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
const __m128i zero = _mm_setzero_si128();
for (i = 0; i + 2 <= num_pixels; i += 2) {

View File

@ -44,8 +44,9 @@ static uint32_t ExtraCost_SSE41(const uint32_t* const a, int length) {
return HorizontalSum_SSE41(cost);
}
static uint32_t ExtraCostCombined_SSE41(const uint32_t* const a,
const uint32_t* const b, int length) {
static uint32_t ExtraCostCombined_SSE41(const uint32_t* WEBP_RESTRICT const a,
const uint32_t* WEBP_RESTRICT const b,
int length) {
int i;
__m128i cost = _mm_add_epi32(_mm_set_epi32(2 * a[7], 2 * a[6], a[5], a[4]),
_mm_set_epi32(2 * b[7], 2 * b[6], b[5], b[4]));
@ -95,7 +96,8 @@ static void SubtractGreenFromBlueAndRed_SSE41(uint32_t* argb_data,
#define MK_CST_16(HI, LO) \
_mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride,
static void CollectColorBlueTransforms_SSE41(const uint32_t* WEBP_RESTRICT argb,
int stride,
int tile_width, int tile_height,
int green_to_blue, int red_to_blue,
uint32_t histo[]) {
@ -141,7 +143,8 @@ static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride,
}
}
static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride,
static void CollectColorRedTransforms_SSE41(const uint32_t* WEBP_RESTRICT argb,
int stride,
int tile_width, int tile_height,
int green_to_red,
uint32_t histo[]) {

View File

@ -26,8 +26,8 @@
#if !defined(WORK_AROUND_GCC)
// gcc 4.6.0 had some trouble (NDK-r9) with this code. We only use it for
// gcc-4.8.x at least.
static void ConvertBGRAToRGBA_NEON(const uint32_t* src,
int num_pixels, uint8_t* dst) {
static void ConvertBGRAToRGBA_NEON(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const uint32_t* const end = src + (num_pixels & ~15);
for (; src < end; src += 16) {
uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
@ -41,8 +41,8 @@ static void ConvertBGRAToRGBA_NEON(const uint32_t* src,
VP8LConvertBGRAToRGBA_C(src, num_pixels & 15, dst); // left-overs
}
static void ConvertBGRAToBGR_NEON(const uint32_t* src,
int num_pixels, uint8_t* dst) {
static void ConvertBGRAToBGR_NEON(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const uint32_t* const end = src + (num_pixels & ~15);
for (; src < end; src += 16) {
const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
@ -53,8 +53,8 @@ static void ConvertBGRAToBGR_NEON(const uint32_t* src,
VP8LConvertBGRAToBGR_C(src, num_pixels & 15, dst); // left-overs
}
static void ConvertBGRAToRGB_NEON(const uint32_t* src,
int num_pixels, uint8_t* dst) {
static void ConvertBGRAToRGB_NEON(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const uint32_t* const end = src + (num_pixels & ~15);
for (; src < end; src += 16) {
const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
@ -71,8 +71,8 @@ static void ConvertBGRAToRGB_NEON(const uint32_t* src,
static const uint8_t kRGBAShuffle[8] = { 2, 1, 0, 3, 6, 5, 4, 7 };
static void ConvertBGRAToRGBA_NEON(const uint32_t* src,
int num_pixels, uint8_t* dst) {
static void ConvertBGRAToRGBA_NEON(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const uint32_t* const end = src + (num_pixels & ~1);
const uint8x8_t shuffle = vld1_u8(kRGBAShuffle);
for (; src < end; src += 2) {
@ -89,8 +89,8 @@ static const uint8_t kBGRShuffle[3][8] = {
{ 21, 22, 24, 25, 26, 28, 29, 30 }
};
static void ConvertBGRAToBGR_NEON(const uint32_t* src,
int num_pixels, uint8_t* dst) {
static void ConvertBGRAToBGR_NEON(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const uint32_t* const end = src + (num_pixels & ~7);
const uint8x8_t shuffle0 = vld1_u8(kBGRShuffle[0]);
const uint8x8_t shuffle1 = vld1_u8(kBGRShuffle[1]);
@ -116,8 +116,8 @@ static const uint8_t kRGBShuffle[3][8] = {
{ 21, 20, 26, 25, 24, 30, 29, 28 }
};
static void ConvertBGRAToRGB_NEON(const uint32_t* src,
int num_pixels, uint8_t* dst) {
static void ConvertBGRAToRGB_NEON(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const uint32_t* const end = src + (num_pixels & ~7);
const uint8x8_t shuffle0 = vld1_u8(kRGBShuffle[0]);
const uint8x8_t shuffle1 = vld1_u8(kRGBShuffle[1]);
@ -209,7 +209,7 @@ static uint32_t Predictor13_NEON(const uint32_t* const left,
// Predictor0: ARGB_BLACK.
static void PredictorAdd0_NEON(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
const uint8x16_t black = vreinterpretq_u8_u32(vdupq_n_u32(ARGB_BLACK));
for (i = 0; i + 4 <= num_pixels; i += 4) {
@ -222,7 +222,7 @@ static void PredictorAdd0_NEON(const uint32_t* in, const uint32_t* upper,
// Predictor1: left.
static void PredictorAdd1_NEON(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
const uint8x16_t zero = LOADQ_U32_AS_U8(0);
for (i = 0; i + 4 <= num_pixels; i += 4) {
@ -248,7 +248,7 @@ static void PredictorAdd1_NEON(const uint32_t* in, const uint32_t* upper,
#define GENERATE_PREDICTOR_1(X, IN) \
static void PredictorAdd##X##_NEON(const uint32_t* in, \
const uint32_t* upper, int num_pixels, \
uint32_t* out) { \
uint32_t* WEBP_RESTRICT out) { \
int i; \
for (i = 0; i + 4 <= num_pixels; i += 4) { \
const uint8x16_t src = LOADQ_U32P_AS_U8(&in[i]); \
@ -276,7 +276,7 @@ GENERATE_PREDICTOR_1(4, upper[i - 1])
} while (0)
static void PredictorAdd5_NEON(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
for (i = 0; i + 4 <= num_pixels; i += 4) {
@ -301,7 +301,7 @@ static void PredictorAdd5_NEON(const uint32_t* in, const uint32_t* upper,
// Predictor6: average(left, TL)
static void PredictorAdd6_NEON(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
for (i = 0; i + 4 <= num_pixels; i += 4) {
@ -317,7 +317,7 @@ static void PredictorAdd6_NEON(const uint32_t* in, const uint32_t* upper,
// Predictor7: average(left, T)
static void PredictorAdd7_NEON(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
for (i = 0; i + 4 <= num_pixels; i += 4) {
@ -335,7 +335,7 @@ static void PredictorAdd7_NEON(const uint32_t* in, const uint32_t* upper,
#define GENERATE_PREDICTOR_2(X, IN) \
static void PredictorAdd##X##_NEON(const uint32_t* in, \
const uint32_t* upper, int num_pixels, \
uint32_t* out) { \
uint32_t* WEBP_RESTRICT out) { \
int i; \
for (i = 0; i + 4 <= num_pixels; i += 4) { \
const uint8x16_t src = LOADQ_U32P_AS_U8(&in[i]); \
@ -363,7 +363,7 @@ GENERATE_PREDICTOR_2(9, upper[i + 1])
} while (0)
static void PredictorAdd10_NEON(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
for (i = 0; i + 4 <= num_pixels; i += 4) {
@ -394,7 +394,7 @@ static void PredictorAdd10_NEON(const uint32_t* in, const uint32_t* upper,
} while (0)
static void PredictorAdd11_NEON(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
for (i = 0; i + 4 <= num_pixels; i += 4) {
@ -427,7 +427,7 @@ static void PredictorAdd11_NEON(const uint32_t* in, const uint32_t* upper,
} while (0)
static void PredictorAdd12_NEON(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
uint16x8_t L = vmovl_u8(LOAD_U32_AS_U8(out[-1]));
for (i = 0; i + 4 <= num_pixels; i += 4) {
@ -468,7 +468,7 @@ static void PredictorAdd12_NEON(const uint32_t* in, const uint32_t* upper,
} while (0)
static void PredictorAdd13_NEON(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
for (i = 0; i + 4 <= num_pixels; i += 4) {

View File

@ -186,7 +186,7 @@ static uint32_t Predictor13_SSE2(const uint32_t* const left,
// Predictor0: ARGB_BLACK.
static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
const __m128i black = _mm_set1_epi32((int)ARGB_BLACK);
for (i = 0; i + 4 <= num_pixels; i += 4) {
@ -202,7 +202,7 @@ static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
// Predictor1: left.
static void PredictorAdd1_SSE2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
__m128i prev = _mm_set1_epi32((int)out[-1]);
for (i = 0; i + 4 <= num_pixels; i += 4) {
@ -230,7 +230,8 @@ static void PredictorAdd1_SSE2(const uint32_t* in, const uint32_t* upper,
// per 8 bit channel.
#define GENERATE_PREDICTOR_1(X, IN) \
static void PredictorAdd##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
int num_pixels, uint32_t* out) { \
int num_pixels, \
uint32_t* WEBP_RESTRICT out) { \
int i; \
for (i = 0; i + 4 <= num_pixels; i += 4) { \
const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); \
@ -259,7 +260,8 @@ GENERATE_PREDICTOR_ADD(Predictor7_SSE2, PredictorAdd7_SSE2)
#define GENERATE_PREDICTOR_2(X, IN) \
static void PredictorAdd##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
int num_pixels, uint32_t* out) { \
int num_pixels, \
uint32_t* WEBP_RESTRICT out) { \
int i; \
for (i = 0; i + 4 <= num_pixels; i += 4) { \
const __m128i Tother = _mm_loadu_si128((const __m128i*)&(IN)); \
@ -297,7 +299,7 @@ GENERATE_PREDICTOR_2(9, upper[i + 1])
} while (0)
static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
__m128i L = _mm_cvtsi32_si128((int)out[-1]);
for (i = 0; i + 4 <= num_pixels; i += 4) {
@ -344,7 +346,7 @@ static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
} while (0)
static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
__m128i pa;
__m128i L = _mm_cvtsi32_si128((int)out[-1]);
@ -395,7 +397,7 @@ static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper,
} while (0)
static void PredictorAdd12_SSE2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int num_pixels, uint32_t* WEBP_RESTRICT out) {
int i;
const __m128i zero = _mm_setzero_si128();
const __m128i L8 = _mm_cvtsi32_si128((int)out[-1]);
@ -490,8 +492,8 @@ static void TransformColorInverse_SSE2(const VP8LMultipliers* const m,
//------------------------------------------------------------------------------
// Color-space conversion functions
static void ConvertBGRAToRGB_SSE2(const uint32_t* src, int num_pixels,
uint8_t* dst) {
static void ConvertBGRAToRGB_SSE2(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const __m128i* in = (const __m128i*)src;
__m128i* out = (__m128i*)dst;
@ -526,8 +528,8 @@ static void ConvertBGRAToRGB_SSE2(const uint32_t* src, int num_pixels,
}
}
static void ConvertBGRAToRGBA_SSE2(const uint32_t* src,
int num_pixels, uint8_t* dst) {
static void ConvertBGRAToRGBA_SSE2(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ff);
const __m128i* in = (const __m128i*)src;
__m128i* out = (__m128i*)dst;
@ -554,8 +556,9 @@ static void ConvertBGRAToRGBA_SSE2(const uint32_t* src,
}
}
static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src,
int num_pixels, uint8_t* dst) {
static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* WEBP_RESTRICT src,
int num_pixels,
uint8_t* WEBP_RESTRICT dst) {
const __m128i mask_0x0f = _mm_set1_epi8(0x0f);
const __m128i mask_0xf0 = _mm_set1_epi8((char)0xf0);
const __m128i* in = (const __m128i*)src;
@ -590,8 +593,9 @@ static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src,
}
}
static void ConvertBGRAToRGB565_SSE2(const uint32_t* src,
int num_pixels, uint8_t* dst) {
static void ConvertBGRAToRGB565_SSE2(const uint32_t* WEBP_RESTRICT src,
int num_pixels,
uint8_t* WEBP_RESTRICT dst) {
const __m128i mask_0xe0 = _mm_set1_epi8((char)0xe0);
const __m128i mask_0xf8 = _mm_set1_epi8((char)0xf8);
const __m128i mask_0x07 = _mm_set1_epi8(0x07);
@ -631,8 +635,8 @@ static void ConvertBGRAToRGB565_SSE2(const uint32_t* src,
}
}
static void ConvertBGRAToBGR_SSE2(const uint32_t* src,
int num_pixels, uint8_t* dst) {
static void ConvertBGRAToBGR_SSE2(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const __m128i mask_l = _mm_set_epi32(0, 0x00ffffff, 0, 0x00ffffff);
const __m128i mask_h = _mm_set_epi32(0x00ffffff, 0, 0x00ffffff, 0);
const __m128i* in = (const __m128i*)src;

View File

@ -77,8 +77,8 @@ static void TransformColorInverse_SSE41(const VP8LMultipliers* const m,
} \
} while (0)
static void ConvertBGRAToRGB_SSE41(const uint32_t* src, int num_pixels,
uint8_t* dst) {
static void ConvertBGRAToRGB_SSE41(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const __m128i* in = (const __m128i*)src;
__m128i* out = (__m128i*)dst;
const __m128i perm0 = _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9,
@ -95,8 +95,8 @@ static void ConvertBGRAToRGB_SSE41(const uint32_t* src, int num_pixels,
}
}
static void ConvertBGRAToBGR_SSE41(const uint32_t* src,
int num_pixels, uint8_t* dst) {
static void ConvertBGRAToBGR_SSE41(const uint32_t* WEBP_RESTRICT src,
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
const __m128i* in = (const __m128i*)src;
__m128i* out = (__m128i*)dst;
const __m128i perm0 = _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10,