mirror of
https://github.com/webmproject/libwebp.git
synced 2024-11-20 12:28:26 +01:00
enc_sse2: drop SSE2 suffix from local functions
Change-Id: I5d61605a9d410761d50b689b046114f0ab3ba24e
This commit is contained in:
parent
d038e6193b
commit
2ca42a4fb7
@ -52,9 +52,9 @@ static void PrintReg(const __m128i r, const char* const name, int size) {
|
|||||||
// Compute susceptibility based on DCT-coeff histograms:
|
// Compute susceptibility based on DCT-coeff histograms:
|
||||||
// the higher, the "easier" the macroblock is to compress.
|
// the higher, the "easier" the macroblock is to compress.
|
||||||
|
|
||||||
static void CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred,
|
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
|
||||||
int start_block, int end_block,
|
int start_block, int end_block,
|
||||||
VP8Histogram* const histo) {
|
VP8Histogram* const histo) {
|
||||||
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
|
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
|
||||||
int j;
|
int j;
|
||||||
for (j = start_block; j < end_block; ++j) {
|
for (j = start_block; j < end_block; ++j) {
|
||||||
@ -98,8 +98,8 @@ static void CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred,
|
|||||||
// Transforms (Paragraph 14.4)
|
// Transforms (Paragraph 14.4)
|
||||||
|
|
||||||
// Does one or two inverse transforms.
|
// Does one or two inverse transforms.
|
||||||
static void ITransformSSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||||
int do_two) {
|
int do_two) {
|
||||||
// This implementation makes use of 16-bit fixed point versions of two
|
// This implementation makes use of 16-bit fixed point versions of two
|
||||||
// multiply constants:
|
// multiply constants:
|
||||||
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
||||||
@ -318,8 +318,7 @@ static void ITransformSSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FTransformSSE2(const uint8_t* src, const uint8_t* ref,
|
static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||||
int16_t* out) {
|
|
||||||
const __m128i zero = _mm_setzero_si128();
|
const __m128i zero = _mm_setzero_si128();
|
||||||
const __m128i seven = _mm_set1_epi16(7);
|
const __m128i seven = _mm_set1_epi16(7);
|
||||||
const __m128i k937 = _mm_set1_epi32(937);
|
const __m128i k937 = _mm_set1_epi32(937);
|
||||||
@ -451,7 +450,7 @@ static void FTransformSSE2(const uint8_t* src, const uint8_t* ref,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FTransformWHTSSE2(const int16_t* in, int16_t* out) {
|
static void FTransformWHT(const int16_t* in, int16_t* out) {
|
||||||
int32_t tmp[16];
|
int32_t tmp[16];
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < 4; ++i, in += 64) {
|
for (i = 0; i < 4; ++i, in += 64) {
|
||||||
@ -487,8 +486,8 @@ static void FTransformWHTSSE2(const int16_t* in, int16_t* out) {
|
|||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Metric
|
// Metric
|
||||||
|
|
||||||
static int SSE_Nx4SSE2(const uint8_t* a, const uint8_t* b,
|
static int SSE_Nx4(const uint8_t* a, const uint8_t* b,
|
||||||
int num_quads, int do_16) {
|
int num_quads, int do_16) {
|
||||||
const __m128i zero = _mm_setzero_si128();
|
const __m128i zero = _mm_setzero_si128();
|
||||||
__m128i sum1 = zero;
|
__m128i sum1 = zero;
|
||||||
__m128i sum2 = zero;
|
__m128i sum2 = zero;
|
||||||
@ -565,19 +564,19 @@ static int SSE_Nx4SSE2(const uint8_t* a, const uint8_t* b,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int SSE16x16SSE2(const uint8_t* a, const uint8_t* b) {
|
static int SSE16x16(const uint8_t* a, const uint8_t* b) {
|
||||||
return SSE_Nx4SSE2(a, b, 4, 1);
|
return SSE_Nx4(a, b, 4, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int SSE16x8SSE2(const uint8_t* a, const uint8_t* b) {
|
static int SSE16x8(const uint8_t* a, const uint8_t* b) {
|
||||||
return SSE_Nx4SSE2(a, b, 2, 1);
|
return SSE_Nx4(a, b, 2, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int SSE8x8SSE2(const uint8_t* a, const uint8_t* b) {
|
static int SSE8x8(const uint8_t* a, const uint8_t* b) {
|
||||||
return SSE_Nx4SSE2(a, b, 2, 0);
|
return SSE_Nx4(a, b, 2, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int SSE4x4SSE2(const uint8_t* a, const uint8_t* b) {
|
static int SSE4x4(const uint8_t* a, const uint8_t* b) {
|
||||||
const __m128i zero = _mm_setzero_si128();
|
const __m128i zero = _mm_setzero_si128();
|
||||||
|
|
||||||
// Load values. Note that we read 8 pixels instead of 4,
|
// Load values. Note that we read 8 pixels instead of 4,
|
||||||
@ -634,8 +633,8 @@ static int SSE4x4SSE2(const uint8_t* a, const uint8_t* b) {
|
|||||||
// Hadamard transform
|
// Hadamard transform
|
||||||
// Returns the difference between the weighted sum of the absolute value of
|
// Returns the difference between the weighted sum of the absolute value of
|
||||||
// transformed coefficients.
|
// transformed coefficients.
|
||||||
static int TTransformSSE2(const uint8_t* inA, const uint8_t* inB,
|
static int TTransform(const uint8_t* inA, const uint8_t* inB,
|
||||||
const uint16_t* const w) {
|
const uint16_t* const w) {
|
||||||
int32_t sum[4];
|
int32_t sum[4];
|
||||||
__m128i tmp_0, tmp_1, tmp_2, tmp_3;
|
__m128i tmp_0, tmp_1, tmp_2, tmp_3;
|
||||||
const __m128i zero = _mm_setzero_si128();
|
const __m128i zero = _mm_setzero_si128();
|
||||||
@ -782,19 +781,19 @@ static int TTransformSSE2(const uint8_t* inA, const uint8_t* inB,
|
|||||||
return sum[0] + sum[1] + sum[2] + sum[3];
|
return sum[0] + sum[1] + sum[2] + sum[3];
|
||||||
}
|
}
|
||||||
|
|
||||||
static int Disto4x4SSE2(const uint8_t* const a, const uint8_t* const b,
|
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
||||||
const uint16_t* const w) {
|
const uint16_t* const w) {
|
||||||
const int diff_sum = TTransformSSE2(a, b, w);
|
const int diff_sum = TTransform(a, b, w);
|
||||||
return abs(diff_sum) >> 5;
|
return abs(diff_sum) >> 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int Disto16x16SSE2(const uint8_t* const a, const uint8_t* const b,
|
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||||
const uint16_t* const w) {
|
const uint16_t* const w) {
|
||||||
int D = 0;
|
int D = 0;
|
||||||
int x, y;
|
int x, y;
|
||||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||||
for (x = 0; x < 16; x += 4) {
|
for (x = 0; x < 16; x += 4) {
|
||||||
D += Disto4x4SSE2(a + x + y, b + x + y, w);
|
D += Disto4x4(a + x + y, b + x + y, w);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return D;
|
return D;
|
||||||
@ -805,10 +804,10 @@ static int Disto16x16SSE2(const uint8_t* const a, const uint8_t* const b,
|
|||||||
//
|
//
|
||||||
|
|
||||||
#define QFIX2 0
|
#define QFIX2 0
|
||||||
static WEBP_INLINE int QuantizeBlock(int16_t in[16], int16_t out[16],
|
static WEBP_INLINE int DoQuantizeBlock(int16_t in[16], int16_t out[16],
|
||||||
int n, int shift,
|
int n, int shift,
|
||||||
const uint16_t* const sharpen,
|
const uint16_t* const sharpen,
|
||||||
const VP8Matrix* const mtx) {
|
const VP8Matrix* const mtx) {
|
||||||
const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
|
const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
|
||||||
const __m128i zero = _mm_setzero_si128();
|
const __m128i zero = _mm_setzero_si128();
|
||||||
__m128i coeff0, coeff8;
|
__m128i coeff0, coeff8;
|
||||||
@ -921,14 +920,14 @@ static WEBP_INLINE int QuantizeBlock(int16_t in[16], int16_t out[16],
|
|||||||
return (_mm_movemask_epi8(_mm_cmpeq_epi8(packed_out, zero)) != 0xffff);
|
return (_mm_movemask_epi8(_mm_cmpeq_epi8(packed_out, zero)) != 0xffff);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int QuantizeBlockSSE2(int16_t in[16], int16_t out[16],
|
static int QuantizeBlock(int16_t in[16], int16_t out[16],
|
||||||
int n, const VP8Matrix* const mtx) {
|
int n, const VP8Matrix* const mtx) {
|
||||||
return QuantizeBlock(in, out, n, 0, &mtx->sharpen_[0], mtx);
|
return DoQuantizeBlock(in, out, n, 0, &mtx->sharpen_[0], mtx);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int QuantizeBlockWHTSSE2(int16_t in[16], int16_t out[16],
|
static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
|
||||||
const VP8Matrix* const mtx) {
|
const VP8Matrix* const mtx) {
|
||||||
return QuantizeBlock(in, out, 0, 0, &mtx->sharpen_[0], mtx);
|
return DoQuantizeBlock(in, out, 0, 0, &mtx->sharpen_[0], mtx);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // WEBP_USE_SSE2
|
#endif // WEBP_USE_SSE2
|
||||||
@ -940,18 +939,18 @@ extern void VP8EncDspInitSSE2(void);
|
|||||||
|
|
||||||
void VP8EncDspInitSSE2(void) {
|
void VP8EncDspInitSSE2(void) {
|
||||||
#if defined(WEBP_USE_SSE2)
|
#if defined(WEBP_USE_SSE2)
|
||||||
VP8CollectHistogram = CollectHistogramSSE2;
|
VP8CollectHistogram = CollectHistogram;
|
||||||
VP8EncQuantizeBlock = QuantizeBlockSSE2;
|
VP8EncQuantizeBlock = QuantizeBlock;
|
||||||
VP8EncQuantizeBlockWHT = QuantizeBlockWHTSSE2;
|
VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
|
||||||
VP8ITransform = ITransformSSE2;
|
VP8ITransform = ITransform;
|
||||||
VP8FTransform = FTransformSSE2;
|
VP8FTransform = FTransform;
|
||||||
VP8FTransformWHT = FTransformWHTSSE2;
|
VP8FTransformWHT = FTransformWHT;
|
||||||
VP8SSE16x16 = SSE16x16SSE2;
|
VP8SSE16x16 = SSE16x16;
|
||||||
VP8SSE16x8 = SSE16x8SSE2;
|
VP8SSE16x8 = SSE16x8;
|
||||||
VP8SSE8x8 = SSE8x8SSE2;
|
VP8SSE8x8 = SSE8x8;
|
||||||
VP8SSE4x4 = SSE4x4SSE2;
|
VP8SSE4x4 = SSE4x4;
|
||||||
VP8TDisto4x4 = Disto4x4SSE2;
|
VP8TDisto4x4 = Disto4x4;
|
||||||
VP8TDisto16x16 = Disto16x16SSE2;
|
VP8TDisto16x16 = Disto16x16;
|
||||||
#endif // WEBP_USE_SSE2
|
#endif // WEBP_USE_SSE2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user