mirror of
https://github.com/webmproject/libwebp.git
synced 2024-12-27 22:28:22 +01:00
Compare commits
No commits in common. "fdb229ea3a910e31af10e3547df489906d71f789" and "169dfbf9310808b9cbd05662f7bfee97f5e86582" have entirely different histories.
fdb229ea3a
...
169dfbf931
@ -354,8 +354,8 @@ static int GetResidualCost_C(int ctx0, const VP8Residual* const res) {
|
||||
return cost;
|
||||
}
|
||||
|
||||
static void SetResidualCoeffs_C(const int16_t* WEBP_RESTRICT const coeffs,
|
||||
VP8Residual* WEBP_RESTRICT const res) {
|
||||
static void SetResidualCoeffs_C(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
int n;
|
||||
res->last = -1;
|
||||
assert(res->first == 0 || coeffs[0] == 0);
|
||||
|
@ -96,8 +96,8 @@ static int GetResidualCost_MIPS32(int ctx0, const VP8Residual* const res) {
|
||||
return cost;
|
||||
}
|
||||
|
||||
static void SetResidualCoeffs_MIPS32(const int16_t* WEBP_RESTRICT const coeffs,
|
||||
VP8Residual* WEBP_RESTRICT const res) {
|
||||
static void SetResidualCoeffs_MIPS32(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
const int16_t* p_coeffs = (int16_t*)coeffs;
|
||||
int temp0, temp1, temp2, n, n1;
|
||||
assert(res->first == 0 || coeffs[0] == 0);
|
||||
|
@ -19,8 +19,8 @@
|
||||
static const uint8_t position[16] = { 1, 2, 3, 4, 5, 6, 7, 8,
|
||||
9, 10, 11, 12, 13, 14, 15, 16 };
|
||||
|
||||
static void SetResidualCoeffs_NEON(const int16_t* WEBP_RESTRICT const coeffs,
|
||||
VP8Residual* WEBP_RESTRICT const res) {
|
||||
static void SetResidualCoeffs_NEON(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
const int16x8_t minus_one = vdupq_n_s16(-1);
|
||||
const int16x8_t coeffs_0 = vld1q_s16(coeffs);
|
||||
const int16x8_t coeffs_1 = vld1q_s16(coeffs + 8);
|
||||
|
@ -22,8 +22,8 @@
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void SetResidualCoeffs_SSE2(const int16_t* WEBP_RESTRICT const coeffs,
|
||||
VP8Residual* WEBP_RESTRICT const res) {
|
||||
static void SetResidualCoeffs_SSE2(const int16_t* const coeffs,
|
||||
VP8Residual* const res) {
|
||||
const __m128i c0 = _mm_loadu_si128((const __m128i*)(coeffs + 0));
|
||||
const __m128i c1 = _mm_loadu_si128((const __m128i*)(coeffs + 8));
|
||||
// Use SSE2 to compare 16 values with a single instruction.
|
||||
|
@ -38,8 +38,7 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
|
||||
} while (0)
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformOne_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void TransformOne_C(const int16_t* in, uint8_t* dst) {
|
||||
int C[4 * 4], *tmp;
|
||||
int i;
|
||||
tmp = C;
|
||||
@ -83,8 +82,7 @@ static void TransformOne_C(const int16_t* WEBP_RESTRICT in,
|
||||
}
|
||||
|
||||
// Simplified transform when only in[0], in[1] and in[4] are non-zero
|
||||
static void TransformAC3_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void TransformAC3_C(const int16_t* in, uint8_t* dst) {
|
||||
const int a = in[0] + 4;
|
||||
const int c4 = WEBP_TRANSFORM_AC3_MUL2(in[4]);
|
||||
const int d4 = WEBP_TRANSFORM_AC3_MUL1(in[4]);
|
||||
@ -97,8 +95,7 @@ static void TransformAC3_C(const int16_t* WEBP_RESTRICT in,
|
||||
}
|
||||
#undef STORE2
|
||||
|
||||
static void TransformTwo_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
static void TransformTwo_C(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
TransformOne_C(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne_C(in + 16, dst + 4);
|
||||
@ -106,15 +103,13 @@ static void TransformTwo_C(const int16_t* WEBP_RESTRICT in,
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void TransformUV_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void TransformUV_C(const int16_t* in, uint8_t* dst) {
|
||||
VP8Transform(in + 0 * 16, dst, 1);
|
||||
VP8Transform(in + 2 * 16, dst + 4 * BPS, 1);
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformDC_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void TransformDC_C(const int16_t* in, uint8_t* dst) {
|
||||
const int DC = in[0] + 4;
|
||||
int i, j;
|
||||
for (j = 0; j < 4; ++j) {
|
||||
@ -125,8 +120,7 @@ static void TransformDC_C(const int16_t* WEBP_RESTRICT in,
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void TransformDCUV_C(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void TransformDCUV_C(const int16_t* in, uint8_t* dst) {
|
||||
if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst);
|
||||
if (in[1 * 16]) VP8TransformDC(in + 1 * 16, dst + 4);
|
||||
if (in[2 * 16]) VP8TransformDC(in + 2 * 16, dst + 4 * BPS);
|
||||
@ -139,8 +133,7 @@ static void TransformDCUV_C(const int16_t* WEBP_RESTRICT in,
|
||||
// Paragraph 14.3
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void TransformWHT_C(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
static void TransformWHT_C(const int16_t* in, int16_t* out) {
|
||||
int tmp[16];
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
@ -168,7 +161,7 @@ static void TransformWHT_C(const int16_t* WEBP_RESTRICT in,
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
VP8WHT VP8TransformWHT;
|
||||
void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Intra predictions
|
||||
@ -668,32 +661,32 @@ static void HFilter16i_C(uint8_t* p, int stride,
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26_C(u, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26_C(v, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static void HFilter8_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26_C(u, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26_C(v, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void VFilter8i_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8i_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24_C(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24_C(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
|
||||
static void HFilter8i_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8i_C(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24_C(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24_C(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
@ -701,8 +694,8 @@ static void HFilter8i_C(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void DitherCombine8x8_C(const uint8_t* WEBP_RESTRICT dither,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride) {
|
||||
static void DitherCombine8x8_C(const uint8_t* dither, uint8_t* dst,
|
||||
int dst_stride) {
|
||||
int i, j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
for (i = 0; i < 8; ++i) {
|
||||
@ -737,8 +730,8 @@ VP8SimpleFilterFunc VP8SimpleHFilter16;
|
||||
VP8SimpleFilterFunc VP8SimpleVFilter16i;
|
||||
VP8SimpleFilterFunc VP8SimpleHFilter16i;
|
||||
|
||||
void (*VP8DitherCombine8x8)(const uint8_t* WEBP_RESTRICT dither,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride);
|
||||
void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst,
|
||||
int dst_stride);
|
||||
|
||||
extern VP8CPUInfo VP8GetCPUInfo;
|
||||
extern void VP8DspInitSSE2(void);
|
||||
|
@ -133,26 +133,26 @@ static void HFilter16(uint8_t* p, int stride,
|
||||
}
|
||||
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void HFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void VFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void HFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
@ -215,8 +215,7 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
|
||||
}
|
||||
}
|
||||
|
||||
static void TransformOne(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
int temp10, temp11, temp12, temp13, temp14;
|
||||
@ -533,8 +532,7 @@ static void TransformOne(const int16_t* WEBP_RESTRICT in,
|
||||
);
|
||||
}
|
||||
|
||||
static void TransformTwo(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
TransformOne(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne(in + 16, dst + 4);
|
||||
|
@ -21,8 +21,7 @@
|
||||
static const int kC1 = WEBP_TRANSFORM_AC3_C1;
|
||||
static const int kC2 = WEBP_TRANSFORM_AC3_C2;
|
||||
|
||||
static void TransformDC(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void TransformDC(const int16_t* in, uint8_t* dst) {
|
||||
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10;
|
||||
|
||||
__asm__ volatile (
|
||||
@ -46,8 +45,7 @@ static void TransformDC(const int16_t* WEBP_RESTRICT in,
|
||||
);
|
||||
}
|
||||
|
||||
static void TransformAC3(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
const int a = in[0] + 4;
|
||||
int c4 = WEBP_TRANSFORM_AC3_MUL2(in[4]);
|
||||
const int d4 = WEBP_TRANSFORM_AC3_MUL1(in[4]);
|
||||
@ -83,8 +81,7 @@ static void TransformAC3(const int16_t* WEBP_RESTRICT in,
|
||||
);
|
||||
}
|
||||
|
||||
static void TransformOne(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
|
||||
int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
|
||||
|
||||
@ -151,8 +148,7 @@ static void TransformOne(const int16_t* WEBP_RESTRICT in,
|
||||
);
|
||||
}
|
||||
|
||||
static void TransformTwo(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
TransformOne(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne(in + 16, dst + 4);
|
||||
@ -438,14 +434,14 @@ static void HFilter16(uint8_t* p, int stride,
|
||||
}
|
||||
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void HFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
@ -469,14 +465,14 @@ static void HFilter16i(uint8_t* p, int stride,
|
||||
}
|
||||
}
|
||||
|
||||
static void VFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
||||
static void HFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
|
||||
}
|
||||
|
@ -38,8 +38,7 @@
|
||||
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
|
||||
}
|
||||
|
||||
static void TransformOne(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void TransformOne(const int16_t* in, uint8_t* dst) {
|
||||
v8i16 input0, input1;
|
||||
v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
|
||||
v4i32 res0, res1, res2, res3;
|
||||
@ -66,16 +65,14 @@ static void TransformOne(const int16_t* WEBP_RESTRICT in,
|
||||
ST4x4_UB(res0, res0, 3, 2, 1, 0, dst, BPS);
|
||||
}
|
||||
|
||||
static void TransformTwo(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
TransformOne(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne(in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void TransformWHT(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
static void TransformWHT(const int16_t* in, int16_t* out) {
|
||||
v8i16 input0, input1;
|
||||
const v8i16 mask0 = { 0, 1, 2, 3, 8, 9, 10, 11 };
|
||||
const v8i16 mask1 = { 4, 5, 6, 7, 12, 13, 14, 15 };
|
||||
@ -117,15 +114,13 @@ static void TransformWHT(const int16_t* WEBP_RESTRICT in,
|
||||
out[240] = __msa_copy_s_h(out1, 7);
|
||||
}
|
||||
|
||||
static void TransformDC(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void TransformDC(const int16_t* in, uint8_t* dst) {
|
||||
const int DC = (in[0] + 4) >> 3;
|
||||
const v8i16 tmp0 = __msa_fill_h(DC);
|
||||
ADDBLK_ST4x4_UB(tmp0, tmp0, tmp0, tmp0, dst, BPS);
|
||||
}
|
||||
|
||||
static void TransformAC3(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void TransformAC3(const int16_t* in, uint8_t* dst) {
|
||||
const int a = in[0] + 4;
|
||||
const int c4 = WEBP_TRANSFORM_AC3_MUL2(in[4]);
|
||||
const int d4 = WEBP_TRANSFORM_AC3_MUL1(in[4]);
|
||||
@ -480,8 +475,8 @@ static void HFilter16i(uint8_t* src_y, int stride,
|
||||
}
|
||||
|
||||
// 8-pixels wide variants, for chroma filtering
|
||||
static void VFilter8(uint8_t* WEBP_RESTRICT src_u, uint8_t* WEBP_RESTRICT src_v,
|
||||
int stride, int b_limit_in, int limit_in, int thresh_in) {
|
||||
static void VFilter8(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
int b_limit_in, int limit_in, int thresh_in) {
|
||||
uint8_t* ptmp_src_u = src_u - 4 * stride;
|
||||
uint8_t* ptmp_src_v = src_v - 4 * stride;
|
||||
uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
|
||||
@ -525,8 +520,8 @@ static void VFilter8(uint8_t* WEBP_RESTRICT src_u, uint8_t* WEBP_RESTRICT src_v,
|
||||
SD(q2_d, ptmp_src_v);
|
||||
}
|
||||
|
||||
static void HFilter8(uint8_t* WEBP_RESTRICT src_u, uint8_t* WEBP_RESTRICT src_v,
|
||||
int stride, int b_limit_in, int limit_in, int thresh_in) {
|
||||
static void HFilter8(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
int b_limit_in, int limit_in, int thresh_in) {
|
||||
uint8_t* ptmp_src_u = src_u - 4;
|
||||
uint8_t* ptmp_src_v = src_v - 4;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0, mask, hev;
|
||||
@ -561,8 +556,7 @@ static void HFilter8(uint8_t* WEBP_RESTRICT src_u, uint8_t* WEBP_RESTRICT src_v,
|
||||
ST6x4_UB(tmp7, 0, tmp5, 4, ptmp_src_v, stride);
|
||||
}
|
||||
|
||||
static void VFilter8i(uint8_t* WEBP_RESTRICT src_u,
|
||||
uint8_t* WEBP_RESTRICT src_v, int stride,
|
||||
static void VFilter8i(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
int b_limit_in, int limit_in, int thresh_in) {
|
||||
uint64_t p1_d, p0_d, q0_d, q1_d;
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0, mask, hev;
|
||||
@ -593,8 +587,7 @@ static void VFilter8i(uint8_t* WEBP_RESTRICT src_u,
|
||||
SD4(q1_d, q0_d, p0_d, p1_d, src_v, -stride);
|
||||
}
|
||||
|
||||
static void HFilter8i(uint8_t* WEBP_RESTRICT src_u,
|
||||
uint8_t* WEBP_RESTRICT src_v, int stride,
|
||||
static void HFilter8i(uint8_t* src_u, uint8_t* src_v, int stride,
|
||||
int b_limit_in, int limit_in, int thresh_in) {
|
||||
v16u8 p3, p2, p1, p0, q3, q2, q1, q0, mask, hev;
|
||||
v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
|
||||
|
@ -916,8 +916,8 @@ static void HFilter16i_NEON(uint8_t* p, int stride,
|
||||
#endif // !WORK_AROUND_GCC
|
||||
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
Load8x8x2_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
|
||||
{
|
||||
@ -932,8 +932,7 @@ static void VFilter8_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
Store8x2x2_NEON(oq1, oq2, u + 2 * stride, v + 2 * stride, stride);
|
||||
}
|
||||
}
|
||||
static void VFilter8i_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride,
|
||||
static void VFilter8i_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
u += 4 * stride;
|
||||
@ -950,8 +949,8 @@ static void VFilter8i_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
}
|
||||
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
static void HFilter8_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
Load8x8x2T_NEON(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
|
||||
{
|
||||
@ -965,8 +964,7 @@ static void HFilter8_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
}
|
||||
}
|
||||
|
||||
static void HFilter8i_NEON(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride,
|
||||
static void HFilter8i_NEON(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
u += 4;
|
||||
@ -1043,8 +1041,7 @@ static WEBP_INLINE void TransformPass_NEON(int16x8x2_t* const rows) {
|
||||
Transpose8x2_NEON(E0, E1, rows);
|
||||
}
|
||||
|
||||
static void TransformOne_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void TransformOne_NEON(const int16_t* in, uint8_t* dst) {
|
||||
int16x8x2_t rows;
|
||||
INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
|
||||
TransformPass_NEON(&rows);
|
||||
@ -1054,8 +1051,7 @@ static void TransformOne_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
|
||||
#else
|
||||
|
||||
static void TransformOne_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void TransformOne_NEON(const int16_t* in, uint8_t* dst) {
|
||||
const int kBPS = BPS;
|
||||
// kC1, kC2. Padded because vld1.16 loads 8 bytes
|
||||
const int16_t constants[4] = { kC1, kC2, 0, 0 };
|
||||
@ -1188,16 +1184,14 @@ static void TransformOne_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
|
||||
#endif // WEBP_USE_INTRINSICS
|
||||
|
||||
static void TransformTwo_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
static void TransformTwo_NEON(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
TransformOne_NEON(in, dst);
|
||||
if (do_two) {
|
||||
TransformOne_NEON(in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void TransformDC_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void TransformDC_NEON(const int16_t* in, uint8_t* dst) {
|
||||
const int16x8_t DC = vdupq_n_s16(in[0]);
|
||||
Add4x4_NEON(DC, DC, dst);
|
||||
}
|
||||
@ -1211,8 +1205,7 @@ static void TransformDC_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
*dst = vgetq_lane_s32(rows.val[3], col); (dst) += 16; \
|
||||
} while (0)
|
||||
|
||||
static void TransformWHT_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
static void TransformWHT_NEON(const int16_t* in, int16_t* out) {
|
||||
int32x4x4_t tmp;
|
||||
|
||||
{
|
||||
@ -1263,8 +1256,7 @@ static void TransformWHT_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void TransformAC3_NEON(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void TransformAC3_NEON(const int16_t* in, uint8_t* dst) {
|
||||
const int16x4_t A = vld1_dup_s16(in);
|
||||
const int16x4_t c4 = vdup_n_s16(WEBP_TRANSFORM_AC3_MUL2(in[4]));
|
||||
const int16x4_t d4 = vdup_n_s16(WEBP_TRANSFORM_AC3_MUL1(in[4]));
|
||||
|
@ -30,8 +30,7 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// Transforms (Paragraph 14.4)
|
||||
|
||||
static void Transform_SSE2(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
|
||||
// This implementation makes use of 16-bit fixed point versions of two
|
||||
// multiply constants:
|
||||
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
||||
@ -198,8 +197,7 @@ static void Transform_SSE2(const int16_t* WEBP_RESTRICT in,
|
||||
|
||||
#if (USE_TRANSFORM_AC3 == 1)
|
||||
|
||||
static void TransformAC3_SSE2(const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void TransformAC3_SSE2(const int16_t* in, uint8_t* dst) {
|
||||
const __m128i A = _mm_set1_epi16(in[0] + 4);
|
||||
const __m128i c4 = _mm_set1_epi16(WEBP_TRANSFORM_AC3_MUL2(in[4]));
|
||||
const __m128i d4 = _mm_set1_epi16(WEBP_TRANSFORM_AC3_MUL1(in[4]));
|
||||
@ -794,8 +792,8 @@ static void HFilter16i_SSE2(uint8_t* p, int stride,
|
||||
}
|
||||
|
||||
// 8-pixels wide variant, for chroma filtering
|
||||
static void VFilter8_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
static void VFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, p2, p1, p0, q0, q1, q2;
|
||||
|
||||
@ -819,8 +817,8 @@ static void VFilter8_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
STOREUV(q2, u, v, 2 * stride);
|
||||
}
|
||||
|
||||
static void HFilter8_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride, int thresh, int ithresh, int hev_thresh) {
|
||||
static void HFilter8_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i p3, p2, p1, p0, q0, q1, q2, q3;
|
||||
|
||||
@ -839,8 +837,7 @@ static void HFilter8_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
Store16x4_SSE2(&q0, &q1, &q2, &q3, u, v, stride);
|
||||
}
|
||||
|
||||
static void VFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride,
|
||||
static void VFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, t2, p1, p0, q0, q1;
|
||||
@ -866,8 +863,7 @@ static void VFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
STOREUV(q1, u, v, 1 * stride);
|
||||
}
|
||||
|
||||
static void HFilter8i_SSE2(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int stride,
|
||||
static void HFilter8i_SSE2(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_thresh) {
|
||||
__m128i mask;
|
||||
__m128i t1, t2, p1, p0, q0, q1;
|
||||
|
153
src/dsp/dsp.h
153
src/dsp/dsp.h
@ -60,66 +60,53 @@ extern "C" {
|
||||
// Transforms
|
||||
// VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms
|
||||
// will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4).
|
||||
typedef void (*VP8Idct)(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two);
|
||||
typedef void (*VP8Fdct)(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out);
|
||||
typedef void (*VP8WHT)(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out);
|
||||
typedef void (*VP8Idct)(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two);
|
||||
typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out);
|
||||
typedef void (*VP8WHT)(const int16_t* in, int16_t* out);
|
||||
extern VP8Idct VP8ITransform;
|
||||
extern VP8Fdct VP8FTransform;
|
||||
extern VP8Fdct VP8FTransform2; // performs two transforms at a time
|
||||
extern VP8WHT VP8FTransformWHT;
|
||||
// Predictions
|
||||
// *dst is the destination block. *top and *left can be NULL.
|
||||
typedef void (*VP8IntraPreds)(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top);
|
||||
typedef void (*VP8Intra4Preds)(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top);
|
||||
typedef void (*VP8IntraPreds)(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top);
|
||||
typedef void (*VP8Intra4Preds)(uint8_t* dst, const uint8_t* top);
|
||||
extern VP8Intra4Preds VP8EncPredLuma4;
|
||||
extern VP8IntraPreds VP8EncPredLuma16;
|
||||
extern VP8IntraPreds VP8EncPredChroma8;
|
||||
|
||||
typedef int (*VP8Metric)(const uint8_t* WEBP_RESTRICT pix,
|
||||
const uint8_t* WEBP_RESTRICT ref);
|
||||
typedef int (*VP8Metric)(const uint8_t* pix, const uint8_t* ref);
|
||||
extern VP8Metric VP8SSE16x16, VP8SSE16x8, VP8SSE8x8, VP8SSE4x4;
|
||||
typedef int (*VP8WMetric)(const uint8_t* WEBP_RESTRICT pix,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint16_t* WEBP_RESTRICT const weights);
|
||||
typedef int (*VP8WMetric)(const uint8_t* pix, const uint8_t* ref,
|
||||
const uint16_t* const weights);
|
||||
// The weights for VP8TDisto4x4 and VP8TDisto16x16 contain a row-major
|
||||
// 4 by 4 symmetric matrix.
|
||||
extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;
|
||||
|
||||
// Compute the average (DC) of four 4x4 blocks.
|
||||
// Each sub-4x4 block #i sum is stored in dc[i].
|
||||
typedef void (*VP8MeanMetric)(const uint8_t* WEBP_RESTRICT ref,
|
||||
uint32_t dc[4]);
|
||||
typedef void (*VP8MeanMetric)(const uint8_t* ref, uint32_t dc[4]);
|
||||
extern VP8MeanMetric VP8Mean16x4;
|
||||
|
||||
typedef void (*VP8BlockCopy)(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst);
|
||||
extern VP8BlockCopy VP8Copy4x4;
|
||||
extern VP8BlockCopy VP8Copy16x8;
|
||||
// Quantization
|
||||
struct VP8Matrix; // forward declaration
|
||||
typedef int (*VP8QuantizeBlock)(
|
||||
int16_t in[16], int16_t out[16],
|
||||
const struct VP8Matrix* WEBP_RESTRICT const mtx);
|
||||
typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
|
||||
const struct VP8Matrix* const mtx);
|
||||
// Same as VP8QuantizeBlock, but quantizes two consecutive blocks.
|
||||
typedef int (*VP8Quantize2Blocks)(
|
||||
int16_t in[32], int16_t out[32],
|
||||
const struct VP8Matrix* WEBP_RESTRICT const mtx);
|
||||
typedef int (*VP8Quantize2Blocks)(int16_t in[32], int16_t out[32],
|
||||
const struct VP8Matrix* const mtx);
|
||||
|
||||
extern VP8QuantizeBlock VP8EncQuantizeBlock;
|
||||
extern VP8Quantize2Blocks VP8EncQuantize2Blocks;
|
||||
|
||||
// specific to 2nd transform:
|
||||
typedef int (*VP8QuantizeBlockWHT)(
|
||||
int16_t in[16], int16_t out[16],
|
||||
const struct VP8Matrix* WEBP_RESTRICT const mtx);
|
||||
typedef int (*VP8QuantizeBlockWHT)(int16_t in[16], int16_t out[16],
|
||||
const struct VP8Matrix* const mtx);
|
||||
extern VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
|
||||
|
||||
extern const int VP8DspScan[16 + 4 + 4];
|
||||
@ -131,10 +118,9 @@ typedef struct {
|
||||
int max_value;
|
||||
int last_non_zero;
|
||||
} VP8Histogram;
|
||||
typedef void (*VP8CHisto)(const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
typedef void (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* WEBP_RESTRICT const histo);
|
||||
VP8Histogram* const histo);
|
||||
extern VP8CHisto VP8CollectHistogram;
|
||||
// General-purpose util function to help VP8CollectHistogram().
|
||||
void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
|
||||
@ -152,9 +138,8 @@ extern const uint16_t VP8LevelFixedCosts[2047 /*MAX_LEVEL*/ + 1];
|
||||
extern const uint8_t VP8EncBands[16 + 1];
|
||||
|
||||
struct VP8Residual;
|
||||
typedef void (*VP8SetResidualCoeffsFunc)(
|
||||
const int16_t* WEBP_RESTRICT const coeffs,
|
||||
struct VP8Residual* WEBP_RESTRICT const res);
|
||||
typedef void (*VP8SetResidualCoeffsFunc)(const int16_t* const coeffs,
|
||||
struct VP8Residual* const res);
|
||||
extern VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
|
||||
|
||||
// Cost calculation function.
|
||||
@ -208,11 +193,9 @@ void VP8SSIMDspInit(void);
|
||||
//------------------------------------------------------------------------------
|
||||
// Decoding
|
||||
|
||||
typedef void (*VP8DecIdct)(const int16_t* WEBP_RESTRICT coeffs,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
typedef void (*VP8DecIdct)(const int16_t* coeffs, uint8_t* dst);
|
||||
// when doing two transforms, coeffs is actually int16_t[2][16].
|
||||
typedef void (*VP8DecIdct2)(const int16_t* WEBP_RESTRICT coeffs,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two);
|
||||
typedef void (*VP8DecIdct2)(const int16_t* coeffs, uint8_t* dst, int do_two);
|
||||
extern VP8DecIdct2 VP8Transform;
|
||||
extern VP8DecIdct VP8TransformAC3;
|
||||
extern VP8DecIdct VP8TransformUV;
|
||||
@ -250,8 +233,7 @@ extern VP8SimpleFilterFunc VP8SimpleHFilter16i;
|
||||
// regular filter (on both macroblock edges and inner edges)
|
||||
typedef void (*VP8LumaFilterFunc)(uint8_t* luma, int stride,
|
||||
int thresh, int ithresh, int hev_t);
|
||||
typedef void (*VP8ChromaFilterFunc)(uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int stride,
|
||||
typedef void (*VP8ChromaFilterFunc)(uint8_t* u, uint8_t* v, int stride,
|
||||
int thresh, int ithresh, int hev_t);
|
||||
// on outer edge
|
||||
extern VP8LumaFilterFunc VP8VFilter16;
|
||||
@ -271,8 +253,8 @@ extern VP8ChromaFilterFunc VP8HFilter8i;
|
||||
#define VP8_DITHER_DESCALE_ROUNDER (1 << (VP8_DITHER_DESCALE - 1))
|
||||
#define VP8_DITHER_AMP_BITS 7
|
||||
#define VP8_DITHER_AMP_CENTER (1 << VP8_DITHER_AMP_BITS)
|
||||
extern void (*VP8DitherCombine8x8)(const uint8_t* WEBP_RESTRICT dither,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride);
|
||||
extern void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst,
|
||||
int dst_stride);
|
||||
|
||||
// must be called before anything using the above
|
||||
void VP8DspInit(void);
|
||||
@ -285,10 +267,10 @@ void VP8DspInit(void);
|
||||
// Convert a pair of y/u/v lines together to the output rgb/a colorspace.
|
||||
// bottom_y can be NULL if only one line of output is needed (at top/bottom).
|
||||
typedef void (*WebPUpsampleLinePairFunc)(
|
||||
const uint8_t* WEBP_RESTRICT top_y, const uint8_t* WEBP_RESTRICT bottom_y,
|
||||
const uint8_t* WEBP_RESTRICT top_u, const uint8_t* WEBP_RESTRICT top_v,
|
||||
const uint8_t* WEBP_RESTRICT cur_u, const uint8_t* WEBP_RESTRICT cur_v,
|
||||
uint8_t* WEBP_RESTRICT top_dst, uint8_t* WEBP_RESTRICT bottom_dst, int len);
|
||||
const uint8_t* top_y, const uint8_t* bottom_y,
|
||||
const uint8_t* top_u, const uint8_t* top_v,
|
||||
const uint8_t* cur_u, const uint8_t* cur_v,
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len);
|
||||
|
||||
#ifdef FANCY_UPSAMPLING
|
||||
|
||||
@ -298,15 +280,13 @@ extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
|
||||
#endif // FANCY_UPSAMPLING
|
||||
|
||||
// Per-row point-sampling methods.
|
||||
typedef void (*WebPSamplerRowFunc)(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len);
|
||||
typedef void (*WebPSamplerRowFunc)(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len);
|
||||
// Generic function to apply 'WebPSamplerRowFunc' to the whole plane:
|
||||
void WebPSamplerProcessPlane(const uint8_t* WEBP_RESTRICT y, int y_stride,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v, int uv_stride,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride,
|
||||
void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
|
||||
const uint8_t* u, const uint8_t* v, int uv_stride,
|
||||
uint8_t* dst, int dst_stride,
|
||||
int width, int height, WebPSamplerRowFunc func);
|
||||
|
||||
// Sampling functions to convert rows of YUV to RGB(A)
|
||||
@ -318,10 +298,9 @@ extern WebPSamplerRowFunc WebPSamplers[/* MODE_LAST */];
|
||||
WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last);
|
||||
|
||||
// YUV444->RGB converters
|
||||
typedef void (*WebPYUV444Converter)(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len);
|
||||
typedef void (*WebPYUV444Converter)(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len);
|
||||
|
||||
extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
|
||||
|
||||
@ -337,35 +316,26 @@ void WebPInitYUV444Converters(void);
|
||||
// ARGB -> YUV converters
|
||||
|
||||
// Convert ARGB samples to luma Y.
|
||||
extern void (*WebPConvertARGBToY)(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
extern void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
|
||||
// Convert ARGB samples to U/V with downsampling. do_store should be '1' for
|
||||
// even lines and '0' for odd ones. 'src_width' is the original width, not
|
||||
// the U/V one.
|
||||
extern void (*WebPConvertARGBToUV)(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v,
|
||||
extern void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
int src_width, int do_store);
|
||||
|
||||
// Convert a row of accumulated (four-values) of rgba32 toward U/V
|
||||
extern void (*WebPConvertRGBA32ToUV)(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int width);
|
||||
extern void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width);
|
||||
|
||||
// Convert RGB or BGR to Y
|
||||
extern void (*WebPConvertRGB24ToY)(const uint8_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
extern void (*WebPConvertBGR24ToY)(const uint8_t* WEBP_RESTRICT bgr,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
extern void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
|
||||
extern void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
|
||||
|
||||
// used for plain-C fallback.
|
||||
extern void WebPConvertARGBToUV_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v,
|
||||
extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
int src_width, int do_store);
|
||||
extern void WebPConvertRGBA32ToUV_C(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int width);
|
||||
extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width);
|
||||
|
||||
// Must be called before using the above.
|
||||
void WebPInitConvertARGBToYUV(void);
|
||||
@ -378,9 +348,8 @@ struct WebPRescaler;
|
||||
// Import a row of data and save its contribution in the rescaler.
|
||||
// 'channel' denotes the channel number to be imported. 'Expand' corresponds to
|
||||
// the wrk->x_expand case. Otherwise, 'Shrink' is to be used.
|
||||
typedef void (*WebPRescalerImportRowFunc)(
|
||||
struct WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src);
|
||||
typedef void (*WebPRescalerImportRowFunc)(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
|
||||
extern WebPRescalerImportRowFunc WebPRescalerImportRowExpand;
|
||||
extern WebPRescalerImportRowFunc WebPRescalerImportRowShrink;
|
||||
@ -393,19 +362,16 @@ extern WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
|
||||
extern WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
|
||||
|
||||
// Plain-C implementation, as fall-back.
|
||||
extern void WebPRescalerImportRowExpand_C(
|
||||
struct WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src);
|
||||
extern void WebPRescalerImportRowShrink_C(
|
||||
struct WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src);
|
||||
extern void WebPRescalerImportRowExpand_C(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerImportRowShrink_C(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
extern void WebPRescalerExportRowExpand_C(struct WebPRescaler* const wrk);
|
||||
extern void WebPRescalerExportRowShrink_C(struct WebPRescaler* const wrk);
|
||||
|
||||
// Main entry calls:
|
||||
extern void WebPRescalerImportRow(
|
||||
struct WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src);
|
||||
extern void WebPRescalerImportRow(struct WebPRescaler* const wrk,
|
||||
const uint8_t* src);
|
||||
// Export one row (starting at x_out position) from rescaler.
|
||||
extern void WebPRescalerExportRow(struct WebPRescaler* const wrk);
|
||||
|
||||
@ -514,9 +480,8 @@ typedef enum { // Filter types.
|
||||
WEBP_FILTER_FAST
|
||||
} WEBP_FILTER_TYPE;
|
||||
|
||||
typedef void (*WebPFilterFunc)(const uint8_t* WEBP_RESTRICT in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out);
|
||||
typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height,
|
||||
int stride, uint8_t* out);
|
||||
// In-place un-filtering.
|
||||
// Warning! 'prev_line' pointer can be equal to 'cur_line' or 'preds'.
|
||||
typedef void (*WebPUnfilterFunc)(const uint8_t* prev_line, const uint8_t* preds,
|
||||
|
124
src/dsp/enc.c
124
src/dsp/enc.c
@ -59,10 +59,9 @@ void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void CollectHistogram_C(const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* WEBP_RESTRICT const histo) {
|
||||
VP8Histogram* const histo) {
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
for (j = start_block; j < end_block; ++j) {
|
||||
@ -110,9 +109,8 @@ static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
|
||||
#define STORE(x, y, v) \
|
||||
dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
|
||||
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
int C[4 * 4], *tmp;
|
||||
int i;
|
||||
tmp = C;
|
||||
@ -148,9 +146,7 @@ static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
|
||||
}
|
||||
}
|
||||
|
||||
static void ITransform_C(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst,
|
||||
static void ITransform_C(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
if (do_two) {
|
||||
@ -158,9 +154,7 @@ static void ITransform_C(const uint8_t* WEBP_RESTRICT ref,
|
||||
}
|
||||
}
|
||||
|
||||
static void FTransform_C(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) {
|
||||
int i;
|
||||
int tmp[16];
|
||||
for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
|
||||
@ -190,16 +184,14 @@ static void FTransform_C(const uint8_t* WEBP_RESTRICT src,
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void FTransform2_C(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
static void FTransform2_C(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
VP8FTransform(src, ref, out);
|
||||
VP8FTransform(src + 4, ref + 4, out + 16);
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void FTransformWHT_C(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
static void FTransformWHT_C(const int16_t* in, int16_t* out) {
|
||||
// input is 12b signed
|
||||
int32_t tmp[16];
|
||||
int i;
|
||||
@ -242,9 +234,8 @@ static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VerticalPred(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top,
|
||||
int size) {
|
||||
static WEBP_INLINE void VerticalPred(uint8_t* dst,
|
||||
const uint8_t* top, int size) {
|
||||
int j;
|
||||
if (top != NULL) {
|
||||
for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size);
|
||||
@ -253,9 +244,8 @@ static WEBP_INLINE void VerticalPred(uint8_t* WEBP_RESTRICT dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HorizontalPred(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
int size) {
|
||||
static WEBP_INLINE void HorizontalPred(uint8_t* dst,
|
||||
const uint8_t* left, int size) {
|
||||
if (left != NULL) {
|
||||
int j;
|
||||
for (j = 0; j < size; ++j) {
|
||||
@ -266,9 +256,8 @@ static WEBP_INLINE void HorizontalPred(uint8_t* WEBP_RESTRICT dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotion(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top, int size) {
|
||||
static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top, int size) {
|
||||
int y;
|
||||
if (left != NULL) {
|
||||
if (top != NULL) {
|
||||
@ -297,9 +286,8 @@ static WEBP_INLINE void TrueMotion(uint8_t* WEBP_RESTRICT dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DCMode(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top,
|
||||
static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top,
|
||||
int size, int round, int shift) {
|
||||
int DC = 0;
|
||||
int j;
|
||||
@ -324,9 +312,8 @@ static WEBP_INLINE void DCMode(uint8_t* WEBP_RESTRICT dst,
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma 8x8 prediction (paragraph 12.2)
|
||||
|
||||
static void IntraChromaPreds_C(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static void IntraChromaPreds_C(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
// U block
|
||||
DCMode(C8DC8 + dst, left, top, 8, 8, 4);
|
||||
VerticalPred(C8VE8 + dst, top, 8);
|
||||
@ -346,9 +333,8 @@ static void IntraChromaPreds_C(uint8_t* WEBP_RESTRICT dst,
|
||||
// luma 16x16 prediction (paragraph 12.3)
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE || !WEBP_AARCH64
|
||||
static void Intra16Preds_C(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static void Intra16Preds_C(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
DCMode(I16DC16 + dst, left, top, 16, 16, 5);
|
||||
VerticalPred(I16VE16 + dst, top, 16);
|
||||
HorizontalPred(I16HE16 + dst, left, 16);
|
||||
@ -366,8 +352,7 @@ static void Intra16Preds_C(uint8_t* WEBP_RESTRICT dst,
|
||||
#define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
|
||||
#define AVG2(a, b) (((a) + (b) + 1) >> 1)
|
||||
|
||||
// vertical
|
||||
static void VE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
static void VE4(uint8_t* dst, const uint8_t* top) { // vertical
|
||||
const uint8_t vals[4] = {
|
||||
AVG3(top[-1], top[0], top[1]),
|
||||
AVG3(top[ 0], top[1], top[2]),
|
||||
@ -380,8 +365,7 @@ static void VE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
}
|
||||
}
|
||||
|
||||
// horizontal
|
||||
static void HE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -393,14 +377,14 @@ static void HE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L));
|
||||
}
|
||||
|
||||
static void DC4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
static void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
uint32_t dc = 4;
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
|
||||
Fill(dst, dc >> 3, 4);
|
||||
}
|
||||
|
||||
static void RD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
static void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -419,7 +403,7 @@ static void RD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
DST(3, 0) = AVG3(D, C, B);
|
||||
}
|
||||
|
||||
static void LD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
static void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
const int A = top[0];
|
||||
const int B = top[1];
|
||||
const int C = top[2];
|
||||
@ -437,7 +421,7 @@ static void LD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
DST(3, 3) = AVG3(G, H, H);
|
||||
}
|
||||
|
||||
static void VR4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
static void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -459,7 +443,7 @@ static void VR4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
DST(3, 1) = AVG3(B, C, D);
|
||||
}
|
||||
|
||||
static void VL4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
static void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
const int A = top[0];
|
||||
const int B = top[1];
|
||||
const int C = top[2];
|
||||
@ -481,7 +465,7 @@ static void VL4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
DST(3, 3) = AVG3(F, G, H);
|
||||
}
|
||||
|
||||
static void HU4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
static void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
const int K = top[-4];
|
||||
@ -496,7 +480,7 @@ static void HU4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
|
||||
}
|
||||
|
||||
static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
static void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -519,7 +503,7 @@ static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
DST(1, 3) = AVG3(L, K, J);
|
||||
}
|
||||
|
||||
static void TM4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
static void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
int x, y;
|
||||
const uint8_t* const clip = clip1 + 255 - top[-1];
|
||||
for (y = 0; y < 4; ++y) {
|
||||
@ -537,8 +521,7 @@ static void TM4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
|
||||
// Left samples are top[-5 .. -2], top_left is top[-1], top are
|
||||
// located at top[0..3], and top right is top[4..7]
|
||||
static void Intra4Preds_C(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) {
|
||||
DC4(I4DC4 + dst, top);
|
||||
TM4(I4TM4 + dst, top);
|
||||
VE4(I4VE4 + dst, top);
|
||||
@ -557,8 +540,7 @@ static void Intra4Preds_C(uint8_t* WEBP_RESTRICT dst,
|
||||
// Metric
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE int GetSSE(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b,
|
||||
static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
|
||||
int w, int h) {
|
||||
int count = 0;
|
||||
int y, x;
|
||||
@ -573,25 +555,21 @@ static WEBP_INLINE int GetSSE(const uint8_t* WEBP_RESTRICT a,
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE16x16_C(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE16x16_C(const uint8_t* a, const uint8_t* b) {
|
||||
return GetSSE(a, b, 16, 16);
|
||||
}
|
||||
static int SSE16x8_C(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE16x8_C(const uint8_t* a, const uint8_t* b) {
|
||||
return GetSSE(a, b, 16, 8);
|
||||
}
|
||||
static int SSE8x8_C(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE8x8_C(const uint8_t* a, const uint8_t* b) {
|
||||
return GetSSE(a, b, 8, 8);
|
||||
}
|
||||
static int SSE4x4_C(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE4x4_C(const uint8_t* a, const uint8_t* b) {
|
||||
return GetSSE(a, b, 4, 4);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
static void Mean16x4_C(const uint8_t* WEBP_RESTRICT ref, uint32_t dc[4]) {
|
||||
static void Mean16x4_C(const uint8_t* ref, uint32_t dc[4]) {
|
||||
int k, x, y;
|
||||
for (k = 0; k < 4; ++k) {
|
||||
uint32_t avg = 0;
|
||||
@ -615,8 +593,7 @@ static void Mean16x4_C(const uint8_t* WEBP_RESTRICT ref, uint32_t dc[4]) {
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// w[] contains a row-major 4 by 4 symmetric matrix.
|
||||
static int TTransform(const uint8_t* WEBP_RESTRICT in,
|
||||
const uint16_t* WEBP_RESTRICT w) {
|
||||
static int TTransform(const uint8_t* in, const uint16_t* w) {
|
||||
int sum = 0;
|
||||
int tmp[16];
|
||||
int i;
|
||||
@ -650,17 +627,15 @@ static int TTransform(const uint8_t* WEBP_RESTRICT in,
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int Disto4x4_C(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
static int Disto4x4_C(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
const int sum1 = TTransform(a, w);
|
||||
const int sum2 = TTransform(b, w);
|
||||
return abs(sum2 - sum1) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_C(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
static int Disto16x16_C(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -683,7 +658,7 @@ static const uint8_t kZigzag[16] = {
|
||||
|
||||
// Simple quantization
|
||||
static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const VP8Matrix* const mtx) {
|
||||
int last = -1;
|
||||
int n;
|
||||
for (n = 0; n < 16; ++n) {
|
||||
@ -709,7 +684,7 @@ static int QuantizeBlock_C(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
@ -720,8 +695,7 @@ static int Quantize2Blocks_C(int16_t in[32], int16_t out[32],
|
||||
//------------------------------------------------------------------------------
|
||||
// Block copy
|
||||
|
||||
static WEBP_INLINE void Copy(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst, int w, int h) {
|
||||
static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) {
|
||||
int y;
|
||||
for (y = 0; y < h; ++y) {
|
||||
memcpy(dst, src, w);
|
||||
@ -730,13 +704,11 @@ static WEBP_INLINE void Copy(const uint8_t* WEBP_RESTRICT src,
|
||||
}
|
||||
}
|
||||
|
||||
static void Copy4x4_C(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void Copy4x4_C(const uint8_t* src, uint8_t* dst) {
|
||||
Copy(src, dst, 4, 4);
|
||||
}
|
||||
|
||||
static void Copy16x8_C(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void Copy16x8_C(const uint8_t* src, uint8_t* dst) {
|
||||
Copy(src, dst, 16, 8);
|
||||
}
|
||||
|
||||
|
@ -109,9 +109,9 @@ static const int kC2 = WEBP_TRANSFORM_AC3_C2;
|
||||
"sb %[" #TEMP12 "], 3+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"
|
||||
|
||||
// Does one or two inverse transforms.
|
||||
static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* ref,
|
||||
const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
|
||||
int temp7, temp8, temp9, temp10, temp11, temp12, temp13;
|
||||
int temp14, temp15, temp16, temp17, temp18, temp19, temp20;
|
||||
@ -141,9 +141,8 @@ static WEBP_INLINE void ITransformOne_MIPS32(const uint8_t* WEBP_RESTRICT ref,
|
||||
);
|
||||
}
|
||||
|
||||
static void ITransform_MIPS32(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
static void ITransform_MIPS32(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst, int do_two) {
|
||||
ITransformOne_MIPS32(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne_MIPS32(ref + 4, in + 16, dst + 4);
|
||||
@ -237,7 +236,7 @@ static int QuantizeBlock_MIPS32(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
nz = QuantizeBlock_MIPS32(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock_MIPS32(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
@ -359,9 +358,8 @@ static int Quantize2Blocks_MIPS32(int16_t in[32], int16_t out[32],
|
||||
"msub %[temp6], %[temp0] \n\t" \
|
||||
"msub %[temp7], %[temp1] \n\t"
|
||||
|
||||
static int Disto4x4_MIPS32(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
static int Disto4x4_MIPS32(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int tmp[32];
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
|
||||
|
||||
@ -395,9 +393,8 @@ static int Disto4x4_MIPS32(const uint8_t* WEBP_RESTRICT const a,
|
||||
#undef VERTICAL_PASS
|
||||
#undef HORIZONTAL_PASS
|
||||
|
||||
static int Disto16x16_MIPS32(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
static int Disto16x16_MIPS32(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -478,9 +475,8 @@ static int Disto16x16_MIPS32(const uint8_t* WEBP_RESTRICT const a,
|
||||
"sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
|
||||
"sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
|
||||
|
||||
static void FTransform_MIPS32(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
static void FTransform_MIPS32(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
|
||||
int temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
|
||||
int temp17, temp18, temp19, temp20;
|
||||
@ -541,8 +537,7 @@ static void FTransform_MIPS32(const uint8_t* WEBP_RESTRICT src,
|
||||
GET_SSE_INNER(C, C + 1, C + 2, C + 3) \
|
||||
GET_SSE_INNER(D, D + 1, D + 2, D + 3)
|
||||
|
||||
static int SSE16x16_MIPS32(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE16x16_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
@ -576,8 +571,7 @@ static int SSE16x16_MIPS32(const uint8_t* WEBP_RESTRICT a,
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE16x8_MIPS32(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE16x8_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
@ -603,8 +597,7 @@ static int SSE16x8_MIPS32(const uint8_t* WEBP_RESTRICT a,
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE8x8_MIPS32(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE8x8_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
@ -626,8 +619,7 @@ static int SSE8x8_MIPS32(const uint8_t* WEBP_RESTRICT a,
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE4x4_MIPS32(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE4x4_MIPS32(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
|
||||
|
@ -141,9 +141,8 @@ static const int kC2 = WEBP_TRANSFORM_AC3_C2;
|
||||
"sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \
|
||||
"sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t"
|
||||
|
||||
static void FTransform_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
static void FTransform_MIPSdspR2(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
const int c2217 = 2217;
|
||||
const int c5352 = 5352;
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
|
||||
@ -172,9 +171,8 @@ static void FTransform_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
|
||||
#undef VERTICAL_PASS
|
||||
#undef HORIZONTAL_PASS
|
||||
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
|
||||
int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
|
||||
|
||||
@ -241,18 +239,16 @@ static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
|
||||
);
|
||||
}
|
||||
|
||||
static void ITransform_MIPSdspR2(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
static void ITransform_MIPSdspR2(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst, int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne(ref + 4, in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static int Disto4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
static int Disto4x4_MIPSdspR2(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
|
||||
int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17;
|
||||
|
||||
@ -318,9 +314,9 @@ static int Disto4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
|
||||
return abs(temp3 - temp17) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
static int Disto16x16_MIPSdspR2(const uint8_t* const a,
|
||||
const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -371,8 +367,8 @@ static int Disto16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT const a,
|
||||
} while (0)
|
||||
|
||||
#define VERTICAL_PRED(DST, TOP, SIZE) \
|
||||
static WEBP_INLINE void VerticalPred##SIZE( \
|
||||
uint8_t* WEBP_RESTRICT (DST), const uint8_t* WEBP_RESTRICT (TOP)) { \
|
||||
static WEBP_INLINE void VerticalPred##SIZE(uint8_t* (DST), \
|
||||
const uint8_t* (TOP)) { \
|
||||
int j; \
|
||||
if ((TOP)) { \
|
||||
for (j = 0; j < (SIZE); ++j) memcpy((DST) + j * BPS, (TOP), (SIZE)); \
|
||||
@ -387,8 +383,8 @@ VERTICAL_PRED(dst, top, 16)
|
||||
#undef VERTICAL_PRED
|
||||
|
||||
#define HORIZONTAL_PRED(DST, LEFT, SIZE) \
|
||||
static WEBP_INLINE void HorizontalPred##SIZE( \
|
||||
uint8_t* WEBP_RESTRICT (DST), const uint8_t* WEBP_RESTRICT (LEFT)) { \
|
||||
static WEBP_INLINE void HorizontalPred##SIZE(uint8_t* (DST), \
|
||||
const uint8_t* (LEFT)) { \
|
||||
if (LEFT) { \
|
||||
int j; \
|
||||
for (j = 0; j < (SIZE); ++j) { \
|
||||
@ -455,9 +451,8 @@ HORIZONTAL_PRED(dst, left, 16)
|
||||
} while (0)
|
||||
|
||||
#define TRUE_MOTION(DST, LEFT, TOP, SIZE) \
|
||||
static WEBP_INLINE void TrueMotion##SIZE(uint8_t* WEBP_RESTRICT (DST), \
|
||||
const uint8_t* WEBP_RESTRICT (LEFT), \
|
||||
const uint8_t* WEBP_RESTRICT (TOP)) { \
|
||||
static WEBP_INLINE void TrueMotion##SIZE(uint8_t* (DST), const uint8_t* (LEFT),\
|
||||
const uint8_t* (TOP)) { \
|
||||
if ((LEFT) != NULL) { \
|
||||
if ((TOP) != NULL) { \
|
||||
CLIP_TO_DST((DST), (LEFT), (TOP), (SIZE)); \
|
||||
@ -485,9 +480,8 @@ TRUE_MOTION(dst, left, top, 16)
|
||||
#undef CLIP_8B_TO_DST
|
||||
#undef CLIPPING
|
||||
|
||||
static WEBP_INLINE void DCMode16(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void DCMode16(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
int DC, DC1;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
|
||||
@ -549,9 +543,8 @@ static WEBP_INLINE void DCMode16(uint8_t* WEBP_RESTRICT dst,
|
||||
FILL_8_OR_16(dst, DC, 16);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DCMode8(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void DCMode8(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
int DC, DC1;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
|
||||
@ -595,7 +588,7 @@ static WEBP_INLINE void DCMode8(uint8_t* WEBP_RESTRICT dst,
|
||||
FILL_8_OR_16(dst, DC, 8);
|
||||
}
|
||||
|
||||
static void DC4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
static void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
int temp0, temp1;
|
||||
__asm__ volatile(
|
||||
"ulw %[temp0], 0(%[top]) \n\t"
|
||||
@ -616,7 +609,7 @@ static void DC4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void TM4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
static void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
int a10, a32, temp0, temp1, temp2, temp3, temp4, temp5;
|
||||
const int c35 = 0xff00ff;
|
||||
__asm__ volatile (
|
||||
@ -671,7 +664,7 @@ static void TM4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void VE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
static void VE4(uint8_t* dst, const uint8_t* top) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
|
||||
__asm__ volatile(
|
||||
"ulw %[temp0], -1(%[top]) \n\t"
|
||||
@ -702,7 +695,7 @@ static void VE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void HE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
static void HE4(uint8_t* dst, const uint8_t* top) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
|
||||
__asm__ volatile(
|
||||
"ulw %[temp0], -4(%[top]) \n\t"
|
||||
@ -738,7 +731,7 @@ static void HE4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void RD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
static void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5;
|
||||
int temp6, temp7, temp8, temp9, temp10, temp11;
|
||||
__asm__ volatile(
|
||||
@ -787,7 +780,7 @@ static void RD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void VR4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
static void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
__asm__ volatile (
|
||||
@ -837,7 +830,7 @@ static void VR4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void LD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
static void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5;
|
||||
int temp6, temp7, temp8, temp9, temp10, temp11;
|
||||
__asm__ volatile(
|
||||
@ -884,7 +877,7 @@ static void LD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void VL4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
static void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
__asm__ volatile (
|
||||
@ -933,7 +926,7 @@ static void VL4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
static void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
__asm__ volatile (
|
||||
@ -981,7 +974,7 @@ static void HD4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
);
|
||||
}
|
||||
|
||||
static void HU4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
static void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
__asm__ volatile (
|
||||
"ulw %[temp0], -5(%[top]) \n\t"
|
||||
@ -1020,9 +1013,8 @@ static void HU4(uint8_t* WEBP_RESTRICT dst, const uint8_t* WEBP_RESTRICT top) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma 8x8 prediction (paragraph 12.2)
|
||||
|
||||
static void IntraChromaPreds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static void IntraChromaPreds_MIPSdspR2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
// U block
|
||||
DCMode8(C8DC8 + dst, left, top);
|
||||
VerticalPred8(C8VE8 + dst, top);
|
||||
@ -1041,9 +1033,8 @@ static void IntraChromaPreds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,
|
||||
//------------------------------------------------------------------------------
|
||||
// luma 16x16 prediction (paragraph 12.3)
|
||||
|
||||
static void Intra16Preds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static void Intra16Preds_MIPSdspR2(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
DCMode16(I16DC16 + dst, left, top);
|
||||
VerticalPred16(I16VE16 + dst, top);
|
||||
HorizontalPred16(I16HE16 + dst, left);
|
||||
@ -1052,8 +1043,7 @@ static void Intra16Preds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,
|
||||
|
||||
// Left samples are top[-5 .. -2], top_left is top[-1], top are
|
||||
// located at top[0..3], and top right is top[4..7]
|
||||
static void Intra4Preds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static void Intra4Preds_MIPSdspR2(uint8_t* dst, const uint8_t* top) {
|
||||
DC4(I4DC4 + dst, top);
|
||||
TM4(I4TM4 + dst, top);
|
||||
VE4(I4VE4 + dst, top);
|
||||
@ -1089,8 +1079,7 @@ static void Intra4Preds_MIPSdspR2(uint8_t* WEBP_RESTRICT dst,
|
||||
GET_SSE_INNER(C) \
|
||||
GET_SSE_INNER(D)
|
||||
|
||||
static int SSE16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE16x16_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1120,8 +1109,7 @@ static int SSE16x16_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE16x8_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE16x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1143,8 +1131,7 @@ static int SSE16x8_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE8x8_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE8x8_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1162,8 +1149,7 @@ static int SSE8x8_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
|
||||
return count;
|
||||
}
|
||||
|
||||
static int SSE4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE4x4_MIPSdspR2(const uint8_t* a, const uint8_t* b) {
|
||||
int count;
|
||||
int temp0, temp1, temp2, temp3;
|
||||
__asm__ volatile (
|
||||
@ -1287,7 +1273,7 @@ static int SSE4x4_MIPSdspR2(const uint8_t* WEBP_RESTRICT a,
|
||||
"3: \n\t"
|
||||
|
||||
static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const VP8Matrix* const mtx) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5,temp6;
|
||||
int sign, coeff, level;
|
||||
int max_level = MAX_LEVEL;
|
||||
@ -1328,7 +1314,7 @@ static int QuantizeBlock_MIPSdspR2(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
nz = QuantizeBlock_MIPSdspR2(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock_MIPSdspR2(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
@ -1374,8 +1360,7 @@ static int Quantize2Blocks_MIPSdspR2(int16_t in[32], int16_t out[32],
|
||||
"usw %[" #TEMP4 "], " #C "(%[out]) \n\t" \
|
||||
"usw %[" #TEMP6 "], " #D "(%[out]) \n\t"
|
||||
|
||||
static void FTransformWHT_MIPSdspR2(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
static void FTransformWHT_MIPSdspR2(const int16_t* in, int16_t* out) {
|
||||
int temp0, temp1, temp2, temp3, temp4;
|
||||
int temp5, temp6, temp7, temp8, temp9;
|
||||
|
||||
|
@ -41,9 +41,8 @@
|
||||
BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
v8i16 input0, input1;
|
||||
v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
|
||||
v4i32 res0, res1, res2, res3;
|
||||
@ -70,18 +69,16 @@ static WEBP_INLINE void ITransformOne(const uint8_t* WEBP_RESTRICT ref,
|
||||
ST4x4_UB(res0, res0, 3, 2, 1, 0, dst, BPS);
|
||||
}
|
||||
|
||||
static void ITransform_MSA(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
static void ITransform_MSA(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
ITransformOne(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne(ref + 4, in + 16, dst + 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void FTransform_MSA(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
static void FTransform_MSA(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
uint64_t out0, out1, out2, out3;
|
||||
uint32_t in0, in1, in2, in3;
|
||||
v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
|
||||
@ -134,8 +131,7 @@ static void FTransform_MSA(const uint8_t* WEBP_RESTRICT src,
|
||||
SD4(out0, out1, out2, out3, out, 8);
|
||||
}
|
||||
|
||||
static void FTransformWHT_MSA(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
static void FTransformWHT_MSA(const int16_t* in, int16_t* out) {
|
||||
v8i16 in0 = { 0 };
|
||||
v8i16 in1 = { 0 };
|
||||
v8i16 tmp0, tmp1, tmp2, tmp3;
|
||||
@ -172,8 +168,7 @@ static void FTransformWHT_MSA(const int16_t* WEBP_RESTRICT in,
|
||||
ST_SH2(out0, out1, out, 8);
|
||||
}
|
||||
|
||||
static int TTransform_MSA(const uint8_t* WEBP_RESTRICT in,
|
||||
const uint16_t* WEBP_RESTRICT w) {
|
||||
static int TTransform_MSA(const uint8_t* in, const uint16_t* w) {
|
||||
int sum;
|
||||
uint32_t in0_m, in1_m, in2_m, in3_m;
|
||||
v16i8 src0 = { 0 };
|
||||
@ -205,17 +200,15 @@ static int TTransform_MSA(const uint8_t* WEBP_RESTRICT in,
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int Disto4x4_MSA(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
static int Disto4x4_MSA(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
const int sum1 = TTransform_MSA(a, w);
|
||||
const int sum2 = TTransform_MSA(b, w);
|
||||
return abs(sum2 - sum1) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_MSA(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
static int Disto16x16_MSA(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -266,9 +259,7 @@ static void CollectHistogram_MSA(const uint8_t* ref, const uint8_t* pred,
|
||||
#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
|
||||
#define AVG2(a, b) (((a) + (b) + 1) >> 1)
|
||||
|
||||
// vertical
|
||||
static WEBP_INLINE void VE4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void VE4(uint8_t* dst, const uint8_t* top) { // vertical
|
||||
const v16u8 A1 = { 0 };
|
||||
const uint64_t val_m = LD(top - 1);
|
||||
const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
|
||||
@ -281,9 +272,7 @@ static WEBP_INLINE void VE4(uint8_t* WEBP_RESTRICT dst,
|
||||
SW4(out, out, out, out, dst, BPS);
|
||||
}
|
||||
|
||||
// horizontal
|
||||
static WEBP_INLINE void HE4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -295,8 +284,7 @@ static WEBP_INLINE void HE4(uint8_t* WEBP_RESTRICT dst,
|
||||
WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void DC4(uint8_t* dst, const uint8_t* top) {
|
||||
uint32_t dc = 4;
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
|
||||
@ -305,8 +293,7 @@ static WEBP_INLINE void DC4(uint8_t* WEBP_RESTRICT dst,
|
||||
SW4(dc, dc, dc, dc, dst, BPS);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void RD4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) {
|
||||
const v16u8 A2 = { 0 };
|
||||
const uint64_t val_m = LD(top - 5);
|
||||
const v16u8 A1 = (v16u8)__msa_insert_d((v2i64)A2, 0, val_m);
|
||||
@ -326,8 +313,7 @@ static WEBP_INLINE void RD4(uint8_t* WEBP_RESTRICT dst,
|
||||
SW4(val3, val2, val1, val0, dst, BPS);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void LD4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void LD4(uint8_t* dst, const uint8_t* top) {
|
||||
const v16u8 A1 = { 0 };
|
||||
const uint64_t val_m = LD(top);
|
||||
const v16u8 A = (v16u8)__msa_insert_d((v2i64)A1, 0, val_m);
|
||||
@ -347,8 +333,7 @@ static WEBP_INLINE void LD4(uint8_t* WEBP_RESTRICT dst,
|
||||
SW4(val0, val1, val2, val3, dst, BPS);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VR4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void VR4(uint8_t* dst, const uint8_t* top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -369,8 +354,7 @@ static WEBP_INLINE void VR4(uint8_t* WEBP_RESTRICT dst,
|
||||
DST(3, 1) = AVG3(B, C, D);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VL4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void VL4(uint8_t* dst, const uint8_t* top) {
|
||||
const int A = top[0];
|
||||
const int B = top[1];
|
||||
const int C = top[2];
|
||||
@ -391,8 +375,7 @@ static WEBP_INLINE void VL4(uint8_t* WEBP_RESTRICT dst,
|
||||
DST(3, 3) = AVG3(F, G, H);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HU4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void HU4(uint8_t* dst, const uint8_t* top) {
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
const int K = top[-4];
|
||||
@ -407,8 +390,7 @@ static WEBP_INLINE void HU4(uint8_t* WEBP_RESTRICT dst,
|
||||
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HD4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void HD4(uint8_t* dst, const uint8_t* top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -429,8 +411,7 @@ static WEBP_INLINE void HD4(uint8_t* WEBP_RESTRICT dst,
|
||||
DST(1, 3) = AVG3(L, K, J);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TM4(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void TM4(uint8_t* dst, const uint8_t* top) {
|
||||
const v16i8 zero = { 0 };
|
||||
const v8i16 TL = (v8i16)__msa_fill_h(top[-1]);
|
||||
const v8i16 L0 = (v8i16)__msa_fill_h(top[-2]);
|
||||
@ -450,8 +431,7 @@ static WEBP_INLINE void TM4(uint8_t* WEBP_RESTRICT dst,
|
||||
#undef AVG3
|
||||
#undef AVG2
|
||||
|
||||
static void Intra4Preds_MSA(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static void Intra4Preds_MSA(uint8_t* dst, const uint8_t* top) {
|
||||
DC4(I4DC4 + dst, top);
|
||||
TM4(I4TM4 + dst, top);
|
||||
VE4(I4VE4 + dst, top);
|
||||
@ -471,8 +451,7 @@ static void Intra4Preds_MSA(uint8_t* WEBP_RESTRICT dst,
|
||||
ST_UB8(out, out, out, out, out, out, out, out, dst + 8 * BPS, BPS); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void VerticalPred16x16(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void VerticalPred16x16(uint8_t* dst, const uint8_t* top) {
|
||||
if (top != NULL) {
|
||||
const v16u8 out = LD_UB(top);
|
||||
STORE16x16(out, dst);
|
||||
@ -482,8 +461,8 @@ static WEBP_INLINE void VerticalPred16x16(uint8_t* WEBP_RESTRICT dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HorizontalPred16x16(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
static WEBP_INLINE void HorizontalPred16x16(uint8_t* dst,
|
||||
const uint8_t* left) {
|
||||
if (left != NULL) {
|
||||
int j;
|
||||
for (j = 0; j < 16; j += 4) {
|
||||
@ -501,9 +480,8 @@ static WEBP_INLINE void HorizontalPred16x16(uint8_t* WEBP_RESTRICT dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotion16x16(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void TrueMotion16x16(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
if (left != NULL) {
|
||||
if (top != NULL) {
|
||||
int j;
|
||||
@ -541,9 +519,8 @@ static WEBP_INLINE void TrueMotion16x16(uint8_t* WEBP_RESTRICT dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DCMode16x16(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void DCMode16x16(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
int DC;
|
||||
v16u8 out;
|
||||
if (top != NULL && left != NULL) {
|
||||
@ -571,9 +548,8 @@ static WEBP_INLINE void DCMode16x16(uint8_t* WEBP_RESTRICT dst,
|
||||
STORE16x16(out, dst);
|
||||
}
|
||||
|
||||
static void Intra16Preds_MSA(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static void Intra16Preds_MSA(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
DCMode16x16(I16DC16 + dst, left, top);
|
||||
VerticalPred16x16(I16VE16 + dst, top);
|
||||
HorizontalPred16x16(I16HE16 + dst, left);
|
||||
@ -598,8 +574,7 @@ static void Intra16Preds_MSA(uint8_t* WEBP_RESTRICT dst,
|
||||
SD4(out, out, out, out, dst + 4 * BPS, BPS); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void VerticalPred8x8(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void VerticalPred8x8(uint8_t* dst, const uint8_t* top) {
|
||||
if (top != NULL) {
|
||||
const uint64_t out = LD(top);
|
||||
STORE8x8(out, dst);
|
||||
@ -609,8 +584,7 @@ static WEBP_INLINE void VerticalPred8x8(uint8_t* WEBP_RESTRICT dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HorizontalPred8x8(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
static WEBP_INLINE void HorizontalPred8x8(uint8_t* dst, const uint8_t* left) {
|
||||
if (left != NULL) {
|
||||
int j;
|
||||
for (j = 0; j < 8; j += 4) {
|
||||
@ -632,9 +606,8 @@ static WEBP_INLINE void HorizontalPred8x8(uint8_t* WEBP_RESTRICT dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotion8x8(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void TrueMotion8x8(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
if (left != NULL) {
|
||||
if (top != NULL) {
|
||||
int j;
|
||||
@ -673,9 +646,8 @@ static WEBP_INLINE void TrueMotion8x8(uint8_t* WEBP_RESTRICT dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DCMode8x8(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void DCMode8x8(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
uint64_t out;
|
||||
v16u8 src = { 0 };
|
||||
if (top != NULL && left != NULL) {
|
||||
@ -698,9 +670,8 @@ static WEBP_INLINE void DCMode8x8(uint8_t* WEBP_RESTRICT dst,
|
||||
STORE8x8(out, dst);
|
||||
}
|
||||
|
||||
static void IntraChromaPreds_MSA(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static void IntraChromaPreds_MSA(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
// U block
|
||||
DCMode8x8(C8DC8 + dst, left, top);
|
||||
VerticalPred8x8(C8VE8 + dst, top);
|
||||
@ -741,8 +712,7 @@ static void IntraChromaPreds_MSA(uint8_t* WEBP_RESTRICT dst,
|
||||
DPADD_SH2_SW(tmp2, tmp3, tmp2, tmp3, out2, out3); \
|
||||
} while (0)
|
||||
|
||||
static int SSE16x16_MSA(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE16x16_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
uint32_t sum;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
|
||||
@ -769,8 +739,7 @@ static int SSE16x16_MSA(const uint8_t* WEBP_RESTRICT a,
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int SSE16x8_MSA(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE16x8_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
uint32_t sum;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
|
||||
@ -789,8 +758,7 @@ static int SSE16x8_MSA(const uint8_t* WEBP_RESTRICT a,
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int SSE8x8_MSA(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE8x8_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
uint32_t sum;
|
||||
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
|
||||
v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
|
||||
@ -810,8 +778,7 @@ static int SSE8x8_MSA(const uint8_t* WEBP_RESTRICT a,
|
||||
return sum;
|
||||
}
|
||||
|
||||
static int SSE4x4_MSA(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE4x4_MSA(const uint8_t* a, const uint8_t* b) {
|
||||
uint32_t sum = 0;
|
||||
uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3;
|
||||
v16u8 src = { 0 }, ref = { 0 }, tmp0, tmp1;
|
||||
@ -834,7 +801,7 @@ static int SSE4x4_MSA(const uint8_t* WEBP_RESTRICT a,
|
||||
// Quantization
|
||||
|
||||
static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const VP8Matrix* const mtx) {
|
||||
int sum;
|
||||
v8i16 in0, in1, sh0, sh1, out0, out1;
|
||||
v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, sign0, sign1;
|
||||
@ -887,7 +854,7 @@ static int QuantizeBlock_MSA(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_MSA(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
nz = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
|
@ -60,8 +60,8 @@ static WEBP_INLINE void SaturateAndStore4x4_NEON(uint8_t* const dst,
|
||||
|
||||
static WEBP_INLINE void Add4x4_NEON(const int16x8_t row01,
|
||||
const int16x8_t row23,
|
||||
const uint8_t* WEBP_RESTRICT const ref,
|
||||
uint8_t* WEBP_RESTRICT const dst) {
|
||||
const uint8_t* const ref,
|
||||
uint8_t* const dst) {
|
||||
uint32x2_t dst01 = vdup_n_u32(0);
|
||||
uint32x2_t dst23 = vdup_n_u32(0);
|
||||
|
||||
@ -120,9 +120,8 @@ static WEBP_INLINE void TransformPass_NEON(int16x8x2_t* const rows) {
|
||||
Transpose8x2_NEON(E0, E1, rows);
|
||||
}
|
||||
|
||||
static void ITransformOne_NEON(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void ITransformOne_NEON(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst) {
|
||||
int16x8x2_t rows;
|
||||
INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
|
||||
TransformPass_NEON(&rows);
|
||||
@ -132,9 +131,8 @@ static void ITransformOne_NEON(const uint8_t* WEBP_RESTRICT ref,
|
||||
|
||||
#else
|
||||
|
||||
static void ITransformOne_NEON(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void ITransformOne_NEON(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst) {
|
||||
const int kBPS = BPS;
|
||||
const int16_t kC1C2[] = { kC1, kC2, 0, 0 };
|
||||
|
||||
@ -249,9 +247,8 @@ static void ITransformOne_NEON(const uint8_t* WEBP_RESTRICT ref,
|
||||
|
||||
#endif // WEBP_USE_INTRINSICS
|
||||
|
||||
static void ITransform_NEON(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst, int do_two) {
|
||||
static void ITransform_NEON(const uint8_t* ref,
|
||||
const int16_t* in, uint8_t* dst, int do_two) {
|
||||
ITransformOne_NEON(ref, in, dst);
|
||||
if (do_two) {
|
||||
ITransformOne_NEON(ref + 4, in + 16, dst + 4);
|
||||
@ -297,9 +294,8 @@ static WEBP_INLINE int16x8_t DiffU8ToS16_NEON(const uint8x8_t a,
|
||||
return vreinterpretq_s16_u16(vsubl_u8(a, b));
|
||||
}
|
||||
|
||||
static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
int16x8_t d0d1, d3d2; // working 4x4 int16 variables
|
||||
{
|
||||
const uint8x16_t S0 = Load4x4_NEON(src);
|
||||
@ -368,9 +364,8 @@ static const int32_t kCoeff32[] = {
|
||||
51000, 51000, 51000, 51000
|
||||
};
|
||||
|
||||
static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
static void FTransform_NEON(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
const int kBPS = BPS;
|
||||
const uint8_t* src_ptr = src;
|
||||
const uint8_t* ref_ptr = ref;
|
||||
@ -489,8 +484,7 @@ static void FTransform_NEON(const uint8_t* WEBP_RESTRICT src,
|
||||
src += stride; \
|
||||
} while (0)
|
||||
|
||||
static void FTransformWHT_NEON(const int16_t* WEBP_RESTRICT src,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
static void FTransformWHT_NEON(const int16_t* src, int16_t* out) {
|
||||
const int stride = 16;
|
||||
const int16x4_t zero = vdup_n_s16(0);
|
||||
int32x4x4_t tmp0;
|
||||
@ -665,9 +659,8 @@ static WEBP_INLINE int32x2_t DistoSum_NEON(const int16x8x4_t q4_in,
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// w[] contains a row-major 4 by 4 symmetric matrix.
|
||||
static int Disto4x4_NEON(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
static int Disto4x4_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
uint32x2_t d_in_ab_0123 = vdup_n_u32(0);
|
||||
uint32x2_t d_in_ab_4567 = vdup_n_u32(0);
|
||||
uint32x2_t d_in_ab_89ab = vdup_n_u32(0);
|
||||
@ -708,9 +701,8 @@ static int Disto4x4_NEON(const uint8_t* WEBP_RESTRICT const a,
|
||||
}
|
||||
#undef LOAD_LANE_32b
|
||||
|
||||
static int Disto16x16_NEON(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
static int Disto16x16_NEON(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -723,10 +715,9 @@ static int Disto16x16_NEON(const uint8_t* WEBP_RESTRICT const a,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void CollectHistogram_NEON(const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
static void CollectHistogram_NEON(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* WEBP_RESTRICT const histo) {
|
||||
VP8Histogram* const histo) {
|
||||
const uint16x8_t max_coeff_thresh = vdupq_n_u16(MAX_COEFF_THRESH);
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
@ -756,8 +747,8 @@ static void CollectHistogram_NEON(const uint8_t* WEBP_RESTRICT ref,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static WEBP_INLINE void AccumulateSSE16_NEON(
|
||||
const uint8_t* WEBP_RESTRICT const a, const uint8_t* WEBP_RESTRICT const b,
|
||||
static WEBP_INLINE void AccumulateSSE16_NEON(const uint8_t* const a,
|
||||
const uint8_t* const b,
|
||||
uint32x4_t* const sum) {
|
||||
const uint8x16_t a0 = vld1q_u8(a);
|
||||
const uint8x16_t b0 = vld1q_u8(b);
|
||||
@ -784,8 +775,7 @@ static int SumToInt_NEON(uint32x4_t sum) {
|
||||
#endif
|
||||
}
|
||||
|
||||
static int SSE16x16_NEON(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE16x16_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
uint32x4_t sum = vdupq_n_u32(0);
|
||||
int y;
|
||||
for (y = 0; y < 16; ++y) {
|
||||
@ -794,8 +784,7 @@ static int SSE16x16_NEON(const uint8_t* WEBP_RESTRICT a,
|
||||
return SumToInt_NEON(sum);
|
||||
}
|
||||
|
||||
static int SSE16x8_NEON(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE16x8_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
uint32x4_t sum = vdupq_n_u32(0);
|
||||
int y;
|
||||
for (y = 0; y < 8; ++y) {
|
||||
@ -804,8 +793,7 @@ static int SSE16x8_NEON(const uint8_t* WEBP_RESTRICT a,
|
||||
return SumToInt_NEON(sum);
|
||||
}
|
||||
|
||||
static int SSE8x8_NEON(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE8x8_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
uint32x4_t sum = vdupq_n_u32(0);
|
||||
int y;
|
||||
for (y = 0; y < 8; ++y) {
|
||||
@ -818,8 +806,7 @@ static int SSE8x8_NEON(const uint8_t* WEBP_RESTRICT a,
|
||||
return SumToInt_NEON(sum);
|
||||
}
|
||||
|
||||
static int SSE4x4_NEON(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE4x4_NEON(const uint8_t* a, const uint8_t* b) {
|
||||
const uint8x16_t a0 = Load4x4_NEON(a);
|
||||
const uint8x16_t b0 = Load4x4_NEON(b);
|
||||
const uint8x16_t abs_diff = vabdq_u8(a0, b0);
|
||||
@ -838,9 +825,8 @@ static int SSE4x4_NEON(const uint8_t* WEBP_RESTRICT a,
|
||||
// Compilation with gcc-4.6.x is problematic for now.
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
|
||||
static int16x8_t Quantize_NEON(int16_t* WEBP_RESTRICT const in,
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx,
|
||||
int offset) {
|
||||
static int16x8_t Quantize_NEON(int16_t* const in,
|
||||
const VP8Matrix* const mtx, int offset) {
|
||||
const uint16x8_t sharp = vld1q_u16(&mtx->sharpen_[offset]);
|
||||
const uint16x8_t q = vld1q_u16(&mtx->q_[offset]);
|
||||
const uint16x8_t iq = vld1q_u16(&mtx->iq_[offset]);
|
||||
@ -874,7 +860,7 @@ static const uint8_t kShuffles[4][8] = {
|
||||
};
|
||||
|
||||
static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const VP8Matrix* const mtx) {
|
||||
const int16x8_t out0 = Quantize_NEON(in, mtx, 0);
|
||||
const int16x8_t out1 = Quantize_NEON(in, mtx, 8);
|
||||
uint8x8x4_t shuffles;
|
||||
@ -916,7 +902,7 @@ static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_NEON(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
nz = QuantizeBlock_NEON(in + 0 * 16, out + 0 * 16, mtx) << 0;
|
||||
nz |= QuantizeBlock_NEON(in + 1 * 16, out + 1 * 16, mtx) << 1;
|
||||
@ -946,8 +932,7 @@ static int Quantize2Blocks_NEON(int16_t in[32], int16_t out[32],
|
||||
vst1q_u8(dst, r); \
|
||||
} while (0)
|
||||
|
||||
static void Intra4Preds_NEON(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static void Intra4Preds_NEON(uint8_t* dst, const uint8_t* top) {
|
||||
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13
|
||||
// L K J I X A B C D E F G H
|
||||
// -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7
|
||||
@ -1180,9 +1165,8 @@ static WEBP_INLINE void TrueMotion_NEON(uint8_t* dst, const uint8_t* left,
|
||||
}
|
||||
}
|
||||
|
||||
static void Intra16Preds_NEON(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static void Intra16Preds_NEON(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
DCMode_NEON(I16DC16 + dst, left, top);
|
||||
VerticalPred16_NEON(I16VE16 + dst, top);
|
||||
HorizontalPred16_NEON(I16HE16 + dst, left);
|
||||
|
@ -26,9 +26,8 @@
|
||||
// Transforms (Paragraph 14.4)
|
||||
|
||||
// Does one inverse transform.
|
||||
static void ITransform_One_SSE2(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void ITransform_One_SSE2(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
// This implementation makes use of 16-bit fixed point versions of two
|
||||
// multiply constants:
|
||||
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
||||
@ -178,9 +177,8 @@ static void ITransform_One_SSE2(const uint8_t* WEBP_RESTRICT ref,
|
||||
}
|
||||
|
||||
// Does two inverse transforms.
|
||||
static void ITransform_Two_SSE2(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void ITransform_Two_SSE2(const uint8_t* ref, const int16_t* in,
|
||||
uint8_t* dst) {
|
||||
// This implementation makes use of 16-bit fixed point versions of two
|
||||
// multiply constants:
|
||||
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
|
||||
@ -318,9 +316,7 @@ static void ITransform_Two_SSE2(const uint8_t* WEBP_RESTRICT ref,
|
||||
}
|
||||
|
||||
// Does one or two inverse transforms.
|
||||
static void ITransform_SSE2(const uint8_t* WEBP_RESTRICT ref,
|
||||
const int16_t* WEBP_RESTRICT in,
|
||||
uint8_t* WEBP_RESTRICT dst,
|
||||
static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
|
||||
int do_two) {
|
||||
if (do_two) {
|
||||
ITransform_Two_SSE2(ref, in, dst);
|
||||
@ -377,7 +373,7 @@ static void FTransformPass1_SSE2(const __m128i* const in01,
|
||||
|
||||
static void FTransformPass2_SSE2(const __m128i* const v01,
|
||||
const __m128i* const v32,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
int16_t* out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i seven = _mm_set1_epi16(7);
|
||||
const __m128i k5352_2217 = _mm_set_epi16(5352, 2217, 5352, 2217,
|
||||
@ -428,9 +424,8 @@ static void FTransformPass2_SSE2(const __m128i* const v01,
|
||||
_mm_storeu_si128((__m128i*)&out[8], d2_f3);
|
||||
}
|
||||
|
||||
static void FTransform_SSE2(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
static void FTransform_SSE2(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
// Load src.
|
||||
const __m128i src0 = _mm_loadl_epi64((const __m128i*)&src[0 * BPS]);
|
||||
@ -473,9 +468,8 @@ static void FTransform_SSE2(const uint8_t* WEBP_RESTRICT src,
|
||||
FTransformPass2_SSE2(&v01, &v32, out);
|
||||
}
|
||||
|
||||
static void FTransform2_SSE2(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT ref,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
static void FTransform2_SSE2(const uint8_t* src, const uint8_t* ref,
|
||||
int16_t* out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
|
||||
// Load src and convert to 16b.
|
||||
@ -523,8 +517,7 @@ static void FTransform2_SSE2(const uint8_t* WEBP_RESTRICT src,
|
||||
FTransformPass2_SSE2(&v01h, &v32h, out + 16);
|
||||
}
|
||||
|
||||
static void FTransformWHTRow_SSE2(const int16_t* WEBP_RESTRICT const in,
|
||||
__m128i* const out) {
|
||||
static void FTransformWHTRow_SSE2(const int16_t* const in, __m128i* const out) {
|
||||
const __m128i kMult = _mm_set_epi16(-1, 1, -1, 1, 1, 1, 1, 1);
|
||||
const __m128i src0 = _mm_loadl_epi64((__m128i*)&in[0 * 16]);
|
||||
const __m128i src1 = _mm_loadl_epi64((__m128i*)&in[1 * 16]);
|
||||
@ -540,8 +533,7 @@ static void FTransformWHTRow_SSE2(const int16_t* WEBP_RESTRICT const in,
|
||||
*out = _mm_madd_epi16(D, kMult);
|
||||
}
|
||||
|
||||
static void FTransformWHT_SSE2(const int16_t* WEBP_RESTRICT in,
|
||||
int16_t* WEBP_RESTRICT out) {
|
||||
static void FTransformWHT_SSE2(const int16_t* in, int16_t* out) {
|
||||
// Input is 12b signed.
|
||||
__m128i row0, row1, row2, row3;
|
||||
// Rows are 14b signed.
|
||||
@ -574,10 +566,9 @@ static void FTransformWHT_SSE2(const int16_t* WEBP_RESTRICT in,
|
||||
// Compute susceptibility based on DCT-coeff histograms:
|
||||
// the higher, the "easier" the macroblock is to compress.
|
||||
|
||||
static void CollectHistogram_SSE2(const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
static void CollectHistogram_SSE2(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* WEBP_RESTRICT const histo) {
|
||||
VP8Histogram* const histo) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
|
||||
int j;
|
||||
@ -649,8 +640,7 @@ static WEBP_INLINE void Fill_SSE2(uint8_t* dst, int value, int size) {
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VE8uv_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void VE8uv_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
int j;
|
||||
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
|
||||
for (j = 0; j < 8; ++j) {
|
||||
@ -658,8 +648,7 @@ static WEBP_INLINE void VE8uv_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VE16_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void VE16_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
const __m128i top_values = _mm_load_si128((const __m128i*)top);
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
@ -667,9 +656,8 @@ static WEBP_INLINE void VE16_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void VerticalPred_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top,
|
||||
int size) {
|
||||
static WEBP_INLINE void VerticalPred_SSE2(uint8_t* dst,
|
||||
const uint8_t* top, int size) {
|
||||
if (top != NULL) {
|
||||
if (size == 8) {
|
||||
VE8uv_SSE2(dst, top);
|
||||
@ -681,8 +669,7 @@ static WEBP_INLINE void VerticalPred_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HE8uv_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
int j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
const __m128i values = _mm_set1_epi8((char)left[j]);
|
||||
@ -691,8 +678,7 @@ static WEBP_INLINE void HE8uv_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HE16_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
static WEBP_INLINE void HE16_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
int j;
|
||||
for (j = 0; j < 16; ++j) {
|
||||
const __m128i values = _mm_set1_epi8((char)left[j]);
|
||||
@ -701,9 +687,8 @@ static WEBP_INLINE void HE16_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HorizontalPred_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
int size) {
|
||||
static WEBP_INLINE void HorizontalPred_SSE2(uint8_t* dst,
|
||||
const uint8_t* left, int size) {
|
||||
if (left != NULL) {
|
||||
if (size == 8) {
|
||||
HE8uv_SSE2(dst, left);
|
||||
@ -715,9 +700,8 @@ static WEBP_INLINE void HorizontalPred_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TM_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top, int size) {
|
||||
static WEBP_INLINE void TM_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top, int size) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
int y;
|
||||
if (size == 8) {
|
||||
@ -744,10 +728,8 @@ static WEBP_INLINE void TM_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TrueMotion_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top,
|
||||
int size) {
|
||||
static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top, int size) {
|
||||
if (left != NULL) {
|
||||
if (top != NULL) {
|
||||
TM_SSE2(dst, left, top, size);
|
||||
@ -767,9 +749,8 @@ static WEBP_INLINE void TrueMotion_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uv_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void DC8uv_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i left_values = _mm_loadl_epi64((const __m128i*)left);
|
||||
const __m128i combined = _mm_unpacklo_epi64(top_values, left_values);
|
||||
@ -777,8 +758,7 @@ static WEBP_INLINE void DC8uv_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
Put8x8uv_SSE2(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uvNoLeft_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void DC8uvNoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i sum = _mm_sad_epu8(top_values, zero);
|
||||
@ -786,8 +766,7 @@ static WEBP_INLINE void DC8uvNoLeft_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
Put8x8uv_SSE2(DC >> 3, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uvNoTop_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
static WEBP_INLINE void DC8uvNoTop_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
// 'left' is contiguous so we can reuse the top summation.
|
||||
DC8uvNoLeft_SSE2(dst, left);
|
||||
}
|
||||
@ -796,9 +775,8 @@ static WEBP_INLINE void DC8uvNoTopLeft_SSE2(uint8_t* dst) {
|
||||
Put8x8uv_SSE2(0x80, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC8uvMode_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void DC8uvMode_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
if (top != NULL) {
|
||||
if (left != NULL) { // top and left present
|
||||
DC8uv_SSE2(dst, left, top);
|
||||
@ -812,9 +790,8 @@ static WEBP_INLINE void DC8uvMode_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void DC16_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
const __m128i top_row = _mm_load_si128((const __m128i*)top);
|
||||
const __m128i left_row = _mm_load_si128((const __m128i*)left);
|
||||
const int DC =
|
||||
@ -822,15 +799,13 @@ static WEBP_INLINE void DC16_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
Put16_SSE2(DC >> 5, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16NoLeft_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void DC16NoLeft_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
const __m128i top_row = _mm_load_si128((const __m128i*)top);
|
||||
const int DC = VP8HorizontalAdd8b(&top_row) + 8;
|
||||
Put16_SSE2(DC >> 4, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16NoTop_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left) {
|
||||
static WEBP_INLINE void DC16NoTop_SSE2(uint8_t* dst, const uint8_t* left) {
|
||||
// 'left' is contiguous so we can reuse the top summation.
|
||||
DC16NoLeft_SSE2(dst, left);
|
||||
}
|
||||
@ -839,9 +814,8 @@ static WEBP_INLINE void DC16NoTopLeft_SSE2(uint8_t* dst) {
|
||||
Put16_SSE2(0x80, dst);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC16Mode_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void DC16Mode_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
if (top != NULL) {
|
||||
if (left != NULL) { // top and left present
|
||||
DC16_SSE2(dst, left, top);
|
||||
@ -870,9 +844,8 @@ static WEBP_INLINE void DC16Mode_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
// where: AC = (a + b + 1) >> 1, BC = (b + c + 1) >> 1
|
||||
// and ab = a ^ b, bc = b ^ c, lsb = (AC^BC)&1
|
||||
|
||||
// vertical
|
||||
static WEBP_INLINE void VE4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void VE4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // vertical
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(top - 1));
|
||||
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -888,9 +861,8 @@ static WEBP_INLINE void VE4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
}
|
||||
}
|
||||
|
||||
// horizontal
|
||||
static WEBP_INLINE void HE4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void HE4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // horizontal
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -902,17 +874,15 @@ static WEBP_INLINE void HE4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(K, L, L));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DC4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void DC4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
uint32_t dc = 4;
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
|
||||
Fill_SSE2(dst, dc >> 3, 4);
|
||||
}
|
||||
|
||||
// Down-Left
|
||||
static WEBP_INLINE void LD4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void LD4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Down-Left
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -928,9 +898,8 @@ static WEBP_INLINE void LD4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
}
|
||||
|
||||
// Vertical-Right
|
||||
static WEBP_INLINE void VR4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void VR4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Vertical-Right
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -955,9 +924,8 @@ static WEBP_INLINE void VR4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
DST(0, 3) = AVG3(K, J, I);
|
||||
}
|
||||
|
||||
// Vertical-Left
|
||||
static WEBP_INLINE void VL4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void VL4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Vertical-Left
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i ABCDEFGH = _mm_loadl_epi64((const __m128i*)top);
|
||||
const __m128i BCDEFGH_ = _mm_srli_si128(ABCDEFGH, 1);
|
||||
@ -983,9 +951,8 @@ static WEBP_INLINE void VL4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
DST(3, 3) = (extra_out >> 8) & 0xff;
|
||||
}
|
||||
|
||||
// Down-right
|
||||
static WEBP_INLINE void RD4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void RD4_SSE2(uint8_t* dst,
|
||||
const uint8_t* top) { // Down-right
|
||||
const __m128i one = _mm_set1_epi8(1);
|
||||
const __m128i LKJIXABC = _mm_loadl_epi64((const __m128i*)(top - 5));
|
||||
const __m128i LKJIXABCD = _mm_insert_epi16(LKJIXABC, top[3], 4);
|
||||
@ -1001,8 +968,7 @@ static WEBP_INLINE void RD4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HU4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void HU4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
const int K = top[-4];
|
||||
@ -1017,8 +983,7 @@ static WEBP_INLINE void HU4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
|
||||
}
|
||||
|
||||
static WEBP_INLINE void HD4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void HD4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
const int X = top[-1];
|
||||
const int I = top[-2];
|
||||
const int J = top[-3];
|
||||
@ -1041,8 +1006,7 @@ static WEBP_INLINE void HD4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
DST(1, 3) = AVG3(L, K, J);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TM4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static WEBP_INLINE void TM4_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i top_values = _mm_cvtsi32_si128(WebPMemToInt32(top));
|
||||
const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
|
||||
@ -1064,8 +1028,7 @@ static WEBP_INLINE void TM4_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
|
||||
// Left samples are top[-5 .. -2], top_left is top[-1], top are
|
||||
// located at top[0..3], and top right is top[4..7]
|
||||
static void Intra4Preds_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static void Intra4Preds_SSE2(uint8_t* dst, const uint8_t* top) {
|
||||
DC4_SSE2(I4DC4 + dst, top);
|
||||
TM4_SSE2(I4TM4 + dst, top);
|
||||
VE4_SSE2(I4VE4 + dst, top);
|
||||
@ -1081,9 +1044,8 @@ static void Intra4Preds_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
//------------------------------------------------------------------------------
|
||||
// Chroma 8x8 prediction (paragraph 12.2)
|
||||
|
||||
static void IntraChromaPreds_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static void IntraChromaPreds_SSE2(uint8_t* dst, const uint8_t* left,
|
||||
const uint8_t* top) {
|
||||
// U block
|
||||
DC8uvMode_SSE2(C8DC8 + dst, left, top);
|
||||
VerticalPred_SSE2(C8VE8 + dst, top, 8);
|
||||
@ -1102,9 +1064,8 @@ static void IntraChromaPreds_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
//------------------------------------------------------------------------------
|
||||
// luma 16x16 prediction (paragraph 12.3)
|
||||
|
||||
static void Intra16Preds_SSE2(uint8_t* WEBP_RESTRICT dst,
|
||||
const uint8_t* WEBP_RESTRICT left,
|
||||
const uint8_t* WEBP_RESTRICT top) {
|
||||
static void Intra16Preds_SSE2(uint8_t* dst,
|
||||
const uint8_t* left, const uint8_t* top) {
|
||||
DC16Mode_SSE2(I16DC16 + dst, left, top);
|
||||
VerticalPred_SSE2(I16VE16 + dst, top, 16);
|
||||
HorizontalPred_SSE2(I16HE16 + dst, left, 16);
|
||||
@ -1131,8 +1092,7 @@ static WEBP_INLINE void SubtractAndAccumulate_SSE2(const __m128i a,
|
||||
*sum = _mm_add_epi32(sum1, sum2);
|
||||
}
|
||||
|
||||
static WEBP_INLINE int SSE_16xN_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b,
|
||||
static WEBP_INLINE int SSE_16xN_SSE2(const uint8_t* a, const uint8_t* b,
|
||||
int num_pairs) {
|
||||
__m128i sum = _mm_setzero_si128();
|
||||
int32_t tmp[4];
|
||||
@ -1154,21 +1114,18 @@ static WEBP_INLINE int SSE_16xN_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
|
||||
}
|
||||
|
||||
static int SSE16x16_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE16x16_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
return SSE_16xN_SSE2(a, b, 8);
|
||||
}
|
||||
|
||||
static int SSE16x8_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE16x8_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
return SSE_16xN_SSE2(a, b, 4);
|
||||
}
|
||||
|
||||
#define LOAD_8x16b(ptr) \
|
||||
_mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(ptr)), zero)
|
||||
|
||||
static int SSE8x8_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE8x8_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
int num_pairs = 4;
|
||||
__m128i sum = zero;
|
||||
@ -1195,8 +1152,7 @@ static int SSE8x8_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
}
|
||||
#undef LOAD_8x16b
|
||||
|
||||
static int SSE4x4_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
const uint8_t* WEBP_RESTRICT b) {
|
||||
static int SSE4x4_SSE2(const uint8_t* a, const uint8_t* b) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
|
||||
// Load values. Note that we read 8 pixels instead of 4,
|
||||
@ -1233,7 +1189,7 @@ static int SSE4x4_SSE2(const uint8_t* WEBP_RESTRICT a,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void Mean16x4_SSE2(const uint8_t* WEBP_RESTRICT ref, uint32_t dc[4]) {
|
||||
static void Mean16x4_SSE2(const uint8_t* ref, uint32_t dc[4]) {
|
||||
const __m128i mask = _mm_set1_epi16(0x00ff);
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&ref[BPS * 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&ref[BPS * 1]);
|
||||
@ -1271,9 +1227,8 @@ static void Mean16x4_SSE2(const uint8_t* WEBP_RESTRICT ref, uint32_t dc[4]) {
|
||||
// Hadamard transform
|
||||
// Returns the weighted sum of the absolute value of transformed coefficients.
|
||||
// w[] contains a row-major 4 by 4 symmetric matrix.
|
||||
static int TTransform_SSE2(const uint8_t* WEBP_RESTRICT inA,
|
||||
const uint8_t* WEBP_RESTRICT inB,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
static int TTransform_SSE2(const uint8_t* inA, const uint8_t* inB,
|
||||
const uint16_t* const w) {
|
||||
int32_t sum[4];
|
||||
__m128i tmp_0, tmp_1, tmp_2, tmp_3;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
@ -1373,16 +1328,14 @@ static int TTransform_SSE2(const uint8_t* WEBP_RESTRICT inA,
|
||||
return sum[0] + sum[1] + sum[2] + sum[3];
|
||||
}
|
||||
|
||||
static int Disto4x4_SSE2(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
static int Disto4x4_SSE2(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
const int diff_sum = TTransform_SSE2(a, b, w);
|
||||
return abs(diff_sum) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_SSE2(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
static int Disto16x16_SSE2(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -1397,10 +1350,9 @@ static int Disto16x16_SSE2(const uint8_t* WEBP_RESTRICT const a,
|
||||
// Quantization
|
||||
//
|
||||
|
||||
static WEBP_INLINE int DoQuantizeBlock_SSE2(
|
||||
int16_t in[16], int16_t out[16],
|
||||
const uint16_t* WEBP_RESTRICT const sharpen,
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
static WEBP_INLINE int DoQuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
|
||||
const uint16_t* const sharpen,
|
||||
const VP8Matrix* const mtx) {
|
||||
const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
__m128i coeff0, coeff8;
|
||||
@ -1511,17 +1463,17 @@ static WEBP_INLINE int DoQuantizeBlock_SSE2(
|
||||
}
|
||||
|
||||
static int QuantizeBlock_SSE2(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const VP8Matrix* const mtx) {
|
||||
return DoQuantizeBlock_SSE2(in, out, &mtx->sharpen_[0], mtx);
|
||||
}
|
||||
|
||||
static int QuantizeBlockWHT_SSE2(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const VP8Matrix* const mtx) {
|
||||
return DoQuantizeBlock_SSE2(in, out, NULL, mtx);
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_SSE2(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
const uint16_t* const sharpen = &mtx->sharpen_[0];
|
||||
nz = DoQuantizeBlock_SSE2(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
|
||||
|
@ -23,10 +23,9 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// Compute susceptibility based on DCT-coeff histograms.
|
||||
|
||||
static void CollectHistogram_SSE41(const uint8_t* WEBP_RESTRICT ref,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
static void CollectHistogram_SSE41(const uint8_t* ref, const uint8_t* pred,
|
||||
int start_block, int end_block,
|
||||
VP8Histogram* WEBP_RESTRICT const histo) {
|
||||
VP8Histogram* const histo) {
|
||||
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
|
||||
int j;
|
||||
int distribution[MAX_COEFF_THRESH + 1] = { 0 };
|
||||
@ -169,16 +168,14 @@ static int TTransform_SSE41(const uint8_t* inA, const uint8_t* inB,
|
||||
return sum[0] + sum[1] + sum[2] + sum[3];
|
||||
}
|
||||
|
||||
static int Disto4x4_SSE41(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
static int Disto4x4_SSE41(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
const int diff_sum = TTransform_SSE41(a, b, w);
|
||||
return abs(diff_sum) >> 5;
|
||||
}
|
||||
|
||||
static int Disto16x16_SSE41(const uint8_t* WEBP_RESTRICT const a,
|
||||
const uint8_t* WEBP_RESTRICT const b,
|
||||
const uint16_t* WEBP_RESTRICT const w) {
|
||||
static int Disto16x16_SSE41(const uint8_t* const a, const uint8_t* const b,
|
||||
const uint16_t* const w) {
|
||||
int D = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
|
||||
@ -304,17 +301,17 @@ static WEBP_INLINE int DoQuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
|
||||
#undef PSHUFB_CST
|
||||
|
||||
static int QuantizeBlock_SSE41(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const VP8Matrix* const mtx) {
|
||||
return DoQuantizeBlock_SSE41(in, out, &mtx->sharpen_[0], mtx);
|
||||
}
|
||||
|
||||
static int QuantizeBlockWHT_SSE41(int16_t in[16], int16_t out[16],
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const VP8Matrix* const mtx) {
|
||||
return DoQuantizeBlock_SSE41(in, out, NULL, mtx);
|
||||
}
|
||||
|
||||
static int Quantize2Blocks_SSE41(int16_t in[32], int16_t out[32],
|
||||
const VP8Matrix* WEBP_RESTRICT const mtx) {
|
||||
const VP8Matrix* const mtx) {
|
||||
int nz;
|
||||
const uint16_t* const sharpen = &mtx->sharpen_[0];
|
||||
nz = DoQuantizeBlock_SSE41(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
|
||||
|
@ -23,16 +23,14 @@
|
||||
do { \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert((in) != (out)); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
} while (0)
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE void PredictLine_C(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
static WEBP_INLINE void PredictLine_C(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length) {
|
||||
int i;
|
||||
for (i = 0; i < length; ++i) dst[i] = (uint8_t)(src[i] - pred[i]);
|
||||
}
|
||||
@ -40,9 +38,9 @@ static WEBP_INLINE void PredictLine_C(const uint8_t* WEBP_RESTRICT src,
|
||||
//------------------------------------------------------------------------------
|
||||
// Horizontal filter.
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* WEBP_RESTRICT in,
|
||||
static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
uint8_t* out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
@ -68,9 +66,9 @@ static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* WEBP_RESTRICT in,
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_C(const uint8_t* WEBP_RESTRICT in,
|
||||
static WEBP_INLINE void DoVerticalFilter_C(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
uint8_t* out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
@ -101,9 +99,9 @@ static WEBP_INLINE int GradientPredictor_C(uint8_t a, uint8_t b, uint8_t c) {
|
||||
}
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static WEBP_INLINE void DoGradientFilter_C(const uint8_t* WEBP_RESTRICT in,
|
||||
static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
uint8_t* out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
@ -138,21 +136,18 @@ static WEBP_INLINE void DoGradientFilter_C(const uint8_t* WEBP_RESTRICT in,
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
static void HorizontalFilter_C(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
static void HorizontalFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter_C(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
static void VerticalFilter_C(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
static void VerticalFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter_C(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
static void GradientFilter_C(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
static void GradientFilter_C(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_C(data, width, height, stride, filtered_data);
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
@ -26,9 +26,8 @@
|
||||
|
||||
#define DCHECK(in, out) \
|
||||
do { \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert((in) != (out)); \
|
||||
assert(in != NULL); \
|
||||
assert(out != NULL); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
@ -102,8 +101,7 @@
|
||||
); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst,
|
||||
static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* src, uint8_t* dst,
|
||||
int length) {
|
||||
DO_PREDICT_LINE(src, dst, length, 0);
|
||||
}
|
||||
@ -193,9 +191,9 @@ static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* WEBP_RESTRICT src,
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(
|
||||
const uint8_t* WEBP_RESTRICT in, int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride, uint8_t* out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
@ -212,9 +210,9 @@ static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(
|
||||
}
|
||||
#undef FILTER_LINE_BY_LINE
|
||||
|
||||
static void HorizontalFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
static void HorizontalFilter_MIPSdspR2(const uint8_t* data,
|
||||
int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter_MIPSdspR2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
@ -230,9 +228,9 @@ static void HorizontalFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(
|
||||
const uint8_t* WEBP_RESTRICT in, int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride, uint8_t* out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
@ -249,9 +247,8 @@ static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(
|
||||
}
|
||||
#undef FILTER_LINE_BY_LINE
|
||||
|
||||
static void VerticalFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
static void VerticalFilter_MIPSdspR2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter_MIPSdspR2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
@ -287,9 +284,9 @@ static int GradientPredictor_MIPSdspR2(uint8_t a, uint8_t b, uint8_t c) {
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void DoGradientFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT in,
|
||||
static void DoGradientFilter_MIPSdspR2(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
uint8_t* out) {
|
||||
const uint8_t* preds = in;
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
@ -306,9 +303,8 @@ static void DoGradientFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT in,
|
||||
}
|
||||
#undef FILTER_LINE_BY_LINE
|
||||
|
||||
static void GradientFilter_MIPSdspR2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
static void GradientFilter_MIPSdspR2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_MIPSdspR2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
|
@ -21,8 +21,7 @@
|
||||
|
||||
static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
|
||||
const uint8_t* pred,
|
||||
uint8_t* WEBP_RESTRICT dst,
|
||||
int length) {
|
||||
uint8_t* dst, int length) {
|
||||
v16u8 src0, pred0, dst0;
|
||||
assert(length >= 0);
|
||||
while (length >= 32) {
|
||||
@ -59,9 +58,8 @@ static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
|
||||
|
||||
#define DCHECK(in, out) \
|
||||
do { \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert((in) != (out)); \
|
||||
assert(in != NULL); \
|
||||
assert(out != NULL); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
@ -70,9 +68,8 @@ static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
|
||||
//------------------------------------------------------------------------------
|
||||
// Horrizontal filter
|
||||
|
||||
static void HorizontalFilter_MSA(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
static void HorizontalFilter_MSA(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
const uint8_t* preds = data;
|
||||
const uint8_t* in = data;
|
||||
uint8_t* out = filtered_data;
|
||||
@ -102,8 +99,8 @@ static void HorizontalFilter_MSA(const uint8_t* WEBP_RESTRICT data,
|
||||
|
||||
static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
|
||||
const uint8_t* ppred,
|
||||
uint8_t* WEBP_RESTRICT poutput,
|
||||
int stride, int size) {
|
||||
uint8_t* poutput, int stride,
|
||||
int size) {
|
||||
int w;
|
||||
const v16i8 zero = { 0 };
|
||||
while (size >= 16) {
|
||||
@ -134,9 +131,8 @@ static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
|
||||
}
|
||||
|
||||
|
||||
static void GradientFilter_MSA(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
static void GradientFilter_MSA(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
const uint8_t* in = data;
|
||||
const uint8_t* preds = data;
|
||||
uint8_t* out = filtered_data;
|
||||
@ -163,9 +159,8 @@ static void GradientFilter_MSA(const uint8_t* WEBP_RESTRICT data,
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter
|
||||
|
||||
static void VerticalFilter_MSA(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
static void VerticalFilter_MSA(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
const uint8_t* in = data;
|
||||
const uint8_t* preds = data;
|
||||
uint8_t* out = filtered_data;
|
||||
|
@ -23,9 +23,8 @@
|
||||
|
||||
#define DCHECK(in, out) \
|
||||
do { \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert((in) != (out)); \
|
||||
assert(in != NULL); \
|
||||
assert(out != NULL); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
@ -45,7 +44,7 @@
|
||||
#define ROTATE_RIGHT_N(A, N) vext_u8((A), (A), (8 - (N)) % 8)
|
||||
|
||||
static void PredictLine_NEON(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
uint8_t* dst, int length) {
|
||||
int i;
|
||||
assert(length >= 0);
|
||||
for (i = 0; i + 16 <= length; i += 16) {
|
||||
@ -58,17 +57,16 @@ static void PredictLine_NEON(const uint8_t* src, const uint8_t* pred,
|
||||
}
|
||||
|
||||
// Special case for left-based prediction (when preds==dst-1 or preds==src-1).
|
||||
static void PredictLineLeft_NEON(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
static void PredictLineLeft_NEON(const uint8_t* src, uint8_t* dst, int length) {
|
||||
PredictLine_NEON(src, src - 1, dst, length);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Horizontal filter.
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter_NEON(
|
||||
const uint8_t* WEBP_RESTRICT in, int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
static WEBP_INLINE void DoHorizontalFilter_NEON(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride, uint8_t* out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
|
||||
@ -88,18 +86,17 @@ static WEBP_INLINE void DoHorizontalFilter_NEON(
|
||||
}
|
||||
}
|
||||
|
||||
static void HorizontalFilter_NEON(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
static void HorizontalFilter_NEON(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter_NEON(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_NEON(const uint8_t* WEBP_RESTRICT in,
|
||||
static WEBP_INLINE void DoVerticalFilter_NEON(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
uint8_t* out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
|
||||
@ -118,9 +115,8 @@ static WEBP_INLINE void DoVerticalFilter_NEON(const uint8_t* WEBP_RESTRICT in,
|
||||
}
|
||||
}
|
||||
|
||||
static void VerticalFilter_NEON(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
static void VerticalFilter_NEON(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter_NEON(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
@ -134,8 +130,7 @@ static WEBP_INLINE int GradientPredictor_C(uint8_t a, uint8_t b, uint8_t c) {
|
||||
|
||||
static void GradientPredictDirect_NEON(const uint8_t* const row,
|
||||
const uint8_t* const top,
|
||||
uint8_t* WEBP_RESTRICT const out,
|
||||
int length) {
|
||||
uint8_t* const out, int length) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= length; i += 8) {
|
||||
const uint8x8_t A = vld1_u8(&row[i - 1]);
|
||||
@ -151,9 +146,9 @@ static void GradientPredictDirect_NEON(const uint8_t* const row,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DoGradientFilter_NEON(const uint8_t* WEBP_RESTRICT in,
|
||||
static WEBP_INLINE void DoGradientFilter_NEON(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
uint8_t* out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
|
||||
@ -172,9 +167,8 @@ static WEBP_INLINE void DoGradientFilter_NEON(const uint8_t* WEBP_RESTRICT in,
|
||||
}
|
||||
}
|
||||
|
||||
static void GradientFilter_NEON(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
static void GradientFilter_NEON(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_NEON(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
|
@ -27,15 +27,13 @@
|
||||
do { \
|
||||
assert((in) != NULL); \
|
||||
assert((out) != NULL); \
|
||||
assert((in) != (out)); \
|
||||
assert(width > 0); \
|
||||
assert(height > 0); \
|
||||
assert(stride >= width); \
|
||||
} while (0)
|
||||
|
||||
static void PredictLineTop_SSE2(const uint8_t* WEBP_RESTRICT src,
|
||||
const uint8_t* WEBP_RESTRICT pred,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
static void PredictLineTop_SSE2(const uint8_t* src, const uint8_t* pred,
|
||||
uint8_t* dst, int length) {
|
||||
int i;
|
||||
const int max_pos = length & ~31;
|
||||
assert(length >= 0);
|
||||
@ -53,8 +51,7 @@ static void PredictLineTop_SSE2(const uint8_t* WEBP_RESTRICT src,
|
||||
}
|
||||
|
||||
// Special case for left-based prediction (when preds==dst-1 or preds==src-1).
|
||||
static void PredictLineLeft_SSE2(const uint8_t* WEBP_RESTRICT src,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
static void PredictLineLeft_SSE2(const uint8_t* src, uint8_t* dst, int length) {
|
||||
int i;
|
||||
const int max_pos = length & ~31;
|
||||
assert(length >= 0);
|
||||
@ -74,9 +71,9 @@ static void PredictLineLeft_SSE2(const uint8_t* WEBP_RESTRICT src,
|
||||
//------------------------------------------------------------------------------
|
||||
// Horizontal filter.
|
||||
|
||||
static WEBP_INLINE void DoHorizontalFilter_SSE2(
|
||||
const uint8_t* WEBP_RESTRICT in, int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* in,
|
||||
int width, int height,
|
||||
int stride, uint8_t* out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
|
||||
@ -99,9 +96,9 @@ static WEBP_INLINE void DoHorizontalFilter_SSE2(
|
||||
//------------------------------------------------------------------------------
|
||||
// Vertical filter.
|
||||
|
||||
static WEBP_INLINE void DoVerticalFilter_SSE2(const uint8_t* WEBP_RESTRICT in,
|
||||
static WEBP_INLINE void DoVerticalFilter_SSE2(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
uint8_t* out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
|
||||
@ -130,8 +127,7 @@ static WEBP_INLINE int GradientPredictor_SSE2(uint8_t a, uint8_t b, uint8_t c) {
|
||||
|
||||
static void GradientPredictDirect_SSE2(const uint8_t* const row,
|
||||
const uint8_t* const top,
|
||||
uint8_t* WEBP_RESTRICT const out,
|
||||
int length) {
|
||||
uint8_t* const out, int length) {
|
||||
const int max_pos = length & ~7;
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
@ -155,9 +151,9 @@ static void GradientPredictDirect_SSE2(const uint8_t* const row,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* WEBP_RESTRICT in,
|
||||
static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT out) {
|
||||
uint8_t* out) {
|
||||
int row;
|
||||
DCHECK(in, out);
|
||||
|
||||
@ -180,21 +176,18 @@ static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* WEBP_RESTRICT in,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void HorizontalFilter_SSE2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
static void HorizontalFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoHorizontalFilter_SSE2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
static void VerticalFilter_SSE2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
static void VerticalFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoVerticalFilter_SSE2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
static void GradientFilter_SSE2(const uint8_t* WEBP_RESTRICT data,
|
||||
int width, int height, int stride,
|
||||
uint8_t* WEBP_RESTRICT filtered_data) {
|
||||
static void GradientFilter_SSE2(const uint8_t* data, int width, int height,
|
||||
int stride, uint8_t* filtered_data) {
|
||||
DoGradientFilter_SSE2(data, width, height, stride, filtered_data);
|
||||
}
|
||||
|
||||
|
@ -182,13 +182,13 @@ uint32_t VP8LPredictor13_C(const uint32_t* const left,
|
||||
}
|
||||
|
||||
static void PredictorAdd0_C(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int x;
|
||||
(void)upper;
|
||||
for (x = 0; x < num_pixels; ++x) out[x] = VP8LAddPixels(in[x], ARGB_BLACK);
|
||||
}
|
||||
static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
uint32_t left = out[-1];
|
||||
(void)upper;
|
||||
@ -441,8 +441,8 @@ static int is_big_endian(void) {
|
||||
return (tmp.b[0] != 1);
|
||||
}
|
||||
|
||||
void VP8LConvertBGRAToRGB_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
void VP8LConvertBGRAToRGB_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
@ -452,8 +452,8 @@ void VP8LConvertBGRAToRGB_C(const uint32_t* WEBP_RESTRICT src,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LConvertBGRAToRGBA_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
@ -464,8 +464,8 @@ void VP8LConvertBGRAToRGBA_C(const uint32_t* WEBP_RESTRICT src,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LConvertBGRAToRGBA4444_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
@ -481,8 +481,8 @@ void VP8LConvertBGRAToRGBA4444_C(const uint32_t* WEBP_RESTRICT src,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LConvertBGRAToRGB565_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
@ -498,8 +498,8 @@ void VP8LConvertBGRAToRGB565_C(const uint32_t* WEBP_RESTRICT src,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LConvertBGRAToBGR_C(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
void VP8LConvertBGRAToBGR_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
const uint32_t argb = *src++;
|
||||
@ -509,8 +509,8 @@ void VP8LConvertBGRAToBGR_C(const uint32_t* WEBP_RESTRICT src,
|
||||
}
|
||||
}
|
||||
|
||||
static void CopyOrSwap(const uint32_t* WEBP_RESTRICT src, int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst, int swap_on_big_endian) {
|
||||
static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
|
||||
int swap_on_big_endian) {
|
||||
if (is_big_endian() == swap_on_big_endian) {
|
||||
const uint32_t* const src_end = src + num_pixels;
|
||||
while (src < src_end) {
|
||||
|
@ -18,7 +18,6 @@
|
||||
#include "src/webp/types.h"
|
||||
#include "src/webp/decode.h"
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/enc/histogram_enc.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
@ -61,7 +60,7 @@ uint32_t VP8LPredictor13_C(const uint32_t* const left,
|
||||
// These Add/Sub function expects upper[-1] and out[-1] to be readable.
|
||||
typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in,
|
||||
const uint32_t* upper, int num_pixels,
|
||||
uint32_t* WEBP_RESTRICT out);
|
||||
uint32_t* out);
|
||||
extern VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
|
||||
extern VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];
|
||||
|
||||
@ -92,8 +91,8 @@ void VP8LInverseTransform(const struct VP8LTransform* const transform,
|
||||
const uint32_t* const in, uint32_t* const out);
|
||||
|
||||
// Color space conversion.
|
||||
typedef void (*VP8LConvertFunc)(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst);
|
||||
typedef void (*VP8LConvertFunc)(const uint32_t* src, int num_pixels,
|
||||
uint8_t* dst);
|
||||
extern VP8LConvertFunc VP8LConvertBGRAToRGB;
|
||||
extern VP8LConvertFunc VP8LConvertBGRAToRGBA;
|
||||
extern VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
|
||||
@ -146,33 +145,29 @@ void VP8LDspInit(void);
|
||||
|
||||
typedef void (*VP8LProcessEncBlueAndRedFunc)(uint32_t* dst, int num_pixels);
|
||||
extern VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
|
||||
typedef void (*VP8LTransformColorFunc)(
|
||||
const VP8LMultipliers* WEBP_RESTRICT const m, uint32_t* WEBP_RESTRICT dst,
|
||||
int num_pixels);
|
||||
typedef void (*VP8LTransformColorFunc)(const VP8LMultipliers* const m,
|
||||
uint32_t* dst, int num_pixels);
|
||||
extern VP8LTransformColorFunc VP8LTransformColor;
|
||||
typedef void (*VP8LCollectColorBlueTransformsFunc)(
|
||||
const uint32_t* WEBP_RESTRICT argb, int stride,
|
||||
const uint32_t* argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue, uint32_t histo[]);
|
||||
extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
|
||||
|
||||
typedef void (*VP8LCollectColorRedTransformsFunc)(
|
||||
const uint32_t* WEBP_RESTRICT argb, int stride,
|
||||
const uint32_t* argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red, uint32_t histo[]);
|
||||
extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
|
||||
|
||||
// Expose some C-only fallback functions
|
||||
void VP8LTransformColor_C(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT data, int num_pixels);
|
||||
void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* WEBP_RESTRICT argb_data,
|
||||
int num_pixels);
|
||||
void VP8LCollectColorRedTransforms_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
void VP8LTransformColor_C(const VP8LMultipliers* const m,
|
||||
uint32_t* data, int num_pixels);
|
||||
void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels);
|
||||
void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red, uint32_t histo[]);
|
||||
void VP8LCollectColorBlueTransforms_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue,
|
||||
uint32_t histo[]);
|
||||
@ -184,8 +179,7 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
|
||||
// Huffman-cost related functions.
|
||||
|
||||
typedef uint32_t (*VP8LCostFunc)(const uint32_t* population, int length);
|
||||
typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* WEBP_RESTRICT X,
|
||||
const uint32_t* WEBP_RESTRICT Y,
|
||||
typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
|
||||
int length);
|
||||
typedef uint64_t (*VP8LCombinedShannonEntropyFunc)(const uint32_t X[256],
|
||||
const uint32_t Y[256]);
|
||||
@ -216,30 +210,26 @@ void VP8LBitEntropyInit(VP8LBitEntropy* const entropy);
|
||||
// codec specific heuristics.
|
||||
typedef void (*VP8LGetCombinedEntropyUnrefinedFunc)(
|
||||
const uint32_t X[], const uint32_t Y[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats);
|
||||
VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats);
|
||||
extern VP8LGetCombinedEntropyUnrefinedFunc VP8LGetCombinedEntropyUnrefined;
|
||||
|
||||
// Get the entropy for the distribution 'X'.
|
||||
typedef void (*VP8LGetEntropyUnrefinedFunc)(
|
||||
const uint32_t X[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats);
|
||||
typedef void (*VP8LGetEntropyUnrefinedFunc)(const uint32_t X[], int length,
|
||||
VP8LBitEntropy* const bit_entropy,
|
||||
VP8LStreaks* const stats);
|
||||
extern VP8LGetEntropyUnrefinedFunc VP8LGetEntropyUnrefined;
|
||||
|
||||
void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
|
||||
VP8LBitEntropy* const entropy);
|
||||
|
||||
typedef void (*VP8LAddVectorFunc)(const uint32_t* WEBP_RESTRICT a,
|
||||
const uint32_t* WEBP_RESTRICT b,
|
||||
uint32_t* WEBP_RESTRICT out, int size);
|
||||
typedef void (*VP8LAddVectorFunc)(const uint32_t* a, const uint32_t* b,
|
||||
uint32_t* out, int size);
|
||||
extern VP8LAddVectorFunc VP8LAddVector;
|
||||
typedef void (*VP8LAddVectorEqFunc)(const uint32_t* WEBP_RESTRICT a,
|
||||
uint32_t* WEBP_RESTRICT out, int size);
|
||||
typedef void (*VP8LAddVectorEqFunc)(const uint32_t* a, uint32_t* out, int size);
|
||||
extern VP8LAddVectorEqFunc VP8LAddVectorEq;
|
||||
void VP8LHistogramAdd(const VP8LHistogram* WEBP_RESTRICT const a,
|
||||
const VP8LHistogram* WEBP_RESTRICT const b,
|
||||
VP8LHistogram* WEBP_RESTRICT const out);
|
||||
void VP8LHistogramAdd(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
VP8LHistogram* const out);
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// PrefixEncode()
|
||||
@ -249,12 +239,11 @@ typedef int (*VP8LVectorMismatchFunc)(const uint32_t* const array1,
|
||||
// Returns the first index where array1 and array2 are different.
|
||||
extern VP8LVectorMismatchFunc VP8LVectorMismatch;
|
||||
|
||||
typedef void (*VP8LBundleColorMapFunc)(const uint8_t* WEBP_RESTRICT const row,
|
||||
int width, int xbits,
|
||||
uint32_t* WEBP_RESTRICT dst);
|
||||
typedef void (*VP8LBundleColorMapFunc)(const uint8_t* const row, int width,
|
||||
int xbits, uint32_t* dst);
|
||||
extern VP8LBundleColorMapFunc VP8LBundleColorMap;
|
||||
void VP8LBundleColorMap_C(const uint8_t* WEBP_RESTRICT const row,
|
||||
int width, int xbits, uint32_t* WEBP_RESTRICT dst);
|
||||
void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
|
||||
uint32_t* dst);
|
||||
|
||||
// Must be called before calling any of the above methods.
|
||||
void VP8LEncDspInit(void);
|
||||
|
@ -196,7 +196,7 @@ uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
|
||||
// is therefore considered as a residual) to get the final prediction.
|
||||
#define GENERATE_PREDICTOR_ADD(PREDICTOR, PREDICTOR_ADD) \
|
||||
static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) { \
|
||||
int num_pixels, uint32_t* out) { \
|
||||
int x; \
|
||||
assert(upper != NULL); \
|
||||
for (x = 0; x < num_pixels; ++x) { \
|
||||
|
@ -359,8 +359,8 @@ void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
|
||||
entropy->nonzero_code = VP8L_NON_TRIVIAL_SYM;
|
||||
}
|
||||
|
||||
void VP8LBitsEntropyUnrefined(const uint32_t* WEBP_RESTRICT const array, int n,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const entropy) {
|
||||
void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
|
||||
VP8LBitEntropy* const entropy) {
|
||||
int i;
|
||||
|
||||
VP8LBitEntropyInit(entropy);
|
||||
@ -380,10 +380,8 @@ void VP8LBitsEntropyUnrefined(const uint32_t* WEBP_RESTRICT const array, int n,
|
||||
}
|
||||
|
||||
static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
||||
uint32_t val, int i, uint32_t* WEBP_RESTRICT const val_prev,
|
||||
int* WEBP_RESTRICT const i_prev,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
uint32_t val, int i, uint32_t* const val_prev, int* const i_prev,
|
||||
VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats) {
|
||||
const int streak = i - *i_prev;
|
||||
|
||||
// Gather info for the bit entropy.
|
||||
@ -405,10 +403,9 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
||||
*i_prev = i;
|
||||
}
|
||||
|
||||
static void GetEntropyUnrefined_C(
|
||||
const uint32_t X[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
static void GetEntropyUnrefined_C(const uint32_t X[], int length,
|
||||
VP8LBitEntropy* const bit_entropy,
|
||||
VP8LStreaks* const stats) {
|
||||
int i;
|
||||
int i_prev = 0;
|
||||
uint32_t x_prev = X[0];
|
||||
@ -427,10 +424,11 @@ static void GetEntropyUnrefined_C(
|
||||
bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;
|
||||
}
|
||||
|
||||
static void GetCombinedEntropyUnrefined_C(
|
||||
const uint32_t X[], const uint32_t Y[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
static void GetCombinedEntropyUnrefined_C(const uint32_t X[],
|
||||
const uint32_t Y[],
|
||||
int length,
|
||||
VP8LBitEntropy* const bit_entropy,
|
||||
VP8LStreaks* const stats) {
|
||||
int i = 1;
|
||||
int i_prev = 0;
|
||||
uint32_t xy_prev = X[0] + Y[0];
|
||||
@ -470,8 +468,8 @@ static WEBP_INLINE int8_t U32ToS8(uint32_t v) {
|
||||
return (int8_t)(v & 0xff);
|
||||
}
|
||||
|
||||
void VP8LTransformColor_C(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT data, int num_pixels) {
|
||||
void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
|
||||
int num_pixels) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) {
|
||||
const uint32_t argb = data[i];
|
||||
@ -507,8 +505,7 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
|
||||
return (new_blue & 0xff);
|
||||
}
|
||||
|
||||
void VP8LCollectColorRedTransforms_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red, uint32_t histo[]) {
|
||||
while (tile_height-- > 0) {
|
||||
@ -520,8 +517,7 @@ void VP8LCollectColorRedTransforms_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8LCollectColorBlueTransforms_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue,
|
||||
uint32_t histo[]) {
|
||||
@ -548,8 +544,8 @@ static int VectorMismatch_C(const uint32_t* const array1,
|
||||
}
|
||||
|
||||
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
|
||||
void VP8LBundleColorMap_C(const uint8_t* WEBP_RESTRICT const row,
|
||||
int width, int xbits, uint32_t* WEBP_RESTRICT dst) {
|
||||
void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
|
||||
uint32_t* dst) {
|
||||
int x;
|
||||
if (xbits > 0) {
|
||||
const int bit_depth = 1 << (3 - xbits);
|
||||
@ -580,8 +576,7 @@ static uint32_t ExtraCost_C(const uint32_t* population, int length) {
|
||||
return cost;
|
||||
}
|
||||
|
||||
static uint32_t ExtraCostCombined_C(const uint32_t* WEBP_RESTRICT X,
|
||||
const uint32_t* WEBP_RESTRICT Y,
|
||||
static uint32_t ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
|
||||
int length) {
|
||||
int i;
|
||||
uint32_t cost = X[4] + Y[4] + X[5] + Y[5];
|
||||
@ -596,15 +591,13 @@ static uint32_t ExtraCostCombined_C(const uint32_t* WEBP_RESTRICT X,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static void AddVector_C(const uint32_t* WEBP_RESTRICT a,
|
||||
const uint32_t* WEBP_RESTRICT b,
|
||||
uint32_t* WEBP_RESTRICT out, int size) {
|
||||
static void AddVector_C(const uint32_t* a, const uint32_t* b, uint32_t* out,
|
||||
int size) {
|
||||
int i;
|
||||
for (i = 0; i < size; ++i) out[i] = a[i] + b[i];
|
||||
}
|
||||
|
||||
static void AddVectorEq_C(const uint32_t* WEBP_RESTRICT a,
|
||||
uint32_t* WEBP_RESTRICT out, int size) {
|
||||
static void AddVectorEq_C(const uint32_t* a, uint32_t* out, int size) {
|
||||
int i;
|
||||
for (i = 0; i < size; ++i) out[i] += a[i];
|
||||
}
|
||||
@ -633,9 +626,8 @@ static void AddVectorEq_C(const uint32_t* WEBP_RESTRICT a,
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void VP8LHistogramAdd(const VP8LHistogram* WEBP_RESTRICT const a,
|
||||
const VP8LHistogram* WEBP_RESTRICT const b,
|
||||
VP8LHistogram* WEBP_RESTRICT const out) {
|
||||
void VP8LHistogramAdd(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b, VP8LHistogram* const out) {
|
||||
int i;
|
||||
const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
|
||||
assert(a->palette_code_bits_ == b->palette_code_bits_);
|
||||
@ -665,14 +657,14 @@ void VP8LHistogramAdd(const VP8LHistogram* WEBP_RESTRICT const a,
|
||||
// Image transforms.
|
||||
|
||||
static void PredictorSub0_C(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) out[i] = VP8LSubPixels(in[i], ARGB_BLACK);
|
||||
(void)upper;
|
||||
}
|
||||
|
||||
static void PredictorSub1_C(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) out[i] = VP8LSubPixels(in[i], in[i - 1]);
|
||||
(void)upper;
|
||||
@ -683,8 +675,7 @@ static void PredictorSub1_C(const uint32_t* in, const uint32_t* upper,
|
||||
#define GENERATE_PREDICTOR_SUB(PREDICTOR_I) \
|
||||
static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in, \
|
||||
const uint32_t* upper, \
|
||||
int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int num_pixels, uint32_t* out) { \
|
||||
int x; \
|
||||
assert(upper != NULL); \
|
||||
for (x = 0; x < num_pixels; ++x) { \
|
||||
|
@ -149,9 +149,8 @@ static uint32_t ExtraCost_MIPS32(const uint32_t* const population, int length) {
|
||||
// pY += 2;
|
||||
// }
|
||||
// return cost;
|
||||
static uint32_t ExtraCostCombined_MIPS32(const uint32_t* WEBP_RESTRICT const X,
|
||||
const uint32_t* WEBP_RESTRICT const Y,
|
||||
int length) {
|
||||
static uint32_t ExtraCostCombined_MIPS32(const uint32_t* const X,
|
||||
const uint32_t* const Y, int length) {
|
||||
int i, temp0, temp1, temp2, temp3;
|
||||
const uint32_t* pX = &X[4];
|
||||
const uint32_t* pY = &Y[4];
|
||||
@ -216,10 +215,8 @@ static uint32_t ExtraCostCombined_MIPS32(const uint32_t* WEBP_RESTRICT const X,
|
||||
|
||||
// Returns the various RLE counts
|
||||
static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
||||
uint32_t val, int i, uint32_t* WEBP_RESTRICT const val_prev,
|
||||
int* WEBP_RESTRICT const i_prev,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
uint32_t val, int i, uint32_t* const val_prev, int* const i_prev,
|
||||
VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats) {
|
||||
int* const pstreaks = &stats->streaks[0][0];
|
||||
int* const pcnts = &stats->counts[0];
|
||||
int temp0, temp1, temp2, temp3;
|
||||
@ -244,10 +241,9 @@ static WEBP_INLINE void GetEntropyUnrefinedHelper(
|
||||
*i_prev = i;
|
||||
}
|
||||
|
||||
static void GetEntropyUnrefined_MIPS32(
|
||||
const uint32_t X[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const bit_entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
static void GetEntropyUnrefined_MIPS32(const uint32_t X[], int length,
|
||||
VP8LBitEntropy* const bit_entropy,
|
||||
VP8LStreaks* const stats) {
|
||||
int i;
|
||||
int i_prev = 0;
|
||||
uint32_t x_prev = X[0];
|
||||
@ -266,10 +262,11 @@ static void GetEntropyUnrefined_MIPS32(
|
||||
bit_entropy->entropy = VP8LFastSLog2(bit_entropy->sum) - bit_entropy->entropy;
|
||||
}
|
||||
|
||||
static void GetCombinedEntropyUnrefined_MIPS32(
|
||||
const uint32_t X[], const uint32_t Y[], int length,
|
||||
VP8LBitEntropy* WEBP_RESTRICT const entropy,
|
||||
VP8LStreaks* WEBP_RESTRICT const stats) {
|
||||
static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
|
||||
const uint32_t Y[],
|
||||
int length,
|
||||
VP8LBitEntropy* const entropy,
|
||||
VP8LStreaks* const stats) {
|
||||
int i = 1;
|
||||
int i_prev = 0;
|
||||
uint32_t xy_prev = X[0] + Y[0];
|
||||
@ -347,9 +344,8 @@ static void GetCombinedEntropyUnrefined_MIPS32(
|
||||
ASM_END_COMMON_0 \
|
||||
ASM_END_COMMON_1
|
||||
|
||||
static void AddVector_MIPS32(const uint32_t* WEBP_RESTRICT pa,
|
||||
const uint32_t* WEBP_RESTRICT pb,
|
||||
uint32_t* WEBP_RESTRICT pout, int size) {
|
||||
static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb,
|
||||
uint32_t* pout, int size) {
|
||||
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
const int end = ((size) / 4) * 4;
|
||||
const uint32_t* const LoopEnd = pa + end;
|
||||
@ -360,8 +356,7 @@ static void AddVector_MIPS32(const uint32_t* WEBP_RESTRICT pa,
|
||||
for (i = 0; i < size - end; ++i) pout[i] = pa[i] + pb[i];
|
||||
}
|
||||
|
||||
static void AddVectorEq_MIPS32(const uint32_t* WEBP_RESTRICT pa,
|
||||
uint32_t* WEBP_RESTRICT pout, int size) {
|
||||
static void AddVectorEq_MIPS32(const uint32_t* pa, uint32_t* pout, int size) {
|
||||
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
const int end = ((size) / 4) * 4;
|
||||
const uint32_t* const LoopEnd = pa + end;
|
||||
|
@ -78,9 +78,8 @@ static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
|
||||
return (uint32_t)((int)(color_pred) * color) >> 5;
|
||||
}
|
||||
|
||||
static void TransformColor_MIPSdspR2(
|
||||
const VP8LMultipliers* WEBP_RESTRICT const m, uint32_t* WEBP_RESTRICT data,
|
||||
int num_pixels) {
|
||||
static void TransformColor_MIPSdspR2(const VP8LMultipliers* const m,
|
||||
uint32_t* data, int num_pixels) {
|
||||
int temp0, temp1, temp2, temp3, temp4, temp5;
|
||||
uint32_t argb, argb1, new_red, new_red1;
|
||||
const uint32_t G_to_R = m->green_to_red_;
|
||||
@ -173,8 +172,7 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
|
||||
}
|
||||
|
||||
static void CollectColorBlueTransforms_MIPSdspR2(
|
||||
const uint32_t* WEBP_RESTRICT argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
const uint32_t* argb, int stride, int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue, uint32_t histo[]) {
|
||||
const int rtb = (red_to_blue << 16) | (red_to_blue & 0xffff);
|
||||
const int gtb = (green_to_blue << 16) | (green_to_blue & 0xffff);
|
||||
@ -223,9 +221,11 @@ static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
|
||||
return (new_red & 0xff);
|
||||
}
|
||||
|
||||
static void CollectColorRedTransforms_MIPSdspR2(
|
||||
const uint32_t* WEBP_RESTRICT argb, int stride,
|
||||
int tile_width, int tile_height, int green_to_red, uint32_t histo[]) {
|
||||
static void CollectColorRedTransforms_MIPSdspR2(const uint32_t* argb,
|
||||
int stride, int tile_width,
|
||||
int tile_height,
|
||||
int green_to_red,
|
||||
uint32_t histo[]) {
|
||||
const int gtr = (green_to_red << 16) | (green_to_red & 0xffff);
|
||||
while (tile_height-- > 0) {
|
||||
int x;
|
||||
|
@ -48,8 +48,8 @@
|
||||
dst = VSHF_UB(src, t0, mask1); \
|
||||
} while (0)
|
||||
|
||||
static void TransformColor_MSA(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT data, int num_pixels) {
|
||||
static void TransformColor_MSA(const VP8LMultipliers* const m, uint32_t* data,
|
||||
int num_pixels) {
|
||||
v16u8 src0, dst0;
|
||||
const v16i8 g2br = (v16i8)__msa_fill_w(m->green_to_blue_ |
|
||||
(m->green_to_red_ << 16));
|
||||
|
@ -72,9 +72,8 @@ static void SubtractGreenFromBlueAndRed_NEON(uint32_t* argb_data,
|
||||
//------------------------------------------------------------------------------
|
||||
// Color Transform
|
||||
|
||||
static void TransformColor_NEON(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT argb_data,
|
||||
int num_pixels) {
|
||||
static void TransformColor_NEON(const VP8LMultipliers* const m,
|
||||
uint32_t* argb_data, int num_pixels) {
|
||||
// sign-extended multiplying constants, pre-shifted by 6.
|
||||
#define CST(X) (((int16_t)(m->X << 8)) >> 6)
|
||||
const int16_t rb[8] = {
|
||||
|
@ -49,9 +49,8 @@ static void SubtractGreenFromBlueAndRed_SSE2(uint32_t* argb_data,
|
||||
#define MK_CST_16(HI, LO) \
|
||||
_mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
|
||||
|
||||
static void TransformColor_SSE2(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
uint32_t* WEBP_RESTRICT argb_data,
|
||||
int num_pixels) {
|
||||
static void TransformColor_SSE2(const VP8LMultipliers* const m,
|
||||
uint32_t* argb_data, int num_pixels) {
|
||||
const __m128i mults_rb = MK_CST_16(CST_5b(m->green_to_red_),
|
||||
CST_5b(m->green_to_blue_));
|
||||
const __m128i mults_b2 = MK_CST_16(CST_5b(m->red_to_blue_), 0);
|
||||
@ -80,8 +79,7 @@ static void TransformColor_SSE2(const VP8LMultipliers* WEBP_RESTRICT const m,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
#define SPAN 8
|
||||
static void CollectColorBlueTransforms_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
static void CollectColorBlueTransforms_SSE2(const uint32_t* argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue,
|
||||
uint32_t histo[]) {
|
||||
@ -128,8 +126,7 @@ static void CollectColorBlueTransforms_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
||||
}
|
||||
}
|
||||
|
||||
static void CollectColorRedTransforms_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red, uint32_t histo[]) {
|
||||
const __m128i mults_g = MK_CST_16(0, CST_5b(green_to_red));
|
||||
@ -176,9 +173,8 @@ static void CollectColorRedTransforms_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
||||
// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
|
||||
// that's ok since the histogram values are less than 1<<28 (max picture size).
|
||||
#define LINE_SIZE 16 // 8 or 16
|
||||
static void AddVector_SSE2(const uint32_t* WEBP_RESTRICT a,
|
||||
const uint32_t* WEBP_RESTRICT b,
|
||||
uint32_t* WEBP_RESTRICT out, int size) {
|
||||
static void AddVector_SSE2(const uint32_t* a, const uint32_t* b, uint32_t* out,
|
||||
int size) {
|
||||
int i;
|
||||
for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||
@ -205,8 +201,7 @@ static void AddVector_SSE2(const uint32_t* WEBP_RESTRICT a,
|
||||
}
|
||||
}
|
||||
|
||||
static void AddVectorEq_SSE2(const uint32_t* WEBP_RESTRICT a,
|
||||
uint32_t* WEBP_RESTRICT out, int size) {
|
||||
static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
|
||||
int i;
|
||||
for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||
@ -338,9 +333,8 @@ static int VectorMismatch_SSE2(const uint32_t* const array1,
|
||||
}
|
||||
|
||||
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
|
||||
static void BundleColorMap_SSE2(const uint8_t* WEBP_RESTRICT const row,
|
||||
int width, int xbits,
|
||||
uint32_t* WEBP_RESTRICT dst) {
|
||||
static void BundleColorMap_SSE2(const uint8_t* const row, int width, int xbits,
|
||||
uint32_t* dst) {
|
||||
int x;
|
||||
assert(xbits >= 0);
|
||||
assert(xbits <= 3);
|
||||
@ -429,7 +423,7 @@ static WEBP_INLINE void Average2_m128i(const __m128i* const a0,
|
||||
|
||||
// Predictor0: ARGB_BLACK.
|
||||
static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
const __m128i black = _mm_set1_epi32((int)ARGB_BLACK);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -446,8 +440,7 @@ static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
#define GENERATE_PREDICTOR_1(X, IN) \
|
||||
static void PredictorSub##X##_SSE2(const uint32_t* const in, \
|
||||
const uint32_t* const upper, \
|
||||
int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT const out) { \
|
||||
int num_pixels, uint32_t* const out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); \
|
||||
@ -469,7 +462,7 @@ GENERATE_PREDICTOR_1(4, upper[i - 1]) // Predictor4: TL
|
||||
|
||||
// Predictor5: avg2(avg2(L, TR), T)
|
||||
static void PredictorSub5_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]);
|
||||
@ -489,8 +482,7 @@ static void PredictorSub5_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
#define GENERATE_PREDICTOR_2(X, A, B) \
|
||||
static void PredictorSub##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
|
||||
int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int num_pixels, uint32_t* out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const __m128i tA = _mm_loadu_si128((const __m128i*)&(A)); \
|
||||
@ -514,7 +506,7 @@ GENERATE_PREDICTOR_2(9, upper[i], upper[i + 1]) // Predictor9: average(T, TR)
|
||||
|
||||
// Predictor10: avg(avg(L,TL), avg(T, TR)).
|
||||
static void PredictorSub10_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]);
|
||||
@ -549,7 +541,7 @@ static void GetSumAbsDiff32_SSE2(const __m128i* const A, const __m128i* const B,
|
||||
}
|
||||
|
||||
static void PredictorSub11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i L = _mm_loadu_si128((const __m128i*)&in[i - 1]);
|
||||
@ -575,7 +567,7 @@ static void PredictorSub11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictor12: ClampedSubSubtractFull.
|
||||
static void PredictorSub12_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -604,7 +596,7 @@ static void PredictorSub12_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictors13: ClampedAddSubtractHalf
|
||||
static void PredictorSub13_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
for (i = 0; i + 2 <= num_pixels; i += 2) {
|
||||
|
@ -44,9 +44,8 @@ static uint32_t ExtraCost_SSE41(const uint32_t* const a, int length) {
|
||||
return HorizontalSum_SSE41(cost);
|
||||
}
|
||||
|
||||
static uint32_t ExtraCostCombined_SSE41(const uint32_t* WEBP_RESTRICT const a,
|
||||
const uint32_t* WEBP_RESTRICT const b,
|
||||
int length) {
|
||||
static uint32_t ExtraCostCombined_SSE41(const uint32_t* const a,
|
||||
const uint32_t* const b, int length) {
|
||||
int i;
|
||||
__m128i cost = _mm_add_epi32(_mm_set_epi32(2 * a[7], 2 * a[6], a[5], a[4]),
|
||||
_mm_set_epi32(2 * b[7], 2 * b[6], b[5], b[4]));
|
||||
@ -96,8 +95,7 @@ static void SubtractGreenFromBlueAndRed_SSE41(uint32_t* argb_data,
|
||||
#define MK_CST_16(HI, LO) \
|
||||
_mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
|
||||
|
||||
static void CollectColorBlueTransforms_SSE41(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_blue, int red_to_blue,
|
||||
uint32_t histo[]) {
|
||||
@ -143,8 +141,7 @@ static void CollectColorBlueTransforms_SSE41(const uint32_t* WEBP_RESTRICT argb,
|
||||
}
|
||||
}
|
||||
|
||||
static void CollectColorRedTransforms_SSE41(const uint32_t* WEBP_RESTRICT argb,
|
||||
int stride,
|
||||
static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride,
|
||||
int tile_width, int tile_height,
|
||||
int green_to_red,
|
||||
uint32_t histo[]) {
|
||||
|
@ -26,8 +26,8 @@
|
||||
#if !defined(WORK_AROUND_GCC)
|
||||
// gcc 4.6.0 had some trouble (NDK-r9) with this code. We only use it for
|
||||
// gcc-4.8.x at least.
|
||||
static void ConvertBGRAToRGBA_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
static void ConvertBGRAToRGBA_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~15);
|
||||
for (; src < end; src += 16) {
|
||||
uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
|
||||
@ -41,8 +41,8 @@ static void ConvertBGRAToRGBA_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
VP8LConvertBGRAToRGBA_C(src, num_pixels & 15, dst); // left-overs
|
||||
}
|
||||
|
||||
static void ConvertBGRAToBGR_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
static void ConvertBGRAToBGR_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~15);
|
||||
for (; src < end; src += 16) {
|
||||
const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
|
||||
@ -53,8 +53,8 @@ static void ConvertBGRAToBGR_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
VP8LConvertBGRAToBGR_C(src, num_pixels & 15, dst); // left-overs
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGB_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
static void ConvertBGRAToRGB_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~15);
|
||||
for (; src < end; src += 16) {
|
||||
const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
|
||||
@ -71,8 +71,8 @@ static void ConvertBGRAToRGB_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
|
||||
static const uint8_t kRGBAShuffle[8] = { 2, 1, 0, 3, 6, 5, 4, 7 };
|
||||
|
||||
static void ConvertBGRAToRGBA_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
static void ConvertBGRAToRGBA_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~1);
|
||||
const uint8x8_t shuffle = vld1_u8(kRGBAShuffle);
|
||||
for (; src < end; src += 2) {
|
||||
@ -89,8 +89,8 @@ static const uint8_t kBGRShuffle[3][8] = {
|
||||
{ 21, 22, 24, 25, 26, 28, 29, 30 }
|
||||
};
|
||||
|
||||
static void ConvertBGRAToBGR_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
static void ConvertBGRAToBGR_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~7);
|
||||
const uint8x8_t shuffle0 = vld1_u8(kBGRShuffle[0]);
|
||||
const uint8x8_t shuffle1 = vld1_u8(kBGRShuffle[1]);
|
||||
@ -116,8 +116,8 @@ static const uint8_t kRGBShuffle[3][8] = {
|
||||
{ 21, 20, 26, 25, 24, 30, 29, 28 }
|
||||
};
|
||||
|
||||
static void ConvertBGRAToRGB_NEON(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
static void ConvertBGRAToRGB_NEON(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const uint32_t* const end = src + (num_pixels & ~7);
|
||||
const uint8x8_t shuffle0 = vld1_u8(kRGBShuffle[0]);
|
||||
const uint8x8_t shuffle1 = vld1_u8(kRGBShuffle[1]);
|
||||
@ -209,7 +209,7 @@ static uint32_t Predictor13_NEON(const uint32_t* const left,
|
||||
|
||||
// Predictor0: ARGB_BLACK.
|
||||
static void PredictorAdd0_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
const uint8x16_t black = vreinterpretq_u8_u32(vdupq_n_u32(ARGB_BLACK));
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -222,7 +222,7 @@ static void PredictorAdd0_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictor1: left.
|
||||
static void PredictorAdd1_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
const uint8x16_t zero = LOADQ_U32_AS_U8(0);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -248,7 +248,7 @@ static void PredictorAdd1_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
#define GENERATE_PREDICTOR_1(X, IN) \
|
||||
static void PredictorAdd##X##_NEON(const uint32_t* in, \
|
||||
const uint32_t* upper, int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
uint32_t* out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const uint8x16_t src = LOADQ_U32P_AS_U8(&in[i]); \
|
||||
@ -276,7 +276,7 @@ GENERATE_PREDICTOR_1(4, upper[i - 1])
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd5_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -301,7 +301,7 @@ static void PredictorAdd5_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictor6: average(left, TL)
|
||||
static void PredictorAdd6_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -317,7 +317,7 @@ static void PredictorAdd6_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictor7: average(left, T)
|
||||
static void PredictorAdd7_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -335,7 +335,7 @@ static void PredictorAdd7_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
#define GENERATE_PREDICTOR_2(X, IN) \
|
||||
static void PredictorAdd##X##_NEON(const uint32_t* in, \
|
||||
const uint32_t* upper, int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
uint32_t* out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const uint8x16_t src = LOADQ_U32P_AS_U8(&in[i]); \
|
||||
@ -363,7 +363,7 @@ GENERATE_PREDICTOR_2(9, upper[i + 1])
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd10_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -394,7 +394,7 @@ static void PredictorAdd10_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd11_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -427,7 +427,7 @@ static void PredictorAdd11_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd12_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
uint16x8_t L = vmovl_u8(LOAD_U32_AS_U8(out[-1]));
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -468,7 +468,7 @@ static void PredictorAdd12_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd13_NEON(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
uint8x16_t L = LOADQ_U32_AS_U8(out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
|
@ -186,7 +186,7 @@ static uint32_t Predictor13_SSE2(const uint32_t* const left,
|
||||
|
||||
// Predictor0: ARGB_BLACK.
|
||||
static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
const __m128i black = _mm_set1_epi32((int)ARGB_BLACK);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -202,7 +202,7 @@ static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
|
||||
// Predictor1: left.
|
||||
static void PredictorAdd1_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
__m128i prev = _mm_set1_epi32((int)out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -230,8 +230,7 @@ static void PredictorAdd1_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
// per 8 bit channel.
|
||||
#define GENERATE_PREDICTOR_1(X, IN) \
|
||||
static void PredictorAdd##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
|
||||
int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int num_pixels, uint32_t* out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); \
|
||||
@ -260,8 +259,7 @@ GENERATE_PREDICTOR_ADD(Predictor7_SSE2, PredictorAdd7_SSE2)
|
||||
|
||||
#define GENERATE_PREDICTOR_2(X, IN) \
|
||||
static void PredictorAdd##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
|
||||
int num_pixels, \
|
||||
uint32_t* WEBP_RESTRICT out) { \
|
||||
int num_pixels, uint32_t* out) { \
|
||||
int i; \
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) { \
|
||||
const __m128i Tother = _mm_loadu_si128((const __m128i*)&(IN)); \
|
||||
@ -299,7 +297,7 @@ GENERATE_PREDICTOR_2(9, upper[i + 1])
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
__m128i L = _mm_cvtsi32_si128((int)out[-1]);
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
@ -346,7 +344,7 @@ static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
__m128i pa;
|
||||
__m128i L = _mm_cvtsi32_si128((int)out[-1]);
|
||||
@ -397,7 +395,7 @@ static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
} while (0)
|
||||
|
||||
static void PredictorAdd12_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
int num_pixels, uint32_t* WEBP_RESTRICT out) {
|
||||
int num_pixels, uint32_t* out) {
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i L8 = _mm_cvtsi32_si128((int)out[-1]);
|
||||
@ -492,8 +490,8 @@ static void TransformColorInverse_SSE2(const VP8LMultipliers* const m,
|
||||
//------------------------------------------------------------------------------
|
||||
// Color-space conversion functions
|
||||
|
||||
static void ConvertBGRAToRGB_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
static void ConvertBGRAToRGB_SSE2(const uint32_t* src, int num_pixels,
|
||||
uint8_t* dst) {
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
|
||||
@ -528,8 +526,8 @@ static void ConvertBGRAToRGB_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGBA_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
static void ConvertBGRAToRGBA_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ff);
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
@ -556,9 +554,8 @@ static void ConvertBGRAToRGBA_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const __m128i mask_0x0f = _mm_set1_epi8(0x0f);
|
||||
const __m128i mask_0xf0 = _mm_set1_epi8((char)0xf0);
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
@ -593,9 +590,8 @@ static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGB565_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
static void ConvertBGRAToRGB565_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const __m128i mask_0xe0 = _mm_set1_epi8((char)0xe0);
|
||||
const __m128i mask_0xf8 = _mm_set1_epi8((char)0xf8);
|
||||
const __m128i mask_0x07 = _mm_set1_epi8(0x07);
|
||||
@ -635,8 +631,8 @@ static void ConvertBGRAToRGB565_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToBGR_SSE2(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
static void ConvertBGRAToBGR_SSE2(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const __m128i mask_l = _mm_set_epi32(0, 0x00ffffff, 0, 0x00ffffff);
|
||||
const __m128i mask_h = _mm_set_epi32(0x00ffffff, 0, 0x00ffffff, 0);
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
|
@ -77,8 +77,8 @@ static void TransformColorInverse_SSE41(const VP8LMultipliers* const m,
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void ConvertBGRAToRGB_SSE41(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
static void ConvertBGRAToRGB_SSE41(const uint32_t* src, int num_pixels,
|
||||
uint8_t* dst) {
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
const __m128i perm0 = _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9,
|
||||
@ -95,8 +95,8 @@ static void ConvertBGRAToRGB_SSE41(const uint32_t* WEBP_RESTRICT src,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToBGR_SSE41(const uint32_t* WEBP_RESTRICT src,
|
||||
int num_pixels, uint8_t* WEBP_RESTRICT dst) {
|
||||
static void ConvertBGRAToBGR_SSE41(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
__m128i* out = (__m128i*)dst;
|
||||
const __m128i perm0 = _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10,
|
||||
|
@ -26,8 +26,8 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// Row import
|
||||
|
||||
void WebPRescalerImportRowExpand_C(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
void WebPRescalerImportRowExpand_C(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
const int x_stride = wrk->num_channels;
|
||||
const int x_out_max = wrk->dst_width * wrk->num_channels;
|
||||
int channel;
|
||||
@ -59,8 +59,8 @@ void WebPRescalerImportRowExpand_C(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
}
|
||||
}
|
||||
|
||||
void WebPRescalerImportRowShrink_C(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
void WebPRescalerImportRowShrink_C(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
const int x_stride = wrk->num_channels;
|
||||
const int x_out_max = wrk->dst_width * wrk->num_channels;
|
||||
int channel;
|
||||
@ -158,8 +158,7 @@ void WebPRescalerExportRowShrink_C(WebPRescaler* const wrk) {
|
||||
//------------------------------------------------------------------------------
|
||||
// Main entry calls
|
||||
|
||||
void WebPRescalerImportRow(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
void WebPRescalerImportRow(WebPRescaler* const wrk, const uint8_t* src) {
|
||||
assert(!WebPRescalerInputDone(wrk));
|
||||
if (!wrk->x_expand) {
|
||||
WebPRescalerImportRowShrink(wrk, src);
|
||||
|
@ -21,8 +21,8 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// Row import
|
||||
|
||||
static void ImportRowShrink_MIPS32(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
static void ImportRowShrink_MIPS32(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
const int x_stride = wrk->num_channels;
|
||||
const int x_out_max = wrk->dst_width * wrk->num_channels;
|
||||
const int fx_scale = wrk->fx_scale;
|
||||
@ -81,8 +81,8 @@ static void ImportRowShrink_MIPS32(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
}
|
||||
}
|
||||
|
||||
static void ImportRowExpand_MIPS32(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
static void ImportRowExpand_MIPS32(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
const int x_stride = wrk->num_channels;
|
||||
const int x_out_max = wrk->dst_width * wrk->num_channels;
|
||||
const int x_add = wrk->x_add;
|
||||
|
@ -114,9 +114,9 @@
|
||||
dst = __msa_copy_s_w((v4i32)t0, 0); \
|
||||
} while (0)
|
||||
|
||||
static WEBP_INLINE void ExportRowExpand_0(
|
||||
const uint32_t* WEBP_RESTRICT frow, uint8_t* WEBP_RESTRICT dst, int length,
|
||||
WebPRescaler* WEBP_RESTRICT const wrk) {
|
||||
static WEBP_INLINE void ExportRowExpand_0(const uint32_t* frow, uint8_t* dst,
|
||||
int length,
|
||||
WebPRescaler* const wrk) {
|
||||
const v4u32 scale = (v4u32)__msa_fill_w(wrk->fy_scale);
|
||||
const v4u32 shift = (v4u32)__msa_fill_w(WEBP_RESCALER_RFIX);
|
||||
const v4i32 zero = { 0 };
|
||||
@ -171,10 +171,9 @@ static WEBP_INLINE void ExportRowExpand_0(
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void ExportRowExpand_1(
|
||||
const uint32_t* WEBP_RESTRICT frow, uint32_t* WEBP_RESTRICT irow,
|
||||
uint8_t* WEBP_RESTRICT dst, int length,
|
||||
WebPRescaler* WEBP_RESTRICT const wrk) {
|
||||
static WEBP_INLINE void ExportRowExpand_1(const uint32_t* frow, uint32_t* irow,
|
||||
uint8_t* dst, int length,
|
||||
WebPRescaler* const wrk) {
|
||||
const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
|
||||
const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
|
||||
const v4i32 B1 = __msa_fill_w(B);
|
||||
@ -263,10 +262,10 @@ static void RescalerExportRowExpand_MIPSdspR2(WebPRescaler* const wrk) {
|
||||
}
|
||||
|
||||
#if 0 // disabled for now. TODO(skal): make match the C-code
|
||||
static WEBP_INLINE void ExportRowShrink_0(
|
||||
const uint32_t* WEBP_RESTRICT frow, uint32_t* WEBP_RESTRICT irow,
|
||||
uint8_t* WEBP_RESTRICT dst, int length, const uint32_t yscale,
|
||||
WebPRescaler* WEBP_RESTRICT const wrk) {
|
||||
static WEBP_INLINE void ExportRowShrink_0(const uint32_t* frow, uint32_t* irow,
|
||||
uint8_t* dst, int length,
|
||||
const uint32_t yscale,
|
||||
WebPRescaler* const wrk) {
|
||||
const v4u32 y_scale = (v4u32)__msa_fill_w(yscale);
|
||||
const v4u32 fxyscale = (v4u32)__msa_fill_w(wrk->fxy_scale);
|
||||
const v4u32 shiftval = (v4u32)__msa_fill_w(WEBP_RESCALER_RFIX);
|
||||
@ -349,9 +348,9 @@ static WEBP_INLINE void ExportRowShrink_0(
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void ExportRowShrink_1(
|
||||
uint32_t* WEBP_RESTRICT irow, uint8_t* WEBP_RESTRICT dst, int length,
|
||||
WebPRescaler* WEBP_RESTRICT const wrk) {
|
||||
static WEBP_INLINE void ExportRowShrink_1(uint32_t* irow, uint8_t* dst,
|
||||
int length,
|
||||
WebPRescaler* const wrk) {
|
||||
const v4u32 scale = (v4u32)__msa_fill_w(wrk->fxy_scale);
|
||||
const v4u32 shift = (v4u32)__msa_fill_w(WEBP_RESCALER_RFIX);
|
||||
const v4i32 zero = { 0 };
|
||||
|
@ -45,8 +45,8 @@
|
||||
#error "MULT_FIX/WEBP_RESCALER_RFIX need some more work"
|
||||
#endif
|
||||
|
||||
static uint32x4_t Interpolate_NEON(const rescaler_t* WEBP_RESTRICT const frow,
|
||||
const rescaler_t* WEBP_RESTRICT const irow,
|
||||
static uint32x4_t Interpolate_NEON(const rescaler_t* const frow,
|
||||
const rescaler_t* const irow,
|
||||
uint32_t A, uint32_t B) {
|
||||
LOAD_32x4(frow, A0);
|
||||
LOAD_32x4(irow, B0);
|
||||
|
@ -43,8 +43,8 @@ static void LoadEightPixels_SSE2(const uint8_t* const src, __m128i* out) {
|
||||
*out = _mm_unpacklo_epi8(A, zero);
|
||||
}
|
||||
|
||||
static void RescalerImportRowExpand_SSE2(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
rescaler_t* frow = wrk->frow;
|
||||
const rescaler_t* const frow_end = frow + wrk->dst_width * wrk->num_channels;
|
||||
const int x_add = wrk->x_add;
|
||||
@ -109,8 +109,8 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
assert(accum == 0);
|
||||
}
|
||||
|
||||
static void RescalerImportRowShrink_SSE2(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
const uint8_t* WEBP_RESTRICT src) {
|
||||
static void RescalerImportRowShrink_SSE2(WebPRescaler* const wrk,
|
||||
const uint8_t* src) {
|
||||
const int x_sub = wrk->x_sub;
|
||||
int accum = 0;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
@ -168,9 +168,11 @@ static void RescalerImportRowShrink_SSE2(WebPRescaler* WEBP_RESTRICT const wrk,
|
||||
// Row export
|
||||
|
||||
// load *src as epi64, multiply by mult and store result in [out0 ... out3]
|
||||
static WEBP_INLINE void LoadDispatchAndMult_SSE2(
|
||||
const rescaler_t* WEBP_RESTRICT const src, const __m128i* const mult,
|
||||
__m128i* const out0, __m128i* const out1, __m128i* const out2,
|
||||
static WEBP_INLINE void LoadDispatchAndMult_SSE2(const rescaler_t* const src,
|
||||
const __m128i* const mult,
|
||||
__m128i* const out0,
|
||||
__m128i* const out1,
|
||||
__m128i* const out2,
|
||||
__m128i* const out3) {
|
||||
const __m128i A0 = _mm_loadu_si128((const __m128i*)(src + 0));
|
||||
const __m128i A1 = _mm_loadu_si128((const __m128i*)(src + 4));
|
||||
|
@ -35,14 +35,10 @@ WebPUpsampleLinePairFunc WebPUpsamplers[MODE_LAST];
|
||||
#define LOAD_UV(u, v) ((u) | ((v) << 16))
|
||||
|
||||
#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bottom_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT cur_u, \
|
||||
const uint8_t* WEBP_RESTRICT cur_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bottom_dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* cur_u, const uint8_t* cur_v, \
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
|
||||
int x; \
|
||||
const int last_pixel_pair = (len - 1) >> 1; \
|
||||
uint32_t tl_uv = LOAD_UV(top_u[0], top_v[0]); /* top-left sample */ \
|
||||
@ -140,14 +136,10 @@ static void EmptyUpsampleFunc(const uint8_t* top_y, const uint8_t* bottom_y,
|
||||
|
||||
#if !defined(FANCY_UPSAMPLING)
|
||||
#define DUAL_SAMPLE_FUNC(FUNC_NAME, FUNC) \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bot_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT bot_u, \
|
||||
const uint8_t* WEBP_RESTRICT bot_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bot_dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bot_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* bot_u, const uint8_t* bot_v, \
|
||||
uint8_t* top_dst, uint8_t* bot_dst, int len) { \
|
||||
const int half_len = len >> 1; \
|
||||
int x; \
|
||||
assert(top_dst != NULL); \
|
||||
@ -186,14 +178,10 @@ WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last) {
|
||||
// YUV444 converter
|
||||
|
||||
#define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
extern void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len); \
|
||||
void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
extern void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len); \
|
||||
void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
int i; \
|
||||
for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * (XSTEP)]); \
|
||||
}
|
||||
|
@ -143,14 +143,10 @@ static WEBP_INLINE void YuvToRgba(uint8_t y, uint8_t u, uint8_t v,
|
||||
#define LOAD_UV(u, v) ((u) | ((v) << 16))
|
||||
|
||||
#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bottom_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT cur_u, \
|
||||
const uint8_t* WEBP_RESTRICT cur_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bottom_dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* cur_u, const uint8_t* cur_v, \
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
|
||||
int x; \
|
||||
const int last_pixel_pair = (len - 1) >> 1; \
|
||||
uint32_t tl_uv = LOAD_UV(top_u[0], top_v[0]); /* top-left sample */ \
|
||||
@ -245,10 +241,8 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersMIPSdspR2(void) {
|
||||
// YUV444 converter
|
||||
|
||||
#define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
int i; \
|
||||
for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]); \
|
||||
}
|
||||
|
@ -320,10 +320,8 @@ static void YuvToRgba(uint8_t y, uint8_t u, uint8_t v, uint8_t* const rgba) {
|
||||
}
|
||||
|
||||
#if !defined(WEBP_REDUCE_CSP)
|
||||
static void YuvToRgbLine(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
static void YuvToRgbLine(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
v16u8 R, G, B;
|
||||
while (length >= 16) {
|
||||
CALC_RGB16(y, u, v, R, G, B);
|
||||
@ -349,10 +347,8 @@ static void YuvToRgbLine(const uint8_t* WEBP_RESTRICT y,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToBgrLine(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
static void YuvToBgrLine(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
v16u8 R, G, B;
|
||||
while (length >= 16) {
|
||||
CALC_RGB16(y, u, v, R, G, B);
|
||||
@ -379,10 +375,8 @@ static void YuvToBgrLine(const uint8_t* WEBP_RESTRICT y,
|
||||
}
|
||||
#endif // WEBP_REDUCE_CSP
|
||||
|
||||
static void YuvToRgbaLine(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
static void YuvToRgbaLine(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
v16u8 R, G, B;
|
||||
const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL);
|
||||
while (length >= 16) {
|
||||
@ -409,10 +403,8 @@ static void YuvToRgbaLine(const uint8_t* WEBP_RESTRICT y,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToBgraLine(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
static void YuvToBgraLine(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
v16u8 R, G, B;
|
||||
const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL);
|
||||
while (length >= 16) {
|
||||
@ -440,10 +432,8 @@ static void YuvToBgraLine(const uint8_t* WEBP_RESTRICT y,
|
||||
}
|
||||
|
||||
#if !defined(WEBP_REDUCE_CSP)
|
||||
static void YuvToArgbLine(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
static void YuvToArgbLine(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
v16u8 R, G, B;
|
||||
const v16u8 A = (v16u8)__msa_ldi_b(ALPHAVAL);
|
||||
while (length >= 16) {
|
||||
@ -470,10 +460,8 @@ static void YuvToArgbLine(const uint8_t* WEBP_RESTRICT y,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToRgba4444Line(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
static void YuvToRgba4444Line(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
v16u8 R, G, B, RG, BA, tmp0, tmp1;
|
||||
while (length >= 16) {
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
@ -508,10 +496,8 @@ static void YuvToRgba4444Line(const uint8_t* WEBP_RESTRICT y,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToRgb565Line(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int length) {
|
||||
static void YuvToRgb565Line(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst, int length) {
|
||||
v16u8 R, G, B, RG, GB, tmp0, tmp1;
|
||||
while (length >= 16) {
|
||||
#if (WEBP_SWAP_16BIT_CSP == 1)
|
||||
@ -578,14 +564,11 @@ static void YuvToRgb565Line(const uint8_t* WEBP_RESTRICT y,
|
||||
} while (0)
|
||||
|
||||
#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bot_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT cur_u, \
|
||||
const uint8_t* WEBP_RESTRICT cur_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bot_dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bot_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* cur_u, const uint8_t* cur_v, \
|
||||
uint8_t* top_dst, uint8_t* bot_dst, int len) \
|
||||
{ \
|
||||
int size = (len - 1) >> 1; \
|
||||
uint8_t temp_u[64]; \
|
||||
uint8_t temp_v[64]; \
|
||||
|
@ -58,9 +58,8 @@
|
||||
} while (0)
|
||||
|
||||
// Turn the macro into a function for reducing code-size when non-critical
|
||||
static void Upsample16Pixels_NEON(const uint8_t* WEBP_RESTRICT const r1,
|
||||
const uint8_t* WEBP_RESTRICT const r2,
|
||||
uint8_t* WEBP_RESTRICT const out) {
|
||||
static void Upsample16Pixels_NEON(const uint8_t* r1, const uint8_t* r2,
|
||||
uint8_t* out) {
|
||||
UPSAMPLE_16PIXELS(r1, r2, out);
|
||||
}
|
||||
|
||||
@ -191,14 +190,10 @@ static const int16_t kCoeffs1[4] = { 19077, 26149, 6419, 13320 };
|
||||
}
|
||||
|
||||
#define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bottom_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT cur_u, \
|
||||
const uint8_t* WEBP_RESTRICT cur_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bottom_dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* cur_u, const uint8_t* cur_v, \
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
|
||||
int block; \
|
||||
/* 16 byte aligned array to cache reconstructed u and v */ \
|
||||
uint8_t uv_buf[2 * 32 + 15]; \
|
||||
|
@ -88,9 +88,8 @@
|
||||
} while (0)
|
||||
|
||||
// Turn the macro into a function for reducing code-size when non-critical
|
||||
static void Upsample32Pixels_SSE2(const uint8_t* WEBP_RESTRICT const r1,
|
||||
const uint8_t* WEBP_RESTRICT const r2,
|
||||
uint8_t* WEBP_RESTRICT const out) {
|
||||
static void Upsample32Pixels_SSE2(const uint8_t r1[], const uint8_t r2[],
|
||||
uint8_t* const out) {
|
||||
UPSAMPLE_32PIXELS(r1, r2, out);
|
||||
}
|
||||
|
||||
@ -115,14 +114,10 @@ static void Upsample32Pixels_SSE2(const uint8_t* WEBP_RESTRICT const r1,
|
||||
} while (0)
|
||||
|
||||
#define SSE2_UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bottom_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT cur_u, \
|
||||
const uint8_t* WEBP_RESTRICT cur_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bottom_dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* cur_u, const uint8_t* cur_v, \
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
|
||||
int uv_pos, pos; \
|
||||
/* 16byte-aligned array to cache reconstructed u and v */ \
|
||||
uint8_t uv_buf[14 * 32 + 15] = { 0 }; \
|
||||
@ -220,14 +215,10 @@ extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
|
||||
extern void WebPInitYUV444ConvertersSSE2(void);
|
||||
|
||||
#define YUV444_FUNC(FUNC_NAME, CALL, CALL_C, XSTEP) \
|
||||
extern void CALL_C(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len); \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
extern void CALL_C(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len); \
|
||||
static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
int i; \
|
||||
const int max_len = len & ~31; \
|
||||
for (i = 0; i < max_len; i += 32) { \
|
||||
|
@ -90,9 +90,8 @@
|
||||
} while (0)
|
||||
|
||||
// Turn the macro into a function for reducing code-size when non-critical
|
||||
static void Upsample32Pixels_SSE41(const uint8_t* WEBP_RESTRICT const r1,
|
||||
const uint8_t* WEBP_RESTRICT const r2,
|
||||
uint8_t* WEBP_RESTRICT const out) {
|
||||
static void Upsample32Pixels_SSE41(const uint8_t r1[], const uint8_t r2[],
|
||||
uint8_t* const out) {
|
||||
UPSAMPLE_32PIXELS(r1, r2, out);
|
||||
}
|
||||
|
||||
@ -117,14 +116,10 @@ static void Upsample32Pixels_SSE41(const uint8_t* WEBP_RESTRICT const r1,
|
||||
} while (0)
|
||||
|
||||
#define SSE4_UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \
|
||||
const uint8_t* WEBP_RESTRICT bottom_y, \
|
||||
const uint8_t* WEBP_RESTRICT top_u, \
|
||||
const uint8_t* WEBP_RESTRICT top_v, \
|
||||
const uint8_t* WEBP_RESTRICT cur_u, \
|
||||
const uint8_t* WEBP_RESTRICT cur_v, \
|
||||
uint8_t* WEBP_RESTRICT top_dst, \
|
||||
uint8_t* WEBP_RESTRICT bottom_dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
|
||||
const uint8_t* top_u, const uint8_t* top_v, \
|
||||
const uint8_t* cur_u, const uint8_t* cur_v, \
|
||||
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
|
||||
int uv_pos, pos; \
|
||||
/* 16byte-aligned array to cache reconstructed u and v */ \
|
||||
uint8_t uv_buf[14 * 32 + 15] = { 0 }; \
|
||||
@ -207,14 +202,10 @@ extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
|
||||
extern void WebPInitYUV444ConvertersSSE41(void);
|
||||
|
||||
#define YUV444_FUNC(FUNC_NAME, CALL, CALL_C, XSTEP) \
|
||||
extern void CALL_C(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len); \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
extern void CALL_C(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len); \
|
||||
static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
int i; \
|
||||
const int max_len = len & ~31; \
|
||||
for (i = 0; i < max_len; i += 32) { \
|
||||
|
@ -20,10 +20,9 @@
|
||||
// Plain-C version
|
||||
|
||||
#define ROW_FUNC(FUNC_NAME, FUNC, XSTEP) \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* y, \
|
||||
const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
const uint8_t* const end = dst + (len & ~1) * (XSTEP); \
|
||||
while (dst != end) { \
|
||||
FUNC(y[0], u[0], v[0], dst); \
|
||||
@ -50,10 +49,9 @@ ROW_FUNC(YuvToRgb565Row, VP8YuvToRgb565, 2)
|
||||
#undef ROW_FUNC
|
||||
|
||||
// Main call for processing a plane with a WebPSamplerRowFunc function:
|
||||
void WebPSamplerProcessPlane(const uint8_t* WEBP_RESTRICT y, int y_stride,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v, int uv_stride,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride,
|
||||
void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
|
||||
const uint8_t* u, const uint8_t* v, int uv_stride,
|
||||
uint8_t* dst, int dst_stride,
|
||||
int width, int height, WebPSamplerRowFunc func) {
|
||||
int j;
|
||||
for (j = 0; j < height; ++j) {
|
||||
@ -119,8 +117,7 @@ WEBP_DSP_INIT_FUNC(WebPInitSamplers) {
|
||||
//-----------------------------------------------------------------------------
|
||||
// ARGB -> YUV converters
|
||||
|
||||
static void ConvertARGBToY_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
static void ConvertARGBToY_C(const uint32_t* argb, uint8_t* y, int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
const uint32_t p = argb[i];
|
||||
@ -129,8 +126,7 @@ static void ConvertARGBToY_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
}
|
||||
}
|
||||
|
||||
void WebPConvertARGBToUV_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
int src_width, int do_store) {
|
||||
// No rounding. Last pixel is dealt with separately.
|
||||
const int uv_width = src_width >> 1;
|
||||
@ -173,25 +169,22 @@ void WebPConvertARGBToUV_C(const uint32_t* WEBP_RESTRICT argb,
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
static void ConvertRGB24ToY_C(const uint8_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
static void ConvertRGB24ToY_C(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i, rgb += 3) {
|
||||
y[i] = VP8RGBToY(rgb[0], rgb[1], rgb[2], YUV_HALF);
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGR24ToY_C(const uint8_t* WEBP_RESTRICT bgr,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
static void ConvertBGR24ToY_C(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i, bgr += 3) {
|
||||
y[i] = VP8RGBToY(bgr[2], bgr[1], bgr[0], YUV_HALF);
|
||||
}
|
||||
}
|
||||
|
||||
void WebPConvertRGBA32ToUV_C(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
int width) {
|
||||
void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; i += 1, rgb += 4) {
|
||||
const int r = rgb[0], g = rgb[1], b = rgb[2];
|
||||
@ -202,18 +195,13 @@ void WebPConvertRGBA32ToUV_C(const uint16_t* WEBP_RESTRICT rgb,
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
void (*WebPConvertRGB24ToY)(const uint8_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
void (*WebPConvertBGR24ToY)(const uint8_t* WEBP_RESTRICT bgr,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
void (*WebPConvertRGBA32ToUV)(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int width);
|
||||
void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
|
||||
void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
|
||||
void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width);
|
||||
|
||||
void (*WebPConvertARGBToY)(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT y, int width);
|
||||
void (*WebPConvertARGBToUV)(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
|
||||
void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
|
||||
void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
int src_width, int do_store);
|
||||
|
||||
extern void WebPInitConvertARGBToYUVSSE2(void);
|
||||
|
@ -149,34 +149,20 @@ static WEBP_INLINE void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v,
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
|
||||
// Process 32 pixels and store the result (16b, 24b or 32b per pixel) in *dst.
|
||||
void VP8YuvToRgba32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToRgb32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToBgra32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToBgr32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToArgb32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToRgba444432_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToRgb56532_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToRgba32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToRgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToBgra32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToBgr32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToArgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToRgba444432_SSE2(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst);
|
||||
void VP8YuvToRgb56532_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
|
||||
#endif // WEBP_USE_SSE2
|
||||
|
||||
@ -186,14 +172,10 @@ void VP8YuvToRgb56532_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
#if defined(WEBP_USE_SSE41)
|
||||
|
||||
// Process 32 pixels and store the result (16b, 24b or 32b per pixel) in *dst.
|
||||
void VP8YuvToRgb32_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToBgr32_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst);
|
||||
void VP8YuvToRgb32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
void VP8YuvToBgr32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst);
|
||||
|
||||
#endif // WEBP_USE_SSE41
|
||||
|
||||
|
@ -22,10 +22,9 @@
|
||||
// simple point-sampling
|
||||
|
||||
#define ROW_FUNC(FUNC_NAME, XSTEP, R, G, B, A) \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* y, \
|
||||
const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
int i, r, g, b; \
|
||||
int temp0, temp1, temp2, temp3, temp4; \
|
||||
for (i = 0; i < (len >> 1); i++) { \
|
||||
|
@ -69,10 +69,9 @@
|
||||
: "memory", "hi", "lo" \
|
||||
|
||||
#define ROW_FUNC(FUNC_NAME, XSTEP, R, G, B, A) \
|
||||
static void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \
|
||||
const uint8_t* WEBP_RESTRICT u, \
|
||||
const uint8_t* WEBP_RESTRICT v, \
|
||||
uint8_t* WEBP_RESTRICT dst, int len) { \
|
||||
static void FUNC_NAME(const uint8_t* y, \
|
||||
const uint8_t* u, const uint8_t* v, \
|
||||
uint8_t* dst, int len) { \
|
||||
int i; \
|
||||
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; \
|
||||
const int t_con_1 = 26149; \
|
||||
|
@ -46,8 +46,7 @@ static uint8x8_t ConvertRGBToY_NEON(const uint8x8_t R,
|
||||
return vqmovn_u16(Y2);
|
||||
}
|
||||
|
||||
static void ConvertRGB24ToY_NEON(const uint8_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
static void ConvertRGB24ToY_NEON(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= width; i += 8, rgb += 3 * 8) {
|
||||
const uint8x8x3_t RGB = vld3_u8(rgb);
|
||||
@ -59,8 +58,7 @@ static void ConvertRGB24ToY_NEON(const uint8_t* WEBP_RESTRICT rgb,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGR24ToY_NEON(const uint8_t* WEBP_RESTRICT bgr,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
static void ConvertBGR24ToY_NEON(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= width; i += 8, bgr += 3 * 8) {
|
||||
const uint8x8x3_t BGR = vld3_u8(bgr);
|
||||
@ -72,8 +70,7 @@ static void ConvertBGR24ToY_NEON(const uint8_t* WEBP_RESTRICT bgr,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertARGBToY_NEON(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
static void ConvertARGBToY_NEON(const uint32_t* argb, uint8_t* y, int width) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= width; i += 8) {
|
||||
const uint8x8x4_t RGB = vld4_u8((const uint8_t*)&argb[i]);
|
||||
@ -117,9 +114,8 @@ static void ConvertARGBToY_NEON(const uint32_t* WEBP_RESTRICT argb,
|
||||
MULTIPLY_16b(28800, -24116, -4684, 128 << SHIFT, V_DST); \
|
||||
} while (0)
|
||||
|
||||
static void ConvertRGBA32ToUV_NEON(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int width) {
|
||||
static void ConvertRGBA32ToUV_NEON(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= width; i += 8, rgb += 4 * 8) {
|
||||
const uint16x8x4_t RGB = vld4q_u16((const uint16_t*)rgb);
|
||||
@ -135,9 +131,7 @@ static void ConvertRGBA32ToUV_NEON(const uint16_t* WEBP_RESTRICT rgb,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertARGBToUV_NEON(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v,
|
||||
static void ConvertARGBToUV_NEON(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
int src_width, int do_store) {
|
||||
int i;
|
||||
for (i = 0; i + 16 <= src_width; i += 16, u += 8, v += 8) {
|
||||
|
@ -82,9 +82,9 @@ static WEBP_INLINE __m128i Load_UV_HI_8_SSE2(const uint8_t* src) {
|
||||
}
|
||||
|
||||
// Convert 32 samples of YUV444 to R/G/B
|
||||
static void YUV444ToRGB_SSE2(const uint8_t* WEBP_RESTRICT const y,
|
||||
const uint8_t* WEBP_RESTRICT const u,
|
||||
const uint8_t* WEBP_RESTRICT const v,
|
||||
static void YUV444ToRGB_SSE2(const uint8_t* const y,
|
||||
const uint8_t* const u,
|
||||
const uint8_t* const v,
|
||||
__m128i* const R, __m128i* const G,
|
||||
__m128i* const B) {
|
||||
const __m128i Y0 = Load_HI_16_SSE2(y), U0 = Load_HI_16_SSE2(u),
|
||||
@ -93,9 +93,9 @@ static void YUV444ToRGB_SSE2(const uint8_t* WEBP_RESTRICT const y,
|
||||
}
|
||||
|
||||
// Convert 32 samples of YUV420 to R/G/B
|
||||
static void YUV420ToRGB_SSE2(const uint8_t* WEBP_RESTRICT const y,
|
||||
const uint8_t* WEBP_RESTRICT const u,
|
||||
const uint8_t* WEBP_RESTRICT const v,
|
||||
static void YUV420ToRGB_SSE2(const uint8_t* const y,
|
||||
const uint8_t* const u,
|
||||
const uint8_t* const v,
|
||||
__m128i* const R, __m128i* const G,
|
||||
__m128i* const B) {
|
||||
const __m128i Y0 = Load_HI_16_SSE2(y), U0 = Load_UV_HI_8_SSE2(u),
|
||||
@ -108,7 +108,7 @@ static WEBP_INLINE void PackAndStore4_SSE2(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
const __m128i* const A,
|
||||
uint8_t* WEBP_RESTRICT const dst) {
|
||||
uint8_t* const dst) {
|
||||
const __m128i rb = _mm_packus_epi16(*R, *B);
|
||||
const __m128i ga = _mm_packus_epi16(*G, *A);
|
||||
const __m128i rg = _mm_unpacklo_epi8(rb, ga);
|
||||
@ -120,9 +120,11 @@ static WEBP_INLINE void PackAndStore4_SSE2(const __m128i* const R,
|
||||
}
|
||||
|
||||
// Pack R/G/B/A results into 16b output.
|
||||
static WEBP_INLINE void PackAndStore4444_SSE2(
|
||||
const __m128i* const R, const __m128i* const G, const __m128i* const B,
|
||||
const __m128i* const A, uint8_t* WEBP_RESTRICT const dst) {
|
||||
static WEBP_INLINE void PackAndStore4444_SSE2(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
const __m128i* const A,
|
||||
uint8_t* const dst) {
|
||||
#if (WEBP_SWAP_16BIT_CSP == 0)
|
||||
const __m128i rg0 = _mm_packus_epi16(*R, *G);
|
||||
const __m128i ba0 = _mm_packus_epi16(*B, *A);
|
||||
@ -143,7 +145,7 @@ static WEBP_INLINE void PackAndStore4444_SSE2(
|
||||
static WEBP_INLINE void PackAndStore565_SSE2(const __m128i* const R,
|
||||
const __m128i* const G,
|
||||
const __m128i* const B,
|
||||
uint8_t* WEBP_RESTRICT const dst) {
|
||||
uint8_t* const dst) {
|
||||
const __m128i r0 = _mm_packus_epi16(*R, *R);
|
||||
const __m128i g0 = _mm_packus_epi16(*G, *G);
|
||||
const __m128i b0 = _mm_packus_epi16(*B, *B);
|
||||
@ -168,7 +170,7 @@ static WEBP_INLINE void PackAndStore565_SSE2(const __m128i* const R,
|
||||
static WEBP_INLINE void PlanarTo24b_SSE2(__m128i* const in0, __m128i* const in1,
|
||||
__m128i* const in2, __m128i* const in3,
|
||||
__m128i* const in4, __m128i* const in5,
|
||||
uint8_t* WEBP_RESTRICT const rgb) {
|
||||
uint8_t* const rgb) {
|
||||
// The input is 6 registers of sixteen 8b but for the sake of explanation,
|
||||
// let's take 6 registers of four 8b values.
|
||||
// To pack, we will keep taking one every two 8b integer and move it
|
||||
@ -191,10 +193,8 @@ static WEBP_INLINE void PlanarTo24b_SSE2(__m128i* const in0, __m128i* const in1,
|
||||
_mm_storeu_si128((__m128i*)(rgb + 80), *in5);
|
||||
}
|
||||
|
||||
void VP8YuvToRgba32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
void VP8YuvToRgba32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 32) {
|
||||
@ -204,10 +204,8 @@ void VP8YuvToRgba32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToBgra32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
void VP8YuvToBgra32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 32) {
|
||||
@ -217,10 +215,8 @@ void VP8YuvToBgra32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToArgb32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
void VP8YuvToArgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 32) {
|
||||
@ -230,10 +226,8 @@ void VP8YuvToArgb32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToRgba444432_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
void VP8YuvToRgba444432_SSE2(const uint8_t* y, const uint8_t* u,
|
||||
const uint8_t* v, uint8_t* dst) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 16) {
|
||||
@ -243,10 +237,8 @@ void VP8YuvToRgba444432_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToRgb56532_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
void VP8YuvToRgb56532_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
int n;
|
||||
for (n = 0; n < 32; n += 8, dst += 16) {
|
||||
__m128i R, G, B;
|
||||
@ -255,10 +247,8 @@ void VP8YuvToRgb56532_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
}
|
||||
}
|
||||
|
||||
void VP8YuvToRgb32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
void VP8YuvToRgb32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
__m128i rgb0, rgb1, rgb2, rgb3, rgb4, rgb5;
|
||||
|
||||
@ -279,10 +269,8 @@ void VP8YuvToRgb32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
PlanarTo24b_SSE2(&rgb0, &rgb1, &rgb2, &rgb3, &rgb4, &rgb5, dst);
|
||||
}
|
||||
|
||||
void VP8YuvToBgr32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
void VP8YuvToBgr32_SSE2(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
__m128i bgr0, bgr1, bgr2, bgr3, bgr4, bgr5;
|
||||
|
||||
@ -306,10 +294,9 @@ void VP8YuvToBgr32_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
//-----------------------------------------------------------------------------
|
||||
// Arbitrary-length row conversion functions
|
||||
|
||||
static void YuvToRgbaRow_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
static void YuvToRgbaRow_SSE2(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n + 8 <= len; n += 8, dst += 32) {
|
||||
@ -329,10 +316,9 @@ static void YuvToRgbaRow_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToBgraRow_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
static void YuvToBgraRow_SSE2(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n + 8 <= len; n += 8, dst += 32) {
|
||||
@ -352,10 +338,9 @@ static void YuvToBgraRow_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToArgbRow_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
static void YuvToArgbRow_SSE2(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
const __m128i kAlpha = _mm_set1_epi16(255);
|
||||
int n;
|
||||
for (n = 0; n + 8 <= len; n += 8, dst += 32) {
|
||||
@ -375,10 +360,9 @@ static void YuvToArgbRow_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToRgbRow_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
static void YuvToRgbRow_SSE2(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
int n;
|
||||
for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
@ -413,10 +397,9 @@ static void YuvToRgbRow_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToBgrRow_SSE2(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
static void YuvToBgrRow_SSE2(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
int n;
|
||||
for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
@ -488,7 +471,7 @@ static WEBP_INLINE void RGB24PackedToPlanarHelper_SSE2(
|
||||
// rrrr... rrrr... gggg... gggg... bbbb... bbbb....
|
||||
// Similar to PlanarTo24bHelper(), but in reverse order.
|
||||
static WEBP_INLINE void RGB24PackedToPlanar_SSE2(
|
||||
const uint8_t* WEBP_RESTRICT const rgb, __m128i* const out /*out[6]*/) {
|
||||
const uint8_t* const rgb, __m128i* const out /*out[6]*/) {
|
||||
__m128i tmp[6];
|
||||
tmp[0] = _mm_loadu_si128((const __m128i*)(rgb + 0));
|
||||
tmp[1] = _mm_loadu_si128((const __m128i*)(rgb + 16));
|
||||
@ -505,8 +488,8 @@ static WEBP_INLINE void RGB24PackedToPlanar_SSE2(
|
||||
}
|
||||
|
||||
// Convert 8 packed ARGB to r[], g[], b[]
|
||||
static WEBP_INLINE void RGB32PackedToPlanar_SSE2(
|
||||
const uint32_t* WEBP_RESTRICT const argb, __m128i* const rgb /*in[6]*/) {
|
||||
static WEBP_INLINE void RGB32PackedToPlanar_SSE2(const uint32_t* const argb,
|
||||
__m128i* const rgb /*in[6]*/) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
__m128i a0 = LOAD_16(argb + 0);
|
||||
__m128i a1 = LOAD_16(argb + 4);
|
||||
@ -579,8 +562,7 @@ static WEBP_INLINE void ConvertRGBToUV_SSE2(const __m128i* const R,
|
||||
#undef MK_CST_16
|
||||
#undef TRANSFORM
|
||||
|
||||
static void ConvertRGB24ToY_SSE2(const uint8_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
static void ConvertRGB24ToY_SSE2(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
const int max_width = width & ~31;
|
||||
int i;
|
||||
for (i = 0; i < max_width; rgb += 3 * 16 * 2) {
|
||||
@ -614,8 +596,7 @@ static void ConvertRGB24ToY_SSE2(const uint8_t* WEBP_RESTRICT rgb,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGR24ToY_SSE2(const uint8_t* WEBP_RESTRICT bgr,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
static void ConvertBGR24ToY_SSE2(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
const int max_width = width & ~31;
|
||||
int i;
|
||||
for (i = 0; i < max_width; bgr += 3 * 16 * 2) {
|
||||
@ -649,8 +630,7 @@ static void ConvertBGR24ToY_SSE2(const uint8_t* WEBP_RESTRICT bgr,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertARGBToY_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
static void ConvertARGBToY_SSE2(const uint32_t* argb, uint8_t* y, int width) {
|
||||
const int max_width = width & ~15;
|
||||
int i;
|
||||
for (i = 0; i < max_width; i += 16) {
|
||||
@ -678,9 +658,8 @@ static void HorizontalAddPack_SSE2(const __m128i* const A,
|
||||
*out = _mm_packs_epi32(C, D);
|
||||
}
|
||||
|
||||
static void ConvertARGBToUV_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v,
|
||||
static void ConvertARGBToUV_SSE2(const uint32_t* argb,
|
||||
uint8_t* u, uint8_t* v,
|
||||
int src_width, int do_store) {
|
||||
const int max_width = src_width & ~31;
|
||||
int i;
|
||||
@ -716,7 +695,7 @@ static void ConvertARGBToUV_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
||||
|
||||
// Convert 16 packed ARGB 16b-values to r[], g[], b[]
|
||||
static WEBP_INLINE void RGBA32PackedToPlanar_16b_SSE2(
|
||||
const uint16_t* WEBP_RESTRICT const rgbx,
|
||||
const uint16_t* const rgbx,
|
||||
__m128i* const r, __m128i* const g, __m128i* const b) {
|
||||
const __m128i in0 = LOAD_16(rgbx + 0); // r0 | g0 | b0 |x| r1 | g1 | b1 |x
|
||||
const __m128i in1 = LOAD_16(rgbx + 8); // r2 | g2 | b2 |x| r3 | g3 | b3 |x
|
||||
@ -736,9 +715,8 @@ static WEBP_INLINE void RGBA32PackedToPlanar_16b_SSE2(
|
||||
*b = _mm_unpacklo_epi64(B1, B3);
|
||||
}
|
||||
|
||||
static void ConvertRGBA32ToUV_SSE2(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int width) {
|
||||
static void ConvertRGBA32ToUV_SSE2(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width) {
|
||||
const int max_width = width & ~15;
|
||||
const uint16_t* const last_rgb = rgb + 4 * max_width;
|
||||
while (rgb < last_rgb) {
|
||||
|
@ -82,9 +82,9 @@ static WEBP_INLINE __m128i Load_UV_HI_8_SSE41(const uint8_t* src) {
|
||||
}
|
||||
|
||||
// Convert 32 samples of YUV444 to R/G/B
|
||||
static void YUV444ToRGB_SSE41(const uint8_t* WEBP_RESTRICT const y,
|
||||
const uint8_t* WEBP_RESTRICT const u,
|
||||
const uint8_t* WEBP_RESTRICT const v,
|
||||
static void YUV444ToRGB_SSE41(const uint8_t* const y,
|
||||
const uint8_t* const u,
|
||||
const uint8_t* const v,
|
||||
__m128i* const R, __m128i* const G,
|
||||
__m128i* const B) {
|
||||
const __m128i Y0 = Load_HI_16_SSE41(y), U0 = Load_HI_16_SSE41(u),
|
||||
@ -93,9 +93,9 @@ static void YUV444ToRGB_SSE41(const uint8_t* WEBP_RESTRICT const y,
|
||||
}
|
||||
|
||||
// Convert 32 samples of YUV420 to R/G/B
|
||||
static void YUV420ToRGB_SSE41(const uint8_t* WEBP_RESTRICT const y,
|
||||
const uint8_t* WEBP_RESTRICT const u,
|
||||
const uint8_t* WEBP_RESTRICT const v,
|
||||
static void YUV420ToRGB_SSE41(const uint8_t* const y,
|
||||
const uint8_t* const u,
|
||||
const uint8_t* const v,
|
||||
__m128i* const R, __m128i* const G,
|
||||
__m128i* const B) {
|
||||
const __m128i Y0 = Load_HI_16_SSE41(y), U0 = Load_UV_HI_8_SSE41(u),
|
||||
@ -109,7 +109,7 @@ static void YUV420ToRGB_SSE41(const uint8_t* WEBP_RESTRICT const y,
|
||||
static WEBP_INLINE void PlanarTo24b_SSE41(
|
||||
__m128i* const in0, __m128i* const in1, __m128i* const in2,
|
||||
__m128i* const in3, __m128i* const in4, __m128i* const in5,
|
||||
uint8_t* WEBP_RESTRICT const rgb) {
|
||||
uint8_t* const rgb) {
|
||||
// The input is 6 registers of sixteen 8b but for the sake of explanation,
|
||||
// let's take 6 registers of four 8b values.
|
||||
// To pack, we will keep taking one every two 8b integer and move it
|
||||
@ -132,10 +132,8 @@ static WEBP_INLINE void PlanarTo24b_SSE41(
|
||||
_mm_storeu_si128((__m128i*)(rgb + 80), *in5);
|
||||
}
|
||||
|
||||
void VP8YuvToRgb32_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
void VP8YuvToRgb32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
__m128i rgb0, rgb1, rgb2, rgb3, rgb4, rgb5;
|
||||
|
||||
@ -156,10 +154,8 @@ void VP8YuvToRgb32_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
PlanarTo24b_SSE41(&rgb0, &rgb1, &rgb2, &rgb3, &rgb4, &rgb5, dst);
|
||||
}
|
||||
|
||||
void VP8YuvToBgr32_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst) {
|
||||
void VP8YuvToBgr32_SSE41(const uint8_t* y, const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
__m128i bgr0, bgr1, bgr2, bgr3, bgr4, bgr5;
|
||||
|
||||
@ -183,10 +179,9 @@ void VP8YuvToBgr32_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
//-----------------------------------------------------------------------------
|
||||
// Arbitrary-length row conversion functions
|
||||
|
||||
static void YuvToRgbRow_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
static void YuvToRgbRow_SSE41(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
int n;
|
||||
for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
@ -221,10 +216,9 @@ static void YuvToRgbRow_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
}
|
||||
}
|
||||
|
||||
static void YuvToBgrRow_SSE41(const uint8_t* WEBP_RESTRICT y,
|
||||
const uint8_t* WEBP_RESTRICT u,
|
||||
const uint8_t* WEBP_RESTRICT v,
|
||||
uint8_t* WEBP_RESTRICT dst, int len) {
|
||||
static void YuvToBgrRow_SSE41(const uint8_t* y,
|
||||
const uint8_t* u, const uint8_t* v,
|
||||
uint8_t* dst, int len) {
|
||||
int n;
|
||||
for (n = 0; n + 32 <= len; n += 32, dst += 32 * 3) {
|
||||
__m128i R0, R1, R2, R3, G0, G1, G2, G3, B0, B1, B2, B3;
|
||||
@ -296,7 +290,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplersSSE41(void) {
|
||||
// rrrr... rrrr... gggg... gggg... bbbb... bbbb....
|
||||
// Similar to PlanarTo24bHelper(), but in reverse order.
|
||||
static WEBP_INLINE void RGB24PackedToPlanar_SSE41(
|
||||
const uint8_t* WEBP_RESTRICT const rgb, __m128i* const out /*out[6]*/) {
|
||||
const uint8_t* const rgb, __m128i* const out /*out[6]*/) {
|
||||
const __m128i A0 = _mm_loadu_si128((const __m128i*)(rgb + 0));
|
||||
const __m128i A1 = _mm_loadu_si128((const __m128i*)(rgb + 16));
|
||||
const __m128i A2 = _mm_loadu_si128((const __m128i*)(rgb + 32));
|
||||
@ -340,7 +334,7 @@ static WEBP_INLINE void RGB24PackedToPlanar_SSE41(
|
||||
|
||||
// Convert 8 packed ARGB to r[], g[], b[]
|
||||
static WEBP_INLINE void RGB32PackedToPlanar_SSE41(
|
||||
const uint32_t* WEBP_RESTRICT const argb, __m128i* const rgb /*in[6]*/) {
|
||||
const uint32_t* const argb, __m128i* const rgb /*in[6]*/) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
__m128i a0 = LOAD_16(argb + 0);
|
||||
__m128i a1 = LOAD_16(argb + 4);
|
||||
@ -413,8 +407,7 @@ static WEBP_INLINE void ConvertRGBToUV_SSE41(const __m128i* const R,
|
||||
#undef MK_CST_16
|
||||
#undef TRANSFORM
|
||||
|
||||
static void ConvertRGB24ToY_SSE41(const uint8_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
static void ConvertRGB24ToY_SSE41(const uint8_t* rgb, uint8_t* y, int width) {
|
||||
const int max_width = width & ~31;
|
||||
int i;
|
||||
for (i = 0; i < max_width; rgb += 3 * 16 * 2) {
|
||||
@ -448,8 +441,7 @@ static void ConvertRGB24ToY_SSE41(const uint8_t* WEBP_RESTRICT rgb,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGR24ToY_SSE41(const uint8_t* WEBP_RESTRICT bgr,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
static void ConvertBGR24ToY_SSE41(const uint8_t* bgr, uint8_t* y, int width) {
|
||||
const int max_width = width & ~31;
|
||||
int i;
|
||||
for (i = 0; i < max_width; bgr += 3 * 16 * 2) {
|
||||
@ -483,8 +475,7 @@ static void ConvertBGR24ToY_SSE41(const uint8_t* WEBP_RESTRICT bgr,
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertARGBToY_SSE41(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT y, int width) {
|
||||
static void ConvertARGBToY_SSE41(const uint32_t* argb, uint8_t* y, int width) {
|
||||
const int max_width = width & ~15;
|
||||
int i;
|
||||
for (i = 0; i < max_width; i += 16) {
|
||||
@ -512,9 +503,8 @@ static void HorizontalAddPack_SSE41(const __m128i* const A,
|
||||
*out = _mm_packs_epi32(C, D);
|
||||
}
|
||||
|
||||
static void ConvertARGBToUV_SSE41(const uint32_t* WEBP_RESTRICT argb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v,
|
||||
static void ConvertARGBToUV_SSE41(const uint32_t* argb,
|
||||
uint8_t* u, uint8_t* v,
|
||||
int src_width, int do_store) {
|
||||
const int max_width = src_width & ~31;
|
||||
int i;
|
||||
@ -550,7 +540,7 @@ static void ConvertARGBToUV_SSE41(const uint32_t* WEBP_RESTRICT argb,
|
||||
|
||||
// Convert 16 packed ARGB 16b-values to r[], g[], b[]
|
||||
static WEBP_INLINE void RGBA32PackedToPlanar_16b_SSE41(
|
||||
const uint16_t* WEBP_RESTRICT const rgbx,
|
||||
const uint16_t* const rgbx,
|
||||
__m128i* const r, __m128i* const g, __m128i* const b) {
|
||||
const __m128i in0 = LOAD_16(rgbx + 0); // r0 | g0 | b0 |x| r1 | g1 | b1 |x
|
||||
const __m128i in1 = LOAD_16(rgbx + 8); // r2 | g2 | b2 |x| r3 | g3 | b3 |x
|
||||
@ -580,9 +570,8 @@ static WEBP_INLINE void RGBA32PackedToPlanar_16b_SSE41(
|
||||
*b = _mm_unpackhi_epi64(B1, B3);
|
||||
}
|
||||
|
||||
static void ConvertRGBA32ToUV_SSE41(const uint16_t* WEBP_RESTRICT rgb,
|
||||
uint8_t* WEBP_RESTRICT u,
|
||||
uint8_t* WEBP_RESTRICT v, int width) {
|
||||
static void ConvertRGBA32ToUV_SSE41(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width) {
|
||||
const int max_width = width & ~15;
|
||||
const uint16_t* const last_rgb = rgb + 4 * max_width;
|
||||
while (rgb < last_rgb) {
|
||||
|
Loading…
Reference in New Issue
Block a user