mirror of
https://github.com/webmproject/libwebp.git
synced 2024-12-25 21:28:22 +01:00
Simplify the texture evaluation Disto4x4()
We don't need to use the exact forward transform, since it's only a rough evaluation. -> Removed some shifts and rounding constants. Change-Id: I3fdf8b4fe9720473894155e1ad0345f4d1fd9a33
This commit is contained in:
parent
a7305c2ef0
commit
e5c3b3f554
@ -569,30 +569,30 @@ static int TTransform(const uint8_t* in, const uint16_t* w) {
|
|||||||
int i;
|
int i;
|
||||||
// horizontal pass
|
// horizontal pass
|
||||||
for (i = 0; i < 4; ++i, in += BPS) {
|
for (i = 0; i < 4; ++i, in += BPS) {
|
||||||
const int a0 = (in[0] + in[2]) << 2;
|
const int a0 = in[0] + in[2];
|
||||||
const int a1 = (in[1] + in[3]) << 2;
|
const int a1 = in[1] + in[3];
|
||||||
const int a2 = (in[1] - in[3]) << 2;
|
const int a2 = in[1] - in[3];
|
||||||
const int a3 = (in[0] - in[2]) << 2;
|
const int a3 = in[0] - in[2];
|
||||||
tmp[0 + i * 4] = a0 + a1 + (a0 != 0);
|
tmp[0 + i * 4] = a0 + a1;
|
||||||
tmp[1 + i * 4] = a3 + a2;
|
tmp[1 + i * 4] = a3 + a2;
|
||||||
tmp[2 + i * 4] = a3 - a2;
|
tmp[2 + i * 4] = a3 - a2;
|
||||||
tmp[3 + i * 4] = a0 - a1;
|
tmp[3 + i * 4] = a0 - a1;
|
||||||
}
|
}
|
||||||
// vertical pass
|
// vertical pass
|
||||||
for (i = 0; i < 4; ++i, ++w) {
|
for (i = 0; i < 4; ++i, ++w) {
|
||||||
const int a0 = (tmp[0 + i] + tmp[8 + i]);
|
const int a0 = tmp[0 + i] + tmp[8 + i];
|
||||||
const int a1 = (tmp[4 + i] + tmp[12+ i]);
|
const int a1 = tmp[4 + i] + tmp[12+ i];
|
||||||
const int a2 = (tmp[4 + i] - tmp[12+ i]);
|
const int a2 = tmp[4 + i] - tmp[12+ i];
|
||||||
const int a3 = (tmp[0 + i] - tmp[8 + i]);
|
const int a3 = tmp[0 + i] - tmp[8 + i];
|
||||||
const int b0 = a0 + a1;
|
const int b0 = a0 + a1;
|
||||||
const int b1 = a3 + a2;
|
const int b1 = a3 + a2;
|
||||||
const int b2 = a3 - a2;
|
const int b2 = a3 - a2;
|
||||||
const int b3 = a0 - a1;
|
const int b3 = a0 - a1;
|
||||||
// abs((b + (b<0) + 3) >> 3) = (abs(b) + 3) >> 3
|
|
||||||
sum += w[ 0] * ((abs(b0) + 3) >> 3);
|
sum += w[ 0] * abs(b0);
|
||||||
sum += w[ 4] * ((abs(b1) + 3) >> 3);
|
sum += w[ 4] * abs(b1);
|
||||||
sum += w[ 8] * ((abs(b2) + 3) >> 3);
|
sum += w[ 8] * abs(b2);
|
||||||
sum += w[12] * ((abs(b3) + 3) >> 3);
|
sum += w[12] * abs(b3);
|
||||||
}
|
}
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
@ -601,7 +601,7 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
|
|||||||
const uint16_t* const w) {
|
const uint16_t* const w) {
|
||||||
const int sum1 = TTransform(a, w);
|
const int sum1 = TTransform(a, w);
|
||||||
const int sum2 = TTransform(b, w);
|
const int sum2 = TTransform(b, w);
|
||||||
return (abs(sum2 - sum1) + 8) >> 4;
|
return abs(sum2 - sum1) >> 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
|
||||||
|
@ -502,8 +502,6 @@ static int TTransformSSE2(const uint8_t* inA, const uint8_t* inB,
|
|||||||
int32_t sum[4];
|
int32_t sum[4];
|
||||||
__m128i tmp_0, tmp_1, tmp_2, tmp_3;
|
__m128i tmp_0, tmp_1, tmp_2, tmp_3;
|
||||||
const __m128i zero = _mm_setzero_si128();
|
const __m128i zero = _mm_setzero_si128();
|
||||||
const __m128i one = _mm_set1_epi16(1);
|
|
||||||
const __m128i three = _mm_set1_epi16(3);
|
|
||||||
|
|
||||||
// Load, combine and tranpose inputs.
|
// Load, combine and tranpose inputs.
|
||||||
{
|
{
|
||||||
@ -550,17 +548,14 @@ static int TTransformSSE2(const uint8_t* inA, const uint8_t* inB,
|
|||||||
// Horizontal pass and subsequent transpose.
|
// Horizontal pass and subsequent transpose.
|
||||||
{
|
{
|
||||||
// Calculate a and b (two 4x4 at once).
|
// Calculate a and b (two 4x4 at once).
|
||||||
const __m128i a0 = _mm_slli_epi16(_mm_add_epi16(tmp_0, tmp_2), 2);
|
const __m128i a0 = _mm_add_epi16(tmp_0, tmp_2);
|
||||||
const __m128i a1 = _mm_slli_epi16(_mm_add_epi16(tmp_1, tmp_3), 2);
|
const __m128i a1 = _mm_add_epi16(tmp_1, tmp_3);
|
||||||
const __m128i a2 = _mm_slli_epi16(_mm_sub_epi16(tmp_1, tmp_3), 2);
|
const __m128i a2 = _mm_sub_epi16(tmp_1, tmp_3);
|
||||||
const __m128i a3 = _mm_slli_epi16(_mm_sub_epi16(tmp_0, tmp_2), 2);
|
const __m128i a3 = _mm_sub_epi16(tmp_0, tmp_2);
|
||||||
// b0_extra = (a0 != 0);
|
const __m128i b0 = _mm_add_epi16(a0, a1);
|
||||||
const __m128i b0_extra = _mm_andnot_si128(_mm_cmpeq_epi16 (a0, zero), one);
|
|
||||||
const __m128i b0_base = _mm_add_epi16(a0, a1);
|
|
||||||
const __m128i b1 = _mm_add_epi16(a3, a2);
|
const __m128i b1 = _mm_add_epi16(a3, a2);
|
||||||
const __m128i b2 = _mm_sub_epi16(a3, a2);
|
const __m128i b2 = _mm_sub_epi16(a3, a2);
|
||||||
const __m128i b3 = _mm_sub_epi16(a0, a1);
|
const __m128i b3 = _mm_sub_epi16(a0, a1);
|
||||||
const __m128i b0 = _mm_add_epi16(b0_base, b0_extra);
|
|
||||||
// a00 a01 a02 a03 b00 b01 b02 b03
|
// a00 a01 a02 a03 b00 b01 b02 b03
|
||||||
// a10 a11 a12 a13 b10 b11 b12 b13
|
// a10 a11 a12 a13 b10 b11 b12 b13
|
||||||
// a20 a21 a22 a23 b20 b21 b22 b23
|
// a20 a21 a22 a23 b20 b21 b22 b23
|
||||||
@ -635,19 +630,6 @@ static int TTransformSSE2(const uint8_t* inA, const uint8_t* inB,
|
|||||||
B_b2 = _mm_sub_epi16(B_b2, sign_B_b2);
|
B_b2 = _mm_sub_epi16(B_b2, sign_B_b2);
|
||||||
}
|
}
|
||||||
|
|
||||||
// b = abs(b) + 3
|
|
||||||
A_b0 = _mm_add_epi16(A_b0, three);
|
|
||||||
A_b2 = _mm_add_epi16(A_b2, three);
|
|
||||||
B_b0 = _mm_add_epi16(B_b0, three);
|
|
||||||
B_b2 = _mm_add_epi16(B_b2, three);
|
|
||||||
|
|
||||||
// abs((b + (b<0) + 3) >> 3) = (abs(b) + 3) >> 3
|
|
||||||
// b = (abs(b) + 3) >> 3
|
|
||||||
A_b0 = _mm_srai_epi16(A_b0, 3);
|
|
||||||
A_b2 = _mm_srai_epi16(A_b2, 3);
|
|
||||||
B_b0 = _mm_srai_epi16(B_b0, 3);
|
|
||||||
B_b2 = _mm_srai_epi16(B_b2, 3);
|
|
||||||
|
|
||||||
// weighted sums
|
// weighted sums
|
||||||
A_b0 = _mm_madd_epi16(A_b0, w_0);
|
A_b0 = _mm_madd_epi16(A_b0, w_0);
|
||||||
A_b2 = _mm_madd_epi16(A_b2, w_8);
|
A_b2 = _mm_madd_epi16(A_b2, w_8);
|
||||||
@ -666,7 +648,7 @@ static int TTransformSSE2(const uint8_t* inA, const uint8_t* inB,
|
|||||||
static int Disto4x4SSE2(const uint8_t* const a, const uint8_t* const b,
|
static int Disto4x4SSE2(const uint8_t* const a, const uint8_t* const b,
|
||||||
const uint16_t* const w) {
|
const uint16_t* const w) {
|
||||||
const int diff_sum = TTransformSSE2(a, b, w);
|
const int diff_sum = TTransformSSE2(a, b, w);
|
||||||
return (abs(diff_sum) + 8) >> 4;
|
return abs(diff_sum) >> 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int Disto16x16SSE2(const uint8_t* const a, const uint8_t* const b,
|
static int Disto16x16SSE2(const uint8_t* const a, const uint8_t* const b,
|
||||||
|
Loading…
Reference in New Issue
Block a user