mirror of
https://github.com/webmproject/libwebp.git
synced 2025-07-13 06:24:27 +02:00
Simplify forward-WHT + SSE2 version
no precision loss observed speed is not really faster (0.5% at max), as forward-WHT isn't called often. also: replaced a "int << 3" (undefined by C-spec) by a "int * 8" ( supersedes https://gerrit.chromium.org/gerrit/#/c/48739/ ) Change-Id: I2d980ec2f20f4ff6be5636105ff4f1c70ffde401
This commit is contained in:
@ -453,6 +453,39 @@ static void FTransformSSE2(const uint8_t* src, const uint8_t* ref,
|
||||
}
|
||||
}
|
||||
|
||||
static void FTransformWHTSSE2(const int16_t* in, int16_t* out) {
|
||||
int16_t tmp[16];
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i, in += 64) {
|
||||
const int a0 = (in[0 * 16] + in[2 * 16]);
|
||||
const int a1 = (in[1 * 16] + in[3 * 16]);
|
||||
const int a2 = (in[1 * 16] - in[3 * 16]);
|
||||
const int a3 = (in[0 * 16] - in[2 * 16]);
|
||||
tmp[0 + i * 4] = a0 + a1;
|
||||
tmp[1 + i * 4] = a3 + a2;
|
||||
tmp[2 + i * 4] = a3 - a2;
|
||||
tmp[3 + i * 4] = a0 - a1;
|
||||
}
|
||||
{
|
||||
const __m128i src0 = _mm_loadl_epi64((__m128i*)&tmp[0]);
|
||||
const __m128i src1 = _mm_loadl_epi64((__m128i*)&tmp[4]);
|
||||
const __m128i src2 = _mm_loadl_epi64((__m128i*)&tmp[8]);
|
||||
const __m128i src3 = _mm_loadl_epi64((__m128i*)&tmp[12]);
|
||||
const __m128i a0 = _mm_add_epi16(src0, src2);
|
||||
const __m128i a1 = _mm_add_epi16(src1, src3);
|
||||
const __m128i a2 = _mm_sub_epi16(src1, src3);
|
||||
const __m128i a3 = _mm_sub_epi16(src0, src2);
|
||||
const __m128i b0 = _mm_srai_epi16(_mm_adds_epi16(a0, a1), 1);
|
||||
const __m128i b1 = _mm_srai_epi16(_mm_adds_epi16(a3, a2), 1);
|
||||
const __m128i b2 = _mm_srai_epi16(_mm_subs_epi16(a3, a2), 1);
|
||||
const __m128i b3 = _mm_srai_epi16(_mm_subs_epi16(a0, a1), 1);
|
||||
_mm_storel_epi64((__m128i*)&out[ 0], b0);
|
||||
_mm_storel_epi64((__m128i*)&out[ 4], b1);
|
||||
_mm_storel_epi64((__m128i*)&out[ 8], b2);
|
||||
_mm_storel_epi64((__m128i*)&out[12], b3);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Metric
|
||||
|
||||
@ -919,6 +952,7 @@ void VP8EncDspInitSSE2(void) {
|
||||
VP8EncQuantizeBlock = QuantizeBlockSSE2;
|
||||
VP8ITransform = ITransformSSE2;
|
||||
VP8FTransform = FTransformSSE2;
|
||||
VP8FTransformWHT = FTransformWHTSSE2;
|
||||
VP8SSE16x16 = SSE16x16SSE2;
|
||||
VP8SSE16x8 = SSE16x8SSE2;
|
||||
VP8SSE8x8 = SSE8x8SSE2;
|
||||
|
Reference in New Issue
Block a user