refactor the PSNR / SSIM calculation code

-print_psnr is now much faster because it doesn't use the SSIM code.
The SSIM speed-up and re-write will come later.

Change-Id: Iabf565e0a8b41651d8164df1266cfeded4ab4823
This commit is contained in:
Pascal Massimino
2016-09-13 17:02:25 +02:00
parent 78363e9e51
commit 50c3d7da9a
4 changed files with 153 additions and 42 deletions

View File

@ -1365,8 +1365,56 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitSSE2(void) {
VP8Mean16x4 = Mean16x4;
}
//------------------------------------------------------------------------------
// SSIM / PSNR entry point (TODO(skal): move to its own file later)
static uint32_t AccumulateSSE_SSE2(const uint8_t* src1,
const uint8_t* src2, int len) {
int i = 0;
uint32_t sse2 = 0;
if (len >= 16) {
const int limit = len - 32;
int32_t tmp[4];
__m128i sum1;
__m128i sum = _mm_setzero_si128();
__m128i a0 = _mm_loadu_si128((const __m128i*)&src1[i]);
__m128i b0 = _mm_loadu_si128((const __m128i*)&src2[i]);
i += 16;
while (i <= limit) {
const __m128i a1 = _mm_loadu_si128((const __m128i*)&src1[i]);
const __m128i b1 = _mm_loadu_si128((const __m128i*)&src2[i]);
__m128i sum2;
i += 16;
SubtractAndAccumulate(a0, b0, &sum1);
sum = _mm_add_epi32(sum, sum1);
a0 = _mm_loadu_si128((const __m128i*)&src1[i]);
b0 = _mm_loadu_si128((const __m128i*)&src2[i]);
i += 16;
SubtractAndAccumulate(a1, b1, &sum2);
sum = _mm_add_epi32(sum, sum2);
}
SubtractAndAccumulate(a0, b0, &sum1);
sum = _mm_add_epi32(sum, sum1);
_mm_storeu_si128((__m128i*)tmp, sum);
sse2 += (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
}
for (; i < len; ++i) {
const int32_t diff = src1[i] - src2[i];
sse2 += diff * diff;
}
return sse2;
}
extern void VP8SSIMDspInitSSE2(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8SSIMDspInitSSE2(void) {
VP8AccumulateSSE = AccumulateSSE_SSE2;
}
#else // !WEBP_USE_SSE2
WEBP_DSP_INIT_STUB(VP8EncDspInitSSE2)
WEBP_DSP_INIT_STUB(VP8SSIMDspInitSSE2)
#endif // WEBP_USE_SSE2