mirror of
https://github.com/webmproject/libwebp.git
synced 2024-12-27 22:28:22 +01:00
refactor the PSNR / SSIM calculation code
-print_psnr is now much faster because it doesn't use the SSIM code. The SSIM speed-up and re-write will come later. Change-Id: Iabf565e0a8b41651d8164df1266cfeded4ab4823
This commit is contained in:
parent
78363e9e51
commit
50c3d7da9a
@ -250,7 +250,7 @@ extern VP8GetResidualCostFunc VP8GetResidualCost;
|
|||||||
void VP8EncDspCostInit(void);
|
void VP8EncDspCostInit(void);
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// SSIM utils
|
// SSIM / PSNR utils
|
||||||
|
|
||||||
// struct for accumulating statistical moments
|
// struct for accumulating statistical moments
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@ -275,6 +275,10 @@ typedef void (*VP8SSIMAccumulateFunc)(const uint8_t* src1, int stride1,
|
|||||||
extern VP8SSIMAccumulateFunc VP8SSIMAccumulate; // unclipped / unchecked
|
extern VP8SSIMAccumulateFunc VP8SSIMAccumulate; // unclipped / unchecked
|
||||||
extern VP8SSIMAccumulateClippedFunc VP8SSIMAccumulateClipped; // with clipping
|
extern VP8SSIMAccumulateClippedFunc VP8SSIMAccumulateClipped; // with clipping
|
||||||
|
|
||||||
|
typedef uint32_t (*VP8AccumulateSSEFunc)(const uint8_t* src1,
|
||||||
|
const uint8_t* src2, int len);
|
||||||
|
extern VP8AccumulateSSEFunc VP8AccumulateSSE;
|
||||||
|
|
||||||
// must be called before using any of the above directly
|
// must be called before using any of the above directly
|
||||||
void VP8SSIMDspInit(void);
|
void VP8SSIMDspInit(void);
|
||||||
|
|
||||||
|
@ -691,6 +691,7 @@ static void Copy16x8(const uint8_t* src, uint8_t* dst) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
// SSIM / PSNR
|
||||||
|
|
||||||
static void SSIMAccumulateClipped(const uint8_t* src1, int stride1,
|
static void SSIMAccumulateClipped(const uint8_t* src1, int stride1,
|
||||||
const uint8_t* src2, int stride2,
|
const uint8_t* src2, int stride2,
|
||||||
@ -737,8 +738,23 @@ static void SSIMAccumulate(const uint8_t* src1, int stride1,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint32_t AccumulateSSE(const uint8_t* src1,
|
||||||
|
const uint8_t* src2, int len) {
|
||||||
|
int i;
|
||||||
|
uint32_t sse2 = 0;
|
||||||
|
assert(len <= 65535); // to ensure that accumulation fits within uint32_t
|
||||||
|
for (i = 0; i < len; ++i) {
|
||||||
|
const int32_t diff = src1[i] - src2[i];
|
||||||
|
sse2 += diff * diff;
|
||||||
|
}
|
||||||
|
return sse2;
|
||||||
|
}
|
||||||
|
|
||||||
VP8SSIMAccumulateFunc VP8SSIMAccumulate;
|
VP8SSIMAccumulateFunc VP8SSIMAccumulate;
|
||||||
VP8SSIMAccumulateClippedFunc VP8SSIMAccumulateClipped;
|
VP8SSIMAccumulateClippedFunc VP8SSIMAccumulateClipped;
|
||||||
|
VP8AccumulateSSEFunc VP8AccumulateSSE;
|
||||||
|
|
||||||
|
extern void VP8SSIMDspInitSSE2(void);
|
||||||
|
|
||||||
static volatile VP8CPUInfo ssim_last_cpuinfo_used =
|
static volatile VP8CPUInfo ssim_last_cpuinfo_used =
|
||||||
(VP8CPUInfo)&ssim_last_cpuinfo_used;
|
(VP8CPUInfo)&ssim_last_cpuinfo_used;
|
||||||
@ -749,6 +765,15 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8SSIMDspInit(void) {
|
|||||||
VP8SSIMAccumulate = SSIMAccumulate;
|
VP8SSIMAccumulate = SSIMAccumulate;
|
||||||
VP8SSIMAccumulateClipped = SSIMAccumulateClipped;
|
VP8SSIMAccumulateClipped = SSIMAccumulateClipped;
|
||||||
|
|
||||||
|
VP8AccumulateSSE = AccumulateSSE;
|
||||||
|
if (VP8GetCPUInfo != NULL) {
|
||||||
|
#if defined(WEBP_USE_SSE2)
|
||||||
|
if (VP8GetCPUInfo(kSSE2)) {
|
||||||
|
VP8SSIMDspInitSSE2();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
ssim_last_cpuinfo_used = VP8GetCPUInfo;
|
ssim_last_cpuinfo_used = VP8GetCPUInfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1365,8 +1365,56 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitSSE2(void) {
|
|||||||
VP8Mean16x4 = Mean16x4;
|
VP8Mean16x4 = Mean16x4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
// SSIM / PSNR entry point (TODO(skal): move to its own file later)
|
||||||
|
|
||||||
|
static uint32_t AccumulateSSE_SSE2(const uint8_t* src1,
|
||||||
|
const uint8_t* src2, int len) {
|
||||||
|
int i = 0;
|
||||||
|
uint32_t sse2 = 0;
|
||||||
|
if (len >= 16) {
|
||||||
|
const int limit = len - 32;
|
||||||
|
int32_t tmp[4];
|
||||||
|
__m128i sum1;
|
||||||
|
__m128i sum = _mm_setzero_si128();
|
||||||
|
__m128i a0 = _mm_loadu_si128((const __m128i*)&src1[i]);
|
||||||
|
__m128i b0 = _mm_loadu_si128((const __m128i*)&src2[i]);
|
||||||
|
i += 16;
|
||||||
|
while (i <= limit) {
|
||||||
|
const __m128i a1 = _mm_loadu_si128((const __m128i*)&src1[i]);
|
||||||
|
const __m128i b1 = _mm_loadu_si128((const __m128i*)&src2[i]);
|
||||||
|
__m128i sum2;
|
||||||
|
i += 16;
|
||||||
|
SubtractAndAccumulate(a0, b0, &sum1);
|
||||||
|
sum = _mm_add_epi32(sum, sum1);
|
||||||
|
a0 = _mm_loadu_si128((const __m128i*)&src1[i]);
|
||||||
|
b0 = _mm_loadu_si128((const __m128i*)&src2[i]);
|
||||||
|
i += 16;
|
||||||
|
SubtractAndAccumulate(a1, b1, &sum2);
|
||||||
|
sum = _mm_add_epi32(sum, sum2);
|
||||||
|
}
|
||||||
|
SubtractAndAccumulate(a0, b0, &sum1);
|
||||||
|
sum = _mm_add_epi32(sum, sum1);
|
||||||
|
_mm_storeu_si128((__m128i*)tmp, sum);
|
||||||
|
sse2 += (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; i < len; ++i) {
|
||||||
|
const int32_t diff = src1[i] - src2[i];
|
||||||
|
sse2 += diff * diff;
|
||||||
|
}
|
||||||
|
return sse2;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern void VP8SSIMDspInitSSE2(void);
|
||||||
|
|
||||||
|
WEBP_TSAN_IGNORE_FUNCTION void VP8SSIMDspInitSSE2(void) {
|
||||||
|
VP8AccumulateSSE = AccumulateSSE_SSE2;
|
||||||
|
}
|
||||||
|
|
||||||
#else // !WEBP_USE_SSE2
|
#else // !WEBP_USE_SSE2
|
||||||
|
|
||||||
WEBP_DSP_INIT_STUB(VP8EncDspInitSSE2)
|
WEBP_DSP_INIT_STUB(VP8EncDspInitSSE2)
|
||||||
|
WEBP_DSP_INIT_STUB(VP8SSIMDspInitSSE2)
|
||||||
|
|
||||||
#endif // WEBP_USE_SSE2
|
#endif // WEBP_USE_SSE2
|
||||||
|
@ -25,9 +25,9 @@
|
|||||||
|
|
||||||
#define RADIUS 2 // search radius. Shouldn't be too large.
|
#define RADIUS 2 // search radius. Shouldn't be too large.
|
||||||
|
|
||||||
static void AccumulateLSIM(const uint8_t* src, int src_stride,
|
static double AccumulateLSIM(const uint8_t* src, int src_stride,
|
||||||
const uint8_t* ref, int ref_stride,
|
const uint8_t* ref, int ref_stride,
|
||||||
int w, int h, VP8DistoStats* stats) {
|
int w, int h) {
|
||||||
int x, y;
|
int x, y;
|
||||||
double total_sse = 0.;
|
double total_sse = 0.;
|
||||||
for (y = 0; y < h; ++y) {
|
for (y = 0; y < h; ++y) {
|
||||||
@ -50,37 +50,52 @@ static void AccumulateLSIM(const uint8_t* src, int src_stride,
|
|||||||
total_sse += best_sse;
|
total_sse += best_sse;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stats->w = w * h;
|
return total_sse;
|
||||||
stats->xm = 0;
|
|
||||||
stats->ym = 0;
|
|
||||||
stats->xxm = total_sse;
|
|
||||||
stats->yym = 0;
|
|
||||||
stats->xxm = 0;
|
|
||||||
}
|
}
|
||||||
#undef RADIUS
|
#undef RADIUS
|
||||||
|
|
||||||
|
static double AccumulateSSE(const uint8_t* src, int src_stride,
|
||||||
|
const uint8_t* ref, int ref_stride,
|
||||||
|
int w, int h) {
|
||||||
|
int y;
|
||||||
|
double total_sse = 0.;
|
||||||
|
for (y = 0; y < h; ++y) {
|
||||||
|
total_sse += VP8AccumulateSSE(src, ref, w);
|
||||||
|
src += src_stride;
|
||||||
|
ref += ref_stride;
|
||||||
|
}
|
||||||
|
return total_sse;
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Distortion
|
// Distortion
|
||||||
|
|
||||||
// Max value returned in case of exact similarity.
|
// Max value returned in case of exact similarity.
|
||||||
static const double kMinDistortion_dB = 99.;
|
static const double kMinDistortion_dB = 99.;
|
||||||
static float GetPSNR(const double v) {
|
|
||||||
return (float)((v > 0.) ? -4.3429448 * log(v / (255 * 255.))
|
static double GetPSNR(double v, double size) {
|
||||||
: kMinDistortion_dB);
|
return (v > 0. && size > 0.) ? -4.3429448 * log(v / (size * 255 * 255.))
|
||||||
|
: kMinDistortion_dB;
|
||||||
|
}
|
||||||
|
static double GetLogSSIM(double v, double size) {
|
||||||
|
v = (size > 0.) ? v / size : 1.;
|
||||||
|
return (v < 1.) ? -10.0 * log10(1. - v) : kMinDistortion_dB;
|
||||||
}
|
}
|
||||||
|
|
||||||
int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref,
|
int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref,
|
||||||
int type, float result[5]) {
|
int type, float results[5]) {
|
||||||
|
int w, h, c;
|
||||||
|
double disto[4] = { 0. };
|
||||||
|
double sizes[4] = { 0. };
|
||||||
|
double total_size = 0., total_disto = 0.;
|
||||||
VP8DistoStats stats[5];
|
VP8DistoStats stats[5];
|
||||||
int w, h;
|
|
||||||
|
|
||||||
memset(stats, 0, sizeof(stats));
|
|
||||||
|
|
||||||
VP8SSIMDspInit();
|
VP8SSIMDspInit();
|
||||||
|
memset(stats, 0, sizeof(stats));
|
||||||
|
|
||||||
if (src == NULL || ref == NULL ||
|
if (src == NULL || ref == NULL ||
|
||||||
src->width != ref->width || src->height != ref->height ||
|
src->width != ref->width || src->height != ref->height ||
|
||||||
src->use_argb != ref->use_argb || result == NULL) {
|
src->use_argb != ref->use_argb || results == NULL) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
w = src->width;
|
w = src->width;
|
||||||
@ -90,7 +105,7 @@ int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref,
|
|||||||
if (src->argb == NULL || ref->argb == NULL) {
|
if (src->argb == NULL || ref->argb == NULL) {
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
int i, j, c;
|
int i, j;
|
||||||
uint8_t* tmp1, *tmp2;
|
uint8_t* tmp1, *tmp2;
|
||||||
uint8_t* const tmp_plane =
|
uint8_t* const tmp_plane =
|
||||||
(uint8_t*)WebPSafeMalloc(2ULL * w * h, sizeof(*tmp_plane));
|
(uint8_t*)WebPSafeMalloc(2ULL * w * h, sizeof(*tmp_plane));
|
||||||
@ -104,8 +119,11 @@ int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref,
|
|||||||
tmp2[j * w + i] = ref->argb[i + j * ref->argb_stride] >> (c * 8);
|
tmp2[j * w + i] = ref->argb[i + j * ref->argb_stride] >> (c * 8);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
sizes[c] = w * h;
|
||||||
if (type >= 2) {
|
if (type >= 2) {
|
||||||
AccumulateLSIM(tmp1, w, tmp2, w, w, h, &stats[c]);
|
disto[c] = AccumulateLSIM(tmp1, w, tmp2, w, w, h);
|
||||||
|
} else if (type == 0) {
|
||||||
|
disto[c] = AccumulateSSE(tmp1, w, tmp2, w, w, h);
|
||||||
} else {
|
} else {
|
||||||
VP8SSIMAccumulatePlane(tmp1, w, tmp2, w, w, h, &stats[c]);
|
VP8SSIMAccumulatePlane(tmp1, w, tmp2, w, w, h, &stats[c]);
|
||||||
}
|
}
|
||||||
@ -127,16 +145,31 @@ int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref,
|
|||||||
|
|
||||||
uv_w = (src->width + 1) >> 1;
|
uv_w = (src->width + 1) >> 1;
|
||||||
uv_h = (src->height + 1) >> 1;
|
uv_h = (src->height + 1) >> 1;
|
||||||
|
sizes[0] = w * h;
|
||||||
|
sizes[1] = sizes[2] = uv_w * uv_h;
|
||||||
|
sizes[3] = has_alpha ? w * h : 0.;
|
||||||
|
|
||||||
if (type >= 2) {
|
if (type >= 2) {
|
||||||
AccumulateLSIM(src->y, src->y_stride, ref->y, ref->y_stride,
|
disto[0] = AccumulateLSIM(src->y, src->y_stride, ref->y, ref->y_stride,
|
||||||
w, h, &stats[0]);
|
w, h);
|
||||||
AccumulateLSIM(src->u, src->uv_stride, ref->u, ref->uv_stride,
|
disto[1] = AccumulateLSIM(src->u, src->uv_stride, ref->u, ref->uv_stride,
|
||||||
uv_w, uv_h, &stats[1]);
|
uv_w, uv_h);
|
||||||
AccumulateLSIM(src->v, src->uv_stride, ref->v, ref->uv_stride,
|
disto[2] = AccumulateLSIM(src->v, src->uv_stride, ref->v, ref->uv_stride,
|
||||||
uv_w, uv_h, &stats[2]);
|
uv_w, uv_h);
|
||||||
if (has_alpha) {
|
if (has_alpha) {
|
||||||
AccumulateLSIM(src->a, src->a_stride, ref->a, ref->a_stride,
|
disto[3] = AccumulateLSIM(src->a, src->a_stride, ref->a, ref->a_stride,
|
||||||
w, h, &stats[3]);
|
w, h);
|
||||||
|
}
|
||||||
|
} else if (type == 0) {
|
||||||
|
disto[0] = AccumulateSSE(src->y, src->y_stride, ref->y, ref->y_stride,
|
||||||
|
w, h);
|
||||||
|
disto[1] = AccumulateSSE(src->u, src->uv_stride, ref->u, ref->uv_stride,
|
||||||
|
uv_w, uv_h);
|
||||||
|
disto[2] = AccumulateSSE(src->v, src->uv_stride, ref->v, ref->uv_stride,
|
||||||
|
uv_w, uv_h);
|
||||||
|
if (has_alpha) {
|
||||||
|
disto[3] = AccumulateSSE(src->a, src->a_stride, ref->a, ref->a_stride,
|
||||||
|
w, h);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
VP8SSIMAccumulatePlane(src->y, src->y_stride,
|
VP8SSIMAccumulatePlane(src->y, src->y_stride,
|
||||||
@ -155,22 +188,23 @@ int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Final stat calculations.
|
|
||||||
{
|
for (c = 0; c < 4; ++c) {
|
||||||
int c;
|
if (type == 1) {
|
||||||
for (c = 0; c <= 4; ++c) {
|
results[c] = (float)GetLogSSIM(VP8SSIMGet(&stats[c]), 1.);
|
||||||
if (type == 1) {
|
VP8SSIMAddStats(&stats[c], &stats[4]);
|
||||||
const double v = VP8SSIMGet(&stats[c]);
|
} else {
|
||||||
result[c] = (float)((v < 1.) ? -10.0 * log10(1. - v)
|
total_disto += disto[c];
|
||||||
: kMinDistortion_dB);
|
total_size += sizes[c];
|
||||||
} else {
|
results[c] = (float)GetPSNR(disto[c], sizes[c]);
|
||||||
const double v = VP8SSIMGetSquaredError(&stats[c]);
|
|
||||||
result[c] = GetPSNR(v);
|
|
||||||
}
|
|
||||||
// Accumulate forward
|
|
||||||
if (c < 4) VP8SSIMAddStats(&stats[c], &stats[4]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (type == 1) {
|
||||||
|
results[4] = (float)GetLogSSIM(VP8SSIMGet(&stats[4]), 1.);
|
||||||
|
} else {
|
||||||
|
results[4] = (float)GetPSNR(total_disto, total_size);
|
||||||
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user