mirror of
				https://github.com/webmproject/libwebp.git
				synced 2025-10-31 18:35:41 +01:00 
			
		
		
		
	refactor the PSNR / SSIM calculation code
-print_psnr is now much faster because it doesn't use the SSIM code. The SSIM speed-up and re-write will come later. Change-Id: Iabf565e0a8b41651d8164df1266cfeded4ab4823
This commit is contained in:
		| @@ -250,7 +250,7 @@ extern VP8GetResidualCostFunc VP8GetResidualCost; | |||||||
| void VP8EncDspCostInit(void); | void VP8EncDspCostInit(void); | ||||||
|  |  | ||||||
| //------------------------------------------------------------------------------ | //------------------------------------------------------------------------------ | ||||||
| // SSIM utils | // SSIM / PSNR utils | ||||||
|  |  | ||||||
| // struct for accumulating statistical moments | // struct for accumulating statistical moments | ||||||
| typedef struct { | typedef struct { | ||||||
| @@ -275,6 +275,10 @@ typedef void (*VP8SSIMAccumulateFunc)(const uint8_t* src1, int stride1, | |||||||
| extern VP8SSIMAccumulateFunc VP8SSIMAccumulate;         // unclipped / unchecked | extern VP8SSIMAccumulateFunc VP8SSIMAccumulate;         // unclipped / unchecked | ||||||
| extern VP8SSIMAccumulateClippedFunc VP8SSIMAccumulateClipped;   // with clipping | extern VP8SSIMAccumulateClippedFunc VP8SSIMAccumulateClipped;   // with clipping | ||||||
|  |  | ||||||
|  | typedef uint32_t (*VP8AccumulateSSEFunc)(const uint8_t* src1, | ||||||
|  |                                          const uint8_t* src2, int len); | ||||||
|  | extern VP8AccumulateSSEFunc VP8AccumulateSSE; | ||||||
|  |  | ||||||
| // must be called before using any of the above directly | // must be called before using any of the above directly | ||||||
| void VP8SSIMDspInit(void); | void VP8SSIMDspInit(void); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -691,6 +691,7 @@ static void Copy16x8(const uint8_t* src, uint8_t* dst) { | |||||||
| } | } | ||||||
|  |  | ||||||
| //------------------------------------------------------------------------------ | //------------------------------------------------------------------------------ | ||||||
|  | // SSIM / PSNR | ||||||
|  |  | ||||||
| static void SSIMAccumulateClipped(const uint8_t* src1, int stride1, | static void SSIMAccumulateClipped(const uint8_t* src1, int stride1, | ||||||
|                                   const uint8_t* src2, int stride2, |                                   const uint8_t* src2, int stride2, | ||||||
| @@ -737,8 +738,23 @@ static void SSIMAccumulate(const uint8_t* src1, int stride1, | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | static uint32_t AccumulateSSE(const uint8_t* src1, | ||||||
|  |                               const uint8_t* src2, int len) { | ||||||
|  |   int i; | ||||||
|  |   uint32_t sse2 = 0; | ||||||
|  |   assert(len <= 65535);  // to ensure that accumulation fits within uint32_t | ||||||
|  |   for (i = 0; i < len; ++i) { | ||||||
|  |     const int32_t diff = src1[i] - src2[i]; | ||||||
|  |     sse2 += diff * diff; | ||||||
|  |   } | ||||||
|  |   return sse2; | ||||||
|  | } | ||||||
|  |  | ||||||
| VP8SSIMAccumulateFunc VP8SSIMAccumulate; | VP8SSIMAccumulateFunc VP8SSIMAccumulate; | ||||||
| VP8SSIMAccumulateClippedFunc VP8SSIMAccumulateClipped; | VP8SSIMAccumulateClippedFunc VP8SSIMAccumulateClipped; | ||||||
|  | VP8AccumulateSSEFunc VP8AccumulateSSE; | ||||||
|  |  | ||||||
|  | extern void VP8SSIMDspInitSSE2(void); | ||||||
|  |  | ||||||
| static volatile VP8CPUInfo ssim_last_cpuinfo_used = | static volatile VP8CPUInfo ssim_last_cpuinfo_used = | ||||||
|     (VP8CPUInfo)&ssim_last_cpuinfo_used; |     (VP8CPUInfo)&ssim_last_cpuinfo_used; | ||||||
| @@ -749,6 +765,15 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8SSIMDspInit(void) { | |||||||
|   VP8SSIMAccumulate = SSIMAccumulate; |   VP8SSIMAccumulate = SSIMAccumulate; | ||||||
|   VP8SSIMAccumulateClipped = SSIMAccumulateClipped; |   VP8SSIMAccumulateClipped = SSIMAccumulateClipped; | ||||||
|  |  | ||||||
|  |   VP8AccumulateSSE = AccumulateSSE; | ||||||
|  |   if (VP8GetCPUInfo != NULL) { | ||||||
|  | #if defined(WEBP_USE_SSE2) | ||||||
|  |     if (VP8GetCPUInfo(kSSE2)) { | ||||||
|  |       VP8SSIMDspInitSSE2(); | ||||||
|  |     } | ||||||
|  | #endif | ||||||
|  |   } | ||||||
|  |  | ||||||
|   ssim_last_cpuinfo_used = VP8GetCPUInfo; |   ssim_last_cpuinfo_used = VP8GetCPUInfo; | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1365,8 +1365,56 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitSSE2(void) { | |||||||
|   VP8Mean16x4 = Mean16x4; |   VP8Mean16x4 = Mean16x4; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | //------------------------------------------------------------------------------ | ||||||
|  | // SSIM / PSNR entry point (TODO(skal): move to its own file later) | ||||||
|  |  | ||||||
|  | static uint32_t AccumulateSSE_SSE2(const uint8_t* src1, | ||||||
|  |                                    const uint8_t* src2, int len) { | ||||||
|  |   int i = 0; | ||||||
|  |   uint32_t sse2 = 0; | ||||||
|  |   if (len >= 16) { | ||||||
|  |     const int limit = len - 32; | ||||||
|  |     int32_t tmp[4]; | ||||||
|  |     __m128i sum1; | ||||||
|  |     __m128i sum = _mm_setzero_si128(); | ||||||
|  |     __m128i a0 = _mm_loadu_si128((const __m128i*)&src1[i]); | ||||||
|  |     __m128i b0 = _mm_loadu_si128((const __m128i*)&src2[i]); | ||||||
|  |     i += 16; | ||||||
|  |     while (i <= limit) { | ||||||
|  |       const __m128i a1 = _mm_loadu_si128((const __m128i*)&src1[i]); | ||||||
|  |       const __m128i b1 = _mm_loadu_si128((const __m128i*)&src2[i]); | ||||||
|  |       __m128i sum2; | ||||||
|  |       i += 16; | ||||||
|  |       SubtractAndAccumulate(a0, b0, &sum1); | ||||||
|  |       sum = _mm_add_epi32(sum, sum1); | ||||||
|  |       a0 = _mm_loadu_si128((const __m128i*)&src1[i]); | ||||||
|  |       b0 = _mm_loadu_si128((const __m128i*)&src2[i]); | ||||||
|  |       i += 16; | ||||||
|  |       SubtractAndAccumulate(a1, b1, &sum2); | ||||||
|  |       sum = _mm_add_epi32(sum, sum2); | ||||||
|  |     } | ||||||
|  |     SubtractAndAccumulate(a0, b0, &sum1); | ||||||
|  |     sum = _mm_add_epi32(sum, sum1); | ||||||
|  |     _mm_storeu_si128((__m128i*)tmp, sum); | ||||||
|  |     sse2 += (tmp[3] + tmp[2] + tmp[1] + tmp[0]); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   for (; i < len; ++i) { | ||||||
|  |     const int32_t diff = src1[i] - src2[i]; | ||||||
|  |     sse2 += diff * diff; | ||||||
|  |   } | ||||||
|  |   return sse2; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | extern void VP8SSIMDspInitSSE2(void); | ||||||
|  |  | ||||||
|  | WEBP_TSAN_IGNORE_FUNCTION void VP8SSIMDspInitSSE2(void) { | ||||||
|  |   VP8AccumulateSSE = AccumulateSSE_SSE2; | ||||||
|  | } | ||||||
|  |  | ||||||
| #else  // !WEBP_USE_SSE2 | #else  // !WEBP_USE_SSE2 | ||||||
|  |  | ||||||
| WEBP_DSP_INIT_STUB(VP8EncDspInitSSE2) | WEBP_DSP_INIT_STUB(VP8EncDspInitSSE2) | ||||||
|  | WEBP_DSP_INIT_STUB(VP8SSIMDspInitSSE2) | ||||||
|  |  | ||||||
| #endif  // WEBP_USE_SSE2 | #endif  // WEBP_USE_SSE2 | ||||||
|   | |||||||
| @@ -25,9 +25,9 @@ | |||||||
|  |  | ||||||
| #define RADIUS 2  // search radius. Shouldn't be too large. | #define RADIUS 2  // search radius. Shouldn't be too large. | ||||||
|  |  | ||||||
| static void AccumulateLSIM(const uint8_t* src, int src_stride, | static double AccumulateLSIM(const uint8_t* src, int src_stride, | ||||||
|                            const uint8_t* ref, int ref_stride, |                              const uint8_t* ref, int ref_stride, | ||||||
|                            int w, int h, VP8DistoStats* stats) { |                              int w, int h) { | ||||||
|   int x, y; |   int x, y; | ||||||
|   double total_sse = 0.; |   double total_sse = 0.; | ||||||
|   for (y = 0; y < h; ++y) { |   for (y = 0; y < h; ++y) { | ||||||
| @@ -50,37 +50,52 @@ static void AccumulateLSIM(const uint8_t* src, int src_stride, | |||||||
|       total_sse += best_sse; |       total_sse += best_sse; | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|   stats->w = w * h; |   return total_sse; | ||||||
|   stats->xm = 0; |  | ||||||
|   stats->ym = 0; |  | ||||||
|   stats->xxm = total_sse; |  | ||||||
|   stats->yym = 0; |  | ||||||
|   stats->xxm = 0; |  | ||||||
| } | } | ||||||
| #undef RADIUS | #undef RADIUS | ||||||
|  |  | ||||||
|  | static double AccumulateSSE(const uint8_t* src, int src_stride, | ||||||
|  |                             const uint8_t* ref, int ref_stride, | ||||||
|  |                             int w, int h) { | ||||||
|  |   int y; | ||||||
|  |   double total_sse = 0.; | ||||||
|  |   for (y = 0; y < h; ++y) { | ||||||
|  |     total_sse += VP8AccumulateSSE(src, ref, w); | ||||||
|  |     src += src_stride; | ||||||
|  |     ref += ref_stride; | ||||||
|  |   } | ||||||
|  |   return total_sse; | ||||||
|  | } | ||||||
|  |  | ||||||
| //------------------------------------------------------------------------------ | //------------------------------------------------------------------------------ | ||||||
| // Distortion | // Distortion | ||||||
|  |  | ||||||
| // Max value returned in case of exact similarity. | // Max value returned in case of exact similarity. | ||||||
| static const double kMinDistortion_dB = 99.; | static const double kMinDistortion_dB = 99.; | ||||||
| static float GetPSNR(const double v) { |  | ||||||
|   return (float)((v > 0.) ? -4.3429448 * log(v / (255 * 255.)) | static double GetPSNR(double v, double size) { | ||||||
|                           : kMinDistortion_dB); |   return (v > 0. && size > 0.) ? -4.3429448 * log(v / (size * 255 * 255.)) | ||||||
|  |                                : kMinDistortion_dB; | ||||||
|  | } | ||||||
|  | static double GetLogSSIM(double v, double size) { | ||||||
|  |   v = (size > 0.) ? v / size : 1.; | ||||||
|  |   return (v < 1.) ? -10.0 * log10(1. - v) : kMinDistortion_dB; | ||||||
| } | } | ||||||
|  |  | ||||||
| int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref, | int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref, | ||||||
|                           int type, float result[5]) { |                           int type, float results[5]) { | ||||||
|  |   int w, h, c; | ||||||
|  |   double disto[4] = { 0. }; | ||||||
|  |   double sizes[4] = { 0. }; | ||||||
|  |   double total_size = 0., total_disto = 0.; | ||||||
|   VP8DistoStats stats[5]; |   VP8DistoStats stats[5]; | ||||||
|   int w, h; |  | ||||||
|  |  | ||||||
|   memset(stats, 0, sizeof(stats)); |  | ||||||
|  |  | ||||||
|   VP8SSIMDspInit(); |   VP8SSIMDspInit(); | ||||||
|  |   memset(stats, 0, sizeof(stats)); | ||||||
|  |  | ||||||
|   if (src == NULL || ref == NULL || |   if (src == NULL || ref == NULL || | ||||||
|       src->width != ref->width || src->height != ref->height || |       src->width != ref->width || src->height != ref->height || | ||||||
|       src->use_argb != ref->use_argb || result == NULL) { |       src->use_argb != ref->use_argb || results == NULL) { | ||||||
|     return 0; |     return 0; | ||||||
|   } |   } | ||||||
|   w = src->width; |   w = src->width; | ||||||
| @@ -90,7 +105,7 @@ int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref, | |||||||
|     if (src->argb == NULL || ref->argb == NULL) { |     if (src->argb == NULL || ref->argb == NULL) { | ||||||
|       return 0; |       return 0; | ||||||
|     } else { |     } else { | ||||||
|       int i, j, c; |       int i, j; | ||||||
|       uint8_t* tmp1, *tmp2; |       uint8_t* tmp1, *tmp2; | ||||||
|       uint8_t* const tmp_plane = |       uint8_t* const tmp_plane = | ||||||
|           (uint8_t*)WebPSafeMalloc(2ULL * w * h, sizeof(*tmp_plane)); |           (uint8_t*)WebPSafeMalloc(2ULL * w * h, sizeof(*tmp_plane)); | ||||||
| @@ -104,8 +119,11 @@ int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref, | |||||||
|             tmp2[j * w + i] = ref->argb[i + j * ref->argb_stride] >> (c * 8); |             tmp2[j * w + i] = ref->argb[i + j * ref->argb_stride] >> (c * 8); | ||||||
|           } |           } | ||||||
|         } |         } | ||||||
|  |         sizes[c] = w * h; | ||||||
|         if (type >= 2) { |         if (type >= 2) { | ||||||
|           AccumulateLSIM(tmp1, w, tmp2, w, w, h, &stats[c]); |           disto[c] = AccumulateLSIM(tmp1, w, tmp2, w, w, h); | ||||||
|  |         } else if (type == 0) { | ||||||
|  |           disto[c] = AccumulateSSE(tmp1, w, tmp2, w, w, h); | ||||||
|         } else { |         } else { | ||||||
|           VP8SSIMAccumulatePlane(tmp1, w, tmp2, w, w, h, &stats[c]); |           VP8SSIMAccumulatePlane(tmp1, w, tmp2, w, w, h, &stats[c]); | ||||||
|         } |         } | ||||||
| @@ -127,16 +145,31 @@ int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref, | |||||||
|  |  | ||||||
|     uv_w = (src->width + 1) >> 1; |     uv_w = (src->width + 1) >> 1; | ||||||
|     uv_h = (src->height + 1) >> 1; |     uv_h = (src->height + 1) >> 1; | ||||||
|  |     sizes[0] = w * h; | ||||||
|  |     sizes[1] = sizes[2] = uv_w * uv_h; | ||||||
|  |     sizes[3] = has_alpha ? w * h : 0.; | ||||||
|  |  | ||||||
|     if (type >= 2) { |     if (type >= 2) { | ||||||
|       AccumulateLSIM(src->y, src->y_stride, ref->y, ref->y_stride, |       disto[0] = AccumulateLSIM(src->y, src->y_stride, ref->y, ref->y_stride, | ||||||
|                      w, h, &stats[0]); |                                 w, h); | ||||||
|       AccumulateLSIM(src->u, src->uv_stride, ref->u, ref->uv_stride, |       disto[1] = AccumulateLSIM(src->u, src->uv_stride, ref->u, ref->uv_stride, | ||||||
|                      uv_w, uv_h, &stats[1]); |                                 uv_w, uv_h); | ||||||
|       AccumulateLSIM(src->v, src->uv_stride, ref->v, ref->uv_stride, |       disto[2] = AccumulateLSIM(src->v, src->uv_stride, ref->v, ref->uv_stride, | ||||||
|                      uv_w, uv_h, &stats[2]); |                                 uv_w, uv_h); | ||||||
|       if (has_alpha) { |       if (has_alpha) { | ||||||
|         AccumulateLSIM(src->a, src->a_stride, ref->a, ref->a_stride, |         disto[3] = AccumulateLSIM(src->a, src->a_stride, ref->a, ref->a_stride, | ||||||
|                        w, h, &stats[3]); |                                   w, h); | ||||||
|  |       } | ||||||
|  |     } else if (type == 0) { | ||||||
|  |       disto[0] = AccumulateSSE(src->y, src->y_stride, ref->y, ref->y_stride, | ||||||
|  |                                w, h); | ||||||
|  |       disto[1] = AccumulateSSE(src->u, src->uv_stride, ref->u, ref->uv_stride, | ||||||
|  |                                uv_w, uv_h); | ||||||
|  |       disto[2] = AccumulateSSE(src->v, src->uv_stride, ref->v, ref->uv_stride, | ||||||
|  |                                uv_w, uv_h); | ||||||
|  |       if (has_alpha) { | ||||||
|  |         disto[3] = AccumulateSSE(src->a, src->a_stride, ref->a, ref->a_stride, | ||||||
|  |                                  w, h); | ||||||
|       } |       } | ||||||
|     } else { |     } else { | ||||||
|       VP8SSIMAccumulatePlane(src->y, src->y_stride, |       VP8SSIMAccumulatePlane(src->y, src->y_stride, | ||||||
| @@ -155,22 +188,23 @@ int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref, | |||||||
|       } |       } | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|   // Final stat calculations. |  | ||||||
|   { |   for (c = 0; c < 4; ++c) { | ||||||
|     int c; |     if (type == 1) { | ||||||
|     for (c = 0; c <= 4; ++c) { |       results[c] = (float)GetLogSSIM(VP8SSIMGet(&stats[c]), 1.); | ||||||
|       if (type == 1) { |       VP8SSIMAddStats(&stats[c], &stats[4]); | ||||||
|         const double v = VP8SSIMGet(&stats[c]); |     } else { | ||||||
|         result[c] = (float)((v < 1.) ? -10.0 * log10(1. - v) |       total_disto += disto[c]; | ||||||
|                                      : kMinDistortion_dB); |       total_size += sizes[c]; | ||||||
|       } else { |       results[c] = (float)GetPSNR(disto[c], sizes[c]); | ||||||
|         const double v = VP8SSIMGetSquaredError(&stats[c]); |  | ||||||
|         result[c] = GetPSNR(v); |  | ||||||
|       } |  | ||||||
|       // Accumulate forward |  | ||||||
|       if (c < 4) VP8SSIMAddStats(&stats[c], &stats[4]); |  | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |   if (type == 1) { | ||||||
|  |     results[4] = (float)GetLogSSIM(VP8SSIMGet(&stats[4]), 1.); | ||||||
|  |   } else { | ||||||
|  |     results[4] = (float)GetPSNR(total_disto, total_size); | ||||||
|  |   } | ||||||
|  |  | ||||||
|   return 1; |   return 1; | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user