mirror of
				https://github.com/webmproject/libwebp.git
				synced 2025-10-31 02:15:42 +01:00 
			
		
		
		
	Make libwebp depend on libsharpyuv.
Change-Id: I6d8ebfe1f855024fc0694b1aa584f71fa27b83ae
This commit is contained in:
		| @@ -33,6 +33,12 @@ else | ||||
|   NEON := c | ||||
| endif | ||||
|  | ||||
| sharpyuv_srcs := \ | ||||
|     sharpyuv/sharpyuv.c \ | ||||
|     sharpyuv/sharpyuv_dsp.c \ | ||||
|     sharpyuv/sharpyuv_neon.$(NEON) \ | ||||
|     sharpyuv/sharpyuv_sse2.c \ | ||||
|  | ||||
| dec_srcs := \ | ||||
|     src/dec/alpha_dec.c \ | ||||
|     src/dec/buffer_dec.c \ | ||||
| @@ -204,6 +210,7 @@ endif  # ENABLE_SHARED=1 | ||||
| include $(CLEAR_VARS) | ||||
|  | ||||
| LOCAL_SRC_FILES := \ | ||||
|     $(sharpyuv_srcs) \ | ||||
|     $(dsp_enc_srcs) \ | ||||
|     $(enc_srcs) \ | ||||
|     $(utils_enc_srcs) \ | ||||
|   | ||||
| @@ -31,7 +31,6 @@ option(WEBP_BUILD_VWEBP "Build the vwebp viewer tool." ON) | ||||
| option(WEBP_BUILD_WEBPINFO "Build the webpinfo command line tool." ON) | ||||
| option(WEBP_BUILD_LIBWEBPMUX "Build the libwebpmux library." ON) | ||||
| option(WEBP_BUILD_WEBPMUX "Build the webpmux command line tool." ON) | ||||
| option(WEBP_BUILD_SHARPYUV "Build the sharpyuv library." OFF) | ||||
| option(WEBP_BUILD_EXTRAS "Build extras." ON) | ||||
| option(WEBP_BUILD_WEBP_JS "Emscripten build of webp.js." OFF) | ||||
| option(WEBP_USE_THREAD "Enable threading support" ON) | ||||
| @@ -212,18 +211,16 @@ function(libwebp_add_stub_file TARGET) | ||||
|   target_sources(${TARGET} PRIVATE ${stub_source_file}) | ||||
| endfunction() | ||||
|  | ||||
| if(WEBP_BUILD_SHARPYUV) | ||||
|   parse_makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/sharpyuv "WEBP_SHARPYUV_SRCS" | ||||
|                     "") | ||||
|   add_library(sharpyuv OBJECT ${WEBP_SHARPYUV_SRCS}) | ||||
|   target_include_directories(sharpyuv | ||||
|                              PRIVATE ${CMAKE_CURRENT_BINARY_DIR} | ||||
|                              ${CMAKE_CURRENT_SOURCE_DIR}) | ||||
|   set_target_properties( | ||||
|     sharpyuv | ||||
|     PROPERTIES PUBLIC_HEADER "${CMAKE_CURRENT_SOURCE_DIR}/sharpyuv/sharpyuv.h;\ | ||||
| parse_makefile_am(${CMAKE_CURRENT_SOURCE_DIR}/sharpyuv "WEBP_SHARPYUV_SRCS" | ||||
|                   "") | ||||
| add_library(sharpyuv OBJECT ${WEBP_SHARPYUV_SRCS}) | ||||
| target_include_directories(sharpyuv | ||||
|                             PRIVATE ${CMAKE_CURRENT_BINARY_DIR} | ||||
|                             ${CMAKE_CURRENT_SOURCE_DIR}) | ||||
| set_target_properties( | ||||
|   sharpyuv | ||||
|   PROPERTIES PUBLIC_HEADER "${CMAKE_CURRENT_SOURCE_DIR}/sharpyuv/sharpyuv.h;\ | ||||
| ${CMAKE_CURRENT_SOURCE_DIR}/src/webp/types.h") | ||||
| endif() | ||||
|  | ||||
| if(MSVC) | ||||
|   # avoid security warnings for e.g., fopen() used in the examples. | ||||
| @@ -289,6 +286,7 @@ target_include_directories(webputils | ||||
|                            PRIVATE ${CMAKE_CURRENT_BINARY_DIR} | ||||
|                                    ${CMAKE_CURRENT_SOURCE_DIR}) | ||||
| add_library(webp | ||||
|             $<TARGET_OBJECTS:sharpyuv> | ||||
|             $<TARGET_OBJECTS:webpdecode> | ||||
|             $<TARGET_OBJECTS:webpdsp> | ||||
|             $<TARGET_OBJECTS:webpencode> | ||||
| @@ -311,7 +309,8 @@ ${CMAKE_CURRENT_SOURCE_DIR}/src/webp/types.h") | ||||
|  | ||||
| # Make sure the OBJECT libraries are built with position independent code (it is | ||||
| # not ON by default). | ||||
| set_target_properties(webpdecode | ||||
| set_target_properties(sharpyuv | ||||
|                       webpdecode | ||||
|                       webpdspdecode | ||||
|                       webputilsdecode | ||||
|                       webpencode | ||||
|   | ||||
| @@ -1,11 +1,7 @@ | ||||
| ACLOCAL_AMFLAGS = -I m4 | ||||
| SUBDIRS = src imageio man | ||||
| SUBDIRS = sharpyuv src imageio man | ||||
| EXTRA_DIST = COPYING autogen.sh | ||||
|  | ||||
| if BUILD_SHARPYUV | ||||
|   SUBDIRS += sharpyuv | ||||
| endif | ||||
|  | ||||
| if BUILD_EXTRAS | ||||
|   SUBDIRS += extras | ||||
| endif | ||||
|   | ||||
							
								
								
									
										13
									
								
								Makefile.vc
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								Makefile.vc
									
									
									
									
									
								
							| @@ -81,6 +81,7 @@ OUTPUT_DIRS = $(DIRBIN) $(DIRINC) $(DIRLIB) \ | ||||
|               $(DIROBJ)\extras \ | ||||
|               $(DIROBJ)\imageio \ | ||||
|               $(DIROBJ)\mux \ | ||||
|               $(DIROBJ)\sharpyuv \ | ||||
|               $(DIROBJ)\utils \ | ||||
|  | ||||
| # Target configuration | ||||
| @@ -173,6 +174,12 @@ CFLAGS = $(CFLAGS) /D_UNICODE /DUNICODE | ||||
| # A config was provided, so the library can be built. | ||||
| # | ||||
|  | ||||
| SHARPYUV_OBJS = \ | ||||
|     $(DIROBJ)\sharpyuv\sharpyuv.obj \ | ||||
|     $(DIROBJ)\sharpyuv\sharpyuv_dsp.obj \ | ||||
|     $(DIROBJ)\sharpyuv\sharpyuv_neon.obj \ | ||||
|     $(DIROBJ)\sharpyuv\sharpyuv_sse2.obj \ | ||||
|  | ||||
| DEC_OBJS = \ | ||||
|     $(DIROBJ)\dec\alpha_dec.obj \ | ||||
|     $(DIROBJ)\dec\buffer_dec.obj \ | ||||
| @@ -334,8 +341,8 @@ UTILS_ENC_OBJS = \ | ||||
|     $(DIROBJ)\utils\quant_levels_utils.obj \ | ||||
|  | ||||
| LIBWEBPDECODER_OBJS = $(DEC_OBJS) $(DSP_DEC_OBJS) $(UTILS_DEC_OBJS) | ||||
| LIBWEBP_OBJS = $(LIBWEBPDECODER_OBJS) $(ENC_OBJS) $(DSP_ENC_OBJS) \ | ||||
|                $(UTILS_ENC_OBJS) $(DLL_OBJS) | ||||
| LIBWEBP_OBJS = $(LIBWEBPDECODER_OBJS) $(SHARPYUV_OBJS) $(ENC_OBJS) \ | ||||
|                $(DSP_ENC_OBJS) $(UTILS_ENC_OBJS) $(DLL_OBJS) | ||||
| LIBWEBPMUX_OBJS = $(MUX_OBJS) $(LIBWEBPMUX_OBJS) | ||||
| LIBWEBPDEMUX_OBJS = $(DEMUX_OBJS) $(LIBWEBPDEMUX_OBJS) | ||||
|  | ||||
| @@ -481,6 +488,8 @@ $(DIROBJ)\examples\gifdec.obj: examples\gifdec.c | ||||
| 	$(CC) $(CFLAGS) /Fd$(DIROBJ)\extras\ /Fo$(DIROBJ)\extras\ $< | ||||
| {imageio}.c{$(DIROBJ)\imageio}.obj:: | ||||
| 	$(CC) $(CFLAGS) /Fd$(DIROBJ)\imageio\ /Fo$(DIROBJ)\imageio\ $< | ||||
| {sharpyuv}.c{$(DIROBJ)\sharpyuv}.obj:: | ||||
| 	$(CC) $(CFLAGS) /Fd$(DIROBJ)\sharpyuv\ /Fo$(DIROBJ)\sharpyuv\ $< | ||||
| {src\dec}.c{$(DIROBJ)\dec}.obj:: | ||||
| 	$(CC) $(CFLAGS) /Fd$(LIBWEBP_PDBNAME) /Fo$(DIROBJ)\dec\ $< | ||||
| {src\demux}.c{$(DIROBJ)\demux}.obj:: | ||||
|   | ||||
| @@ -105,6 +105,11 @@ model { | ||||
|       sources { | ||||
|         c { | ||||
|           source { | ||||
|             srcDir "sharpyuv" | ||||
|             include "sharpyuv.c" | ||||
|             include "sharpyuv_dsp.c" | ||||
|             include "sharpyuv_neon.c" | ||||
|             include "sharpyuv_sse2.c" | ||||
|             srcDir "src/dec" | ||||
|             include "alpha_dec.c" | ||||
|             include "buffer_dec.c" | ||||
|   | ||||
| @@ -67,14 +67,6 @@ AC_ARG_ENABLE([libwebpextras], | ||||
| AC_MSG_RESULT(${enable_libwebpextras-no}) | ||||
| AM_CONDITIONAL([BUILD_EXTRAS], [test "$enable_libwebpextras" = "yes"]) | ||||
|  | ||||
| dnl === Check whether libsharpyuv should be built | ||||
| AC_MSG_CHECKING(whether libsharpyuv is to be built) | ||||
| AC_ARG_ENABLE([libsharpyuv], | ||||
|               AS_HELP_STRING([--enable-libsharpyuv], | ||||
|                              [Build libsharpyuv @<:@default=no@:>@])) | ||||
| AC_MSG_RESULT(${enable_libsharpyuv-no}) | ||||
| AM_CONDITIONAL([BUILD_SHARPYUV], [test "$enable_libsharpyuv" = "yes"]) | ||||
|  | ||||
| dnl === If --enable-asserts is not defined, define NDEBUG | ||||
|  | ||||
| AC_MSG_CHECKING(whether asserts are enabled) | ||||
| @@ -782,7 +774,6 @@ libwebpdecoder: ${enable_libwebpdecoder-no} | ||||
| libwebpdemux: ${enable_libwebpdemux-no} | ||||
| libwebpmux: ${enable_libwebpmux-no} | ||||
| libwebpextras: ${enable_libwebpextras-no} | ||||
| libsharpyuv: ${enable_libsharpyuv-no} | ||||
|  | ||||
| Tools: | ||||
| cwebp : ${enable_libwebpdemux-no} | ||||
|   | ||||
| @@ -125,6 +125,12 @@ endif | ||||
| ANIM_UTIL_OBJS = \ | ||||
|     examples/anim_util.o \ | ||||
|  | ||||
| SHARPYUV_OBJS = \ | ||||
|     sharpyuv/sharpyuv.o \ | ||||
|     sharpyuv/sharpyuv_dsp.o \ | ||||
|     sharpyuv/sharpyuv_neon.o \ | ||||
|     sharpyuv/sharpyuv_sse2.o \ | ||||
|  | ||||
| DEC_OBJS = \ | ||||
|     src/dec/alpha_dec.o \ | ||||
|     src/dec/buffer_dec.o \ | ||||
| @@ -282,8 +288,8 @@ EXTRA_OBJS = \ | ||||
|     extras/quality_estimate.o \ | ||||
|  | ||||
| LIBWEBPDECODER_OBJS = $(DEC_OBJS) $(DSP_DEC_OBJS) $(UTILS_DEC_OBJS) | ||||
| LIBWEBP_OBJS = $(LIBWEBPDECODER_OBJS) $(ENC_OBJS) $(DSP_ENC_OBJS) \ | ||||
|                $(UTILS_ENC_OBJS) | ||||
| LIBWEBP_OBJS = $(SHARPYUV_OBJS) $(LIBWEBPDECODER_OBJS) $(ENC_OBJS) \ | ||||
|                $(DSP_ENC_OBJS) $(UTILS_ENC_OBJS) | ||||
| LIBWEBPMUX_OBJS = $(MUX_OBJS) | ||||
| LIBWEBPDEMUX_OBJS = $(DEMUX_OBJS) | ||||
| LIBWEBPEXTRA_OBJS = $(EXTRA_OBJS) | ||||
|   | ||||
| @@ -332,15 +332,6 @@ extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v, | ||||
| extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb, | ||||
|                                     uint8_t* u, uint8_t* v, int width); | ||||
|  | ||||
| // utilities for accurate RGB->YUV conversion | ||||
| extern uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* src, const uint16_t* ref, | ||||
|                                        uint16_t* dst, int len); | ||||
| extern void (*WebPSharpYUVUpdateRGB)(const int16_t* src, const int16_t* ref, | ||||
|                                      int16_t* dst, int len); | ||||
| extern void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, | ||||
|                                      int len, | ||||
|                                      const uint16_t* best_y, uint16_t* out); | ||||
|  | ||||
| // Must be called before using the above. | ||||
| void WebPInitConvertARGBToYUV(void); | ||||
|  | ||||
|   | ||||
| @@ -194,50 +194,6 @@ void WebPConvertRGBA32ToUV_C(const uint16_t* rgb, | ||||
|  | ||||
| //----------------------------------------------------------------------------- | ||||
|  | ||||
| #if !WEBP_NEON_OMIT_C_CODE | ||||
| #define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic | ||||
| static uint16_t clip_y(int v) { | ||||
|   return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v; | ||||
| } | ||||
|  | ||||
| static uint64_t SharpYUVUpdateY_C(const uint16_t* ref, const uint16_t* src, | ||||
|                                   uint16_t* dst, int len) { | ||||
|   uint64_t diff = 0; | ||||
|   int i; | ||||
|   for (i = 0; i < len; ++i) { | ||||
|     const int diff_y = ref[i] - src[i]; | ||||
|     const int new_y = (int)dst[i] + diff_y; | ||||
|     dst[i] = clip_y(new_y); | ||||
|     diff += (uint64_t)abs(diff_y); | ||||
|   } | ||||
|   return diff; | ||||
| } | ||||
|  | ||||
| static void SharpYUVUpdateRGB_C(const int16_t* ref, const int16_t* src, | ||||
|                                 int16_t* dst, int len) { | ||||
|   int i; | ||||
|   for (i = 0; i < len; ++i) { | ||||
|     const int diff_uv = ref[i] - src[i]; | ||||
|     dst[i] += diff_uv; | ||||
|   } | ||||
| } | ||||
|  | ||||
| static void SharpYUVFilterRow_C(const int16_t* A, const int16_t* B, int len, | ||||
|                                 const uint16_t* best_y, uint16_t* out) { | ||||
|   int i; | ||||
|   for (i = 0; i < len; ++i, ++A, ++B) { | ||||
|     const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4; | ||||
|     const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4; | ||||
|     out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0); | ||||
|     out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1); | ||||
|   } | ||||
| } | ||||
| #endif  // !WEBP_NEON_OMIT_C_CODE | ||||
|  | ||||
| #undef MAX_Y | ||||
|  | ||||
| //----------------------------------------------------------------------------- | ||||
|  | ||||
| void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width); | ||||
| void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width); | ||||
| void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb, | ||||
| @@ -247,18 +203,9 @@ void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width); | ||||
| void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v, | ||||
|                             int src_width, int do_store); | ||||
|  | ||||
| uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* ref, const uint16_t* src, | ||||
|                                 uint16_t* dst, int len); | ||||
| void (*WebPSharpYUVUpdateRGB)(const int16_t* ref, const int16_t* src, | ||||
|                               int16_t* dst, int len); | ||||
| void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, int len, | ||||
|                               const uint16_t* best_y, uint16_t* out); | ||||
|  | ||||
| extern void WebPInitConvertARGBToYUVSSE2(void); | ||||
| extern void WebPInitConvertARGBToYUVSSE41(void); | ||||
| extern void WebPInitConvertARGBToYUVNEON(void); | ||||
| extern void WebPInitSharpYUVSSE2(void); | ||||
| extern void WebPInitSharpYUVNEON(void); | ||||
|  | ||||
| WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) { | ||||
|   WebPConvertARGBToY = ConvertARGBToY_C; | ||||
| @@ -269,17 +216,10 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) { | ||||
|  | ||||
|   WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C; | ||||
|  | ||||
| #if !WEBP_NEON_OMIT_C_CODE | ||||
|   WebPSharpYUVUpdateY = SharpYUVUpdateY_C; | ||||
|   WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_C; | ||||
|   WebPSharpYUVFilterRow = SharpYUVFilterRow_C; | ||||
| #endif | ||||
|  | ||||
|   if (VP8GetCPUInfo != NULL) { | ||||
| #if defined(WEBP_HAVE_SSE2) | ||||
|     if (VP8GetCPUInfo(kSSE2)) { | ||||
|       WebPInitConvertARGBToYUVSSE2(); | ||||
|       WebPInitSharpYUVSSE2(); | ||||
|     } | ||||
| #endif  // WEBP_HAVE_SSE2 | ||||
| #if defined(WEBP_HAVE_SSE41) | ||||
| @@ -293,7 +233,6 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) { | ||||
|   if (WEBP_NEON_OMIT_C_CODE || | ||||
|       (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { | ||||
|     WebPInitConvertARGBToYUVNEON(); | ||||
|     WebPInitSharpYUVNEON(); | ||||
|   } | ||||
| #endif  // WEBP_HAVE_NEON | ||||
|  | ||||
| @@ -302,7 +241,4 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) { | ||||
|   assert(WebPConvertRGB24ToY != NULL); | ||||
|   assert(WebPConvertBGR24ToY != NULL); | ||||
|   assert(WebPConvertRGBA32ToUV != NULL); | ||||
|   assert(WebPSharpYUVUpdateY != NULL); | ||||
|   assert(WebPSharpYUVUpdateRGB != NULL); | ||||
|   assert(WebPSharpYUVFilterRow != NULL); | ||||
| } | ||||
|   | ||||
| @@ -173,116 +173,8 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVNEON(void) { | ||||
|   WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_NEON; | ||||
| } | ||||
|  | ||||
| //------------------------------------------------------------------------------ | ||||
|  | ||||
| #define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic | ||||
| static uint16_t clip_y_NEON(int v) { | ||||
|   return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v; | ||||
| } | ||||
|  | ||||
| static uint64_t SharpYUVUpdateY_NEON(const uint16_t* ref, const uint16_t* src, | ||||
|                                      uint16_t* dst, int len) { | ||||
|   int i; | ||||
|   const int16x8_t zero = vdupq_n_s16(0); | ||||
|   const int16x8_t max = vdupq_n_s16(MAX_Y); | ||||
|   uint64x2_t sum = vdupq_n_u64(0); | ||||
|   uint64_t diff; | ||||
|  | ||||
|   for (i = 0; i + 8 <= len; i += 8) { | ||||
|     const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i)); | ||||
|     const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i)); | ||||
|     const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i)); | ||||
|     const int16x8_t D = vsubq_s16(A, B);       // diff_y | ||||
|     const int16x8_t F = vaddq_s16(C, D);       // new_y | ||||
|     const uint16x8_t H = | ||||
|         vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero)); | ||||
|     const int16x8_t I = vabsq_s16(D);          // abs(diff_y) | ||||
|     vst1q_u16(dst + i, H); | ||||
|     sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I))); | ||||
|   } | ||||
|   diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1); | ||||
|   for (; i < len; ++i) { | ||||
|     const int diff_y = ref[i] - src[i]; | ||||
|     const int new_y = (int)(dst[i]) + diff_y; | ||||
|     dst[i] = clip_y_NEON(new_y); | ||||
|     diff += (uint64_t)(abs(diff_y)); | ||||
|   } | ||||
|   return diff; | ||||
| } | ||||
|  | ||||
| static void SharpYUVUpdateRGB_NEON(const int16_t* ref, const int16_t* src, | ||||
|                                    int16_t* dst, int len) { | ||||
|   int i; | ||||
|   for (i = 0; i + 8 <= len; i += 8) { | ||||
|     const int16x8_t A = vld1q_s16(ref + i); | ||||
|     const int16x8_t B = vld1q_s16(src + i); | ||||
|     const int16x8_t C = vld1q_s16(dst + i); | ||||
|     const int16x8_t D = vsubq_s16(A, B);   // diff_uv | ||||
|     const int16x8_t E = vaddq_s16(C, D);   // new_uv | ||||
|     vst1q_s16(dst + i, E); | ||||
|   } | ||||
|   for (; i < len; ++i) { | ||||
|     const int diff_uv = ref[i] - src[i]; | ||||
|     dst[i] += diff_uv; | ||||
|   } | ||||
| } | ||||
|  | ||||
| static void SharpYUVFilterRow_NEON(const int16_t* A, const int16_t* B, int len, | ||||
|                                    const uint16_t* best_y, uint16_t* out) { | ||||
|   int i; | ||||
|   const int16x8_t max = vdupq_n_s16(MAX_Y); | ||||
|   const int16x8_t zero = vdupq_n_s16(0); | ||||
|   for (i = 0; i + 8 <= len; i += 8) { | ||||
|     const int16x8_t a0 = vld1q_s16(A + i + 0); | ||||
|     const int16x8_t a1 = vld1q_s16(A + i + 1); | ||||
|     const int16x8_t b0 = vld1q_s16(B + i + 0); | ||||
|     const int16x8_t b1 = vld1q_s16(B + i + 1); | ||||
|     const int16x8_t a0b1 = vaddq_s16(a0, b1); | ||||
|     const int16x8_t a1b0 = vaddq_s16(a1, b0); | ||||
|     const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0);  // A0+A1+B0+B1 | ||||
|     const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1);    // 2*(A0+B1) | ||||
|     const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0);    // 2*(A1+B0) | ||||
|     const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3); | ||||
|     const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3); | ||||
|     const int16x8_t d0 = vaddq_s16(c1, a0); | ||||
|     const int16x8_t d1 = vaddq_s16(c0, a1); | ||||
|     const int16x8_t e0 = vrshrq_n_s16(d0, 1); | ||||
|     const int16x8_t e1 = vrshrq_n_s16(d1, 1); | ||||
|     const int16x8x2_t f = vzipq_s16(e0, e1); | ||||
|     const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0)); | ||||
|     const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8)); | ||||
|     const int16x8_t h0 = vaddq_s16(g0, f.val[0]); | ||||
|     const int16x8_t h1 = vaddq_s16(g1, f.val[1]); | ||||
|     const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero); | ||||
|     const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero); | ||||
|     vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0)); | ||||
|     vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1)); | ||||
|   } | ||||
|   for (; i < len; ++i) { | ||||
|     const int a0b1 = A[i + 0] + B[i + 1]; | ||||
|     const int a1b0 = A[i + 1] + B[i + 0]; | ||||
|     const int a0a1b0b1 = a0b1 + a1b0 + 8; | ||||
|     const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4; | ||||
|     const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4; | ||||
|     out[2 * i + 0] = clip_y_NEON(best_y[2 * i + 0] + v0); | ||||
|     out[2 * i + 1] = clip_y_NEON(best_y[2 * i + 1] + v1); | ||||
|   } | ||||
| } | ||||
| #undef MAX_Y | ||||
|  | ||||
| //------------------------------------------------------------------------------ | ||||
|  | ||||
| extern void WebPInitSharpYUVNEON(void); | ||||
|  | ||||
| WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVNEON(void) { | ||||
|   WebPSharpYUVUpdateY = SharpYUVUpdateY_NEON; | ||||
|   WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_NEON; | ||||
|   WebPSharpYUVFilterRow = SharpYUVFilterRow_NEON; | ||||
| } | ||||
|  | ||||
| #else  // !WEBP_USE_NEON | ||||
|  | ||||
| WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVNEON) | ||||
| WEBP_DSP_INIT_STUB(WebPInitSharpYUVNEON) | ||||
|  | ||||
| #endif  // WEBP_USE_NEON | ||||
|   | ||||
| @@ -747,128 +747,9 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVSSE2(void) { | ||||
|   WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_SSE2; | ||||
| } | ||||
|  | ||||
| //------------------------------------------------------------------------------ | ||||
|  | ||||
| #define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic | ||||
| static uint16_t clip_y(int v) { | ||||
|   return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v; | ||||
| } | ||||
|  | ||||
| static uint64_t SharpYUVUpdateY_SSE2(const uint16_t* ref, const uint16_t* src, | ||||
|                                      uint16_t* dst, int len) { | ||||
|   uint64_t diff = 0; | ||||
|   uint32_t tmp[4]; | ||||
|   int i; | ||||
|   const __m128i zero = _mm_setzero_si128(); | ||||
|   const __m128i max = _mm_set1_epi16(MAX_Y); | ||||
|   const __m128i one = _mm_set1_epi16(1); | ||||
|   __m128i sum = zero; | ||||
|  | ||||
|   for (i = 0; i + 8 <= len; i += 8) { | ||||
|     const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i)); | ||||
|     const __m128i B = _mm_loadu_si128((const __m128i*)(src + i)); | ||||
|     const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i)); | ||||
|     const __m128i D = _mm_sub_epi16(A, B);       // diff_y | ||||
|     const __m128i E = _mm_cmpgt_epi16(zero, D);  // sign (-1 or 0) | ||||
|     const __m128i F = _mm_add_epi16(C, D);       // new_y | ||||
|     const __m128i G = _mm_or_si128(E, one);      // -1 or 1 | ||||
|     const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero); | ||||
|     const __m128i I = _mm_madd_epi16(D, G);      // sum(abs(...)) | ||||
|     _mm_storeu_si128((__m128i*)(dst + i), H); | ||||
|     sum = _mm_add_epi32(sum, I); | ||||
|   } | ||||
|   _mm_storeu_si128((__m128i*)tmp, sum); | ||||
|   diff = tmp[3] + tmp[2] + tmp[1] + tmp[0]; | ||||
|   for (; i < len; ++i) { | ||||
|     const int diff_y = ref[i] - src[i]; | ||||
|     const int new_y = (int)dst[i] + diff_y; | ||||
|     dst[i] = clip_y(new_y); | ||||
|     diff += (uint64_t)abs(diff_y); | ||||
|   } | ||||
|   return diff; | ||||
| } | ||||
|  | ||||
| static void SharpYUVUpdateRGB_SSE2(const int16_t* ref, const int16_t* src, | ||||
|                                    int16_t* dst, int len) { | ||||
|   int i = 0; | ||||
|   for (i = 0; i + 8 <= len; i += 8) { | ||||
|     const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i)); | ||||
|     const __m128i B = _mm_loadu_si128((const __m128i*)(src + i)); | ||||
|     const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i)); | ||||
|     const __m128i D = _mm_sub_epi16(A, B);   // diff_uv | ||||
|     const __m128i E = _mm_add_epi16(C, D);   // new_uv | ||||
|     _mm_storeu_si128((__m128i*)(dst + i), E); | ||||
|   } | ||||
|   for (; i < len; ++i) { | ||||
|     const int diff_uv = ref[i] - src[i]; | ||||
|     dst[i] += diff_uv; | ||||
|   } | ||||
| } | ||||
|  | ||||
| static void SharpYUVFilterRow_SSE2(const int16_t* A, const int16_t* B, int len, | ||||
|                                    const uint16_t* best_y, uint16_t* out) { | ||||
|   int i; | ||||
|   const __m128i kCst8 = _mm_set1_epi16(8); | ||||
|   const __m128i max = _mm_set1_epi16(MAX_Y); | ||||
|   const __m128i zero = _mm_setzero_si128(); | ||||
|   for (i = 0; i + 8 <= len; i += 8) { | ||||
|     const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0)); | ||||
|     const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1)); | ||||
|     const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0)); | ||||
|     const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1)); | ||||
|     const __m128i a0b1 = _mm_add_epi16(a0, b1); | ||||
|     const __m128i a1b0 = _mm_add_epi16(a1, b0); | ||||
|     const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0);  // A0+A1+B0+B1 | ||||
|     const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8); | ||||
|     const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1);    // 2*(A0+B1) | ||||
|     const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0);    // 2*(A1+B0) | ||||
|     const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3); | ||||
|     const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3); | ||||
|     const __m128i d0 = _mm_add_epi16(c1, a0); | ||||
|     const __m128i d1 = _mm_add_epi16(c0, a1); | ||||
|     const __m128i e0 = _mm_srai_epi16(d0, 1); | ||||
|     const __m128i e1 = _mm_srai_epi16(d1, 1); | ||||
|     const __m128i f0 = _mm_unpacklo_epi16(e0, e1); | ||||
|     const __m128i f1 = _mm_unpackhi_epi16(e0, e1); | ||||
|     const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0)); | ||||
|     const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8)); | ||||
|     const __m128i h0 = _mm_add_epi16(g0, f0); | ||||
|     const __m128i h1 = _mm_add_epi16(g1, f1); | ||||
|     const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero); | ||||
|     const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero); | ||||
|     _mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0); | ||||
|     _mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1); | ||||
|   } | ||||
|   for (; i < len; ++i) { | ||||
|     //   (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 = | ||||
|     // = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4 | ||||
|     // We reuse the common sub-expressions. | ||||
|     const int a0b1 = A[i + 0] + B[i + 1]; | ||||
|     const int a1b0 = A[i + 1] + B[i + 0]; | ||||
|     const int a0a1b0b1 = a0b1 + a1b0 + 8; | ||||
|     const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4; | ||||
|     const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4; | ||||
|     out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0); | ||||
|     out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1); | ||||
|   } | ||||
| } | ||||
|  | ||||
| #undef MAX_Y | ||||
|  | ||||
| //------------------------------------------------------------------------------ | ||||
|  | ||||
| extern void WebPInitSharpYUVSSE2(void); | ||||
|  | ||||
| WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVSSE2(void) { | ||||
|   WebPSharpYUVUpdateY = SharpYUVUpdateY_SSE2; | ||||
|   WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_SSE2; | ||||
|   WebPSharpYUVFilterRow = SharpYUVFilterRow_SSE2; | ||||
| } | ||||
|  | ||||
| #else  // !WEBP_USE_SSE2 | ||||
|  | ||||
| WEBP_DSP_INIT_STUB(WebPInitSamplersSSE2) | ||||
| WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVSSE2) | ||||
| WEBP_DSP_INIT_STUB(WebPInitSharpYUVSSE2) | ||||
|  | ||||
| #endif  // WEBP_USE_SSE2 | ||||
|   | ||||
| @@ -37,6 +37,7 @@ libwebpencodeinclude_HEADERS += ../webp/types.h | ||||
| noinst_HEADERS = | ||||
| noinst_HEADERS += ../webp/format_constants.h | ||||
|  | ||||
| libwebpencode_la_LIBADD = ../../sharpyuv/libsharpyuv.la | ||||
| libwebpencode_la_LDFLAGS = -lm | ||||
| libwebpencode_la_CPPFLAGS = $(AM_CPPFLAGS) | ||||
| libwebpencodeincludedir = $(includedir)/webp | ||||
|   | ||||
| @@ -15,6 +15,7 @@ | ||||
| #include <stdlib.h> | ||||
| #include <math.h> | ||||
|  | ||||
| #include "sharpyuv/sharpyuv.h" | ||||
| #include "src/enc/vp8i_enc.h" | ||||
| #include "src/utils/random_utils.h" | ||||
| #include "src/utils/utils.h" | ||||
| @@ -158,7 +159,6 @@ static int RGBToV(int r, int g, int b, VP8Random* const rg) { | ||||
| //------------------------------------------------------------------------------ | ||||
| // Sharp RGB->YUV conversion | ||||
|  | ||||
| static const int kNumIterations = 4; | ||||
| static const int kMinDimensionIterativeConversion = 4; | ||||
|  | ||||
| // We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some | ||||
| @@ -171,402 +171,23 @@ typedef uint16_t fixed_y_t;   // unsigned type with extra SFIX precision for W | ||||
| #define MAX_Y_T ((256 << SFIX) - 1) | ||||
| #define SROUNDER (1 << (YUV_FIX + SFIX - 1)) | ||||
|  | ||||
| #if defined(USE_GAMMA_COMPRESSION) | ||||
|  | ||||
| // We use tables of different size and precision for the Rec709 / BT2020 | ||||
| // transfer function. | ||||
| #define kGammaF (1./0.45) | ||||
| static uint32_t kLinearToGammaTabS[kGammaTabSize + 2]; | ||||
| #define GAMMA_TO_LINEAR_BITS 14 | ||||
| static uint32_t kGammaToLinearTabS[MAX_Y_T + 1];   // size scales with Y_FIX | ||||
| static volatile int kGammaTablesSOk = 0; | ||||
| static void InitGammaTablesS(void); | ||||
|  | ||||
| WEBP_DSP_INIT_FUNC(InitGammaTablesS) { | ||||
|   assert(2 * GAMMA_TO_LINEAR_BITS < 32);  // we use uint32_t intermediate values | ||||
|   if (!kGammaTablesSOk) { | ||||
|     int v; | ||||
|     const double norm = 1. / MAX_Y_T; | ||||
|     const double scale = 1. / kGammaTabSize; | ||||
|     const double a = 0.09929682680944; | ||||
|     const double thresh = 0.018053968510807; | ||||
|     const double final_scale = 1 << GAMMA_TO_LINEAR_BITS; | ||||
|     for (v = 0; v <= MAX_Y_T; ++v) { | ||||
|       const double g = norm * v; | ||||
|       double value; | ||||
|       if (g <= thresh * 4.5) { | ||||
|         value = g / 4.5; | ||||
|       } else { | ||||
|         const double a_rec = 1. / (1. + a); | ||||
|         value = pow(a_rec * (g + a), kGammaF); | ||||
|       } | ||||
|       kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5); | ||||
|     } | ||||
|     for (v = 0; v <= kGammaTabSize; ++v) { | ||||
|       const double g = scale * v; | ||||
|       double value; | ||||
|       if (g <= thresh) { | ||||
|         value = 4.5 * g; | ||||
|       } else { | ||||
|         value = (1. + a) * pow(g, 1. / kGammaF) - a; | ||||
|       } | ||||
|       // we already incorporate the 1/2 rounding constant here | ||||
|       kLinearToGammaTabS[v] = | ||||
|           (uint32_t)(MAX_Y_T * value) + (1 << GAMMA_TO_LINEAR_BITS >> 1); | ||||
|     } | ||||
|     // to prevent small rounding errors to cause read-overflow: | ||||
|     kLinearToGammaTabS[kGammaTabSize + 1] = kLinearToGammaTabS[kGammaTabSize]; | ||||
|     kGammaTablesSOk = 1; | ||||
|   } | ||||
| } | ||||
|  | ||||
| // return value has a fixed-point precision of GAMMA_TO_LINEAR_BITS | ||||
| static WEBP_INLINE uint32_t GammaToLinearS(int v) { | ||||
|   return kGammaToLinearTabS[v]; | ||||
| } | ||||
|  | ||||
| static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) { | ||||
|   // 'value' is in GAMMA_TO_LINEAR_BITS fractional precision | ||||
|   const uint32_t v = value * kGammaTabSize; | ||||
|   const uint32_t tab_pos = v >> GAMMA_TO_LINEAR_BITS; | ||||
|   // fractional part, in GAMMA_TO_LINEAR_BITS fixed-point precision | ||||
|   const uint32_t x = v - (tab_pos << GAMMA_TO_LINEAR_BITS);  // fractional part | ||||
|   // v0 / v1 are in GAMMA_TO_LINEAR_BITS fixed-point precision (range [0..1]) | ||||
|   const uint32_t v0 = kLinearToGammaTabS[tab_pos + 0]; | ||||
|   const uint32_t v1 = kLinearToGammaTabS[tab_pos + 1]; | ||||
|   // Final interpolation. Note that rounding is already included. | ||||
|   const uint32_t v2 = (v1 - v0) * x;    // note: v1 >= v0. | ||||
|   const uint32_t result = v0 + (v2 >> GAMMA_TO_LINEAR_BITS); | ||||
|   return result; | ||||
| } | ||||
|  | ||||
| #else | ||||
|  | ||||
| static void InitGammaTablesS(void) {} | ||||
| static WEBP_INLINE uint32_t GammaToLinearS(int v) { | ||||
|   return (v << GAMMA_TO_LINEAR_BITS) / MAX_Y_T; | ||||
| } | ||||
| static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) { | ||||
|   return (MAX_Y_T * value) >> GAMMA_TO_LINEAR_BITS; | ||||
| } | ||||
|  | ||||
| #endif    // USE_GAMMA_COMPRESSION | ||||
|  | ||||
| //------------------------------------------------------------------------------ | ||||
|  | ||||
| static uint8_t clip_8b(fixed_t v) { | ||||
|   return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u; | ||||
| } | ||||
|  | ||||
| static fixed_y_t clip_y(int y) { | ||||
|   return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T; | ||||
| } | ||||
|  | ||||
| //------------------------------------------------------------------------------ | ||||
|  | ||||
| static int RGBToGray(int r, int g, int b) { | ||||
|   const int luma = 13933 * r + 46871 * g + 4732 * b + YUV_HALF; | ||||
|   return (luma >> YUV_FIX); | ||||
| } | ||||
|  | ||||
| static uint32_t ScaleDown(int a, int b, int c, int d) { | ||||
|   const uint32_t A = GammaToLinearS(a); | ||||
|   const uint32_t B = GammaToLinearS(b); | ||||
|   const uint32_t C = GammaToLinearS(c); | ||||
|   const uint32_t D = GammaToLinearS(d); | ||||
|   return LinearToGammaS((A + B + C + D + 2) >> 2); | ||||
| } | ||||
|  | ||||
| static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w) { | ||||
|   int i; | ||||
|   for (i = 0; i < w; ++i) { | ||||
|     const uint32_t R = GammaToLinearS(src[0 * w + i]); | ||||
|     const uint32_t G = GammaToLinearS(src[1 * w + i]); | ||||
|     const uint32_t B = GammaToLinearS(src[2 * w + i]); | ||||
|     const uint32_t Y = RGBToGray(R, G, B); | ||||
|     dst[i] = (fixed_y_t)LinearToGammaS(Y); | ||||
|   } | ||||
| } | ||||
|  | ||||
| static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2, | ||||
|                          fixed_t* dst, int uv_w) { | ||||
|   int i; | ||||
|   for (i = 0; i < uv_w; ++i) { | ||||
|     const int r = ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], | ||||
|                             src2[0 * uv_w + 0], src2[0 * uv_w + 1]); | ||||
|     const int g = ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], | ||||
|                             src2[2 * uv_w + 0], src2[2 * uv_w + 1]); | ||||
|     const int b = ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], | ||||
|                             src2[4 * uv_w + 0], src2[4 * uv_w + 1]); | ||||
|     const int W = RGBToGray(r, g, b); | ||||
|     dst[0 * uv_w] = (fixed_t)(r - W); | ||||
|     dst[1 * uv_w] = (fixed_t)(g - W); | ||||
|     dst[2 * uv_w] = (fixed_t)(b - W); | ||||
|     dst  += 1; | ||||
|     src1 += 2; | ||||
|     src2 += 2; | ||||
|   } | ||||
| } | ||||
|  | ||||
| static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) { | ||||
|   int i; | ||||
|   for (i = 0; i < w; ++i) { | ||||
|     y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]); | ||||
|   } | ||||
| } | ||||
|  | ||||
| //------------------------------------------------------------------------------ | ||||
|  | ||||
| static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0) { | ||||
|   const int v0 = (A * 3 + B + 2) >> 2; | ||||
|   return clip_y(v0 + W0); | ||||
| } | ||||
|  | ||||
| //------------------------------------------------------------------------------ | ||||
|  | ||||
| static WEBP_INLINE fixed_y_t UpLift(uint8_t a) {  // 8bit -> SFIX | ||||
|   return ((fixed_y_t)a << SFIX) | SHALF; | ||||
| } | ||||
|  | ||||
| static void ImportOneRow(const uint8_t* const r_ptr, | ||||
|                          const uint8_t* const g_ptr, | ||||
|                          const uint8_t* const b_ptr, | ||||
|                          int step, | ||||
|                          int pic_width, | ||||
|                          fixed_y_t* const dst) { | ||||
|   int i; | ||||
|   const int w = (pic_width + 1) & ~1; | ||||
|   for (i = 0; i < pic_width; ++i) { | ||||
|     const int off = i * step; | ||||
|     dst[i + 0 * w] = UpLift(r_ptr[off]); | ||||
|     dst[i + 1 * w] = UpLift(g_ptr[off]); | ||||
|     dst[i + 2 * w] = UpLift(b_ptr[off]); | ||||
|   } | ||||
|   if (pic_width & 1) {  // replicate rightmost pixel | ||||
|     dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1]; | ||||
|     dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1]; | ||||
|     dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1]; | ||||
|   } | ||||
| } | ||||
|  | ||||
| static void InterpolateTwoRows(const fixed_y_t* const best_y, | ||||
|                                const fixed_t* prev_uv, | ||||
|                                const fixed_t* cur_uv, | ||||
|                                const fixed_t* next_uv, | ||||
|                                int w, | ||||
|                                fixed_y_t* out1, | ||||
|                                fixed_y_t* out2) { | ||||
|   const int uv_w = w >> 1; | ||||
|   const int len = (w - 1) >> 1;   // length to filter | ||||
|   int k = 3; | ||||
|   while (k-- > 0) {   // process each R/G/B segments in turn | ||||
|     // special boundary case for i==0 | ||||
|     out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0]); | ||||
|     out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w]); | ||||
|  | ||||
|     WebPSharpYUVFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1); | ||||
|     WebPSharpYUVFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1); | ||||
|  | ||||
|     // special boundary case for i == w - 1 when w is even | ||||
|     if (!(w & 1)) { | ||||
|       out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1], | ||||
|                             best_y[w - 1 + 0]); | ||||
|       out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1], | ||||
|                             best_y[w - 1 + w]); | ||||
|     } | ||||
|     out1 += w; | ||||
|     out2 += w; | ||||
|     prev_uv += uv_w; | ||||
|     cur_uv  += uv_w; | ||||
|     next_uv += uv_w; | ||||
|   } | ||||
| } | ||||
|  | ||||
| static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) { | ||||
|   const int luma = 16839 * r + 33059 * g + 6420 * b + SROUNDER; | ||||
|   return clip_8b(16 + (luma >> (YUV_FIX + SFIX))); | ||||
| } | ||||
|  | ||||
| static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) { | ||||
|   const int u =  -9719 * r - 19081 * g + 28800 * b + SROUNDER; | ||||
|   return clip_8b(128 + (u >> (YUV_FIX + SFIX))); | ||||
| } | ||||
|  | ||||
| static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) { | ||||
|   const int v = +28800 * r - 24116 * g -  4684 * b + SROUNDER; | ||||
|   return clip_8b(128 + (v >> (YUV_FIX + SFIX))); | ||||
| } | ||||
|  | ||||
| static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv, | ||||
|                             WebPPicture* const picture) { | ||||
|   int i, j; | ||||
|   uint8_t* dst_y = picture->y; | ||||
|   uint8_t* dst_u = picture->u; | ||||
|   uint8_t* dst_v = picture->v; | ||||
|   const fixed_t* const best_uv_base = best_uv; | ||||
|   const int w = (picture->width + 1) & ~1; | ||||
|   const int h = (picture->height + 1) & ~1; | ||||
|   const int uv_w = w >> 1; | ||||
|   const int uv_h = h >> 1; | ||||
|   for (best_uv = best_uv_base, j = 0; j < picture->height; ++j) { | ||||
|     for (i = 0; i < picture->width; ++i) { | ||||
|       const int off = (i >> 1); | ||||
|       const int W = best_y[i]; | ||||
|       const int r = best_uv[off + 0 * uv_w] + W; | ||||
|       const int g = best_uv[off + 1 * uv_w] + W; | ||||
|       const int b = best_uv[off + 2 * uv_w] + W; | ||||
|       dst_y[i] = ConvertRGBToY(r, g, b); | ||||
|     } | ||||
|     best_y += w; | ||||
|     best_uv += (j & 1) * 3 * uv_w; | ||||
|     dst_y += picture->y_stride; | ||||
|   } | ||||
|   for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) { | ||||
|     for (i = 0; i < uv_w; ++i) { | ||||
|       const int off = i; | ||||
|       const int r = best_uv[off + 0 * uv_w]; | ||||
|       const int g = best_uv[off + 1 * uv_w]; | ||||
|       const int b = best_uv[off + 2 * uv_w]; | ||||
|       dst_u[i] = ConvertRGBToU(r, g, b); | ||||
|       dst_v[i] = ConvertRGBToV(r, g, b); | ||||
|     } | ||||
|     best_uv += 3 * uv_w; | ||||
|     dst_u += picture->uv_stride; | ||||
|     dst_v += picture->uv_stride; | ||||
|   } | ||||
|   return 1; | ||||
| } | ||||
|  | ||||
| //------------------------------------------------------------------------------ | ||||
| // Main function | ||||
|  | ||||
| #define SAFE_ALLOC(W, H, T) ((T*)WebPSafeMalloc((W) * (H), sizeof(T))) | ||||
|  | ||||
| static int PreprocessARGB(const uint8_t* r_ptr, | ||||
|                           const uint8_t* g_ptr, | ||||
|                           const uint8_t* b_ptr, | ||||
|                           int step, int rgb_stride, | ||||
|                           WebPPicture* const picture) { | ||||
|   // we expand the right/bottom border if needed | ||||
|   const int w = (picture->width + 1) & ~1; | ||||
|   const int h = (picture->height + 1) & ~1; | ||||
|   const int uv_w = w >> 1; | ||||
|   const int uv_h = h >> 1; | ||||
|   uint64_t prev_diff_y_sum = ~0; | ||||
|   int j, iter; | ||||
|  | ||||
|   // TODO(skal): allocate one big memory chunk. But for now, it's easier | ||||
|   // for valgrind debugging to have several chunks. | ||||
|   fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t);   // scratch | ||||
|   fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t); | ||||
|   fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t); | ||||
|   fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t); | ||||
|   fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); | ||||
|   fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); | ||||
|   fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t); | ||||
|   fixed_y_t* best_y = best_y_base; | ||||
|   fixed_y_t* target_y = target_y_base; | ||||
|   fixed_t* best_uv = best_uv_base; | ||||
|   fixed_t* target_uv = target_uv_base; | ||||
|   const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h); | ||||
|   int ok; | ||||
|  | ||||
|   if (best_y_base == NULL || best_uv_base == NULL || | ||||
|       target_y_base == NULL || target_uv_base == NULL || | ||||
|       best_rgb_y == NULL || best_rgb_uv == NULL || | ||||
|       tmp_buffer == NULL) { | ||||
|   int ok = SharpArgbToYuv(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture->y, | ||||
|                           picture->y_stride, picture->u, picture->uv_stride, | ||||
|                           picture->v, picture->uv_stride, picture->width, | ||||
|                           picture->height); | ||||
|   if (!ok) { | ||||
|     ok = WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); | ||||
|     goto End; | ||||
|   } | ||||
|   assert(picture->width >= kMinDimensionIterativeConversion); | ||||
|   assert(picture->height >= kMinDimensionIterativeConversion); | ||||
|  | ||||
|   WebPInitConvertARGBToYUV(); | ||||
|  | ||||
|   // Import RGB samples to W/RGB representation. | ||||
|   for (j = 0; j < picture->height; j += 2) { | ||||
|     const int is_last_row = (j == picture->height - 1); | ||||
|     fixed_y_t* const src1 = tmp_buffer + 0 * w; | ||||
|     fixed_y_t* const src2 = tmp_buffer + 3 * w; | ||||
|  | ||||
|     // prepare two rows of input | ||||
|     ImportOneRow(r_ptr, g_ptr, b_ptr, step, picture->width, src1); | ||||
|     if (!is_last_row) { | ||||
|       ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride, | ||||
|                    step, picture->width, src2); | ||||
|     } else { | ||||
|       memcpy(src2, src1, 3 * w * sizeof(*src2)); | ||||
|     } | ||||
|     StoreGray(src1, best_y + 0, w); | ||||
|     StoreGray(src2, best_y + w, w); | ||||
|  | ||||
|     UpdateW(src1, target_y, w); | ||||
|     UpdateW(src2, target_y + w, w); | ||||
|     UpdateChroma(src1, src2, target_uv, uv_w); | ||||
|     memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv)); | ||||
|     best_y += 2 * w; | ||||
|     best_uv += 3 * uv_w; | ||||
|     target_y += 2 * w; | ||||
|     target_uv += 3 * uv_w; | ||||
|     r_ptr += 2 * rgb_stride; | ||||
|     g_ptr += 2 * rgb_stride; | ||||
|     b_ptr += 2 * rgb_stride; | ||||
|   } | ||||
|  | ||||
|   // Iterate and resolve clipping conflicts. | ||||
|   for (iter = 0; iter < kNumIterations; ++iter) { | ||||
|     const fixed_t* cur_uv = best_uv_base; | ||||
|     const fixed_t* prev_uv = best_uv_base; | ||||
|     uint64_t diff_y_sum = 0; | ||||
|  | ||||
|     best_y = best_y_base; | ||||
|     best_uv = best_uv_base; | ||||
|     target_y = target_y_base; | ||||
|     target_uv = target_uv_base; | ||||
|     for (j = 0; j < h; j += 2) { | ||||
|       fixed_y_t* const src1 = tmp_buffer + 0 * w; | ||||
|       fixed_y_t* const src2 = tmp_buffer + 3 * w; | ||||
|       { | ||||
|         const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0); | ||||
|         InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, src1, src2); | ||||
|         prev_uv = cur_uv; | ||||
|         cur_uv = next_uv; | ||||
|       } | ||||
|  | ||||
|       UpdateW(src1, best_rgb_y + 0 * w, w); | ||||
|       UpdateW(src2, best_rgb_y + 1 * w, w); | ||||
|       UpdateChroma(src1, src2, best_rgb_uv, uv_w); | ||||
|  | ||||
|       // update two rows of Y and one row of RGB | ||||
|       diff_y_sum += WebPSharpYUVUpdateY(target_y, best_rgb_y, best_y, 2 * w); | ||||
|       WebPSharpYUVUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w); | ||||
|  | ||||
|       best_y += 2 * w; | ||||
|       best_uv += 3 * uv_w; | ||||
|       target_y += 2 * w; | ||||
|       target_uv += 3 * uv_w; | ||||
|     } | ||||
|     // test exit condition | ||||
|     if (iter > 0) { | ||||
|       if (diff_y_sum < diff_y_threshold) break; | ||||
|       if (diff_y_sum > prev_diff_y_sum) break; | ||||
|     } | ||||
|     prev_diff_y_sum = diff_y_sum; | ||||
|   } | ||||
|   // final reconstruction | ||||
|   ok = ConvertWRGBToYUV(best_y_base, best_uv_base, picture); | ||||
|  | ||||
|  End: | ||||
|   WebPSafeFree(best_y_base); | ||||
|   WebPSafeFree(best_uv_base); | ||||
|   WebPSafeFree(target_y_base); | ||||
|   WebPSafeFree(target_uv_base); | ||||
|   WebPSafeFree(best_rgb_y); | ||||
|   WebPSafeFree(best_rgb_uv); | ||||
|   WebPSafeFree(tmp_buffer); | ||||
|   return ok; | ||||
| } | ||||
| #undef SAFE_ALLOC | ||||
|  | ||||
| //------------------------------------------------------------------------------ | ||||
| // "Fast" regular RGB->YUV | ||||
| @@ -874,7 +495,6 @@ static int ImportYUVAFromRGBA(const uint8_t* r_ptr, | ||||
|   } | ||||
|  | ||||
|   if (use_iterative_conversion) { | ||||
|     InitGammaTablesS(); | ||||
|     if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) { | ||||
|       return 0; | ||||
|     } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user