mirror of
				https://github.com/webmproject/libwebp.git
				synced 2025-10-30 18:05:36 +01:00 
			
		
		
		
	Merge "MIPS: MIPS32r1: Added optimizations for FastLog2"
This commit is contained in:
		| @@ -33,6 +33,7 @@ LOCAL_SRC_FILES := \ | |||||||
|     src/dsp/enc_mips32.c \ |     src/dsp/enc_mips32.c \ | ||||||
|     src/dsp/enc_sse2.c \ |     src/dsp/enc_sse2.c \ | ||||||
|     src/dsp/lossless.c \ |     src/dsp/lossless.c \ | ||||||
|  |     src/dsp/lossless_mips32.c \ | ||||||
|     src/dsp/lossless_sse2.c \ |     src/dsp/lossless_sse2.c \ | ||||||
|     src/dsp/upsampling.c \ |     src/dsp/upsampling.c \ | ||||||
|     src/dsp/upsampling_mips32.c \ |     src/dsp/upsampling_mips32.c \ | ||||||
|   | |||||||
| @@ -174,6 +174,7 @@ DSP_DEC_OBJS = \ | |||||||
|     $(DIROBJ)\dsp\dec_neon.obj \ |     $(DIROBJ)\dsp\dec_neon.obj \ | ||||||
|     $(DIROBJ)\dsp\dec_sse2.obj \ |     $(DIROBJ)\dsp\dec_sse2.obj \ | ||||||
|     $(DIROBJ)\dsp\lossless.obj \ |     $(DIROBJ)\dsp\lossless.obj \ | ||||||
|  |     $(DIROBJ)\dsp\lossless_mips32.obj \ | ||||||
|     $(DIROBJ)\dsp\lossless_neon.obj \ |     $(DIROBJ)\dsp\lossless_neon.obj \ | ||||||
|     $(DIROBJ)\dsp\lossless_sse2.obj \ |     $(DIROBJ)\dsp\lossless_sse2.obj \ | ||||||
|     $(DIROBJ)\dsp\upsampling.obj \ |     $(DIROBJ)\dsp\upsampling.obj \ | ||||||
|   | |||||||
| @@ -109,6 +109,7 @@ DSP_DEC_OBJS = \ | |||||||
|     src/dsp/dec_neon.o \ |     src/dsp/dec_neon.o \ | ||||||
|     src/dsp/dec_sse2.o \ |     src/dsp/dec_sse2.o \ | ||||||
|     src/dsp/lossless.o \ |     src/dsp/lossless.o \ | ||||||
|  |     src/dsp/lossless_mips32.o \ | ||||||
|     src/dsp/lossless_neon.o \ |     src/dsp/lossless_neon.o \ | ||||||
|     src/dsp/lossless_sse2.o \ |     src/dsp/lossless_sse2.o \ | ||||||
|     src/dsp/upsampling.o \ |     src/dsp/upsampling.o \ | ||||||
|   | |||||||
| @@ -17,6 +17,7 @@ COMMON_SOURCES += dec_neon.c | |||||||
| COMMON_SOURCES += dec_sse2.c | COMMON_SOURCES += dec_sse2.c | ||||||
| COMMON_SOURCES += dsp.h | COMMON_SOURCES += dsp.h | ||||||
| COMMON_SOURCES += lossless.c | COMMON_SOURCES += lossless.c | ||||||
|  | COMMON_SOURCES += lossless_mips32.c | ||||||
| COMMON_SOURCES += lossless_neon.c | COMMON_SOURCES += lossless_neon.c | ||||||
| COMMON_SOURCES += lossless_sse2.c | COMMON_SOURCES += lossless_sse2.c | ||||||
| COMMON_SOURCES += lossless.h | COMMON_SOURCES += lossless.h | ||||||
|   | |||||||
| @@ -332,7 +332,7 @@ const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = { | |||||||
| #define APPROX_LOG_WITH_CORRECTION_MAX  65536 | #define APPROX_LOG_WITH_CORRECTION_MAX  65536 | ||||||
| #define APPROX_LOG_MAX                   4096 | #define APPROX_LOG_MAX                   4096 | ||||||
| #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086 | #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086 | ||||||
| float VP8LFastSLog2Slow(int v) { | static float FastSLog2Slow(int v) { | ||||||
|   assert(v >= LOG_LOOKUP_IDX_MAX); |   assert(v >= LOG_LOOKUP_IDX_MAX); | ||||||
|   if (v < APPROX_LOG_WITH_CORRECTION_MAX) { |   if (v < APPROX_LOG_WITH_CORRECTION_MAX) { | ||||||
|     int log_cnt = 0; |     int log_cnt = 0; | ||||||
| @@ -351,14 +351,14 @@ float VP8LFastSLog2Slow(int v) { | |||||||
|     // The correction factor: log(1 + d) ~ d; for very small d values, so |     // The correction factor: log(1 + d) ~ d; for very small d values, so | ||||||
|     // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v |     // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v | ||||||
|     // LOG_2_RECIPROCAL ~ 23/16 |     // LOG_2_RECIPROCAL ~ 23/16 | ||||||
|     correction = (23 * (orig_v % y)) >> 4; |     correction = (23 * (orig_v & (y - 1))) >> 4; | ||||||
|     return v_f * (kLog2Table[v] + log_cnt) + correction; |     return v_f * (kLog2Table[v] + log_cnt) + correction; | ||||||
|   } else { |   } else { | ||||||
|     return (float)(LOG_2_RECIPROCAL * v * log((double)v)); |     return (float)(LOG_2_RECIPROCAL * v * log((double)v)); | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| float VP8LFastLog2Slow(int v) { | static float FastLog2Slow(int v) { | ||||||
|   assert(v >= LOG_LOOKUP_IDX_MAX); |   assert(v >= LOG_LOOKUP_IDX_MAX); | ||||||
|   if (v < APPROX_LOG_WITH_CORRECTION_MAX) { |   if (v < APPROX_LOG_WITH_CORRECTION_MAX) { | ||||||
|     int log_cnt = 0; |     int log_cnt = 0; | ||||||
| @@ -374,7 +374,7 @@ float VP8LFastLog2Slow(int v) { | |||||||
|     if (orig_v >= APPROX_LOG_MAX) { |     if (orig_v >= APPROX_LOG_MAX) { | ||||||
|       // Since the division is still expensive, add this correction factor only |       // Since the division is still expensive, add this correction factor only | ||||||
|       // for large values of 'v'. |       // for large values of 'v'. | ||||||
|       const int correction = (23 * (orig_v % y)) >> 4; |       const int correction = (23 * (orig_v & (y - 1))) >> 4; | ||||||
|       log_2 += (double)correction / orig_v; |       log_2 += (double)correction / orig_v; | ||||||
|     } |     } | ||||||
|     return (float)log_2; |     return (float)log_2; | ||||||
| @@ -1473,8 +1473,12 @@ VP8LConvertFunc VP8LConvertBGRAToRGBA4444; | |||||||
| VP8LConvertFunc VP8LConvertBGRAToRGB565; | VP8LConvertFunc VP8LConvertBGRAToRGB565; | ||||||
| VP8LConvertFunc VP8LConvertBGRAToBGR; | VP8LConvertFunc VP8LConvertBGRAToBGR; | ||||||
|  |  | ||||||
|  | VP8LFastLog2SlowFunc VP8LFastLog2Slow; | ||||||
|  | VP8LFastLog2SlowFunc VP8LFastSLog2Slow; | ||||||
|  |  | ||||||
| extern void VP8LDspInitSSE2(void); | extern void VP8LDspInitSSE2(void); | ||||||
| extern void VP8LDspInitNEON(void); | extern void VP8LDspInitNEON(void); | ||||||
|  | extern void VP8LDspInitMIPS32(void); | ||||||
|  |  | ||||||
| void VP8LDspInit(void) { | void VP8LDspInit(void) { | ||||||
|   memcpy(VP8LPredictors, kPredictorsC, sizeof(VP8LPredictors)); |   memcpy(VP8LPredictors, kPredictorsC, sizeof(VP8LPredictors)); | ||||||
| @@ -1491,6 +1495,9 @@ void VP8LDspInit(void) { | |||||||
|   VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C; |   VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C; | ||||||
|   VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C; |   VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C; | ||||||
|  |  | ||||||
|  |   VP8LFastLog2Slow = FastLog2Slow; | ||||||
|  |   VP8LFastSLog2Slow = FastSLog2Slow; | ||||||
|  |  | ||||||
|   // If defined, use CPUInfo() to overwrite some pointers with faster versions. |   // If defined, use CPUInfo() to overwrite some pointers with faster versions. | ||||||
|   if (VP8GetCPUInfo != NULL) { |   if (VP8GetCPUInfo != NULL) { | ||||||
| #if defined(WEBP_USE_SSE2) | #if defined(WEBP_USE_SSE2) | ||||||
| @@ -1502,6 +1509,11 @@ void VP8LDspInit(void) { | |||||||
|     if (VP8GetCPUInfo(kNEON)) { |     if (VP8GetCPUInfo(kNEON)) { | ||||||
|       VP8LDspInitNEON(); |       VP8LDspInitNEON(); | ||||||
|     } |     } | ||||||
|  | #endif | ||||||
|  | #if defined(WEBP_USE_MIPS32) | ||||||
|  |     if (VP8GetCPUInfo(kMIPS32)) { | ||||||
|  |       VP8LDspInitMIPS32(); | ||||||
|  |     } | ||||||
| #endif | #endif | ||||||
|   } |   } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -122,8 +122,11 @@ static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size, | |||||||
| #define LOG_LOOKUP_IDX_MAX 256 | #define LOG_LOOKUP_IDX_MAX 256 | ||||||
| extern const float kLog2Table[LOG_LOOKUP_IDX_MAX]; | extern const float kLog2Table[LOG_LOOKUP_IDX_MAX]; | ||||||
| extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX]; | extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX]; | ||||||
| float VP8LFastLog2Slow(int v); | typedef float (*VP8LFastLog2SlowFunc)(int v); | ||||||
| float VP8LFastSLog2Slow(int v); |  | ||||||
|  | extern VP8LFastLog2SlowFunc VP8LFastLog2Slow; | ||||||
|  | extern VP8LFastLog2SlowFunc VP8LFastSLog2Slow; | ||||||
|  |  | ||||||
| static WEBP_INLINE float VP8LFastLog2(int v) { | static WEBP_INLINE float VP8LFastLog2(int v) { | ||||||
|   return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v); |   return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v); | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										108
									
								
								src/dsp/lossless_mips32.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										108
									
								
								src/dsp/lossless_mips32.c
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,108 @@ | |||||||
|  | // Copyright 2014 Google Inc. All Rights Reserved. | ||||||
|  | // | ||||||
|  | // Use of this source code is governed by a BSD-style license | ||||||
|  | // that can be found in the COPYING file in the root of the source | ||||||
|  | // tree. An additional intellectual property rights grant can be found | ||||||
|  | // in the file PATENTS. All contributing project authors may | ||||||
|  | // be found in the AUTHORS file in the root of the source tree. | ||||||
|  | // ----------------------------------------------------------------------------- | ||||||
|  | // | ||||||
|  | // MIPS version of lossless functions | ||||||
|  | // | ||||||
|  | // Author(s):  Jovan Zelincevic (jovan.zelincevic@imgtec.com) | ||||||
|  |  | ||||||
|  | #include "./dsp.h" | ||||||
|  | #include "./lossless.h" | ||||||
|  |  | ||||||
|  | #if defined(WEBP_USE_MIPS32) | ||||||
|  |  | ||||||
|  | #include <math.h> | ||||||
|  | #include <stdlib.h> | ||||||
|  | #include <assert.h> | ||||||
|  |  | ||||||
|  | #define APPROX_LOG_WITH_CORRECTION_MAX  65536 | ||||||
|  | #define APPROX_LOG_MAX                   4096 | ||||||
|  | #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086 | ||||||
|  |  | ||||||
|  | static float FastSLog2SlowMIPS32(int v) { | ||||||
|  |   assert(v >= LOG_LOOKUP_IDX_MAX); | ||||||
|  |   if (v < APPROX_LOG_WITH_CORRECTION_MAX) { | ||||||
|  |     int log_cnt, y, correction; | ||||||
|  |     const int c24 = 24; | ||||||
|  |     const float v_f = (float)v; | ||||||
|  |     int temp; | ||||||
|  |  | ||||||
|  |     // Xf = 256 = 2^8 | ||||||
|  |     // log_cnt is index of leading one in upper 24 bits | ||||||
|  |     __asm__ volatile( | ||||||
|  |       "clz      %[log_cnt], %[v]                      \n\t" | ||||||
|  |       "addiu    %[y],       $zero,        1           \n\t" | ||||||
|  |       "subu     %[log_cnt], %[c24],       %[log_cnt]  \n\t" | ||||||
|  |       "sllv     %[y],       %[y],         %[log_cnt]  \n\t" | ||||||
|  |       "srlv     %[temp],    %[v],         %[log_cnt]  \n\t" | ||||||
|  |       : [log_cnt]"=&r"(log_cnt), [y]"=&r"(y), | ||||||
|  |         [temp]"=r"(temp) | ||||||
|  |       : [c24]"r"(c24), [v]"r"(v) | ||||||
|  |     ); | ||||||
|  |  | ||||||
|  |     // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256 | ||||||
|  |     // Xf = floor(Xf) * (1 + (v % y) / v) | ||||||
|  |     // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v) | ||||||
|  |     // The correction factor: log(1 + d) ~ d; for very small d values, so | ||||||
|  |     // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v | ||||||
|  |     // LOG_2_RECIPROCAL ~ 23/16 | ||||||
|  |  | ||||||
|  |     // (v % y) = (v % 2^log_cnt) = v & (2^log_cnt - 1) | ||||||
|  |     correction = (23 * (v & (y - 1))) >> 4; | ||||||
|  |     return v_f * (kLog2Table[temp] + log_cnt) + correction; | ||||||
|  |   } else { | ||||||
|  |     return (float)(LOG_2_RECIPROCAL * v * log((double)v)); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static float FastLog2SlowMIPS32(int v) { | ||||||
|  |   assert(v >= LOG_LOOKUP_IDX_MAX); | ||||||
|  |   if (v < APPROX_LOG_WITH_CORRECTION_MAX) { | ||||||
|  |     int log_cnt, y; | ||||||
|  |     const int c24 = 24; | ||||||
|  |     double log_2; | ||||||
|  |     int temp; | ||||||
|  |  | ||||||
|  |     __asm__ volatile( | ||||||
|  |       "clz      %[log_cnt], %[v]                      \n\t" | ||||||
|  |       "addiu    %[y],       $zero,        1           \n\t" | ||||||
|  |       "subu     %[log_cnt], %[c24],       %[log_cnt]  \n\t" | ||||||
|  |       "sllv     %[y],       %[y],         %[log_cnt]  \n\t" | ||||||
|  |       "srlv     %[temp],    %[v],         %[log_cnt]  \n\t" | ||||||
|  |       : [log_cnt]"=&r"(log_cnt), [y]"=&r"(y), | ||||||
|  |         [temp]"=r"(temp) | ||||||
|  |       : [c24]"r"(c24), [v]"r"(v) | ||||||
|  |     ); | ||||||
|  |  | ||||||
|  |     log_2 = kLog2Table[temp] + log_cnt; | ||||||
|  |     if (v >= APPROX_LOG_MAX) { | ||||||
|  |       // Since the division is still expensive, add this correction factor only | ||||||
|  |       // for large values of 'v'. | ||||||
|  |  | ||||||
|  |       const int correction = (23 * (v & (y - 1))) >> 4; | ||||||
|  |       log_2 += (double)correction / v; | ||||||
|  |     } | ||||||
|  |     return (float)log_2; | ||||||
|  |   } else { | ||||||
|  |     return (float)(LOG_2_RECIPROCAL * log((double)v)); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #endif  // WEBP_USE_MIPS32 | ||||||
|  |  | ||||||
|  | //------------------------------------------------------------------------------ | ||||||
|  | // Entry point | ||||||
|  |  | ||||||
|  | extern void VP8LDspInitMIPS32(void); | ||||||
|  |  | ||||||
|  | void VP8LDspInitMIPS32(void) { | ||||||
|  | #if defined(WEBP_USE_MIPS32) | ||||||
|  |   VP8LFastSLog2Slow = FastSLog2SlowMIPS32; | ||||||
|  |   VP8LFastLog2Slow = FastLog2SlowMIPS32; | ||||||
|  | #endif  // WEBP_USE_MIPS32 | ||||||
|  | } | ||||||
		Reference in New Issue
	
	Block a user