mirror of
				https://github.com/webmproject/libwebp.git
				synced 2025-10-30 10:03:23 +01:00 
			
		
		
		
	MIPS: MIPS32r1: Added optimizations for ExtraCost functions.
ExtraCost and ExtraCostCombined Change-Id: I7eceb9ce2807296c6b43b974e4216879ddcd79f2
This commit is contained in:
		| @@ -1458,6 +1458,24 @@ void VP8LBundleColorMap(const uint8_t* const row, int width, | ||||
|   } | ||||
| } | ||||
|  | ||||
| static double ExtraCost(const int* const population, int length) { | ||||
|   int i; | ||||
|   double cost = 0.; | ||||
|   for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2]; | ||||
|   return cost; | ||||
| } | ||||
|  | ||||
| static double ExtraCostCombined(const int* const X, const int* const Y, | ||||
|                                 int length) { | ||||
|   int i; | ||||
|   double cost = 0.; | ||||
|   for (i = 2; i < length - 2; ++i) { | ||||
|     const int xy = X[i + 2] + Y[i + 2]; | ||||
|     cost += (i >> 1) * xy; | ||||
|   } | ||||
|   return cost; | ||||
| } | ||||
|  | ||||
| //------------------------------------------------------------------------------ | ||||
|  | ||||
| VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed; | ||||
| @@ -1476,6 +1494,9 @@ VP8LConvertFunc VP8LConvertBGRAToBGR; | ||||
| VP8LFastLog2SlowFunc VP8LFastLog2Slow; | ||||
| VP8LFastLog2SlowFunc VP8LFastSLog2Slow; | ||||
|  | ||||
| VP8LCostFunc VP8LExtraCost; | ||||
| VP8LCostCombinedFunc VP8LExtraCostCombined; | ||||
|  | ||||
| extern void VP8LDspInitSSE2(void); | ||||
| extern void VP8LDspInitNEON(void); | ||||
| extern void VP8LDspInitMIPS32(void); | ||||
| @@ -1498,6 +1519,9 @@ void VP8LDspInit(void) { | ||||
|   VP8LFastLog2Slow = FastLog2Slow; | ||||
|   VP8LFastSLog2Slow = FastSLog2Slow; | ||||
|  | ||||
|   VP8LExtraCost = ExtraCost; | ||||
|   VP8LExtraCostCombined = ExtraCostCombined; | ||||
|  | ||||
|   // If defined, use CPUInfo() to overwrite some pointers with faster versions. | ||||
|   if (VP8GetCPUInfo != NULL) { | ||||
| #if defined(WEBP_USE_SSE2) | ||||
|   | ||||
| @@ -118,6 +118,7 @@ static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size, | ||||
|   return (size + (1 << sampling_bits) - 1) >> sampling_bits; | ||||
| } | ||||
|  | ||||
| // ----------------------------------------------------------------------------- | ||||
| // Faster logarithm for integers. Small values use a look-up table. | ||||
| #define LOG_LOOKUP_IDX_MAX 256 | ||||
| extern const float kLog2Table[LOG_LOOKUP_IDX_MAX]; | ||||
| @@ -135,6 +136,17 @@ static WEBP_INLINE float VP8LFastSLog2(int v) { | ||||
|   return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v); | ||||
| } | ||||
|  | ||||
| // ----------------------------------------------------------------------------- | ||||
| // Huffman-cost related functions. | ||||
|  | ||||
| typedef double (*VP8LCostFunc)(const int* const population, int length); | ||||
| typedef double (*VP8LCostCombinedFunc)(const int* const X, | ||||
|                                        const int* const Y, | ||||
|                                        int length); | ||||
|  | ||||
| extern VP8LCostFunc VP8LExtraCost; | ||||
| extern VP8LCostCombinedFunc VP8LExtraCostCombined; | ||||
|  | ||||
| // ----------------------------------------------------------------------------- | ||||
| // PrefixEncode() | ||||
|  | ||||
|   | ||||
| @@ -9,7 +9,8 @@ | ||||
| // | ||||
| // MIPS version of lossless functions | ||||
| // | ||||
| // Author(s):  Jovan Zelincevic (jovan.zelincevic@imgtec.com) | ||||
| // Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com) | ||||
| //             Jovan Zelincevic (jovan.zelincevic@imgtec.com) | ||||
|  | ||||
| #include "./dsp.h" | ||||
| #include "./lossless.h" | ||||
| @@ -93,6 +94,100 @@ static float FastLog2SlowMIPS32(int v) { | ||||
|   } | ||||
| } | ||||
|  | ||||
| // C version of this function: | ||||
| //   int i = 0; | ||||
| //   int64_t cost = 0; | ||||
| //   int* pop = (int*)&population[4]; | ||||
| //   const int* LoopEnd = (int*)&population[length]; | ||||
| //   while (pop != LoopEnd) { | ||||
| //     ++i; | ||||
| //     cost += i * *pop; | ||||
| //     cost += i * *(pop + 1); | ||||
| //     pop += 2; | ||||
| //   } | ||||
| //   return (double)cost; | ||||
| static double ExtraCostMIPS32(const int* const population, int length) { | ||||
|   int i, temp0, temp1; | ||||
|   int* pop = (int*)&population[4]; | ||||
|   const int* LoopEnd = (int*)&population[length]; | ||||
|  | ||||
|   __asm__ volatile( | ||||
|     "mult   $zero,    $zero                  \n\t" | ||||
|     "xor    %[i],     %[i],       %[i]       \n\t" | ||||
|     "beq    %[pop],   %[LoopEnd], 2f         \n\t" | ||||
|   "1:                                        \n\t" | ||||
|     "lw     %[temp0], 0(%[pop])              \n\t" | ||||
|     "lw     %[temp1], 4(%[pop])              \n\t" | ||||
|     "addiu  %[i],     %[i],       1          \n\t" | ||||
|     "addiu  %[pop],   %[pop],     8          \n\t" | ||||
|     "madd   %[i],     %[temp0]               \n\t" | ||||
|     "madd   %[i],     %[temp1]               \n\t" | ||||
|     "bne    %[pop],   %[LoopEnd], 1b         \n\t" | ||||
|   "2:                                        \n\t" | ||||
|     "mfhi   %[temp0]                         \n\t" | ||||
|     "mflo   %[temp1]                         \n\t" | ||||
|     : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), | ||||
|       [i]"=&r"(i), [pop]"+r"(pop) | ||||
|     : [LoopEnd]"r"(LoopEnd) | ||||
|     : "memory", "hi", "lo" | ||||
|   ); | ||||
|  | ||||
|   return (double)((int64_t)temp0 << 32 | temp1); | ||||
| } | ||||
|  | ||||
| // C version of this function: | ||||
| //   int i = 0; | ||||
| //   int64_t cost = 0; | ||||
| //   int* pX = (int*)&X[4]; | ||||
| //   int* pY = (int*)&Y[4]; | ||||
| //   const int* LoopEnd = (int*)&X[length]; | ||||
| //   while (pX != LoopEnd) { | ||||
| //     const int xy0 = *pX + *pY; | ||||
| //     const int xy1 = *(pX + 1) + *(pY + 1); | ||||
| //     ++i; | ||||
| //     cost += i * xy0; | ||||
| //     cost += i * xy1; | ||||
| //     pX += 2; | ||||
| //     pY += 2; | ||||
| //   } | ||||
| //   return (double)cost; | ||||
| static double ExtraCostCombinedMIPS32(const int* const X, const int* const Y, | ||||
|                                       int length) { | ||||
|   int i, temp0, temp1, temp2, temp3; | ||||
|   int* pX = (int*)&X[4]; | ||||
|   int* pY = (int*)&Y[4]; | ||||
|   const int* LoopEnd = (int*)&X[length]; | ||||
|  | ||||
|   __asm__ volatile( | ||||
|     "mult   $zero,    $zero                  \n\t" | ||||
|     "xor    %[i],     %[i],       %[i]       \n\t" | ||||
|     "beq    %[pX],    %[LoopEnd], 2f         \n\t" | ||||
|   "1:                                        \n\t" | ||||
|     "lw     %[temp0], 0(%[pX])               \n\t" | ||||
|     "lw     %[temp1], 0(%[pY])               \n\t" | ||||
|     "lw     %[temp2], 4(%[pX])               \n\t" | ||||
|     "lw     %[temp3], 4(%[pY])               \n\t" | ||||
|     "addiu  %[i],     %[i],       1          \n\t" | ||||
|     "addu   %[temp0], %[temp0],   %[temp1]   \n\t" | ||||
|     "addu   %[temp2], %[temp2],   %[temp3]   \n\t" | ||||
|     "addiu  %[pX],    %[pX],      8          \n\t" | ||||
|     "addiu  %[pY],    %[pY],      8          \n\t" | ||||
|     "madd   %[i],     %[temp0]               \n\t" | ||||
|     "madd   %[i],     %[temp2]               \n\t" | ||||
|     "bne    %[pX],    %[LoopEnd], 1b         \n\t" | ||||
|   "2:                                        \n\t" | ||||
|     "mfhi   %[temp0]                         \n\t" | ||||
|     "mflo   %[temp1]                         \n\t" | ||||
|     : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), | ||||
|       [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), | ||||
|       [i]"=&r"(i), [pX]"+r"(pX), [pY]"+r"(pY) | ||||
|     : [LoopEnd]"r"(LoopEnd) | ||||
|     : "memory", "hi", "lo" | ||||
|   ); | ||||
|  | ||||
|   return (double)((int64_t)temp0 << 32 | temp1); | ||||
| } | ||||
|  | ||||
| #endif  // WEBP_USE_MIPS32 | ||||
|  | ||||
| //------------------------------------------------------------------------------ | ||||
| @@ -104,5 +199,7 @@ void VP8LDspInitMIPS32(void) { | ||||
| #if defined(WEBP_USE_MIPS32) | ||||
|   VP8LFastSLog2Slow = FastSLog2SlowMIPS32; | ||||
|   VP8LFastLog2Slow = FastLog2SlowMIPS32; | ||||
|   VP8LExtraCost = ExtraCostMIPS32; | ||||
|   VP8LExtraCostCombined = ExtraCostCombinedMIPS32; | ||||
| #endif  // WEBP_USE_MIPS32 | ||||
| } | ||||
|   | ||||
| @@ -252,24 +252,6 @@ static double GetCombinedEntropy(const int* const X, const int* const Y, | ||||
|   return BitsEntropyCombined(X, Y, length) + HuffmanCostCombined(X, Y, length); | ||||
| } | ||||
|  | ||||
| static double ExtraCost(const int* const population, int length) { | ||||
|   int i; | ||||
|   double cost = 0.; | ||||
|   for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2]; | ||||
|   return cost; | ||||
| } | ||||
|  | ||||
| static double ExtraCostCombined(const int* const X, const int* const Y, | ||||
|                                 int length) { | ||||
|   int i; | ||||
|   double cost = 0.; | ||||
|   for (i = 2; i < length - 2; ++i) { | ||||
|     const int xy = X[i + 2] + Y[i + 2]; | ||||
|     cost += (i >> 1) * xy; | ||||
|   } | ||||
|   return cost; | ||||
| } | ||||
|  | ||||
| // Estimates the Entropy + Huffman + other block overhead size cost. | ||||
| double VP8LHistogramEstimateBits(const VP8LHistogram* const p) { | ||||
|   return | ||||
| @@ -278,8 +260,8 @@ double VP8LHistogramEstimateBits(const VP8LHistogram* const p) { | ||||
|       + PopulationCost(p->blue_, 256) | ||||
|       + PopulationCost(p->alpha_, 256) | ||||
|       + PopulationCost(p->distance_, NUM_DISTANCE_CODES) | ||||
|       + ExtraCost(p->literal_ + 256, NUM_LENGTH_CODES) | ||||
|       + ExtraCost(p->distance_, NUM_DISTANCE_CODES); | ||||
|       + VP8LExtraCost(p->literal_ + 256, NUM_LENGTH_CODES) | ||||
|       + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES); | ||||
| } | ||||
|  | ||||
| double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) { | ||||
| @@ -289,8 +271,8 @@ double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) { | ||||
|       + BitsEntropy(p->blue_, 256) | ||||
|       + BitsEntropy(p->alpha_, 256) | ||||
|       + BitsEntropy(p->distance_, NUM_DISTANCE_CODES) | ||||
|       + ExtraCost(p->literal_ + 256, NUM_LENGTH_CODES) | ||||
|       + ExtraCost(p->distance_, NUM_DISTANCE_CODES); | ||||
|       + VP8LExtraCost(p->literal_ + 256, NUM_LENGTH_CODES) | ||||
|       + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES); | ||||
| } | ||||
|  | ||||
| // ----------------------------------------------------------------------------- | ||||
| @@ -322,7 +304,7 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a, | ||||
|                                                         b->palette_code_bits_; | ||||
|   *cost += GetCombinedEntropy(a->literal_, b->literal_, | ||||
|                               VP8LHistogramNumCodes(palette_code_bits)); | ||||
|   *cost += ExtraCostCombined(a->literal_ + 256, b->literal_ + 256, | ||||
|   *cost += VP8LExtraCostCombined(a->literal_ + 256, b->literal_ + 256, | ||||
|                                  NUM_LENGTH_CODES); | ||||
|   if (*cost > cost_threshold) return 0; | ||||
|  | ||||
| @@ -336,7 +318,8 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a, | ||||
|   if (*cost > cost_threshold) return 0; | ||||
|  | ||||
|   *cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES); | ||||
|   *cost += ExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES); | ||||
|   *cost += VP8LExtraCostCombined(a->distance_, b->distance_, | ||||
|                                  NUM_DISTANCE_CODES); | ||||
|   if (*cost > cost_threshold) return 0; | ||||
|  | ||||
|   return 1; | ||||
| @@ -426,10 +409,10 @@ static void UpdateHistogramCost(VP8LHistogram* const h) { | ||||
|   const double alpha_cost = PopulationCost(h->alpha_, 256); | ||||
|   const double distance_cost = | ||||
|       PopulationCost(h->distance_, NUM_DISTANCE_CODES) + | ||||
|       ExtraCost(h->distance_, NUM_DISTANCE_CODES); | ||||
|       VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES); | ||||
|   const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_); | ||||
|   h->literal_cost_ = PopulationCost(h->literal_, num_codes) + | ||||
|                      ExtraCost(h->literal_ + 256, NUM_LENGTH_CODES); | ||||
|                      VP8LExtraCost(h->literal_ + 256, NUM_LENGTH_CODES); | ||||
|   h->red_cost_ = PopulationCost(h->red_, 256); | ||||
|   h->blue_cost_ = PopulationCost(h->blue_, 256); | ||||
|   h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ + | ||||
|   | ||||
		Reference in New Issue
	
	Block a user