From a7e7caa486ba8b9bb4b6f5a05f1f27fae8d2a42e Mon Sep 17 00:00:00 2001 From: Djordje Pesut Date: Wed, 14 Jan 2015 10:25:40 +0100 Subject: [PATCH] MIPS: dspr2: added optimization for function TransformColorRed added new function CollectColorRedTransforms to C, which calls TransformColorRed and it is realized via pointer to function Change-Id: Ia68d73bfcf1ca2cb443dc2825910946221f87835 --- src/dsp/lossless.c | 22 ++++++++++++---- src/dsp/lossless.h | 5 ++++ src/dsp/lossless_mips_dsp_r2.c | 46 ++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 5 deletions(-) diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c index 13328b2c..ddf443cf 100644 --- a/src/dsp/lossless.c +++ b/src/dsp/lossless.c @@ -1067,13 +1067,10 @@ static float PredictionCostCrossColor(const int accumulated[256], PredictionCostSpatial(counts, 3, kExpValue); } -static float GetPredictionCostCrossColorRed( +static void CollectColorRedTransforms( int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max, - int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red, - const int accumulated_red_histo[256], const uint32_t* const argb) { + int xsize, int green_to_red, int* histo, const uint32_t* const argb) { int all_y; - int histo[256] = { 0 }; - float cur_diff; for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) { int ix = all_y * xsize + tile_x_offset; int all_x; @@ -1081,6 +1078,19 @@ static float GetPredictionCostCrossColorRed( ++histo[TransformColorRed(green_to_red, argb[ix])]; // red. } } +} + +static float GetPredictionCostCrossColorRed( + int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max, + int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red, + const int accumulated_red_histo[256], const uint32_t* const argb) { + int histo[256] = { 0 }; + float cur_diff; + + VP8LCollectColorRedTransforms(tile_x_offset, tile_y_offset, all_x_max, + all_y_max, xsize, green_to_red, + histo, argb); + cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo); if ((uint8_t)green_to_red == prev_x.green_to_red_) { cur_diff -= 3; // favor keeping the areas locally similar @@ -1738,6 +1748,7 @@ VP8LConvertFunc VP8LConvertBGRAToRGB565; VP8LConvertFunc VP8LConvertBGRAToBGR; VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms; +VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms; VP8LFastLog2SlowFunc VP8LFastLog2Slow; VP8LFastLog2SlowFunc VP8LFastSLog2Slow; @@ -1779,6 +1790,7 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) { VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C; VP8LCollectColorBlueTransforms = CollectColorBlueTransforms; + VP8LCollectColorRedTransforms = CollectColorRedTransforms; VP8LFastLog2Slow = FastLog2Slow; VP8LFastSLog2Slow = FastSLog2Slow; diff --git a/src/dsp/lossless.h b/src/dsp/lossless.h index 17e024d0..069bfa43 100644 --- a/src/dsp/lossless.h +++ b/src/dsp/lossless.h @@ -65,6 +65,11 @@ typedef void (*VP8LCollectColorBlueTransformsFunc)( const uint32_t* const argb); extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms; +typedef void (*VP8LCollectColorRedTransformsFunc)( + int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max, + int xsize, int green_to_red, int* histo, const uint32_t* const argb); +extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms; + // Expose some C-only fallback functions void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data, int num_pixels); diff --git a/src/dsp/lossless_mips_dsp_r2.c b/src/dsp/lossless_mips_dsp_r2.c index 914a46a3..bbe4ad87 100644 --- a/src/dsp/lossless_mips_dsp_r2.c +++ b/src/dsp/lossless_mips_dsp_r2.c @@ -424,6 +424,51 @@ static void CollectColorBlueTransforms( } } +static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red, + uint32_t argb) { + const uint32_t green = argb >> 8; + uint32_t new_red = argb >> 16; + new_red -= ColorTransformDelta(green_to_red, green); + return (new_red & 0xff); +} + +static void CollectColorRedTransforms( + int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max, + int xsize, int green_to_red, int* histo, const uint32_t* const argb) { + const int gtr = (green_to_red << 16) | (green_to_red & 0xffff); + int ix = tile_y_offset * xsize + tile_x_offset; + int all_y; + for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) { + uint32_t* p_argb = (uint32_t*)&argb[ix]; + const int loop_cnt = all_x_max - tile_x_offset; + int all_x; + ix += xsize; + for (all_x = 0; all_x < (loop_cnt >> 1); ++all_x) { + int temp0, temp1, temp2, temp3, temp4; + __asm__ volatile ( + "lw %[temp0], 0(%[p_argb]) \n\t" + "lw %[temp1], 4(%[p_argb]) \n\t" + "precrq.ph.w %[temp4], %[temp0], %[temp1] \n\t" + "ins %[temp1], %[temp0], 16, 16 \n\t" + "shra.ph %[temp3], %[temp1], 8 \n\t" + "mul.ph %[temp2], %[temp3], %[gtr] \n\t" + "addiu %[p_argb], %[p_argb], 8 \n\t" + "shra.ph %[temp2], %[temp2], 5 \n\t" + "subu.qb %[temp2], %[temp4], %[temp2] \n\t" + : [p_argb]"+&r"(p_argb), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), + [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4) + : [gtr]"r"(gtr) + : "memory", "hi", "lo" + ); + ++histo[(uint8_t)(temp2 >> 16)]; + ++histo[(uint8_t)temp2]; + } + if (loop_cnt & 1) { + ++histo[TransformColorRed(green_to_red, *p_argb)]; + } + } +} + #endif // WEBP_USE_MIPS_DSP_R2 //------------------------------------------------------------------------------ @@ -446,6 +491,7 @@ void VP8LDspInitMIPSdspR2(void) { VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed; VP8LTransformColor = TransformColor; VP8LCollectColorBlueTransforms = CollectColorBlueTransforms; + VP8LCollectColorRedTransforms = CollectColorRedTransforms; #endif // WEBP_USE_MIPS_DSP_R2 }