diff --git a/src/dsp/lossless.c b/src/dsp/lossless.c index 0af443ca..13328b2c 100644 --- a/src/dsp/lossless.c +++ b/src/dsp/lossless.c @@ -1133,14 +1133,11 @@ static void GetBestGreenToRed( best_tx->green_to_red_ = green_to_red; } -static float GetPredictionCostCrossColorBlue( +static void CollectColorBlueTransforms( int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max, - int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, - int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256], + int xsize, int green_to_blue, int red_to_blue, int* histo, const uint32_t* const argb) { int all_y; - int histo[256] = { 0 }; - float cur_diff; for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) { int all_x; int ix = all_y * xsize + tile_x_offset; @@ -1148,6 +1145,20 @@ static float GetPredictionCostCrossColorBlue( ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])]; } } +} + +static float GetPredictionCostCrossColorBlue( + int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max, + int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, + int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256], + const uint32_t* const argb) { + int histo[256] = { 0 }; + float cur_diff; + + VP8LCollectColorBlueTransforms(tile_x_offset, tile_y_offset, all_x_max, + all_y_max, xsize, green_to_blue, red_to_blue, + histo, argb); + cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo); if ((uint8_t)green_to_blue == prev_x.green_to_blue_) { cur_diff -= 3; // favor keeping the areas locally similar @@ -1726,6 +1737,8 @@ VP8LConvertFunc VP8LConvertBGRAToRGBA4444; VP8LConvertFunc VP8LConvertBGRAToRGB565; VP8LConvertFunc VP8LConvertBGRAToBGR; +VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms; + VP8LFastLog2SlowFunc VP8LFastLog2Slow; VP8LFastLog2SlowFunc VP8LFastSLog2Slow; @@ -1765,6 +1778,8 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) { VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C; VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C; + VP8LCollectColorBlueTransforms = CollectColorBlueTransforms; + VP8LFastLog2Slow = FastLog2Slow; VP8LFastSLog2Slow = FastSLog2Slow; diff --git a/src/dsp/lossless.h b/src/dsp/lossless.h index 44469277..17e024d0 100644 --- a/src/dsp/lossless.h +++ b/src/dsp/lossless.h @@ -59,6 +59,12 @@ extern VP8LConvertFunc VP8LConvertBGRAToRGBA4444; extern VP8LConvertFunc VP8LConvertBGRAToRGB565; extern VP8LConvertFunc VP8LConvertBGRAToBGR; +typedef void (*VP8LCollectColorBlueTransformsFunc)( + int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max, + int xsize, int green_to_blue, int red_to_blue, int* histo, + const uint32_t* const argb); +extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms; + // Expose some C-only fallback functions void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data, int num_pixels); diff --git a/src/dsp/lossless_mips_dsp_r2.c b/src/dsp/lossless_mips_dsp_r2.c index d85811b5..914a46a3 100644 --- a/src/dsp/lossless_mips_dsp_r2.c +++ b/src/dsp/lossless_mips_dsp_r2.c @@ -367,6 +367,63 @@ static void TransformColor(const VP8LMultipliers* const m, uint32_t* data, } } +static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue, + uint8_t red_to_blue, + uint32_t argb) { + const uint32_t green = argb >> 8; + const uint32_t red = argb >> 16; + uint8_t new_blue = argb; + new_blue -= ColorTransformDelta(green_to_blue, green); + new_blue -= ColorTransformDelta(red_to_blue, red); + return (new_blue & 0xff); +} + +static void CollectColorBlueTransforms( + int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max, + int xsize, int green_to_blue, int red_to_blue, int* histo, + const uint32_t* const argb) { + const int rtb = (red_to_blue << 16) | (red_to_blue & 0xffff); + const int gtb = (green_to_blue << 16) | (green_to_blue & 0xffff); + const uint32_t mask = 0xff00ffu; + int ix = tile_y_offset * xsize + tile_x_offset; + int all_y; + for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) { + uint32_t* p_argb = (uint32_t*)&argb[ix]; + const int loop_cnt = all_x_max - tile_x_offset; + int all_x; + ix += xsize; + for (all_x = 0; all_x < (loop_cnt >> 1); ++all_x) { + int temp0, temp1, temp2, temp3, temp4, temp5, temp6; + __asm__ volatile ( + "lw %[temp0], 0(%[p_argb]) \n\t" + "lw %[temp1], 4(%[p_argb]) \n\t" + "precr.qb.ph %[temp2], %[temp0], %[temp1] \n\t" + "ins %[temp1], %[temp0], 16, 16 \n\t" + "shra.ph %[temp2], %[temp2], 8 \n\t" + "shra.ph %[temp3], %[temp1], 8 \n\t" + "mul.ph %[temp5], %[temp2], %[rtb] \n\t" + "mul.ph %[temp6], %[temp3], %[gtb] \n\t" + "and %[temp4], %[temp1], %[mask] \n\t" + "addiu %[p_argb], %[p_argb], 8 \n\t" + "shra.ph %[temp5], %[temp5], 5 \n\t" + "shra.ph %[temp6], %[temp6], 5 \n\t" + "subu.qb %[temp2], %[temp4], %[temp5] \n\t" + "subu.qb %[temp2], %[temp2], %[temp6] \n\t" + : [p_argb]"+&r"(p_argb), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), + [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), + [temp5]"=&r"(temp5), [temp6]"=&r"(temp6) + : [rtb]"r"(rtb), [gtb]"r"(gtb), [mask]"r"(mask) + : "memory", "hi", "lo" + ); + ++histo[(uint8_t)(temp2 >> 16)]; + ++histo[(uint8_t)temp2]; + } + if (loop_cnt & 1) { + ++histo[TransformColorBlue(green_to_blue, red_to_blue, *p_argb)]; + } + } +} + #endif // WEBP_USE_MIPS_DSP_R2 //------------------------------------------------------------------------------ @@ -388,6 +445,7 @@ void VP8LDspInitMIPSdspR2(void) { VP8LPredictors[13] = Predictor13; VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed; VP8LTransformColor = TransformColor; + VP8LCollectColorBlueTransforms = CollectColorBlueTransforms; #endif // WEBP_USE_MIPS_DSP_R2 }