MIPS: dspr2: added optimization for function TransformColorBlue

added new function CollectColorBlueTransforms to C, which calls
TransformColorBlue and it is realized via pointer to function

Change-Id: Ia488b7a7a689223b5d33aae9724afab89b97fced
This commit is contained in:
Djordje Pesut 2015-01-06 09:31:58 +01:00
parent d7c4b02a57
commit 7b16197361
3 changed files with 84 additions and 5 deletions

View File

@ -1133,14 +1133,11 @@ static void GetBestGreenToRed(
best_tx->green_to_red_ = green_to_red;
}
static float GetPredictionCostCrossColorBlue(
static void CollectColorBlueTransforms(
int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y,
int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256],
int xsize, int green_to_blue, int red_to_blue, int* histo,
const uint32_t* const argb) {
int all_y;
int histo[256] = { 0 };
float cur_diff;
for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
int all_x;
int ix = all_y * xsize + tile_x_offset;
@ -1148,6 +1145,20 @@ static float GetPredictionCostCrossColorBlue(
++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];
}
}
}
static float GetPredictionCostCrossColorBlue(
int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y,
int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256],
const uint32_t* const argb) {
int histo[256] = { 0 };
float cur_diff;
VP8LCollectColorBlueTransforms(tile_x_offset, tile_y_offset, all_x_max,
all_y_max, xsize, green_to_blue, red_to_blue,
histo, argb);
cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo);
if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {
cur_diff -= 3; // favor keeping the areas locally similar
@ -1726,6 +1737,8 @@ VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
VP8LConvertFunc VP8LConvertBGRAToRGB565;
VP8LConvertFunc VP8LConvertBGRAToBGR;
VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
VP8LFastLog2SlowFunc VP8LFastLog2Slow;
VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
@ -1765,6 +1778,8 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
VP8LCollectColorBlueTransforms = CollectColorBlueTransforms;
VP8LFastLog2Slow = FastLog2Slow;
VP8LFastSLog2Slow = FastSLog2Slow;

View File

@ -59,6 +59,12 @@ extern VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
extern VP8LConvertFunc VP8LConvertBGRAToRGB565;
extern VP8LConvertFunc VP8LConvertBGRAToBGR;
typedef void (*VP8LCollectColorBlueTransformsFunc)(
int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
int xsize, int green_to_blue, int red_to_blue, int* histo,
const uint32_t* const argb);
extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
// Expose some C-only fallback functions
void VP8LTransformColor_C(const VP8LMultipliers* const m,
uint32_t* data, int num_pixels);

View File

@ -367,6 +367,63 @@ static void TransformColor(const VP8LMultipliers* const m, uint32_t* data,
}
}
static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
uint8_t red_to_blue,
uint32_t argb) {
const uint32_t green = argb >> 8;
const uint32_t red = argb >> 16;
uint8_t new_blue = argb;
new_blue -= ColorTransformDelta(green_to_blue, green);
new_blue -= ColorTransformDelta(red_to_blue, red);
return (new_blue & 0xff);
}
static void CollectColorBlueTransforms(
int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
int xsize, int green_to_blue, int red_to_blue, int* histo,
const uint32_t* const argb) {
const int rtb = (red_to_blue << 16) | (red_to_blue & 0xffff);
const int gtb = (green_to_blue << 16) | (green_to_blue & 0xffff);
const uint32_t mask = 0xff00ffu;
int ix = tile_y_offset * xsize + tile_x_offset;
int all_y;
for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
uint32_t* p_argb = (uint32_t*)&argb[ix];
const int loop_cnt = all_x_max - tile_x_offset;
int all_x;
ix += xsize;
for (all_x = 0; all_x < (loop_cnt >> 1); ++all_x) {
int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
__asm__ volatile (
"lw %[temp0], 0(%[p_argb]) \n\t"
"lw %[temp1], 4(%[p_argb]) \n\t"
"precr.qb.ph %[temp2], %[temp0], %[temp1] \n\t"
"ins %[temp1], %[temp0], 16, 16 \n\t"
"shra.ph %[temp2], %[temp2], 8 \n\t"
"shra.ph %[temp3], %[temp1], 8 \n\t"
"mul.ph %[temp5], %[temp2], %[rtb] \n\t"
"mul.ph %[temp6], %[temp3], %[gtb] \n\t"
"and %[temp4], %[temp1], %[mask] \n\t"
"addiu %[p_argb], %[p_argb], 8 \n\t"
"shra.ph %[temp5], %[temp5], 5 \n\t"
"shra.ph %[temp6], %[temp6], 5 \n\t"
"subu.qb %[temp2], %[temp4], %[temp5] \n\t"
"subu.qb %[temp2], %[temp2], %[temp6] \n\t"
: [p_argb]"+&r"(p_argb), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
[temp5]"=&r"(temp5), [temp6]"=&r"(temp6)
: [rtb]"r"(rtb), [gtb]"r"(gtb), [mask]"r"(mask)
: "memory", "hi", "lo"
);
++histo[(uint8_t)(temp2 >> 16)];
++histo[(uint8_t)temp2];
}
if (loop_cnt & 1) {
++histo[TransformColorBlue(green_to_blue, red_to_blue, *p_argb)];
}
}
}
#endif // WEBP_USE_MIPS_DSP_R2
//------------------------------------------------------------------------------
@ -388,6 +445,7 @@ void VP8LDspInitMIPSdspR2(void) {
VP8LPredictors[13] = Predictor13;
VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
VP8LTransformColor = TransformColor;
VP8LCollectColorBlueTransforms = CollectColorBlueTransforms;
#endif // WEBP_USE_MIPS_DSP_R2
}