Make a separate case for low_effort in CopyImageWithPrediction

for more speed.

This gives a roughly a 1% speedup for low_effort. But actually this is a
preparation for the upcoming CL that changes RGB values of transparent pixels
based on prediction, which should not be done for low_effort because that would
slightly hurt its performance.

On 1000 PNGs, with quality 0, method 0:
Before:
Compression (output/input): 2.9120/3.2667 bpp, Encode rate (raw data): 36.034 MP/s
After:
Compression (output/input): 2.9120/3.2667 bpp, Encode rate (raw data): 36.428 MP/s

Change-Id: I5ed9f599bbf908a917723f3c780551ceb7fd724d
This commit is contained in:
Lode Vandevenne 2015-11-27 14:01:54 +00:00 committed by James Zern
parent 5ae220bef6
commit 5bda52d4e8

View File

@ -24,6 +24,8 @@
#define MAX_DIFF_COST (1e30f) #define MAX_DIFF_COST (1e30f)
static const int kPredLowEffort = 11;
// lookup table for small values of log2(int) // lookup table for small values of log2(int)
const float kLog2Table[LOG_LOOKUP_IDX_MAX] = { const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
0.0000000000000000f, 0.0000000000000000f, 0.0000000000000000f, 0.0000000000000000f,
@ -640,6 +642,19 @@ static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
static WEBP_INLINE uint32_t Predict(VP8LPredictorFunc pred_func,
int x, int y,
const uint32_t* current_row,
const uint32_t* upper_row) {
if (y == 0) {
return (x == 0) ? ARGB_BLACK : current_row[x - 1]; // Left.
} else if (x == 0) {
return upper_row[x]; // Top.
} else {
return pred_func(current_row[x - 1], upper_row + x);
}
}
// Returns best predictor and updates the accumulated histogram. // Returns best predictor and updates the accumulated histogram.
static int GetBestPredictorForTile(int width, int height, static int GetBestPredictorForTile(int width, int height,
int tile_x, int tile_y, int bits, int tile_x, int tile_y, int bits,
@ -674,14 +689,8 @@ static int GetBestPredictorForTile(int width, int height,
current_row = upper_row + width; current_row = upper_row + width;
for (x = 0; x < max_x; ++x) { for (x = 0; x < max_x; ++x) {
const int col = col_start + x; const int col = col_start + x;
uint32_t predict; const uint32_t predict =
if (row == 0) { Predict(pred_func, col, row, current_row, upper_row);
predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left.
} else if (col == 0) {
predict = upper_row[col]; // Top.
} else {
predict = pred_func(current_row[col - 1], upper_row + col);
}
UpdateHisto(histo_argb, VP8LSubPixels(current_row[col], predict)); UpdateHisto(histo_argb, VP8LSubPixels(current_row[col], predict));
} }
} }
@ -708,7 +717,7 @@ static int GetBestPredictorForTile(int width, int height,
static void CopyImageWithPrediction(int width, int height, static void CopyImageWithPrediction(int width, int height,
int bits, uint32_t* const modes, int bits, uint32_t* const modes,
uint32_t* const argb_scratch, uint32_t* const argb_scratch,
uint32_t* const argb) { uint32_t* const argb, int low_effort) {
const int tiles_per_row = VP8LSubSampleSize(width, bits); const int tiles_per_row = VP8LSubSampleSize(width, bits);
const int mask = (1 << bits) - 1; const int mask = (1 << bits) - 1;
// The row size is one pixel longer to allow the top right pixel to point to // The row size is one pixel longer to allow the top right pixel to point to
@ -716,7 +725,8 @@ static void CopyImageWithPrediction(int width, int height,
uint32_t* current_row = argb_scratch; uint32_t* current_row = argb_scratch;
uint32_t* upper_row = argb_scratch + width + 1; uint32_t* upper_row = argb_scratch + width + 1;
int y; int y;
VP8LPredictorFunc pred_func = 0; VP8LPredictorFunc pred_func =
low_effort ? VP8LPredictors[kPredLowEffort] : NULL;
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
int x; int x;
@ -725,6 +735,14 @@ static void CopyImageWithPrediction(int width, int height,
current_row = tmp; current_row = tmp;
memcpy(current_row, argb + y * width, sizeof(*current_row) * width); memcpy(current_row, argb + y * width, sizeof(*current_row) * width);
current_row[width] = (y + 1 < height) ? argb[(y + 1) * width] : ARGB_BLACK; current_row[width] = (y + 1 < height) ? argb[(y + 1) * width] : ARGB_BLACK;
if (low_effort) {
for (x = 0; x < width; ++x) {
const uint32_t predict =
Predict(pred_func, x, y, current_row, upper_row);
argb[y * width + x] = VP8LSubPixels(current_row[x], predict);
}
} else {
for (x = 0; x < width; ++x) { for (x = 0; x < width; ++x) {
uint32_t predict; uint32_t predict;
if ((x & mask) == 0) { if ((x & mask) == 0) {
@ -732,17 +750,12 @@ static void CopyImageWithPrediction(int width, int height,
(modes[(y >> bits) * tiles_per_row + (x >> bits)] >> 8) & 0xff; (modes[(y >> bits) * tiles_per_row + (x >> bits)] >> 8) & 0xff;
pred_func = VP8LPredictors[mode]; pred_func = VP8LPredictors[mode];
} }
if (y == 0) { predict = Predict(pred_func, x, y, current_row, upper_row);
predict = (x == 0) ? ARGB_BLACK : current_row[x - 1]; // Left.
} else if (x == 0) {
predict = upper_row[x]; // Top.
} else {
predict = pred_func(current_row[x - 1], upper_row + x);
}
argb[y * width + x] = VP8LSubPixels(current_row[x], predict); argb[y * width + x] = VP8LSubPixels(current_row[x], predict);
} }
} }
} }
}
void VP8LResidualImage(int width, int height, int bits, int low_effort, void VP8LResidualImage(int width, int height, int bits, int low_effort,
uint32_t* const argb, uint32_t* const argb_scratch, uint32_t* const argb, uint32_t* const argb_scratch,
@ -750,12 +763,17 @@ void VP8LResidualImage(int width, int height, int bits, int low_effort,
const int max_tile_size = 1 << bits; const int max_tile_size = 1 << bits;
const int tiles_per_row = VP8LSubSampleSize(width, bits); const int tiles_per_row = VP8LSubSampleSize(width, bits);
const int tiles_per_col = VP8LSubSampleSize(height, bits); const int tiles_per_col = VP8LSubSampleSize(height, bits);
const int kPredLowEffort = 11;
uint32_t* const upper_row = argb_scratch; uint32_t* const upper_row = argb_scratch;
uint32_t* const current_tile_rows = argb_scratch + width; uint32_t* const current_tile_rows = argb_scratch + width;
int tile_y; int tile_y;
int histo[4][256]; int histo[4][256];
if (!low_effort) memset(histo, 0, sizeof(histo)); if (low_effort) {
int i;
for (i = 0; i < tiles_per_row * tiles_per_col; ++i) {
image[i] = ARGB_BLACK | (kPredLowEffort << 8);
}
} else {
memset(histo, 0, sizeof(histo));
for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) { for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
const int tile_y_offset = tile_y * max_tile_size; const int tile_y_offset = tile_y * max_tile_size;
const int this_tile_height = const int this_tile_height =
@ -768,17 +786,16 @@ void VP8LResidualImage(int width, int height, int bits, int low_effort,
memcpy(current_tile_rows, &argb[tile_y_offset * width], memcpy(current_tile_rows, &argb[tile_y_offset * width],
this_tile_height * width * sizeof(*current_tile_rows)); this_tile_height * width * sizeof(*current_tile_rows));
for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) { for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
const int pred = const int pred = GetBestPredictorForTile(width, height, tile_x, tile_y,
low_effort ? kPredLowEffort : bits, (int (*)[256])histo,
GetBestPredictorForTile(width, height,
tile_x, tile_y, bits,
(int (*)[256])histo,
argb_scratch); argb_scratch);
image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8); image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8);
}
} }
} }
CopyImageWithPrediction(width, height, bits, image, argb_scratch, argb); CopyImageWithPrediction(width, height, bits,
image, argb_scratch, argb, low_effort);
} }
void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) { void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {