Make a separate case for low_effort in CopyImageWithPrediction

for more speed.

This gives a roughly a 1% speedup for low_effort. But actually this is a
preparation for the upcoming CL that changes RGB values of transparent pixels
based on prediction, which should not be done for low_effort because that would
slightly hurt its performance.

On 1000 PNGs, with quality 0, method 0:
Before:
Compression (output/input): 2.9120/3.2667 bpp, Encode rate (raw data): 36.034 MP/s
After:
Compression (output/input): 2.9120/3.2667 bpp, Encode rate (raw data): 36.428 MP/s

Change-Id: I5ed9f599bbf908a917723f3c780551ceb7fd724d
This commit is contained in:
Lode Vandevenne 2015-11-27 14:01:54 +00:00 committed by James Zern
parent 5ae220bef6
commit 5bda52d4e8

View File

@ -24,6 +24,8 @@
#define MAX_DIFF_COST (1e30f) #define MAX_DIFF_COST (1e30f)
static const int kPredLowEffort = 11;
// lookup table for small values of log2(int) // lookup table for small values of log2(int)
const float kLog2Table[LOG_LOOKUP_IDX_MAX] = { const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
0.0000000000000000f, 0.0000000000000000f, 0.0000000000000000f, 0.0000000000000000f,
@ -640,6 +642,19 @@ static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
static WEBP_INLINE uint32_t Predict(VP8LPredictorFunc pred_func,
int x, int y,
const uint32_t* current_row,
const uint32_t* upper_row) {
if (y == 0) {
return (x == 0) ? ARGB_BLACK : current_row[x - 1]; // Left.
} else if (x == 0) {
return upper_row[x]; // Top.
} else {
return pred_func(current_row[x - 1], upper_row + x);
}
}
// Returns best predictor and updates the accumulated histogram. // Returns best predictor and updates the accumulated histogram.
static int GetBestPredictorForTile(int width, int height, static int GetBestPredictorForTile(int width, int height,
int tile_x, int tile_y, int bits, int tile_x, int tile_y, int bits,
@ -674,14 +689,8 @@ static int GetBestPredictorForTile(int width, int height,
current_row = upper_row + width; current_row = upper_row + width;
for (x = 0; x < max_x; ++x) { for (x = 0; x < max_x; ++x) {
const int col = col_start + x; const int col = col_start + x;
uint32_t predict; const uint32_t predict =
if (row == 0) { Predict(pred_func, col, row, current_row, upper_row);
predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left.
} else if (col == 0) {
predict = upper_row[col]; // Top.
} else {
predict = pred_func(current_row[col - 1], upper_row + col);
}
UpdateHisto(histo_argb, VP8LSubPixels(current_row[col], predict)); UpdateHisto(histo_argb, VP8LSubPixels(current_row[col], predict));
} }
} }
@ -708,7 +717,7 @@ static int GetBestPredictorForTile(int width, int height,
static void CopyImageWithPrediction(int width, int height, static void CopyImageWithPrediction(int width, int height,
int bits, uint32_t* const modes, int bits, uint32_t* const modes,
uint32_t* const argb_scratch, uint32_t* const argb_scratch,
uint32_t* const argb) { uint32_t* const argb, int low_effort) {
const int tiles_per_row = VP8LSubSampleSize(width, bits); const int tiles_per_row = VP8LSubSampleSize(width, bits);
const int mask = (1 << bits) - 1; const int mask = (1 << bits) - 1;
// The row size is one pixel longer to allow the top right pixel to point to // The row size is one pixel longer to allow the top right pixel to point to
@ -716,7 +725,8 @@ static void CopyImageWithPrediction(int width, int height,
uint32_t* current_row = argb_scratch; uint32_t* current_row = argb_scratch;
uint32_t* upper_row = argb_scratch + width + 1; uint32_t* upper_row = argb_scratch + width + 1;
int y; int y;
VP8LPredictorFunc pred_func = 0; VP8LPredictorFunc pred_func =
low_effort ? VP8LPredictors[kPredLowEffort] : NULL;
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
int x; int x;
@ -725,21 +735,24 @@ static void CopyImageWithPrediction(int width, int height,
current_row = tmp; current_row = tmp;
memcpy(current_row, argb + y * width, sizeof(*current_row) * width); memcpy(current_row, argb + y * width, sizeof(*current_row) * width);
current_row[width] = (y + 1 < height) ? argb[(y + 1) * width] : ARGB_BLACK; current_row[width] = (y + 1 < height) ? argb[(y + 1) * width] : ARGB_BLACK;
for (x = 0; x < width; ++x) {
uint32_t predict; if (low_effort) {
if ((x & mask) == 0) { for (x = 0; x < width; ++x) {
const int mode = const uint32_t predict =
(modes[(y >> bits) * tiles_per_row + (x >> bits)] >> 8) & 0xff; Predict(pred_func, x, y, current_row, upper_row);
pred_func = VP8LPredictors[mode]; argb[y * width + x] = VP8LSubPixels(current_row[x], predict);
} }
if (y == 0) { } else {
predict = (x == 0) ? ARGB_BLACK : current_row[x - 1]; // Left. for (x = 0; x < width; ++x) {
} else if (x == 0) { uint32_t predict;
predict = upper_row[x]; // Top. if ((x & mask) == 0) {
} else { const int mode =
predict = pred_func(current_row[x - 1], upper_row + x); (modes[(y >> bits) * tiles_per_row + (x >> bits)] >> 8) & 0xff;
pred_func = VP8LPredictors[mode];
}
predict = Predict(pred_func, x, y, current_row, upper_row);
argb[y * width + x] = VP8LSubPixels(current_row[x], predict);
} }
argb[y * width + x] = VP8LSubPixels(current_row[x], predict);
} }
} }
} }
@ -750,35 +763,39 @@ void VP8LResidualImage(int width, int height, int bits, int low_effort,
const int max_tile_size = 1 << bits; const int max_tile_size = 1 << bits;
const int tiles_per_row = VP8LSubSampleSize(width, bits); const int tiles_per_row = VP8LSubSampleSize(width, bits);
const int tiles_per_col = VP8LSubSampleSize(height, bits); const int tiles_per_col = VP8LSubSampleSize(height, bits);
const int kPredLowEffort = 11;
uint32_t* const upper_row = argb_scratch; uint32_t* const upper_row = argb_scratch;
uint32_t* const current_tile_rows = argb_scratch + width; uint32_t* const current_tile_rows = argb_scratch + width;
int tile_y; int tile_y;
int histo[4][256]; int histo[4][256];
if (!low_effort) memset(histo, 0, sizeof(histo)); if (low_effort) {
for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) { int i;
const int tile_y_offset = tile_y * max_tile_size; for (i = 0; i < tiles_per_row * tiles_per_col; ++i) {
const int this_tile_height = image[i] = ARGB_BLACK | (kPredLowEffort << 8);
(tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
int tile_x;
if (tile_y > 0) {
memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
width * sizeof(*upper_row));
} }
memcpy(current_tile_rows, &argb[tile_y_offset * width], } else {
this_tile_height * width * sizeof(*current_tile_rows)); memset(histo, 0, sizeof(histo));
for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) { for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
const int pred = const int tile_y_offset = tile_y * max_tile_size;
low_effort ? kPredLowEffort : const int this_tile_height =
GetBestPredictorForTile(width, height, (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
tile_x, tile_y, bits, int tile_x;
(int (*)[256])histo, if (tile_y > 0) {
argb_scratch); memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8); width * sizeof(*upper_row));
}
memcpy(current_tile_rows, &argb[tile_y_offset * width],
this_tile_height * width * sizeof(*current_tile_rows));
for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
const int pred = GetBestPredictorForTile(width, height, tile_x, tile_y,
bits, (int (*)[256])histo,
argb_scratch);
image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8);
}
} }
} }
CopyImageWithPrediction(width, height, bits, image, argb_scratch, argb); CopyImageWithPrediction(width, height, bits,
image, argb_scratch, argb, low_effort);
} }
void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) { void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {