Speed up lossless encoder.

Speedup lossless encoder by 20-25% by optimizing:
- GetBestColorTransformForTile: Use techniques like binary search and
  local minima search to reduce the search space.
- VP8LFastSLog2Slow & VP8LFastLog2Slow: Adding the correction factor for
  log(1 + x) and increase the threshold for calling the approximate
  version of log_2 (compared to costly call to log()).

Change-Id: Ia2444c914521ac298492aafa458e617028fc2f9d
This commit is contained in:
Vikas Arora 2014-02-21 11:41:38 -08:00
parent 77a8f91981
commit c16cd99aba
3 changed files with 205 additions and 99 deletions

View File

@ -28,8 +28,6 @@
#define MAX_DIFF_COST (1e30f) #define MAX_DIFF_COST (1e30f)
// lookup table for small values of log2(int) // lookup table for small values of log2(int)
#define APPROX_LOG_MAX 4096
#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
const float kLog2Table[LOG_LOOKUP_IDX_MAX] = { const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
0.0000000000000000f, 0.0000000000000000f, 0.0000000000000000f, 0.0000000000000000f,
1.0000000000000000f, 1.5849625007211560f, 1.0000000000000000f, 1.5849625007211560f,
@ -331,16 +329,34 @@ const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
}; };
// The threshold till approximate version of log_2 can be used.
// Practically, we can get rid of the call to log() as the two values match to
// very high degree (the ratio of these two is 0.99999x).
// Keeping a high threshold for now.
#define APPROX_LOG_WITH_CORRECTION_MAX 65536
#define APPROX_LOG_MAX 4096
#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
float VP8LFastSLog2Slow(int v) { float VP8LFastSLog2Slow(int v) {
assert(v >= LOG_LOOKUP_IDX_MAX); assert(v >= LOG_LOOKUP_IDX_MAX);
if (v < APPROX_LOG_MAX) { if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
int log_cnt = 0; int log_cnt = 0;
int y = 1;
int correction = 0;
const float v_f = (float)v; const float v_f = (float)v;
while (v >= LOG_LOOKUP_IDX_MAX) { const int orig_v = v;
do {
++log_cnt; ++log_cnt;
v = v >> 1; v = v >> 1;
} y = y << 1;
return v_f * (kLog2Table[v] + log_cnt); } while (v >= LOG_LOOKUP_IDX_MAX);
// vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
// Xf = floor(Xf) * (1 + (v % y) / v)
// log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
// The correction factor: log(1 + d) ~ d; for very small d values, so
// log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
// LOG_2_RECIPROCAL ~ 23/16
correction = (23 * (orig_v % y)) >> 4;
return v_f * (kLog2Table[v] + log_cnt) + correction;
} else { } else {
return (float)(LOG_2_RECIPROCAL * v * log((double)v)); return (float)(LOG_2_RECIPROCAL * v * log((double)v));
} }
@ -348,13 +364,24 @@ float VP8LFastSLog2Slow(int v) {
float VP8LFastLog2Slow(int v) { float VP8LFastLog2Slow(int v) {
assert(v >= LOG_LOOKUP_IDX_MAX); assert(v >= LOG_LOOKUP_IDX_MAX);
if (v < APPROX_LOG_MAX) { if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
int log_cnt = 0; int log_cnt = 0;
while (v >= LOG_LOOKUP_IDX_MAX) { int y = 1;
const int orig_v = v;
double log_2;
do {
++log_cnt; ++log_cnt;
v = v >> 1; v = v >> 1;
y = y << 1;
} while (v >= LOG_LOOKUP_IDX_MAX);
log_2 = kLog2Table[v] + log_cnt;
if (orig_v >= APPROX_LOG_MAX) {
// Since the division is still expensive, add this correction factor only
// for large values of 'v'.
const int correction = (23 * (orig_v % y)) >> 4;
log_2 += (double)correction / orig_v;
} }
return kLog2Table[v] + log_cnt; return (float)log_2;
} else { } else {
return (float)(LOG_2_RECIPROCAL * log((double)v)); return (float)(LOG_2_RECIPROCAL * log((double)v));
} }
@ -881,99 +908,180 @@ static float PredictionCostCrossColor(const int accumulated[256],
PredictionCostSpatial(counts, 3, kExpValue); PredictionCostSpatial(counts, 3, kExpValue);
} }
static Multipliers GetBestColorTransformForTile( static float GetPredictionCostCrossColorRed(
int tile_x, int tile_y, int bits, int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
Multipliers prevX, int xsize, Multipliers prev_x, Multipliers prev_y, int green_to_red,
Multipliers prevY, const int* const accumulated_red_histo, const uint32_t* const argb) {
int step, int xsize, int ysize, int all_y;
int* accumulated_red_histo, int histo[256] = { 0 };
int* accumulated_blue_histo, float cur_diff;
for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
int ix = all_y * xsize + tile_x_offset;
int all_x;
for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
if (SkipRepeatedPixels(argb, ix, xsize)) {
continue;
}
++histo[TransformColorRed(green_to_red, argb[ix])]; // red.
}
}
cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]);
if ((uint8_t)green_to_red == prev_x.green_to_red_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if ((uint8_t)green_to_red == prev_y.green_to_red_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if (green_to_red == 0) {
cur_diff -= 3;
}
return cur_diff;
}
static void GetBestGreenToRed(
int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
int xsize, Multipliers prev_x, Multipliers prev_y,
const int* const accumulated_red_histo, const uint32_t* const argb,
Multipliers* best_tx) {
int min_green_to_red = -64;
int max_green_to_red = 64;
int green_to_red = 0;
int eval_min = 1;
int eval_max = 1;
float cur_diff_min = MAX_DIFF_COST;
float cur_diff_max = MAX_DIFF_COST;
// Do a binary search to find the optimal green_to_red color transform.
while (max_green_to_red - min_green_to_red > 2) {
if (eval_min) {
cur_diff_min = GetPredictionCostCrossColorRed(
tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
prev_x, prev_y, min_green_to_red, &accumulated_red_histo[0], argb);
eval_min = 0;
}
if (eval_max) {
cur_diff_max = GetPredictionCostCrossColorRed(
tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
prev_x, prev_y, max_green_to_red, &accumulated_red_histo[0], argb);
eval_max = 0;
}
if (cur_diff_min < cur_diff_max) {
green_to_red = min_green_to_red;
max_green_to_red = (max_green_to_red + min_green_to_red) / 2;
eval_max = 1;
} else {
green_to_red = max_green_to_red;
min_green_to_red = (max_green_to_red + min_green_to_red) / 2;
eval_min = 1;
}
}
best_tx->green_to_red_ = green_to_red;
}
static float GetPredictionCostCrossColorBlue(
int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
int xsize, Multipliers prev_x, Multipliers prev_y, int green_to_blue,
int red_to_blue, const int* const accumulated_blue_histo,
const uint32_t* const argb) { const uint32_t* const argb) {
int all_y;
int histo[256] = { 0 };
float cur_diff;
for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
int all_x;
int ix = all_y * xsize + tile_x_offset;
for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
if (SkipRepeatedPixels(argb, ix, xsize)) {
continue;
}
++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];
}
}
cur_diff = PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]);
if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if ((uint8_t)green_to_blue == prev_y.green_to_blue_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if ((uint8_t)red_to_blue == prev_x.red_to_blue_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if ((uint8_t)red_to_blue == prev_y.red_to_blue_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if (green_to_blue == 0) {
cur_diff -= 3;
}
if (red_to_blue == 0) {
cur_diff -= 3;
}
return cur_diff;
}
static void GetBestGreenRedToBlue(
int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
int xsize, Multipliers prev_x, Multipliers prev_y, int quality,
const int* const accumulated_blue_histo, const uint32_t* const argb,
Multipliers* best_tx) {
float best_diff = MAX_DIFF_COST; float best_diff = MAX_DIFF_COST;
float cur_diff; float cur_diff;
const int halfstep = step / 2; const int step = (quality < 25) ? 32 : (quality > 50) ? 8 : 16;
const int min_green_to_blue = -32;
const int max_green_to_blue = 32;
const int min_red_to_blue = -16;
const int max_red_to_blue = 16;
const int num_iters =
(1 + (max_green_to_blue - min_green_to_blue) / step) *
(1 + (max_red_to_blue - min_red_to_blue) / step);
// Number of tries to get optimal green_to_blue & red_to_blue color transforms
// after finding a local minima.
const int max_tries_after_min = 4 + (num_iters >> 2);
int num_tries_after_min = 0;
int green_to_blue;
for (green_to_blue = min_green_to_blue;
green_to_blue <= max_green_to_blue &&
num_tries_after_min < max_tries_after_min;
green_to_blue += step) {
int red_to_blue;
for (red_to_blue = min_red_to_blue;
red_to_blue <= max_red_to_blue &&
num_tries_after_min < max_tries_after_min;
red_to_blue += step) {
cur_diff = GetPredictionCostCrossColorBlue(
tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize, prev_x,
prev_y, green_to_blue, red_to_blue, &accumulated_blue_histo[0], argb);
if (cur_diff < best_diff) {
best_diff = cur_diff;
best_tx->green_to_blue_ = green_to_blue;
best_tx->red_to_blue_ = red_to_blue;
num_tries_after_min = 0;
} else {
++num_tries_after_min;
}
}
}
}
static Multipliers GetBestColorTransformForTile(
int tile_x, int tile_y, int bits,
Multipliers prev_x,
Multipliers prev_y,
int quality, int xsize, int ysize,
const int* const accumulated_red_histo,
const int* const accumulated_blue_histo,
const uint32_t* const argb) {
const int max_tile_size = 1 << bits; const int max_tile_size = 1 << bits;
const int tile_y_offset = tile_y * max_tile_size; const int tile_y_offset = tile_y * max_tile_size;
const int tile_x_offset = tile_x * max_tile_size; const int tile_x_offset = tile_x * max_tile_size;
int green_to_red;
int green_to_blue;
int red_to_blue;
const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize); const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize);
const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize); const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize);
Multipliers best_tx; Multipliers best_tx;
MultipliersClear(&best_tx); MultipliersClear(&best_tx);
for (green_to_red = -64; green_to_red <= 64; green_to_red += halfstep) { GetBestGreenToRed(tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
int histo[256] = { 0 }; prev_x, prev_y, &accumulated_red_histo[0], argb, &best_tx);
int all_y; GetBestGreenRedToBlue(tile_x_offset, tile_y_offset, all_x_max, all_y_max,
xsize, prev_x, prev_y, quality,
for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) { &accumulated_blue_histo[0], argb, &best_tx);
int ix = all_y * xsize + tile_x_offset;
int all_x;
for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
if (SkipRepeatedPixels(argb, ix, xsize)) {
continue;
}
++histo[TransformColorRed(green_to_red, argb[ix])]; // red.
}
}
cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]);
if ((uint8_t)green_to_red == prevX.green_to_red_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if ((uint8_t)green_to_red == prevY.green_to_red_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if (green_to_red == 0) {
cur_diff -= 3;
}
if (cur_diff < best_diff) {
best_diff = cur_diff;
best_tx.green_to_red_ = green_to_red;
}
}
best_diff = MAX_DIFF_COST;
for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) {
for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) {
int all_y;
int histo[256] = { 0 };
for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
int all_x;
int ix = all_y * xsize + tile_x_offset;
for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
if (SkipRepeatedPixels(argb, ix, xsize)) {
continue;
}
++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];
}
}
cur_diff =
PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]);
if ((uint8_t)green_to_blue == prevX.green_to_blue_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if ((uint8_t)green_to_blue == prevY.green_to_blue_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if ((uint8_t)red_to_blue == prevX.red_to_blue_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if ((uint8_t)red_to_blue == prevY.red_to_blue_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if (green_to_blue == 0) {
cur_diff -= 3;
}
if (red_to_blue == 0) {
cur_diff -= 3;
}
if (cur_diff < best_diff) {
best_diff = cur_diff;
best_tx.green_to_blue_ = green_to_blue;
best_tx.red_to_blue_ = red_to_blue;
}
}
}
return best_tx; return best_tx;
} }
@ -994,7 +1102,7 @@ static void CopyTileWithColorTransform(int xsize, int ysize,
} }
} }
void VP8LColorSpaceTransform(int width, int height, int bits, int step, void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
uint32_t* const argb, uint32_t* image) { uint32_t* const argb, uint32_t* image) {
const int max_tile_size = 1 << bits; const int max_tile_size = 1 << bits;
const int tile_xsize = VP8LSubSampleSize(width, bits); const int tile_xsize = VP8LSubSampleSize(width, bits);
@ -1018,14 +1126,13 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int step,
} }
prev_x = GetBestColorTransformForTile(tile_x, tile_y, bits, prev_x = GetBestColorTransformForTile(tile_x, tile_y, bits,
prev_x, prev_y, prev_x, prev_y,
step, width, height, quality, width, height,
&accumulated_red_histo[0], &accumulated_red_histo[0],
&accumulated_blue_histo[0], &accumulated_blue_histo[0],
argb); argb);
image[offset] = MultipliersToColorCode(&prev_x); image[offset] = MultipliersToColorCode(&prev_x);
CopyTileWithColorTransform(width, height, CopyTileWithColorTransform(width, height, tile_x_offset, tile_y_offset,
tile_x_offset, tile_y_offset, max_tile_size, max_tile_size, prev_x, argb);
prev_x, argb);
// Gather accumulated histogram data. // Gather accumulated histogram data.
for (y = tile_y_offset; y < all_y_max; ++y) { for (y = tile_y_offset; y < all_y_max; ++y) {

View File

@ -66,7 +66,7 @@ void VP8LResidualImage(int width, int height, int bits,
uint32_t* const argb, uint32_t* const argb_scratch, uint32_t* const argb, uint32_t* const argb_scratch,
uint32_t* const image); uint32_t* const image);
void VP8LColorSpaceTransform(int width, int height, int bits, int step, void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
uint32_t* const argb, uint32_t* image); uint32_t* const argb, uint32_t* image);
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------

View File

@ -695,9 +695,8 @@ static int ApplyCrossColorFilter(const VP8LEncoder* const enc,
const int ccolor_transform_bits = enc->transform_bits_; const int ccolor_transform_bits = enc->transform_bits_;
const int transform_width = VP8LSubSampleSize(width, ccolor_transform_bits); const int transform_width = VP8LSubSampleSize(width, ccolor_transform_bits);
const int transform_height = VP8LSubSampleSize(height, ccolor_transform_bits); const int transform_height = VP8LSubSampleSize(height, ccolor_transform_bits);
const int step = (quality < 25) ? 32 : (quality > 50) ? 8 : 16;
VP8LColorSpaceTransform(width, height, ccolor_transform_bits, step, VP8LColorSpaceTransform(width, height, ccolor_transform_bits, quality,
enc->argb_, enc->transform_data_); enc->argb_, enc->transform_data_);
VP8LWriteBits(bw, 1, TRANSFORM_PRESENT); VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
VP8LWriteBits(bw, 2, CROSS_COLOR_TRANSFORM); VP8LWriteBits(bw, 2, CROSS_COLOR_TRANSFORM);