mirror of
https://github.com/webmproject/libwebp.git
synced 2025-01-27 15:12:54 +01:00
Add SSE2 function for Inverse Cross-color Transform
Lossless decoding is now ~3% faster. Change-Id: Idafb5c73e5cfb272cc3661d841f79971f9da0743
This commit is contained in:
parent
26029568b7
commit
d4813f0cb2
@ -807,15 +807,7 @@ void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels) {
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
// Note: the members are uint8_t, so that any negative values are
|
||||
// automatically converted to "mod 256" values.
|
||||
uint8_t green_to_red_;
|
||||
uint8_t green_to_blue_;
|
||||
uint8_t red_to_blue_;
|
||||
} Multipliers;
|
||||
|
||||
static WEBP_INLINE void MultipliersClear(Multipliers* m) {
|
||||
static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) {
|
||||
m->green_to_red_ = 0;
|
||||
m->green_to_blue_ = 0;
|
||||
m->red_to_blue_ = 0;
|
||||
@ -827,45 +819,55 @@ static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
|
||||
}
|
||||
|
||||
static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
|
||||
Multipliers* const m) {
|
||||
VP8LMultipliers* const m) {
|
||||
m->green_to_red_ = (color_code >> 0) & 0xff;
|
||||
m->green_to_blue_ = (color_code >> 8) & 0xff;
|
||||
m->red_to_blue_ = (color_code >> 16) & 0xff;
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t MultipliersToColorCode(const Multipliers* const m) {
|
||||
static WEBP_INLINE uint32_t MultipliersToColorCode(
|
||||
const VP8LMultipliers* const m) {
|
||||
return 0xff000000u |
|
||||
((uint32_t)(m->red_to_blue_) << 16) |
|
||||
((uint32_t)(m->green_to_blue_) << 8) |
|
||||
m->green_to_red_;
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t TransformColor(const Multipliers* const m,
|
||||
uint32_t argb) {
|
||||
const uint32_t green = argb >> 8;
|
||||
const uint32_t red = argb >> 16;
|
||||
uint32_t new_red = red;
|
||||
uint32_t new_blue = argb;
|
||||
new_red -= ColorTransformDelta(m->green_to_red_, green);
|
||||
new_red &= 0xff;
|
||||
new_blue -= ColorTransformDelta(m->green_to_blue_, green);
|
||||
new_blue -= ColorTransformDelta(m->red_to_blue_, red);
|
||||
new_blue &= 0xff;
|
||||
return (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
|
||||
static WEBP_INLINE void TransformColor(const VP8LMultipliers* const m,
|
||||
uint32_t* data,
|
||||
int num_pixels) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) {
|
||||
const uint32_t argb = data[i];
|
||||
const uint32_t green = argb >> 8;
|
||||
const uint32_t red = argb >> 16;
|
||||
uint32_t new_red = red;
|
||||
uint32_t new_blue = argb;
|
||||
new_red -= ColorTransformDelta(m->green_to_red_, green);
|
||||
new_red &= 0xff;
|
||||
new_blue -= ColorTransformDelta(m->green_to_blue_, green);
|
||||
new_blue -= ColorTransformDelta(m->red_to_blue_, red);
|
||||
new_blue &= 0xff;
|
||||
data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint32_t TransformColorInverse(const Multipliers* const m,
|
||||
uint32_t argb) {
|
||||
const uint32_t green = argb >> 8;
|
||||
const uint32_t red = argb >> 16;
|
||||
uint32_t new_red = red;
|
||||
uint32_t new_blue = argb;
|
||||
new_red += ColorTransformDelta(m->green_to_red_, green);
|
||||
new_red &= 0xff;
|
||||
new_blue += ColorTransformDelta(m->green_to_blue_, green);
|
||||
new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
|
||||
new_blue &= 0xff;
|
||||
return (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
|
||||
void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data,
|
||||
int num_pixels) {
|
||||
int i;
|
||||
for (i = 0; i < num_pixels; ++i) {
|
||||
const uint32_t argb = data[i];
|
||||
const uint32_t green = argb >> 8;
|
||||
const uint32_t red = argb >> 16;
|
||||
uint32_t new_red = red;
|
||||
uint32_t new_blue = argb;
|
||||
new_red += ColorTransformDelta(m->green_to_red_, green);
|
||||
new_red &= 0xff;
|
||||
new_blue += ColorTransformDelta(m->green_to_blue_, green);
|
||||
new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
|
||||
new_blue &= 0xff;
|
||||
data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
|
||||
@ -898,7 +900,7 @@ static float PredictionCostCrossColor(const int accumulated[256],
|
||||
|
||||
static float GetPredictionCostCrossColorRed(
|
||||
int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
|
||||
int xsize, Multipliers prev_x, Multipliers prev_y, int green_to_red,
|
||||
int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,
|
||||
const int accumulated_red_histo[256], const uint32_t* const argb) {
|
||||
int all_y;
|
||||
int histo[256] = { 0 };
|
||||
@ -925,9 +927,9 @@ static float GetPredictionCostCrossColorRed(
|
||||
|
||||
static void GetBestGreenToRed(
|
||||
int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
|
||||
int xsize, Multipliers prev_x, Multipliers prev_y,
|
||||
int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y,
|
||||
const int accumulated_red_histo[256], const uint32_t* const argb,
|
||||
Multipliers* best_tx) {
|
||||
VP8LMultipliers* const best_tx) {
|
||||
int min_green_to_red = -64;
|
||||
int max_green_to_red = 64;
|
||||
int green_to_red = 0;
|
||||
@ -964,8 +966,8 @@ static void GetBestGreenToRed(
|
||||
|
||||
static float GetPredictionCostCrossColorBlue(
|
||||
int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
|
||||
int xsize, Multipliers prev_x, Multipliers prev_y, int green_to_blue,
|
||||
int red_to_blue, const int accumulated_blue_histo[256],
|
||||
int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y,
|
||||
int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256],
|
||||
const uint32_t* const argb) {
|
||||
int all_y;
|
||||
int histo[256] = { 0 };
|
||||
@ -1001,9 +1003,9 @@ static float GetPredictionCostCrossColorBlue(
|
||||
|
||||
static void GetBestGreenRedToBlue(
|
||||
int tile_x_offset, int tile_y_offset, int all_x_max, int all_y_max,
|
||||
int xsize, Multipliers prev_x, Multipliers prev_y, int quality,
|
||||
int xsize, VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
|
||||
const int accumulated_blue_histo[256], const uint32_t* const argb,
|
||||
Multipliers* best_tx) {
|
||||
VP8LMultipliers* const best_tx) {
|
||||
float best_diff = MAX_DIFF_COST;
|
||||
float cur_diff;
|
||||
const int step = (quality < 25) ? 32 : (quality > 50) ? 8 : 16;
|
||||
@ -1043,10 +1045,10 @@ static void GetBestGreenRedToBlue(
|
||||
}
|
||||
}
|
||||
|
||||
static Multipliers GetBestColorTransformForTile(
|
||||
static VP8LMultipliers GetBestColorTransformForTile(
|
||||
int tile_x, int tile_y, int bits,
|
||||
Multipliers prev_x,
|
||||
Multipliers prev_y,
|
||||
VP8LMultipliers prev_x,
|
||||
VP8LMultipliers prev_y,
|
||||
int quality, int xsize, int ysize,
|
||||
const int accumulated_red_histo[256],
|
||||
const int accumulated_blue_histo[256],
|
||||
@ -1056,7 +1058,7 @@ static Multipliers GetBestColorTransformForTile(
|
||||
const int tile_x_offset = tile_x * max_tile_size;
|
||||
const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize);
|
||||
const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize);
|
||||
Multipliers best_tx;
|
||||
VP8LMultipliers best_tx;
|
||||
MultipliersClear(&best_tx);
|
||||
|
||||
GetBestGreenToRed(tile_x_offset, tile_y_offset, all_x_max, all_y_max, xsize,
|
||||
@ -1070,16 +1072,13 @@ static Multipliers GetBestColorTransformForTile(
|
||||
static void CopyTileWithColorTransform(int xsize, int ysize,
|
||||
int tile_x, int tile_y,
|
||||
int max_tile_size,
|
||||
Multipliers color_transform,
|
||||
VP8LMultipliers color_transform,
|
||||
uint32_t* argb) {
|
||||
const int xscan = GetMin(max_tile_size, xsize - tile_x);
|
||||
int yscan = GetMin(max_tile_size, ysize - tile_y);
|
||||
argb += tile_y * xsize + tile_x;
|
||||
while (yscan-- > 0) {
|
||||
int x;
|
||||
for (x = 0; x < xscan; ++x) {
|
||||
argb[x] = TransformColor(&color_transform, argb[x]);
|
||||
}
|
||||
TransformColor(&color_transform, argb, xscan);
|
||||
argb += xsize;
|
||||
}
|
||||
}
|
||||
@ -1092,7 +1091,7 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
|
||||
int accumulated_red_histo[256] = { 0 };
|
||||
int accumulated_blue_histo[256] = { 0 };
|
||||
int tile_x, tile_y;
|
||||
Multipliers prev_x, prev_y;
|
||||
VP8LMultipliers prev_x, prev_y;
|
||||
MultipliersClear(&prev_y);
|
||||
MultipliersClear(&prev_x);
|
||||
for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
|
||||
@ -1148,6 +1147,7 @@ static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
|
||||
const int tile_width = 1 << transform->bits_;
|
||||
const int mask = tile_width - 1;
|
||||
const int safe_width = width & ~mask;
|
||||
const int remaining_width = width - safe_width;
|
||||
const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
|
||||
int y = y_start;
|
||||
const uint32_t* pred_row =
|
||||
@ -1155,22 +1155,19 @@ static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
|
||||
|
||||
while (y < y_end) {
|
||||
const uint32_t* pred = pred_row;
|
||||
Multipliers m = { 0, 0, 0 };
|
||||
int x = 0;
|
||||
while (x < safe_width) {
|
||||
int t;
|
||||
VP8LMultipliers m = { 0, 0, 0 };
|
||||
const uint32_t* const data_safe_end = data + safe_width;
|
||||
const uint32_t* const data_end = data + width;
|
||||
while (data < data_safe_end) {
|
||||
ColorCodeToMultipliers(*pred++, &m);
|
||||
for (t = 0; t < tile_width; ++t, ++x) {
|
||||
data[x] = TransformColorInverse(&m, data[x]);
|
||||
}
|
||||
VP8LTransformColorInverse(&m, data, tile_width);
|
||||
data += tile_width;
|
||||
}
|
||||
if (x < width) {
|
||||
if (data < data_end) { // Left-overs using C-version.
|
||||
ColorCodeToMultipliers(*pred++, &m);
|
||||
for (; x < width; ++x) {
|
||||
data[x] = TransformColorInverse(&m, data[x]);
|
||||
}
|
||||
VP8LTransformColorInverse(&m, data, remaining_width);
|
||||
data += remaining_width;
|
||||
}
|
||||
data += width;
|
||||
++y;
|
||||
if ((y & mask) == 0) pred_row += tiles_per_row;;
|
||||
}
|
||||
@ -1468,6 +1465,8 @@ VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
|
||||
VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
|
||||
VP8LPredictorFunc VP8LPredictors[16];
|
||||
|
||||
VP8LTransformColorFunc VP8LTransformColorInverse;
|
||||
|
||||
VP8LConvertFunc VP8LConvertBGRAToRGB;
|
||||
VP8LConvertFunc VP8LConvertBGRAToRGBA;
|
||||
VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
|
||||
@ -1483,6 +1482,8 @@ void VP8LDspInit(void) {
|
||||
VP8LSubtractGreenFromBlueAndRed = VP8LSubtractGreenFromBlueAndRed_C;
|
||||
VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
|
||||
|
||||
VP8LTransformColorInverse = VP8LTransformColorInverse_C;
|
||||
|
||||
VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
|
||||
VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
|
||||
VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
|
||||
|
@ -32,6 +32,17 @@ typedef void (*VP8LProcessBlueAndRedFunc)(uint32_t* argb_data, int num_pixels);
|
||||
extern VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
|
||||
extern VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
|
||||
|
||||
typedef struct {
|
||||
// Note: the members are uint8_t, so that any negative values are
|
||||
// automatically converted to "mod 256" values.
|
||||
uint8_t green_to_red_;
|
||||
uint8_t green_to_blue_;
|
||||
uint8_t red_to_blue_;
|
||||
} VP8LMultipliers;
|
||||
typedef void (*VP8LTransformColorFunc)(const VP8LMultipliers* const m,
|
||||
uint32_t* argb_data, int num_pixels);
|
||||
extern VP8LTransformColorFunc VP8LTransformColorInverse;
|
||||
|
||||
typedef void (*VP8LConvertFunc)(const uint32_t* src, int num_pixels,
|
||||
uint8_t* dst);
|
||||
extern VP8LConvertFunc VP8LConvertBGRAToRGB;
|
||||
@ -41,6 +52,9 @@ extern VP8LConvertFunc VP8LConvertBGRAToRGB565;
|
||||
extern VP8LConvertFunc VP8LConvertBGRAToBGR;
|
||||
|
||||
// Expose some C-only fallback functions
|
||||
extern void VP8LTransformColorInverse_C(
|
||||
const VP8LMultipliers* const m, uint32_t* data, int num_pixels);
|
||||
|
||||
extern void VP8LConvertBGRAToRGB_C(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst);
|
||||
extern void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
|
||||
|
@ -13,12 +13,14 @@
|
||||
|
||||
#include "./dsp.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#include <emmintrin.h>
|
||||
#include "./lossless.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Predictors
|
||||
// Predictor Transform
|
||||
|
||||
static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
|
||||
uint32_t c2) {
|
||||
@ -118,7 +120,7 @@ static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Colorspace conversion functions
|
||||
// Subtract-Green Transform
|
||||
|
||||
static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
|
||||
const __m128i mask = _mm_set1_epi32(0x0000ff00);
|
||||
@ -152,6 +154,65 @@ static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {
|
||||
VP8LAddGreenToBlueAndRed_C(argb_data + i, num_pixels - i);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Color Transform
|
||||
|
||||
static WEBP_INLINE __m128i ColorTransformDelta(__m128i color_pred,
|
||||
__m128i color) {
|
||||
// We simulate signed 8-bit multiplication as:
|
||||
// * Left shift the two (8-bit) numbers by 8 bits,
|
||||
// * Perform a 16-bit signed multiplication and retain the higher 16-bits.
|
||||
const __m128i color_pred_shifted = _mm_slli_epi32(color_pred, 8);
|
||||
const __m128i color_shifted = _mm_slli_epi32(color, 8);
|
||||
// Note: This performs multiplication on 8 packed 16-bit numbers, 4 of which
|
||||
// happen to be zeroes.
|
||||
const __m128i signed_mult =
|
||||
_mm_mulhi_epi16(color_pred_shifted, color_shifted);
|
||||
return _mm_srli_epi32(signed_mult, 5);
|
||||
}
|
||||
|
||||
static WEBP_INLINE void TransformColorInverse(const VP8LMultipliers* const m,
|
||||
uint32_t* argb_data,
|
||||
int num_pixels) {
|
||||
const __m128i g_to_r = _mm_set1_epi32(m->green_to_red_); // multipliers
|
||||
const __m128i g_to_b = _mm_set1_epi32(m->green_to_blue_);
|
||||
const __m128i r_to_b = _mm_set1_epi32(m->red_to_blue_);
|
||||
|
||||
int i;
|
||||
|
||||
for (i = 0; i + 4 <= num_pixels; i += 4) {
|
||||
const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]);
|
||||
const __m128i alpha_green_mask = _mm_set1_epi32(0xff00ff00); // masks
|
||||
const __m128i red_mask = _mm_set1_epi32(0x00ff0000);
|
||||
const __m128i green_mask = _mm_set1_epi32(0x0000ff00);
|
||||
const __m128i lower_8bit_mask = _mm_set1_epi32(0x000000ff);
|
||||
const __m128i ag = _mm_and_si128(in, alpha_green_mask); // alpha, green
|
||||
const __m128i r = _mm_srli_epi32(_mm_and_si128(in, red_mask), 16);
|
||||
const __m128i g = _mm_srli_epi32(_mm_and_si128(in, green_mask), 8);
|
||||
const __m128i b = in;
|
||||
|
||||
const __m128i r_delta = ColorTransformDelta(g_to_r, g); // red
|
||||
const __m128i r_new =
|
||||
_mm_and_si128(_mm_add_epi32(r, r_delta), lower_8bit_mask);
|
||||
const __m128i r_new_shifted = _mm_slli_epi32(r_new, 16);
|
||||
|
||||
const __m128i b_delta_1 = ColorTransformDelta(g_to_b, g); // blue
|
||||
const __m128i b_delta_2 = ColorTransformDelta(r_to_b, r_new);
|
||||
const __m128i b_delta = _mm_add_epi32(b_delta_1, b_delta_2);
|
||||
const __m128i b_new =
|
||||
_mm_and_si128(_mm_add_epi32(b, b_delta), lower_8bit_mask);
|
||||
|
||||
const __m128i out = _mm_or_si128(_mm_or_si128(ag, r_new_shifted), b_new);
|
||||
_mm_storeu_si128((__m128i*)&argb_data[i], out);
|
||||
}
|
||||
|
||||
// Fall-back to C-version for left-overs.
|
||||
VP8LTransformColorInverse_C(m, argb_data + i, num_pixels - i);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Color-space conversion functions
|
||||
|
||||
static void ConvertBGRAToRGBA(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
const __m128i* in = (const __m128i*)src;
|
||||
@ -298,6 +359,8 @@ void VP8LDspInitSSE2(void) {
|
||||
VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
|
||||
VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
|
||||
|
||||
VP8LTransformColorInverse = TransformColorInverse;
|
||||
|
||||
VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;
|
||||
VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444;
|
||||
VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565;
|
||||
|
Loading…
x
Reference in New Issue
Block a user