Refactor histograms in predictors.

Replace the 2d histograms with uint32_t 1d versions (to avoid
pointer casting and to use the optimized VP8LAddVectorEq).

Change-Id: I90b0fe98390b49e3fd03e3484289571cf7ae6eca
This commit is contained in:
Vincent Rabaud
2024-05-03 22:09:38 +02:00
parent a7aa7525b8
commit a90160e11a
6 changed files with 90 additions and 97 deletions

View File

@ -155,13 +155,13 @@ extern VP8LTransformColorFunc VP8LTransformColor;
typedef void (*VP8LCollectColorBlueTransformsFunc)(
const uint32_t* argb, int stride,
int tile_width, int tile_height,
int green_to_blue, int red_to_blue, int histo[]);
int green_to_blue, int red_to_blue, uint32_t histo[]);
extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
typedef void (*VP8LCollectColorRedTransformsFunc)(
const uint32_t* argb, int stride,
int tile_width, int tile_height,
int green_to_red, int histo[]);
int green_to_red, uint32_t histo[]);
extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
// Expose some C-only fallback functions
@ -170,11 +170,11 @@ void VP8LTransformColor_C(const VP8LMultipliers* const m,
void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels);
void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
int tile_width, int tile_height,
int green_to_red, int histo[]);
int green_to_red, uint32_t histo[]);
void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
int tile_width, int tile_height,
int green_to_blue, int red_to_blue,
int histo[]);
uint32_t histo[]);
extern VP8LPredictorAddSubFunc VP8LPredictorsSub[16];
extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
@ -185,8 +185,8 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
typedef uint32_t (*VP8LCostFunc)(const uint32_t* population, int length);
typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
int length);
typedef float (*VP8LCombinedShannonEntropyFunc)(const int X[256],
const int Y[256]);
typedef float (*VP8LCombinedShannonEntropyFunc)(const uint32_t X[256],
const uint32_t Y[256]);
extern VP8LCostFunc VP8LExtraCost;
extern VP8LCostCombinedFunc VP8LExtraCostCombined;

View File

@ -400,14 +400,15 @@ static float FastLog2Slow_C(uint32_t v) {
// Methods to calculate Entropy (Shannon).
// Compute the combined Shanon's entropy for distribution {X} and {X+Y}
static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
static float CombinedShannonEntropy_C(const uint32_t X[256],
const uint32_t Y[256]) {
int i;
float retval = 0.f;
int sumX = 0, sumXY = 0;
uint32_t sumX = 0, sumXY = 0;
for (i = 0; i < 256; ++i) {
const int x = X[i];
const uint32_t x = X[i];
if (x != 0) {
const int xy = x + Y[i];
const uint32_t xy = x + Y[i];
sumX += x;
retval -= VP8LFastSLog2(x);
sumXY += xy;
@ -577,7 +578,7 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
int tile_width, int tile_height,
int green_to_red, int histo[]) {
int green_to_red, uint32_t histo[]) {
while (tile_height-- > 0) {
int x;
for (x = 0; x < tile_width; ++x) {
@ -590,7 +591,7 @@ void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
int tile_width, int tile_height,
int green_to_blue, int red_to_blue,
int histo[]) {
uint32_t histo[]) {
while (tile_height-- > 0) {
int x;
for (x = 0; x < tile_width; ++x) {

View File

@ -171,13 +171,9 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
return (new_blue & 0xff);
}
static void CollectColorBlueTransforms_MIPSdspR2(const uint32_t* argb,
int stride,
int tile_width,
int tile_height,
int green_to_blue,
int red_to_blue,
int histo[]) {
static void CollectColorBlueTransforms_MIPSdspR2(
const uint32_t* argb, int stride, int tile_width, int tile_height,
int green_to_blue, int red_to_blue, uint32_t histo[]) {
const int rtb = (red_to_blue << 16) | (red_to_blue & 0xffff);
const int gtb = (green_to_blue << 16) | (green_to_blue & 0xffff);
const uint32_t mask = 0xff00ffu;
@ -226,11 +222,10 @@ static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
}
static void CollectColorRedTransforms_MIPSdspR2(const uint32_t* argb,
int stride,
int tile_width,
int stride, int tile_width,
int tile_height,
int green_to_red,
int histo[]) {
uint32_t histo[]) {
const int gtr = (green_to_red << 16) | (green_to_red & 0xffff);
while (tile_height-- > 0) {
int x;

View File

@ -82,7 +82,7 @@ static void TransformColor_SSE2(const VP8LMultipliers* const m,
static void CollectColorBlueTransforms_SSE2(const uint32_t* argb, int stride,
int tile_width, int tile_height,
int green_to_blue, int red_to_blue,
int histo[]) {
uint32_t histo[]) {
const __m128i mults_r = MK_CST_16(CST_5b(red_to_blue), 0);
const __m128i mults_g = MK_CST_16(0, CST_5b(green_to_blue));
const __m128i mask_g = _mm_set1_epi32(0x00ff00); // green mask
@ -128,7 +128,7 @@ static void CollectColorBlueTransforms_SSE2(const uint32_t* argb, int stride,
static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride,
int tile_width, int tile_height,
int green_to_red, int histo[]) {
int green_to_red, uint32_t histo[]) {
const __m128i mults_g = MK_CST_16(0, CST_5b(green_to_red));
const __m128i mask_g = _mm_set1_epi32(0x00ff00); // green mask
const __m128i mask = _mm_set1_epi32(0xff);
@ -237,10 +237,11 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
// when compared to -noasm.
#if !(defined(WEBP_HAVE_SLOW_CLZ_CTZ) || defined(__i386__) || defined(_M_IX86))
static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
static float CombinedShannonEntropy_SSE2(const uint32_t X[256],
const uint32_t Y[256]) {
int i;
float retval = 0.f;
int sumX = 0, sumXY = 0;
uint32_t sumX = 0, sumXY = 0;
const __m128i zero = _mm_setzero_si128();
for (i = 0; i < 256; i += 16) {
@ -260,7 +261,7 @@ static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
int32_t my = _mm_movemask_epi8(_mm_cmpgt_epi8(y4, zero)) | mx;
while (my) {
const int32_t j = BitsCtz(my);
int xy;
uint32_t xy;
if ((mx >> j) & 1) {
const int x = X[i + j];
sumXY += x;

View File

@ -98,7 +98,7 @@ static void SubtractGreenFromBlueAndRed_SSE41(uint32_t* argb_data,
static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride,
int tile_width, int tile_height,
int green_to_blue, int red_to_blue,
int histo[]) {
uint32_t histo[]) {
const __m128i mult =
MK_CST_16(CST_5b(red_to_blue) + 256,CST_5b(green_to_blue));
const __m128i perm =
@ -143,8 +143,8 @@ static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride,
static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride,
int tile_width, int tile_height,
int green_to_red, int histo[]) {
int green_to_red,
uint32_t histo[]) {
const __m128i mult = MK_CST_16(0, CST_5b(green_to_red));
const __m128i mask_g = _mm_set1_epi32(0x0000ff00);
if (tile_width >= 4) {