Switch ExtraCost to ints and implement it in SSE.

The histograms count the occurrences of len/dist in entropy images.
Those (at most (1<<14) by (1<<14)) are sub-sampled by at least
MIN_HUFFMAN_BITS == 2, hence at most 24 bits in a histogram value.
At most, we multiply by 19 (because the longest histogram is of
size 40 and we do 40>>1, cf code) for the bit cost. So it all fits
in 32 bits.

Change-Id: Ife24b035f54794851ff31f2fac07901f724c6d7f
This commit is contained in:
Vincent Rabaud
2023-05-31 15:41:43 +02:00
parent 15b365083d
commit 828b4ce062
5 changed files with 94 additions and 37 deletions

View File

@ -636,20 +636,25 @@ void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
//------------------------------------------------------------------------------
static float ExtraCost_C(const uint32_t* population, int length) {
static uint32_t ExtraCost_C(const uint32_t* population, int length) {
int i;
float cost = 0.f;
for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
uint32_t cost = population[4] + population[5];
assert(length % 2 == 0);
for (i = 2; i < length / 2 - 1; ++i) {
cost += i * (population[2 * i + 2] + population[2 * i + 3]);
}
return cost;
}
static float ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
int length) {
static uint32_t ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
int length) {
int i;
float cost = 0.f;
for (i = 2; i < length - 2; ++i) {
const int xy = X[i + 2] + Y[i + 2];
cost += (i >> 1) * xy;
uint32_t cost = X[4] + Y[4] + X[5] + Y[5];
assert(length % 2 == 0);
for (i = 2; i < length / 2 - 1; ++i) {
const int xy0 = X[2 * i + 2] + Y[2 * i + 2];
const int xy1 = X[2 * i + 3] + Y[2 * i + 3];
cost += i * (xy0 + xy1);
}
return cost;
}