Switch ExtraCost to ints and implement it in SSE.

The histograms count the occurrences of len/dist in entropy images.
Those (at most (1<<14) by (1<<14)) are sub-sampled by at least
MIN_HUFFMAN_BITS == 2, hence at most 24 bits in a histogram value.
At most, we multiply by 19 (because the longest histogram is of
size 40 and we do 40>>1, cf code) for the bit cost. So it all fits
in 32 bits.

Change-Id: Ife24b035f54794851ff31f2fac07901f724c6d7f
This commit is contained in:
Vincent Rabaud
2023-05-31 15:41:43 +02:00
parent 15b365083d
commit 828b4ce062
5 changed files with 94 additions and 37 deletions

View File

@ -103,8 +103,8 @@ static float FastLog2Slow_MIPS32(uint32_t v) {
// cost += i * *(pop + 1);
// pop += 2;
// }
// return (float)cost;
static float ExtraCost_MIPS32(const uint32_t* const population, int length) {
// return cost;
static uint32_t ExtraCost_MIPS32(const uint32_t* const population, int length) {
int i, temp0, temp1;
const uint32_t* pop = &population[4];
const uint32_t* const LoopEnd = &population[length];
@ -130,7 +130,7 @@ static float ExtraCost_MIPS32(const uint32_t* const population, int length) {
: "memory", "hi", "lo"
);
return (float)((int64_t)temp0 << 32 | temp1);
return ((int64_t)temp0 << 32 | temp1);
}
// C version of this function:
@ -148,9 +148,9 @@ static float ExtraCost_MIPS32(const uint32_t* const population, int length) {
// pX += 2;
// pY += 2;
// }
// return (float)cost;
static float ExtraCostCombined_MIPS32(const uint32_t* const X,
const uint32_t* const Y, int length) {
// return cost;
static uint32_t ExtraCostCombined_MIPS32(const uint32_t* const X,
const uint32_t* const Y, int length) {
int i, temp0, temp1, temp2, temp3;
const uint32_t* pX = &X[4];
const uint32_t* pY = &Y[4];
@ -183,7 +183,7 @@ static float ExtraCostCombined_MIPS32(const uint32_t* const X,
: "memory", "hi", "lo"
);
return (float)((int64_t)temp0 << 32 | temp1);
return ((int64_t)temp0 << 32 | temp1);
}
#define HUFFMAN_COST_PASS \