mirror of
https://github.com/webmproject/libwebp.git
synced 2025-04-03 15:36:50 +02:00
make HistogramAdd() a pointer in dsp
* merged the two HistogramAdd/AddEval() into a single call (with detection of special case when b==out) * added a SSE2 variant * harmonize the histogram type to 'uint32_t' instead of just 'int'. This has a lot of ripples on signatures. * 1-2% faster Change-Id: I10299ff300f36cdbca5a560df1ae4d4df149d306
This commit is contained in:
parent
c8bbb636ea
commit
b3a616b356
@ -332,14 +332,14 @@ const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
|
|||||||
#define APPROX_LOG_WITH_CORRECTION_MAX 65536
|
#define APPROX_LOG_WITH_CORRECTION_MAX 65536
|
||||||
#define APPROX_LOG_MAX 4096
|
#define APPROX_LOG_MAX 4096
|
||||||
#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
|
#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
|
||||||
static float FastSLog2Slow(int v) {
|
static float FastSLog2Slow(uint32_t v) {
|
||||||
assert(v >= LOG_LOOKUP_IDX_MAX);
|
assert(v >= LOG_LOOKUP_IDX_MAX);
|
||||||
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
||||||
int log_cnt = 0;
|
int log_cnt = 0;
|
||||||
int y = 1;
|
uint32_t y = 1;
|
||||||
int correction = 0;
|
int correction = 0;
|
||||||
const float v_f = (float)v;
|
const float v_f = (float)v;
|
||||||
const int orig_v = v;
|
const uint32_t orig_v = v;
|
||||||
do {
|
do {
|
||||||
++log_cnt;
|
++log_cnt;
|
||||||
v = v >> 1;
|
v = v >> 1;
|
||||||
@ -358,12 +358,12 @@ static float FastSLog2Slow(int v) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static float FastLog2Slow(int v) {
|
static float FastLog2Slow(uint32_t v) {
|
||||||
assert(v >= LOG_LOOKUP_IDX_MAX);
|
assert(v >= LOG_LOOKUP_IDX_MAX);
|
||||||
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
||||||
int log_cnt = 0;
|
int log_cnt = 0;
|
||||||
int y = 1;
|
uint32_t y = 1;
|
||||||
const int orig_v = v;
|
const uint32_t orig_v = v;
|
||||||
double log_2;
|
double log_2;
|
||||||
do {
|
do {
|
||||||
++log_cnt;
|
++log_cnt;
|
||||||
@ -1437,6 +1437,7 @@ void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
|
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
|
||||||
void VP8LBundleColorMap(const uint8_t* const row, int width,
|
void VP8LBundleColorMap(const uint8_t* const row, int width,
|
||||||
int xbits, uint32_t* const dst) {
|
int xbits, uint32_t* const dst) {
|
||||||
@ -1458,14 +1459,17 @@ void VP8LBundleColorMap(const uint8_t* const row, int width,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static double ExtraCost(const int* population, int length) {
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
static double ExtraCost(const uint32_t* population, int length) {
|
||||||
int i;
|
int i;
|
||||||
double cost = 0.;
|
double cost = 0.;
|
||||||
for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
|
for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
|
||||||
return cost;
|
return cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
static double ExtraCostCombined(const int* X, const int* Y, int length) {
|
static double ExtraCostCombined(const uint32_t* X, const uint32_t* Y,
|
||||||
|
int length) {
|
||||||
int i;
|
int i;
|
||||||
double cost = 0.;
|
double cost = 0.;
|
||||||
for (i = 2; i < length - 2; ++i) {
|
for (i = 2; i < length - 2; ++i) {
|
||||||
@ -1476,7 +1480,7 @@ static double ExtraCostCombined(const int* X, const int* Y, int length) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Returns the various RLE counts
|
// Returns the various RLE counts
|
||||||
static VP8LStreaks HuffmanCostCount(const int* population, int length) {
|
static VP8LStreaks HuffmanCostCount(const uint32_t* population, int length) {
|
||||||
int i;
|
int i;
|
||||||
int streak = 0;
|
int streak = 0;
|
||||||
VP8LStreaks stats;
|
VP8LStreaks stats;
|
||||||
@ -1496,8 +1500,8 @@ static VP8LStreaks HuffmanCostCount(const int* population, int length) {
|
|||||||
return stats;
|
return stats;
|
||||||
}
|
}
|
||||||
|
|
||||||
static VP8LStreaks HuffmanCostCombinedCount(const int* X, const int* Y,
|
static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X,
|
||||||
int length) {
|
const uint32_t* Y, int length) {
|
||||||
int i;
|
int i;
|
||||||
int streak = 0;
|
int streak = 0;
|
||||||
VP8LStreaks stats;
|
VP8LStreaks stats;
|
||||||
@ -1524,6 +1528,41 @@ static VP8LStreaks HuffmanCostCombinedCount(const int* X, const int* Y,
|
|||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
static void HistogramAdd(const VP8LHistogram* const a,
|
||||||
|
const VP8LHistogram* const b,
|
||||||
|
VP8LHistogram* const out) {
|
||||||
|
int i;
|
||||||
|
const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
|
||||||
|
assert(a->palette_code_bits_ == b->palette_code_bits_);
|
||||||
|
if (b != out) {
|
||||||
|
for (i = 0; i < literal_size; ++i) {
|
||||||
|
out->literal_[i] = a->literal_[i] + b->literal_[i];
|
||||||
|
}
|
||||||
|
for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
|
||||||
|
out->distance_[i] = a->distance_[i] + b->distance_[i];
|
||||||
|
}
|
||||||
|
for (i = 0; i < NUM_LITERAL_CODES; ++i) {
|
||||||
|
out->red_[i] = a->red_[i] + b->red_[i];
|
||||||
|
out->blue_[i] = a->blue_[i] + b->blue_[i];
|
||||||
|
out->alpha_[i] = a->alpha_[i] + b->alpha_[i];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (i = 0; i < literal_size; ++i) {
|
||||||
|
out->literal_[i] += a->literal_[i];
|
||||||
|
}
|
||||||
|
for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
|
||||||
|
out->distance_[i] += a->distance_[i];
|
||||||
|
}
|
||||||
|
for (i = 0; i < NUM_LITERAL_CODES; ++i) {
|
||||||
|
out->red_[i] += a->red_[i];
|
||||||
|
out->blue_[i] += a->blue_[i];
|
||||||
|
out->alpha_[i] += a->alpha_[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
|
VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
|
||||||
VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
|
VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
|
||||||
VP8LPredictorFunc VP8LPredictors[16];
|
VP8LPredictorFunc VP8LPredictors[16];
|
||||||
@ -1546,6 +1585,8 @@ VP8LCostCombinedFunc VP8LExtraCostCombined;
|
|||||||
VP8LCostCountFunc VP8LHuffmanCostCount;
|
VP8LCostCountFunc VP8LHuffmanCostCount;
|
||||||
VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;
|
VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;
|
||||||
|
|
||||||
|
VP8LHistogramAddFunc VP8LHistogramAdd;
|
||||||
|
|
||||||
extern void VP8LDspInitSSE2(void);
|
extern void VP8LDspInitSSE2(void);
|
||||||
extern void VP8LDspInitNEON(void);
|
extern void VP8LDspInitNEON(void);
|
||||||
extern void VP8LDspInitMIPS32(void);
|
extern void VP8LDspInitMIPS32(void);
|
||||||
@ -1574,6 +1615,8 @@ void VP8LDspInit(void) {
|
|||||||
VP8LHuffmanCostCount = HuffmanCostCount;
|
VP8LHuffmanCostCount = HuffmanCostCount;
|
||||||
VP8LHuffmanCostCombinedCount = HuffmanCostCombinedCount;
|
VP8LHuffmanCostCombinedCount = HuffmanCostCombinedCount;
|
||||||
|
|
||||||
|
VP8LHistogramAdd = HistogramAdd;
|
||||||
|
|
||||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||||
if (VP8GetCPUInfo != NULL) {
|
if (VP8GetCPUInfo != NULL) {
|
||||||
#if defined(WEBP_USE_SSE2)
|
#if defined(WEBP_USE_SSE2)
|
||||||
|
@ -18,6 +18,8 @@
|
|||||||
#include "../webp/types.h"
|
#include "../webp/types.h"
|
||||||
#include "../webp/decode.h"
|
#include "../webp/decode.h"
|
||||||
|
|
||||||
|
#include "../enc/histogram.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
@ -123,24 +125,25 @@ static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size,
|
|||||||
#define LOG_LOOKUP_IDX_MAX 256
|
#define LOG_LOOKUP_IDX_MAX 256
|
||||||
extern const float kLog2Table[LOG_LOOKUP_IDX_MAX];
|
extern const float kLog2Table[LOG_LOOKUP_IDX_MAX];
|
||||||
extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX];
|
extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX];
|
||||||
typedef float (*VP8LFastLog2SlowFunc)(int v);
|
typedef float (*VP8LFastLog2SlowFunc)(uint32_t v);
|
||||||
|
|
||||||
extern VP8LFastLog2SlowFunc VP8LFastLog2Slow;
|
extern VP8LFastLog2SlowFunc VP8LFastLog2Slow;
|
||||||
extern VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
|
extern VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
|
||||||
|
|
||||||
static WEBP_INLINE float VP8LFastLog2(int v) {
|
static WEBP_INLINE float VP8LFastLog2(uint32_t v) {
|
||||||
return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v);
|
return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v);
|
||||||
}
|
}
|
||||||
// Fast calculation of v * log2(v) for integer input.
|
// Fast calculation of v * log2(v) for integer input.
|
||||||
static WEBP_INLINE float VP8LFastSLog2(int v) {
|
static WEBP_INLINE float VP8LFastSLog2(uint32_t v) {
|
||||||
return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v);
|
return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
// Huffman-cost related functions.
|
// Huffman-cost related functions.
|
||||||
|
|
||||||
typedef double (*VP8LCostFunc)(const int* population, int length);
|
typedef double (*VP8LCostFunc)(const uint32_t* population, int length);
|
||||||
typedef double (*VP8LCostCombinedFunc)(const int* X, const int* Y, int length);
|
typedef double (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
|
||||||
|
int length);
|
||||||
|
|
||||||
extern VP8LCostFunc VP8LExtraCost;
|
extern VP8LCostFunc VP8LExtraCost;
|
||||||
extern VP8LCostCombinedFunc VP8LExtraCostCombined;
|
extern VP8LCostCombinedFunc VP8LExtraCostCombined;
|
||||||
@ -150,13 +153,19 @@ typedef struct { // small struct to hold counters
|
|||||||
int streaks[2][2]; // [zero/non-zero][streak<3 / streak>=3]
|
int streaks[2][2]; // [zero/non-zero][streak<3 / streak>=3]
|
||||||
} VP8LStreaks;
|
} VP8LStreaks;
|
||||||
|
|
||||||
typedef VP8LStreaks (*VP8LCostCountFunc)(const int* population, int length);
|
typedef VP8LStreaks (*VP8LCostCountFunc)(const uint32_t* population,
|
||||||
typedef VP8LStreaks (*VP8LCostCombinedCountFunc)(const int* X, const int* Y,
|
int length);
|
||||||
int length);
|
typedef VP8LStreaks (*VP8LCostCombinedCountFunc)(const uint32_t* X,
|
||||||
|
const uint32_t* Y, int length);
|
||||||
|
|
||||||
extern VP8LCostCountFunc VP8LHuffmanCostCount;
|
extern VP8LCostCountFunc VP8LHuffmanCostCount;
|
||||||
extern VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;
|
extern VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;
|
||||||
|
|
||||||
|
typedef void (*VP8LHistogramAddFunc)(const VP8LHistogram* const a,
|
||||||
|
const VP8LHistogram* const b,
|
||||||
|
VP8LHistogram* const out);
|
||||||
|
extern VP8LHistogramAddFunc VP8LHistogramAdd;
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
// PrefixEncode()
|
// PrefixEncode()
|
||||||
|
|
||||||
|
@ -26,13 +26,13 @@
|
|||||||
#define APPROX_LOG_MAX 4096
|
#define APPROX_LOG_MAX 4096
|
||||||
#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
|
#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
|
||||||
|
|
||||||
static float FastSLog2Slow(int v) {
|
static float FastSLog2Slow(uint32_t v) {
|
||||||
assert(v >= LOG_LOOKUP_IDX_MAX);
|
assert(v >= LOG_LOOKUP_IDX_MAX);
|
||||||
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
||||||
int log_cnt, y, correction;
|
uint32_t log_cnt, y, correction;
|
||||||
const int c24 = 24;
|
const int c24 = 24;
|
||||||
const float v_f = (float)v;
|
const float v_f = (float)v;
|
||||||
int temp;
|
uint32_t temp;
|
||||||
|
|
||||||
// Xf = 256 = 2^8
|
// Xf = 256 = 2^8
|
||||||
// log_cnt is index of leading one in upper 24 bits
|
// log_cnt is index of leading one in upper 24 bits
|
||||||
@ -62,13 +62,13 @@ static float FastSLog2Slow(int v) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static float FastLog2Slow(int v) {
|
static float FastLog2Slow(uint32_t v) {
|
||||||
assert(v >= LOG_LOOKUP_IDX_MAX);
|
assert(v >= LOG_LOOKUP_IDX_MAX);
|
||||||
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
|
||||||
int log_cnt, y;
|
uint32_t log_cnt, y;
|
||||||
const int c24 = 24;
|
const int c24 = 24;
|
||||||
double log_2;
|
double log_2;
|
||||||
int temp;
|
uint32_t temp;
|
||||||
|
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
"clz %[log_cnt], %[v] \n\t"
|
"clz %[log_cnt], %[v] \n\t"
|
||||||
@ -86,7 +86,7 @@ static float FastLog2Slow(int v) {
|
|||||||
// Since the division is still expensive, add this correction factor only
|
// Since the division is still expensive, add this correction factor only
|
||||||
// for large values of 'v'.
|
// for large values of 'v'.
|
||||||
|
|
||||||
const int correction = (23 * (v & (y - 1))) >> 4;
|
const uint32_t correction = (23 * (v & (y - 1))) >> 4;
|
||||||
log_2 += (double)correction / v;
|
log_2 += (double)correction / v;
|
||||||
}
|
}
|
||||||
return (float)log_2;
|
return (float)log_2;
|
||||||
@ -98,8 +98,8 @@ static float FastLog2Slow(int v) {
|
|||||||
// C version of this function:
|
// C version of this function:
|
||||||
// int i = 0;
|
// int i = 0;
|
||||||
// int64_t cost = 0;
|
// int64_t cost = 0;
|
||||||
// int* pop = (int*)&population[4];
|
// const uint32_t* pop = &population[4];
|
||||||
// const int* LoopEnd = (int*)&population[length];
|
// const uint32_t* LoopEnd = &population[length];
|
||||||
// while (pop != LoopEnd) {
|
// while (pop != LoopEnd) {
|
||||||
// ++i;
|
// ++i;
|
||||||
// cost += i * *pop;
|
// cost += i * *pop;
|
||||||
@ -107,10 +107,10 @@ static float FastLog2Slow(int v) {
|
|||||||
// pop += 2;
|
// pop += 2;
|
||||||
// }
|
// }
|
||||||
// return (double)cost;
|
// return (double)cost;
|
||||||
static double ExtraCost(const int* const population, int length) {
|
static double ExtraCost(const uint32_t* const population, int length) {
|
||||||
int i, temp0, temp1;
|
int i, temp0, temp1;
|
||||||
const int* pop = &population[4];
|
const uint32_t* pop = &population[4];
|
||||||
const int* const LoopEnd = &population[length];
|
const uint32_t* const LoopEnd = &population[length];
|
||||||
|
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
"mult $zero, $zero \n\t"
|
"mult $zero, $zero \n\t"
|
||||||
@ -139,12 +139,12 @@ static double ExtraCost(const int* const population, int length) {
|
|||||||
// C version of this function:
|
// C version of this function:
|
||||||
// int i = 0;
|
// int i = 0;
|
||||||
// int64_t cost = 0;
|
// int64_t cost = 0;
|
||||||
// int* pX = (int*)&X[4];
|
// const uint32_t* pX = &X[4];
|
||||||
// int* pY = (int*)&Y[4];
|
// const uint32_t* pY = &Y[4];
|
||||||
// const int* LoopEnd = (int*)&X[length];
|
// const uint32_t* LoopEnd = &X[length];
|
||||||
// while (pX != LoopEnd) {
|
// while (pX != LoopEnd) {
|
||||||
// const int xy0 = *pX + *pY;
|
// const uint32_t xy0 = *pX + *pY;
|
||||||
// const int xy1 = *(pX + 1) + *(pY + 1);
|
// const uint32_t xy1 = *(pX + 1) + *(pY + 1);
|
||||||
// ++i;
|
// ++i;
|
||||||
// cost += i * xy0;
|
// cost += i * xy0;
|
||||||
// cost += i * xy1;
|
// cost += i * xy1;
|
||||||
@ -152,12 +152,12 @@ static double ExtraCost(const int* const population, int length) {
|
|||||||
// pY += 2;
|
// pY += 2;
|
||||||
// }
|
// }
|
||||||
// return (double)cost;
|
// return (double)cost;
|
||||||
static double ExtraCostCombined(const int* const X, const int* const Y,
|
static double ExtraCostCombined(const uint32_t* const X,
|
||||||
int length) {
|
const uint32_t* const Y, int length) {
|
||||||
int i, temp0, temp1, temp2, temp3;
|
int i, temp0, temp1, temp2, temp3;
|
||||||
const int* pX = &X[4];
|
const uint32_t* pX = &X[4];
|
||||||
const int* pY = &Y[4];
|
const uint32_t* pY = &Y[4];
|
||||||
const int* const LoopEnd = &X[length];
|
const uint32_t* const LoopEnd = &X[length];
|
||||||
|
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
"mult $zero, $zero \n\t"
|
"mult $zero, $zero \n\t"
|
||||||
@ -217,7 +217,7 @@ static double ExtraCostCombined(const int* const X, const int* const Y,
|
|||||||
);
|
);
|
||||||
|
|
||||||
// Returns the various RLE counts
|
// Returns the various RLE counts
|
||||||
static VP8LStreaks HuffmanCostCount(const int* population, int length) {
|
static VP8LStreaks HuffmanCostCount(const uint32_t* population, int length) {
|
||||||
int i;
|
int i;
|
||||||
int streak = 0;
|
int streak = 0;
|
||||||
VP8LStreaks stats;
|
VP8LStreaks stats;
|
||||||
@ -230,19 +230,19 @@ static VP8LStreaks HuffmanCostCount(const int* population, int length) {
|
|||||||
if (population[i] == population[i + 1]) {
|
if (population[i] == population[i + 1]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
temp0 = population[i] != 0;
|
temp0 = (population[i] != 0);
|
||||||
HUFFMAN_COST_PASS
|
HUFFMAN_COST_PASS
|
||||||
streak = 0;
|
streak = 0;
|
||||||
}
|
}
|
||||||
++streak;
|
++streak;
|
||||||
temp0 = population[i] != 0;
|
temp0 = (population[i] != 0);
|
||||||
HUFFMAN_COST_PASS
|
HUFFMAN_COST_PASS
|
||||||
|
|
||||||
return stats;
|
return stats;
|
||||||
}
|
}
|
||||||
|
|
||||||
static VP8LStreaks HuffmanCostCombinedCount(const int* X, const int* Y,
|
static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X,
|
||||||
int length) {
|
const uint32_t* Y, int length) {
|
||||||
int i;
|
int i;
|
||||||
int streak = 0;
|
int streak = 0;
|
||||||
VP8LStreaks stats;
|
VP8LStreaks stats;
|
||||||
@ -251,20 +251,20 @@ static VP8LStreaks HuffmanCostCombinedCount(const int* X, const int* Y,
|
|||||||
int temp0, temp1, temp2, temp3;
|
int temp0, temp1, temp2, temp3;
|
||||||
memset(&stats, 0, sizeof(stats));
|
memset(&stats, 0, sizeof(stats));
|
||||||
for (i = 0; i < length - 1; ++i) {
|
for (i = 0; i < length - 1; ++i) {
|
||||||
const int xy = X[i] + Y[i];
|
const uint32_t xy = X[i] + Y[i];
|
||||||
const int xy_next = X[i + 1] + Y[i + 1];
|
const uint32_t xy_next = X[i + 1] + Y[i + 1];
|
||||||
++streak;
|
++streak;
|
||||||
if (xy == xy_next) {
|
if (xy == xy_next) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
temp0 = xy != 0;
|
temp0 = (xy != 0);
|
||||||
HUFFMAN_COST_PASS
|
HUFFMAN_COST_PASS
|
||||||
streak = 0;
|
streak = 0;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
const int xy = X[i] + Y[i];
|
const uint32_t xy = X[i] + Y[i];
|
||||||
++streak;
|
++streak;
|
||||||
temp0 = xy != 0;
|
temp0 = (xy != 0);
|
||||||
HUFFMAN_COST_PASS
|
HUFFMAN_COST_PASS
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -383,6 +383,88 @@ static void ConvertBGRAToBGR(const uint32_t* src,
|
|||||||
VP8LConvertBGRAToBGR_C((const uint32_t*)in, num_pixels, dst);
|
VP8LConvertBGRAToBGR_C((const uint32_t*)in, num_pixels, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#define LINE_SIZE 16 // 8 or 16
|
||||||
|
static void AddVector(const uint32_t* a, const uint32_t* b, uint32_t* out,
|
||||||
|
int size) {
|
||||||
|
int i;
|
||||||
|
assert(size % LINE_SIZE == 0);
|
||||||
|
for (i = 0; i < size; i += LINE_SIZE) {
|
||||||
|
const __m128i a0 = _mm_loadu_si128((__m128i*)&a[i + 0]);
|
||||||
|
const __m128i a1 = _mm_loadu_si128((__m128i*)&a[i + 4]);
|
||||||
|
#if (LINE_SIZE == 16)
|
||||||
|
const __m128i a2 = _mm_loadu_si128((__m128i*)&a[i + 8]);
|
||||||
|
const __m128i a3 = _mm_loadu_si128((__m128i*)&a[i + 12]);
|
||||||
|
#endif
|
||||||
|
const __m128i b0 = _mm_loadu_si128((__m128i*)&b[i + 0]);
|
||||||
|
const __m128i b1 = _mm_loadu_si128((__m128i*)&b[i + 4]);
|
||||||
|
#if (LINE_SIZE == 16)
|
||||||
|
const __m128i b2 = _mm_loadu_si128((__m128i*)&b[i + 8]);
|
||||||
|
const __m128i b3 = _mm_loadu_si128((__m128i*)&b[i + 12]);
|
||||||
|
#endif
|
||||||
|
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||||
|
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||||
|
#if (LINE_SIZE == 16)
|
||||||
|
_mm_storeu_si128((__m128i*)&out[i + 8], _mm_add_epi32(a2, b2));
|
||||||
|
_mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void AddVectorEq(const uint32_t* a, uint32_t* out, int size) {
|
||||||
|
int i;
|
||||||
|
assert(size % LINE_SIZE == 0);
|
||||||
|
for (i = 0; i < size; i += LINE_SIZE) {
|
||||||
|
const __m128i a0 = _mm_loadu_si128((__m128i*)&a[i + 0]);
|
||||||
|
const __m128i a1 = _mm_loadu_si128((__m128i*)&a[i + 4]);
|
||||||
|
#if (LINE_SIZE == 16)
|
||||||
|
const __m128i a2 = _mm_loadu_si128((__m128i*)&a[i + 8]);
|
||||||
|
const __m128i a3 = _mm_loadu_si128((__m128i*)&a[i + 12]);
|
||||||
|
#endif
|
||||||
|
const __m128i b0 = _mm_loadu_si128((__m128i*)&out[i + 0]);
|
||||||
|
const __m128i b1 = _mm_loadu_si128((__m128i*)&out[i + 4]);
|
||||||
|
#if (LINE_SIZE == 16)
|
||||||
|
const __m128i b2 = _mm_loadu_si128((__m128i*)&out[i + 8]);
|
||||||
|
const __m128i b3 = _mm_loadu_si128((__m128i*)&out[i + 12]);
|
||||||
|
#endif
|
||||||
|
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||||
|
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||||
|
#if (LINE_SIZE == 16)
|
||||||
|
_mm_storeu_si128((__m128i*)&out[i + 8], _mm_add_epi32(a2, b2));
|
||||||
|
_mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#undef LINE_SIZE
|
||||||
|
|
||||||
|
// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
|
||||||
|
// that's ok since the histogram values are less than 1<<28 (max picture size).
|
||||||
|
static void HistogramAdd(const VP8LHistogram* const a,
|
||||||
|
const VP8LHistogram* const b,
|
||||||
|
VP8LHistogram* const out) {
|
||||||
|
int i;
|
||||||
|
const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
|
||||||
|
assert(a->palette_code_bits_ == b->palette_code_bits_);
|
||||||
|
if (b != out) {
|
||||||
|
AddVector(a->literal_, b->literal_, out->literal_, NUM_LITERAL_CODES);
|
||||||
|
AddVector(a->red_, b->red_, out->red_, NUM_LITERAL_CODES);
|
||||||
|
AddVector(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES);
|
||||||
|
AddVector(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES);
|
||||||
|
} else {
|
||||||
|
AddVectorEq(a->literal_, out->literal_, NUM_LITERAL_CODES);
|
||||||
|
AddVectorEq(a->red_, out->red_, NUM_LITERAL_CODES);
|
||||||
|
AddVectorEq(a->blue_, out->blue_, NUM_LITERAL_CODES);
|
||||||
|
AddVectorEq(a->alpha_, out->alpha_, NUM_LITERAL_CODES);
|
||||||
|
}
|
||||||
|
for (i = NUM_LITERAL_CODES; i < literal_size; ++i) {
|
||||||
|
out->literal_[i] = a->literal_[i] + b->literal_[i];
|
||||||
|
}
|
||||||
|
for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
|
||||||
|
out->distance_[i] = a->distance_[i] + b->distance_[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif // WEBP_USE_SSE2
|
#endif // WEBP_USE_SSE2
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
@ -405,6 +487,8 @@ void VP8LDspInitSSE2(void) {
|
|||||||
VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444;
|
VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444;
|
||||||
VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565;
|
VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565;
|
||||||
VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
|
VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
|
||||||
|
|
||||||
|
VP8LHistogramAdd = HistogramAdd;
|
||||||
#endif // WEBP_USE_SSE2
|
#endif // WEBP_USE_SSE2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -405,8 +405,8 @@ static int BackwardReferencesTraceBackwards(
|
|||||||
VP8LBackwardRefs* const refs);
|
VP8LBackwardRefs* const refs);
|
||||||
|
|
||||||
static void ConvertPopulationCountTableToBitEstimates(
|
static void ConvertPopulationCountTableToBitEstimates(
|
||||||
int num_symbols, const int population_counts[], double output[]) {
|
int num_symbols, const uint32_t population_counts[], double output[]) {
|
||||||
int sum = 0;
|
uint32_t sum = 0;
|
||||||
int nonzeros = 0;
|
int nonzeros = 0;
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < num_symbols; ++i) {
|
for (i = 0; i < num_symbols; ++i) {
|
||||||
|
@ -29,7 +29,7 @@
|
|||||||
#define BIN_SIZE (NUM_PARTITIONS * NUM_PARTITIONS * NUM_PARTITIONS)
|
#define BIN_SIZE (NUM_PARTITIONS * NUM_PARTITIONS * NUM_PARTITIONS)
|
||||||
|
|
||||||
static void HistogramClear(VP8LHistogram* const p) {
|
static void HistogramClear(VP8LHistogram* const p) {
|
||||||
int* const literal = p->literal_;
|
uint32_t* const literal = p->literal_;
|
||||||
const int cache_bits = p->palette_code_bits_;
|
const int cache_bits = p->palette_code_bits_;
|
||||||
const uint64_t histo_size = VP8LGetHistogramSize(cache_bits);
|
const uint64_t histo_size = VP8LGetHistogramSize(cache_bits);
|
||||||
memset(p, 0, histo_size);
|
memset(p, 0, histo_size);
|
||||||
@ -39,7 +39,7 @@ static void HistogramClear(VP8LHistogram* const p) {
|
|||||||
|
|
||||||
static void HistogramCopy(const VP8LHistogram* const src,
|
static void HistogramCopy(const VP8LHistogram* const src,
|
||||||
VP8LHistogram* const dst) {
|
VP8LHistogram* const dst) {
|
||||||
int* const dst_literal = dst->literal_;
|
uint32_t* const dst_literal = dst->literal_;
|
||||||
const int dst_cache_bits = dst->palette_code_bits_;
|
const int dst_cache_bits = dst->palette_code_bits_;
|
||||||
const uint64_t histo_size = VP8LGetHistogramSize(dst_cache_bits);
|
const uint64_t histo_size = VP8LGetHistogramSize(dst_cache_bits);
|
||||||
assert(src->palette_code_bits_ == dst_cache_bits);
|
assert(src->palette_code_bits_ == dst_cache_bits);
|
||||||
@ -92,7 +92,7 @@ VP8LHistogram* VP8LAllocateHistogram(int cache_bits) {
|
|||||||
if (memory == NULL) return NULL;
|
if (memory == NULL) return NULL;
|
||||||
histo = (VP8LHistogram*)memory;
|
histo = (VP8LHistogram*)memory;
|
||||||
// literal_ won't necessary be aligned.
|
// literal_ won't necessary be aligned.
|
||||||
histo->literal_ = (int*)(memory + sizeof(VP8LHistogram));
|
histo->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));
|
||||||
VP8LHistogramInit(histo, cache_bits);
|
VP8LHistogramInit(histo, cache_bits);
|
||||||
return histo;
|
return histo;
|
||||||
}
|
}
|
||||||
@ -115,7 +115,7 @@ VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
|
|||||||
for (i = 0; i < size; ++i) {
|
for (i = 0; i < size; ++i) {
|
||||||
set->histograms[i] = (VP8LHistogram*)memory;
|
set->histograms[i] = (VP8LHistogram*)memory;
|
||||||
// literal_ won't necessary be aligned.
|
// literal_ won't necessary be aligned.
|
||||||
set->histograms[i]->literal_ = (int*)(memory + sizeof(VP8LHistogram));
|
set->histograms[i]->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));
|
||||||
VP8LHistogramInit(set->histograms[i], cache_bits);
|
VP8LHistogramInit(set->histograms[i], cache_bits);
|
||||||
// There's no padding/alignment between successive histograms.
|
// There's no padding/alignment between successive histograms.
|
||||||
memory += VP8LGetHistogramSize(cache_bits);
|
memory += VP8LGetHistogramSize(cache_bits);
|
||||||
@ -133,7 +133,7 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
|
|||||||
++histo->literal_[PixOrCopyLiteral(v, 1)];
|
++histo->literal_[PixOrCopyLiteral(v, 1)];
|
||||||
++histo->blue_[PixOrCopyLiteral(v, 0)];
|
++histo->blue_[PixOrCopyLiteral(v, 0)];
|
||||||
} else if (PixOrCopyIsCacheIdx(v)) {
|
} else if (PixOrCopyIsCacheIdx(v)) {
|
||||||
int literal_ix =
|
const int literal_ix =
|
||||||
NUM_LITERAL_CODES + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v);
|
NUM_LITERAL_CODES + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v);
|
||||||
++histo->literal_[literal_ix];
|
++histo->literal_[literal_ix];
|
||||||
} else {
|
} else {
|
||||||
@ -178,11 +178,11 @@ static WEBP_INLINE double BitsEntropyRefine(int nonzeros, int sum, int max_val,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static double BitsEntropy(const int* const array, int n) {
|
static double BitsEntropy(const uint32_t* const array, int n) {
|
||||||
double retval = 0.;
|
double retval = 0.;
|
||||||
int sum = 0;
|
uint32_t sum = 0;
|
||||||
int nonzeros = 0;
|
int nonzeros = 0;
|
||||||
int max_val = 0;
|
uint32_t max_val = 0;
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < n; ++i) {
|
for (i = 0; i < n; ++i) {
|
||||||
if (array[i] != 0) {
|
if (array[i] != 0) {
|
||||||
@ -198,8 +198,8 @@ static double BitsEntropy(const int* const array, int n) {
|
|||||||
return BitsEntropyRefine(nonzeros, sum, max_val, retval);
|
return BitsEntropyRefine(nonzeros, sum, max_val, retval);
|
||||||
}
|
}
|
||||||
|
|
||||||
static double BitsEntropyCombined(const int* const X, const int* const Y,
|
static double BitsEntropyCombined(const uint32_t* const X,
|
||||||
int n) {
|
const uint32_t* const Y, int n) {
|
||||||
double retval = 0.;
|
double retval = 0.;
|
||||||
int sum = 0;
|
int sum = 0;
|
||||||
int nonzeros = 0;
|
int nonzeros = 0;
|
||||||
@ -239,24 +239,24 @@ static double FinalHuffmanCost(const VP8LStreaks* const stats) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Trampolines
|
// Trampolines
|
||||||
static double HuffmanCost(const int* const population, int length) {
|
static double HuffmanCost(const uint32_t* const population, int length) {
|
||||||
const VP8LStreaks stats = VP8LHuffmanCostCount(population, length);
|
const VP8LStreaks stats = VP8LHuffmanCostCount(population, length);
|
||||||
return FinalHuffmanCost(&stats);
|
return FinalHuffmanCost(&stats);
|
||||||
}
|
}
|
||||||
|
|
||||||
static double HuffmanCostCombined(const int* const X, const int* const Y,
|
static double HuffmanCostCombined(const uint32_t* const X,
|
||||||
int length) {
|
const uint32_t* const Y, int length) {
|
||||||
const VP8LStreaks stats = VP8LHuffmanCostCombinedCount(X, Y, length);
|
const VP8LStreaks stats = VP8LHuffmanCostCombinedCount(X, Y, length);
|
||||||
return FinalHuffmanCost(&stats);
|
return FinalHuffmanCost(&stats);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Aggregated costs
|
// Aggregated costs
|
||||||
static double PopulationCost(const int* const population, int length) {
|
static double PopulationCost(const uint32_t* const population, int length) {
|
||||||
return BitsEntropy(population, length) + HuffmanCost(population, length);
|
return BitsEntropy(population, length) + HuffmanCost(population, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
static double GetCombinedEntropy(const int* const X, const int* const Y,
|
static double GetCombinedEntropy(const uint32_t* const X,
|
||||||
int length) {
|
const uint32_t* const Y, int length) {
|
||||||
return BitsEntropyCombined(X, Y, length) + HuffmanCostCombined(X, Y, length);
|
return BitsEntropyCombined(X, Y, length) + HuffmanCostCombined(X, Y, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -286,25 +286,6 @@ double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
|
|||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
// Various histogram combine/cost-eval functions
|
// Various histogram combine/cost-eval functions
|
||||||
|
|
||||||
// Adds 'in' histogram to 'out'
|
|
||||||
static void HistogramAdd(const VP8LHistogram* const in,
|
|
||||||
VP8LHistogram* const out) {
|
|
||||||
int i;
|
|
||||||
const int literal_size = VP8LHistogramNumCodes(in->palette_code_bits_);
|
|
||||||
assert(in->palette_code_bits_ == out->palette_code_bits_);
|
|
||||||
for (i = 0; i < literal_size; ++i) {
|
|
||||||
out->literal_[i] += in->literal_[i];
|
|
||||||
}
|
|
||||||
for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
|
|
||||||
out->distance_[i] += in->distance_[i];
|
|
||||||
}
|
|
||||||
for (i = 0; i < NUM_LITERAL_CODES; ++i) {
|
|
||||||
out->red_[i] += in->red_[i];
|
|
||||||
out->blue_[i] += in->blue_[i];
|
|
||||||
out->alpha_[i] += in->alpha_[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
|
static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
|
||||||
const VP8LHistogram* const b,
|
const VP8LHistogram* const b,
|
||||||
double cost_threshold,
|
double cost_threshold,
|
||||||
@ -347,23 +328,10 @@ static double HistogramAddEval(const VP8LHistogram* const a,
|
|||||||
double cost_threshold) {
|
double cost_threshold) {
|
||||||
double cost = 0;
|
double cost = 0;
|
||||||
const double sum_cost = a->bit_cost_ + b->bit_cost_;
|
const double sum_cost = a->bit_cost_ + b->bit_cost_;
|
||||||
int i;
|
|
||||||
assert(a->palette_code_bits_ == b->palette_code_bits_);
|
|
||||||
cost_threshold += sum_cost;
|
cost_threshold += sum_cost;
|
||||||
|
|
||||||
if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) {
|
if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) {
|
||||||
const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
|
VP8LHistogramAdd(a, b, out);
|
||||||
for (i = 0; i < literal_size; ++i) {
|
|
||||||
out->literal_[i] = a->literal_[i] + b->literal_[i];
|
|
||||||
}
|
|
||||||
for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
|
|
||||||
out->distance_[i] = a->distance_[i] + b->distance_[i];
|
|
||||||
}
|
|
||||||
for (i = 0; i < NUM_LITERAL_CODES; ++i) {
|
|
||||||
out->red_[i] = a->red_[i] + b->red_[i];
|
|
||||||
out->blue_[i] = a->blue_[i] + b->blue_[i];
|
|
||||||
out->alpha_[i] = a->alpha_[i] + b->alpha_[i];
|
|
||||||
}
|
|
||||||
out->bit_cost_ = cost;
|
out->bit_cost_ = cost;
|
||||||
out->palette_code_bits_ = a->palette_code_bits_;
|
out->palette_code_bits_ = a->palette_code_bits_;
|
||||||
}
|
}
|
||||||
@ -697,8 +665,9 @@ static void HistogramRemap(const VP8LHistogramSet* const init_histo,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < init_histo->size; ++i) {
|
for (i = 0; i < init_histo->size; ++i) {
|
||||||
HistogramAdd(init_histo->histograms[i],
|
VP8LHistogramAdd(init_histo->histograms[i],
|
||||||
histo_image->histograms[symbols[i]]);
|
histo_image->histograms[symbols[i]],
|
||||||
|
histo_image->histograms[symbols[i]]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -32,12 +32,12 @@ extern "C" {
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
// literal_ contains green literal, palette-code and
|
// literal_ contains green literal, palette-code and
|
||||||
// copy-length-prefix histogram
|
// copy-length-prefix histogram
|
||||||
int* literal_; // Pointer to the allocated buffer for literal.
|
uint32_t* literal_; // Pointer to the allocated buffer for literal.
|
||||||
int red_[256];
|
uint32_t red_[NUM_LITERAL_CODES];
|
||||||
int blue_[256];
|
uint32_t blue_[NUM_LITERAL_CODES];
|
||||||
int alpha_[256];
|
uint32_t alpha_[NUM_LITERAL_CODES];
|
||||||
// Backward reference prefix-code histogram.
|
// Backward reference prefix-code histogram.
|
||||||
int distance_[NUM_DISTANCE_CODES];
|
uint32_t distance_[NUM_DISTANCE_CODES];
|
||||||
int palette_code_bits_;
|
int palette_code_bits_;
|
||||||
double bit_cost_; // cached value of VP8LHistogramEstimateBits(this)
|
double bit_cost_; // cached value of VP8LHistogramEstimateBits(this)
|
||||||
double literal_cost_; // Cached values of dominant entropy costs:
|
double literal_cost_; // Cached values of dominant entropy costs:
|
||||||
|
@ -330,7 +330,7 @@ static void StoreFullHuffmanCode(VP8LBitWriter* const bw,
|
|||||||
VP8LWriteBits(bw, 1, 0);
|
VP8LWriteBits(bw, 1, 0);
|
||||||
num_tokens = VP8LCreateCompressedHuffmanTree(tree, tokens, max_tokens);
|
num_tokens = VP8LCreateCompressedHuffmanTree(tree, tokens, max_tokens);
|
||||||
{
|
{
|
||||||
int histogram[CODE_LENGTH_CODES] = { 0 };
|
uint32_t histogram[CODE_LENGTH_CODES] = { 0 };
|
||||||
uint8_t buf_rle[CODE_LENGTH_CODES] = { 0 };
|
uint8_t buf_rle[CODE_LENGTH_CODES] = { 0 };
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < num_tokens; ++i) {
|
for (i = 0; i < num_tokens; ++i) {
|
||||||
|
@ -29,7 +29,7 @@ static int ValuesShouldBeCollapsedToStrideAverage(int a, int b) {
|
|||||||
// Change the population counts in a way that the consequent
|
// Change the population counts in a way that the consequent
|
||||||
// Huffman tree compression, especially its RLE-part, give smaller output.
|
// Huffman tree compression, especially its RLE-part, give smaller output.
|
||||||
static void OptimizeHuffmanForRle(int length, uint8_t* const good_for_rle,
|
static void OptimizeHuffmanForRle(int length, uint8_t* const good_for_rle,
|
||||||
int* const counts) {
|
uint32_t* const counts) {
|
||||||
// 1) Let's make the Huffman code more compatible with rle encoding.
|
// 1) Let's make the Huffman code more compatible with rle encoding.
|
||||||
int i;
|
int i;
|
||||||
for (; length >= 0; --length) {
|
for (; length >= 0; --length) {
|
||||||
@ -47,7 +47,7 @@ static void OptimizeHuffmanForRle(int length, uint8_t* const good_for_rle,
|
|||||||
// Let's not spoil any of the existing good rle codes.
|
// Let's not spoil any of the existing good rle codes.
|
||||||
// Mark any seq of 0's that is longer as 5 as a good_for_rle.
|
// Mark any seq of 0's that is longer as 5 as a good_for_rle.
|
||||||
// Mark any seq of non-0's that is longer as 7 as a good_for_rle.
|
// Mark any seq of non-0's that is longer as 7 as a good_for_rle.
|
||||||
int symbol = counts[0];
|
uint32_t symbol = counts[0];
|
||||||
int stride = 0;
|
int stride = 0;
|
||||||
for (i = 0; i < length + 1; ++i) {
|
for (i = 0; i < length + 1; ++i) {
|
||||||
if (i == length || counts[i] != symbol) {
|
if (i == length || counts[i] != symbol) {
|
||||||
@ -69,17 +69,17 @@ static void OptimizeHuffmanForRle(int length, uint8_t* const good_for_rle,
|
|||||||
}
|
}
|
||||||
// 3) Let's replace those population counts that lead to more rle codes.
|
// 3) Let's replace those population counts that lead to more rle codes.
|
||||||
{
|
{
|
||||||
int stride = 0;
|
uint32_t stride = 0;
|
||||||
int limit = counts[0];
|
uint32_t limit = counts[0];
|
||||||
int sum = 0;
|
uint32_t sum = 0;
|
||||||
for (i = 0; i < length + 1; ++i) {
|
for (i = 0; i < length + 1; ++i) {
|
||||||
if (i == length || good_for_rle[i] ||
|
if (i == length || good_for_rle[i] ||
|
||||||
(i != 0 && good_for_rle[i - 1]) ||
|
(i != 0 && good_for_rle[i - 1]) ||
|
||||||
!ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) {
|
!ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) {
|
||||||
if (stride >= 4 || (stride >= 3 && sum == 0)) {
|
if (stride >= 4 || (stride >= 3 && sum == 0)) {
|
||||||
int k;
|
uint32_t k;
|
||||||
// The stride must end, collapse what we have, if we have enough (4).
|
// The stride must end, collapse what we have, if we have enough (4).
|
||||||
int count = (sum + stride / 2) / stride;
|
uint32_t count = (sum + stride / 2) / stride;
|
||||||
if (count < 1) {
|
if (count < 1) {
|
||||||
count = 1;
|
count = 1;
|
||||||
}
|
}
|
||||||
@ -162,10 +162,11 @@ static void SetBitDepths(const HuffmanTree* const tree,
|
|||||||
// we are not planning to use this with extremely long blocks.
|
// we are not planning to use this with extremely long blocks.
|
||||||
//
|
//
|
||||||
// See http://en.wikipedia.org/wiki/Huffman_coding
|
// See http://en.wikipedia.org/wiki/Huffman_coding
|
||||||
static void GenerateOptimalTree(const int* const histogram, int histogram_size,
|
static void GenerateOptimalTree(const uint32_t* const histogram,
|
||||||
|
int histogram_size,
|
||||||
HuffmanTree* tree, int tree_depth_limit,
|
HuffmanTree* tree, int tree_depth_limit,
|
||||||
uint8_t* const bit_depths) {
|
uint8_t* const bit_depths) {
|
||||||
int count_min;
|
uint32_t count_min;
|
||||||
HuffmanTree* tree_pool;
|
HuffmanTree* tree_pool;
|
||||||
int tree_size_orig = 0;
|
int tree_size_orig = 0;
|
||||||
int i;
|
int i;
|
||||||
@ -195,7 +196,7 @@ static void GenerateOptimalTree(const int* const histogram, int histogram_size,
|
|||||||
int j;
|
int j;
|
||||||
for (j = 0; j < histogram_size; ++j) {
|
for (j = 0; j < histogram_size; ++j) {
|
||||||
if (histogram[j] != 0) {
|
if (histogram[j] != 0) {
|
||||||
const int count =
|
const uint32_t count =
|
||||||
(histogram[j] < count_min) ? count_min : histogram[j];
|
(histogram[j] < count_min) ? count_min : histogram[j];
|
||||||
tree[idx].total_count_ = count;
|
tree[idx].total_count_ = count;
|
||||||
tree[idx].value_ = j;
|
tree[idx].value_ = j;
|
||||||
@ -211,7 +212,7 @@ static void GenerateOptimalTree(const int* const histogram, int histogram_size,
|
|||||||
if (tree_size > 1) { // Normal case.
|
if (tree_size > 1) { // Normal case.
|
||||||
int tree_pool_size = 0;
|
int tree_pool_size = 0;
|
||||||
while (tree_size > 1) { // Finish when we have only one root.
|
while (tree_size > 1) { // Finish when we have only one root.
|
||||||
int count;
|
uint32_t count;
|
||||||
tree_pool[tree_pool_size++] = tree[tree_size - 1];
|
tree_pool[tree_pool_size++] = tree[tree_size - 1];
|
||||||
tree_pool[tree_pool_size++] = tree[tree_size - 2];
|
tree_pool[tree_pool_size++] = tree[tree_size - 2];
|
||||||
count = tree_pool[tree_pool_size - 1].total_count_ +
|
count = tree_pool[tree_pool_size - 1].total_count_ +
|
||||||
@ -402,7 +403,7 @@ static void ConvertBitDepthsToSymbols(HuffmanTreeCode* const tree) {
|
|||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
// Main entry point
|
// Main entry point
|
||||||
|
|
||||||
void VP8LCreateHuffmanTree(int* const histogram, int tree_depth_limit,
|
void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit,
|
||||||
uint8_t* const buf_rle,
|
uint8_t* const buf_rle,
|
||||||
HuffmanTree* const huff_tree,
|
HuffmanTree* const huff_tree,
|
||||||
HuffmanTreeCode* const huff_code) {
|
HuffmanTreeCode* const huff_code) {
|
||||||
|
@ -36,7 +36,7 @@ typedef struct {
|
|||||||
// Struct to represent the Huffman tree.
|
// Struct to represent the Huffman tree.
|
||||||
// TODO(vikasa): Add comment for the fields of the Struct.
|
// TODO(vikasa): Add comment for the fields of the Struct.
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int total_count_;
|
uint32_t total_count_;
|
||||||
int value_;
|
int value_;
|
||||||
int pool_index_left_; // Index for the left sub-tree.
|
int pool_index_left_; // Index for the left sub-tree.
|
||||||
int pool_index_right_; // Index for the right sub-tree.
|
int pool_index_right_; // Index for the right sub-tree.
|
||||||
@ -50,7 +50,7 @@ int VP8LCreateCompressedHuffmanTree(const HuffmanTreeCode* const tree,
|
|||||||
// Create an optimized tree, and tokenize it.
|
// Create an optimized tree, and tokenize it.
|
||||||
// 'buf_rle' and 'huff_tree' are pre-allocated and the 'tree' is the constructed
|
// 'buf_rle' and 'huff_tree' are pre-allocated and the 'tree' is the constructed
|
||||||
// huffman code tree.
|
// huffman code tree.
|
||||||
void VP8LCreateHuffmanTree(int* const histogram, int tree_depth_limit,
|
void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit,
|
||||||
uint8_t* const buf_rle, HuffmanTree* const huff_tree,
|
uint8_t* const buf_rle, HuffmanTree* const huff_tree,
|
||||||
HuffmanTreeCode* const tree);
|
HuffmanTreeCode* const tree);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user