mirror of
https://github.com/webmproject/libwebp.git
synced 2025-04-06 17:06:49 +02:00
Compare commits
No commits in common. "a1ad3f1e379539045dd1604fd91e7a270b8af9d1" and "e0ae21d2317688e25add3b47f0fd692b45656135" have entirely different histories.
a1ad3f1e37
...
e0ae21d231
@ -196,11 +196,15 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_SSE[16];
|
|||||||
// Huffman-cost related functions.
|
// Huffman-cost related functions.
|
||||||
|
|
||||||
typedef uint32_t (*VP8LCostFunc)(const uint32_t* population, int length);
|
typedef uint32_t (*VP8LCostFunc)(const uint32_t* population, int length);
|
||||||
|
typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* WEBP_RESTRICT X,
|
||||||
|
const uint32_t* WEBP_RESTRICT Y,
|
||||||
|
int length);
|
||||||
typedef uint64_t (*VP8LCombinedShannonEntropyFunc)(const uint32_t X[256],
|
typedef uint64_t (*VP8LCombinedShannonEntropyFunc)(const uint32_t X[256],
|
||||||
const uint32_t Y[256]);
|
const uint32_t Y[256]);
|
||||||
typedef uint64_t (*VP8LShannonEntropyFunc)(const uint32_t* X, int length);
|
typedef uint64_t (*VP8LShannonEntropyFunc)(const uint32_t* X, int length);
|
||||||
|
|
||||||
extern VP8LCostFunc VP8LExtraCost;
|
extern VP8LCostFunc VP8LExtraCost;
|
||||||
|
extern VP8LCostCombinedFunc VP8LExtraCostCombined;
|
||||||
extern VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
|
extern VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
|
||||||
extern VP8LShannonEntropyFunc VP8LShannonEntropy;
|
extern VP8LShannonEntropyFunc VP8LShannonEntropy;
|
||||||
|
|
||||||
|
@ -583,6 +583,20 @@ static uint32_t ExtraCost_C(const uint32_t* population, int length) {
|
|||||||
return cost;
|
return cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint32_t ExtraCostCombined_C(const uint32_t* WEBP_RESTRICT X,
|
||||||
|
const uint32_t* WEBP_RESTRICT Y,
|
||||||
|
int length) {
|
||||||
|
int i;
|
||||||
|
uint32_t cost = X[4] + Y[4] + X[5] + Y[5];
|
||||||
|
assert(length % 2 == 0);
|
||||||
|
for (i = 2; i < length / 2 - 1; ++i) {
|
||||||
|
const int xy0 = X[2 * i + 2] + Y[2 * i + 2];
|
||||||
|
const int xy1 = X[2 * i + 3] + Y[2 * i + 3];
|
||||||
|
cost += i * (xy0 + xy1);
|
||||||
|
}
|
||||||
|
return cost;
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
static void AddVector_C(const uint32_t* WEBP_RESTRICT a,
|
static void AddVector_C(const uint32_t* WEBP_RESTRICT a,
|
||||||
@ -713,6 +727,7 @@ VP8LFastLog2SlowFunc VP8LFastLog2Slow;
|
|||||||
VP8LFastSLog2SlowFunc VP8LFastSLog2Slow;
|
VP8LFastSLog2SlowFunc VP8LFastSLog2Slow;
|
||||||
|
|
||||||
VP8LCostFunc VP8LExtraCost;
|
VP8LCostFunc VP8LExtraCost;
|
||||||
|
VP8LCostCombinedFunc VP8LExtraCostCombined;
|
||||||
VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
|
VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
|
||||||
VP8LShannonEntropyFunc VP8LShannonEntropy;
|
VP8LShannonEntropyFunc VP8LShannonEntropy;
|
||||||
|
|
||||||
@ -755,6 +770,7 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
|
|||||||
VP8LFastSLog2Slow = FastSLog2Slow_C;
|
VP8LFastSLog2Slow = FastSLog2Slow_C;
|
||||||
|
|
||||||
VP8LExtraCost = ExtraCost_C;
|
VP8LExtraCost = ExtraCost_C;
|
||||||
|
VP8LExtraCostCombined = ExtraCostCombined_C;
|
||||||
VP8LCombinedShannonEntropy = CombinedShannonEntropy_C;
|
VP8LCombinedShannonEntropy = CombinedShannonEntropy_C;
|
||||||
VP8LShannonEntropy = ShannonEntropy_C;
|
VP8LShannonEntropy = ShannonEntropy_C;
|
||||||
|
|
||||||
@ -849,6 +865,7 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
|
|||||||
assert(VP8LFastLog2Slow != NULL);
|
assert(VP8LFastLog2Slow != NULL);
|
||||||
assert(VP8LFastSLog2Slow != NULL);
|
assert(VP8LFastSLog2Slow != NULL);
|
||||||
assert(VP8LExtraCost != NULL);
|
assert(VP8LExtraCost != NULL);
|
||||||
|
assert(VP8LExtraCostCombined != NULL);
|
||||||
assert(VP8LCombinedShannonEntropy != NULL);
|
assert(VP8LCombinedShannonEntropy != NULL);
|
||||||
assert(VP8LShannonEntropy != NULL);
|
assert(VP8LShannonEntropy != NULL);
|
||||||
assert(VP8LGetEntropyUnrefined != NULL);
|
assert(VP8LGetEntropyUnrefined != NULL);
|
||||||
|
@ -133,6 +133,60 @@ static uint32_t ExtraCost_MIPS32(const uint32_t* const population, int length) {
|
|||||||
return ((int64_t)temp0 << 32 | temp1);
|
return ((int64_t)temp0 << 32 | temp1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// C version of this function:
|
||||||
|
// int i = 0;
|
||||||
|
// int64_t cost = 0;
|
||||||
|
// const uint32_t* pX = &X[4];
|
||||||
|
// const uint32_t* pY = &Y[4];
|
||||||
|
// const uint32_t* LoopEnd = &X[length];
|
||||||
|
// while (pX != LoopEnd) {
|
||||||
|
// const uint32_t xy0 = *pX + *pY;
|
||||||
|
// const uint32_t xy1 = *(pX + 1) + *(pY + 1);
|
||||||
|
// ++i;
|
||||||
|
// cost += i * xy0;
|
||||||
|
// cost += i * xy1;
|
||||||
|
// pX += 2;
|
||||||
|
// pY += 2;
|
||||||
|
// }
|
||||||
|
// return cost;
|
||||||
|
static uint32_t ExtraCostCombined_MIPS32(const uint32_t* WEBP_RESTRICT const X,
|
||||||
|
const uint32_t* WEBP_RESTRICT const Y,
|
||||||
|
int length) {
|
||||||
|
int i, temp0, temp1, temp2, temp3;
|
||||||
|
const uint32_t* pX = &X[4];
|
||||||
|
const uint32_t* pY = &Y[4];
|
||||||
|
const uint32_t* const LoopEnd = &X[length];
|
||||||
|
|
||||||
|
__asm__ volatile(
|
||||||
|
"mult $zero, $zero \n\t"
|
||||||
|
"xor %[i], %[i], %[i] \n\t"
|
||||||
|
"beq %[pX], %[LoopEnd], 2f \n\t"
|
||||||
|
"1: \n\t"
|
||||||
|
"lw %[temp0], 0(%[pX]) \n\t"
|
||||||
|
"lw %[temp1], 0(%[pY]) \n\t"
|
||||||
|
"lw %[temp2], 4(%[pX]) \n\t"
|
||||||
|
"lw %[temp3], 4(%[pY]) \n\t"
|
||||||
|
"addiu %[i], %[i], 1 \n\t"
|
||||||
|
"addu %[temp0], %[temp0], %[temp1] \n\t"
|
||||||
|
"addu %[temp2], %[temp2], %[temp3] \n\t"
|
||||||
|
"addiu %[pX], %[pX], 8 \n\t"
|
||||||
|
"addiu %[pY], %[pY], 8 \n\t"
|
||||||
|
"madd %[i], %[temp0] \n\t"
|
||||||
|
"madd %[i], %[temp2] \n\t"
|
||||||
|
"bne %[pX], %[LoopEnd], 1b \n\t"
|
||||||
|
"2: \n\t"
|
||||||
|
"mfhi %[temp0] \n\t"
|
||||||
|
"mflo %[temp1] \n\t"
|
||||||
|
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
|
||||||
|
[temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
|
||||||
|
[i]"=&r"(i), [pX]"+r"(pX), [pY]"+r"(pY)
|
||||||
|
: [LoopEnd]"r"(LoopEnd)
|
||||||
|
: "memory", "hi", "lo"
|
||||||
|
);
|
||||||
|
|
||||||
|
return ((int64_t)temp0 << 32 | temp1);
|
||||||
|
}
|
||||||
|
|
||||||
#define HUFFMAN_COST_PASS \
|
#define HUFFMAN_COST_PASS \
|
||||||
__asm__ volatile( \
|
__asm__ volatile( \
|
||||||
"sll %[temp1], %[temp0], 3 \n\t" \
|
"sll %[temp1], %[temp0], 3 \n\t" \
|
||||||
@ -334,6 +388,7 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPS32(void) {
|
|||||||
VP8LFastSLog2Slow = FastSLog2Slow_MIPS32;
|
VP8LFastSLog2Slow = FastSLog2Slow_MIPS32;
|
||||||
VP8LFastLog2Slow = FastLog2Slow_MIPS32;
|
VP8LFastLog2Slow = FastLog2Slow_MIPS32;
|
||||||
VP8LExtraCost = ExtraCost_MIPS32;
|
VP8LExtraCost = ExtraCost_MIPS32;
|
||||||
|
VP8LExtraCostCombined = ExtraCostCombined_MIPS32;
|
||||||
VP8LGetEntropyUnrefined = GetEntropyUnrefined_MIPS32;
|
VP8LGetEntropyUnrefined = GetEntropyUnrefined_MIPS32;
|
||||||
VP8LGetCombinedEntropyUnrefined = GetCombinedEntropyUnrefined_MIPS32;
|
VP8LGetCombinedEntropyUnrefined = GetCombinedEntropyUnrefined_MIPS32;
|
||||||
VP8LAddVector = AddVector_MIPS32;
|
VP8LAddVector = AddVector_MIPS32;
|
||||||
|
@ -48,6 +48,29 @@ static uint32_t ExtraCost_SSE41(const uint32_t* const a, int length) {
|
|||||||
return HorizontalSum_SSE41(cost);
|
return HorizontalSum_SSE41(cost);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint32_t ExtraCostCombined_SSE41(const uint32_t* WEBP_RESTRICT const a,
|
||||||
|
const uint32_t* WEBP_RESTRICT const b,
|
||||||
|
int length) {
|
||||||
|
int i;
|
||||||
|
__m128i cost = _mm_add_epi32(_mm_set_epi32(2 * a[7], 2 * a[6], a[5], a[4]),
|
||||||
|
_mm_set_epi32(2 * b[7], 2 * b[6], b[5], b[4]));
|
||||||
|
assert(length % 8 == 0);
|
||||||
|
|
||||||
|
for (i = 8; i + 8 <= length; i += 8) {
|
||||||
|
const int j = (i - 2) >> 1;
|
||||||
|
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]);
|
||||||
|
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||||
|
const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i]);
|
||||||
|
const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i + 4]);
|
||||||
|
const __m128i w = _mm_set_epi32(j + 3, j + 2, j + 1, j);
|
||||||
|
const __m128i a2 = _mm_hadd_epi32(a0, a1);
|
||||||
|
const __m128i b2 = _mm_hadd_epi32(b0, b1);
|
||||||
|
const __m128i mul = _mm_mullo_epi32(_mm_add_epi32(a2, b2), w);
|
||||||
|
cost = _mm_add_epi32(mul, cost);
|
||||||
|
}
|
||||||
|
return HorizontalSum_SSE41(cost);
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Subtract-Green Transform
|
// Subtract-Green Transform
|
||||||
|
|
||||||
@ -176,6 +199,7 @@ extern void VP8LEncDspInitSSE41(void);
|
|||||||
|
|
||||||
WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE41(void) {
|
WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE41(void) {
|
||||||
VP8LExtraCost = ExtraCost_SSE41;
|
VP8LExtraCost = ExtraCost_SSE41;
|
||||||
|
VP8LExtraCostCombined = ExtraCostCombined_SSE41;
|
||||||
VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_SSE41;
|
VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_SSE41;
|
||||||
VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_SSE41;
|
VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_SSE41;
|
||||||
VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE41;
|
VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE41;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user