mirror of
https://github.com/webmproject/libwebp.git
synced 2025-07-16 05:49:51 +02:00
Split HistogramAdd to only have the high level logic in C.
Change-Id: Ic9eaebf7128ca0215b49d2a13bde1f5b94a28061
This commit is contained in:
@ -170,12 +170,13 @@ static void CollectColorRedTransforms_SSE2(const uint32_t* argb, int stride,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
|
||||
// that's ok since the histogram values are less than 1<<28 (max picture size).
|
||||
#define LINE_SIZE 16 // 8 or 16
|
||||
static void AddVector_SSE2(const uint32_t* a, const uint32_t* b, uint32_t* out,
|
||||
int size) {
|
||||
int i;
|
||||
assert(size % LINE_SIZE == 0);
|
||||
for (i = 0; i < size; i += LINE_SIZE) {
|
||||
for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
#if (LINE_SIZE == 16)
|
||||
@ -195,12 +196,14 @@ static void AddVector_SSE2(const uint32_t* a, const uint32_t* b, uint32_t* out,
|
||||
_mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
|
||||
#endif
|
||||
}
|
||||
for (; i < size; ++i) {
|
||||
out[i] = a[i] + b[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
|
||||
int i;
|
||||
assert(size % LINE_SIZE == 0);
|
||||
for (i = 0; i < size; i += LINE_SIZE) {
|
||||
for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
#if (LINE_SIZE == 16)
|
||||
@ -220,36 +223,12 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
|
||||
_mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
|
||||
#endif
|
||||
}
|
||||
for (; i < size; ++i) {
|
||||
out[i] += a[i];
|
||||
}
|
||||
}
|
||||
#undef LINE_SIZE
|
||||
|
||||
// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
|
||||
// that's ok since the histogram values are less than 1<<28 (max picture size).
|
||||
static void HistogramAdd_SSE2(const VP8LHistogram* const a,
|
||||
const VP8LHistogram* const b,
|
||||
VP8LHistogram* const out) {
|
||||
int i;
|
||||
const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
|
||||
assert(a->palette_code_bits_ == b->palette_code_bits_);
|
||||
if (b != out) {
|
||||
AddVector_SSE2(a->literal_, b->literal_, out->literal_, NUM_LITERAL_CODES);
|
||||
AddVector_SSE2(a->red_, b->red_, out->red_, NUM_LITERAL_CODES);
|
||||
AddVector_SSE2(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES);
|
||||
AddVector_SSE2(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES);
|
||||
} else {
|
||||
AddVectorEq_SSE2(a->literal_, out->literal_, NUM_LITERAL_CODES);
|
||||
AddVectorEq_SSE2(a->red_, out->red_, NUM_LITERAL_CODES);
|
||||
AddVectorEq_SSE2(a->blue_, out->blue_, NUM_LITERAL_CODES);
|
||||
AddVectorEq_SSE2(a->alpha_, out->alpha_, NUM_LITERAL_CODES);
|
||||
}
|
||||
for (i = NUM_LITERAL_CODES; i < literal_size; ++i) {
|
||||
out->literal_[i] = a->literal_[i] + b->literal_[i];
|
||||
}
|
||||
for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
|
||||
out->distance_[i] = a->distance_[i] + b->distance_[i];
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entropy
|
||||
|
||||
@ -675,7 +654,8 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE2(void) {
|
||||
VP8LTransformColor = TransformColor_SSE2;
|
||||
VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_SSE2;
|
||||
VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE2;
|
||||
VP8LHistogramAdd = HistogramAdd_SSE2;
|
||||
VP8LAddVector = AddVector_SSE2;
|
||||
VP8LAddVectorEq = AddVectorEq_SSE2;
|
||||
VP8LCombinedShannonEntropy = CombinedShannonEntropy_SSE2;
|
||||
VP8LVectorMismatch = VectorMismatch_SSE2;
|
||||
VP8LBundleColorMap = BundleColorMap_SSE2;
|
||||
|
Reference in New Issue
Block a user