mirror of
https://github.com/webmproject/libwebp.git
synced 2024-12-25 13:18:22 +01:00
Compare commits
4 Commits
2ddaaf0aa5
...
0ab789e067
Author | SHA1 | Date | |
---|---|---|---|
|
0ab789e067 | ||
|
0323645066 | ||
|
61e2cfdadd | ||
|
7bda3deb89 |
@ -53,7 +53,7 @@ DEMUXLIBLIST=''
|
||||
if [[ -z "${SDK}" ]]; then
|
||||
echo "iOS SDK not available"
|
||||
exit 1
|
||||
elif [[ ${SDK%%.*} -gt 8 ]]; then
|
||||
elif [[ ${SDK%%.*} -gt 8 && "${XCODE%%.*}" -lt 16 ]]; then
|
||||
EXTRA_CFLAGS="-fembed-bitcode"
|
||||
elif [[ ${SDK%%.*} -le 6 ]]; then
|
||||
echo "You need iOS SDK version 6.0 or above"
|
||||
|
@ -175,64 +175,102 @@ static void CollectColorRedTransforms_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
||||
|
||||
// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
|
||||
// that's ok since the histogram values are less than 1<<28 (max picture size).
|
||||
#define LINE_SIZE 16 // 8 or 16
|
||||
static void AddVector_SSE2(const uint32_t* WEBP_RESTRICT a,
|
||||
const uint32_t* WEBP_RESTRICT b,
|
||||
uint32_t* WEBP_RESTRICT out, int size) {
|
||||
int i;
|
||||
for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) {
|
||||
int i = 0;
|
||||
int aligned_size = size & ~15;
|
||||
// Size is, at minimum, NUM_DISTANCE_CODES (40) and may be as large as
|
||||
// NUM_LITERAL_CODES (256) + NUM_LENGTH_CODES (24) + (0 or a non-zero power of
|
||||
// 2). See the usage in VP8LHistogramAdd().
|
||||
assert(size >= 16);
|
||||
assert(size % 2 == 0);
|
||||
|
||||
do {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
#if (LINE_SIZE == 16)
|
||||
const __m128i a2 = _mm_loadu_si128((const __m128i*)&a[i + 8]);
|
||||
const __m128i a3 = _mm_loadu_si128((const __m128i*)&a[i + 12]);
|
||||
#endif
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i + 0]);
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i + 4]);
|
||||
#if (LINE_SIZE == 16)
|
||||
const __m128i b2 = _mm_loadu_si128((const __m128i*)&b[i + 8]);
|
||||
const __m128i b3 = _mm_loadu_si128((const __m128i*)&b[i + 12]);
|
||||
#endif
|
||||
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||
#if (LINE_SIZE == 16)
|
||||
_mm_storeu_si128((__m128i*)&out[i + 8], _mm_add_epi32(a2, b2));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
|
||||
#endif
|
||||
i += 16;
|
||||
} while (i != aligned_size);
|
||||
|
||||
if ((size & 8) != 0) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i + 0]);
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i + 4]);
|
||||
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||
i += 8;
|
||||
}
|
||||
for (; i < size; ++i) {
|
||||
out[i] = a[i] + b[i];
|
||||
|
||||
size &= 7;
|
||||
if (size == 4) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]);
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i]);
|
||||
_mm_storeu_si128((__m128i*)&out[i], _mm_add_epi32(a0, b0));
|
||||
} else if (size == 2) {
|
||||
const __m128i a0 = _mm_loadl_epi64((const __m128i*)&a[i]);
|
||||
const __m128i b0 = _mm_loadl_epi64((const __m128i*)&b[i]);
|
||||
_mm_storel_epi64((__m128i*)&out[i], _mm_add_epi32(a0, b0));
|
||||
}
|
||||
}
|
||||
|
||||
static void AddVectorEq_SSE2(const uint32_t* WEBP_RESTRICT a,
|
||||
uint32_t* WEBP_RESTRICT out, int size) {
|
||||
int i;
|
||||
for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) {
|
||||
int i = 0;
|
||||
int aligned_size = size & ~15;
|
||||
// Size is, at minimum, NUM_DISTANCE_CODES (40) and may be as large as
|
||||
// NUM_LITERAL_CODES (256) + NUM_LENGTH_CODES (24) + (0 or a non-zero power of
|
||||
// 2). See the usage in VP8LHistogramAdd().
|
||||
assert(size >= 16);
|
||||
assert(size % 2 == 0);
|
||||
|
||||
do {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
#if (LINE_SIZE == 16)
|
||||
const __m128i a2 = _mm_loadu_si128((const __m128i*)&a[i + 8]);
|
||||
const __m128i a3 = _mm_loadu_si128((const __m128i*)&a[i + 12]);
|
||||
#endif
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&out[i + 0]);
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&out[i + 4]);
|
||||
#if (LINE_SIZE == 16)
|
||||
const __m128i b2 = _mm_loadu_si128((const __m128i*)&out[i + 8]);
|
||||
const __m128i b3 = _mm_loadu_si128((const __m128i*)&out[i + 12]);
|
||||
#endif
|
||||
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||
#if (LINE_SIZE == 16)
|
||||
_mm_storeu_si128((__m128i*)&out[i + 8], _mm_add_epi32(a2, b2));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
|
||||
#endif
|
||||
i += 16;
|
||||
} while (i != aligned_size);
|
||||
|
||||
if ((size & 8) != 0) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&out[i + 0]);
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&out[i + 4]);
|
||||
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||
i += 8;
|
||||
}
|
||||
for (; i < size; ++i) {
|
||||
out[i] += a[i];
|
||||
|
||||
size &= 7;
|
||||
if (size == 4) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]);
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&out[i]);
|
||||
_mm_storeu_si128((__m128i*)&out[i], _mm_add_epi32(a0, b0));
|
||||
} else if (size == 2) {
|
||||
const __m128i a0 = _mm_loadl_epi64((const __m128i*)&a[i]);
|
||||
const __m128i b0 = _mm_loadl_epi64((const __m128i*)&out[i]);
|
||||
_mm_storel_epi64((__m128i*)&out[i], _mm_add_epi32(a0, b0));
|
||||
}
|
||||
}
|
||||
#undef LINE_SIZE
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entropy
|
||||
|
@ -172,7 +172,9 @@ for (( i = 0; i < $NUM_PLATFORMS; ++i )); do
|
||||
CFLAGS="-pipe -isysroot ${SDKROOT} -O3 -DNDEBUG"
|
||||
case "${PLATFORM}" in
|
||||
iPhone*)
|
||||
CFLAGS+=" -fembed-bitcode"
|
||||
if [[ "${XCODE%%.*}" -lt 16 ]]; then
|
||||
CFLAGS+=" -fembed-bitcode"
|
||||
fi
|
||||
CFLAGS+=" -target ${ARCH}-apple-ios${IOS_MIN_VERSION}"
|
||||
[[ "${PLATFORM}" == *Simulator* ]] && CFLAGS+="-simulator"
|
||||
;;
|
||||
|
Loading…
Reference in New Issue
Block a user