mirror of
https://github.com/webmproject/libwebp.git
synced 2024-12-27 06:08:21 +01:00
Compare commits
4 Commits
2ddaaf0aa5
...
0ab789e067
Author | SHA1 | Date | |
---|---|---|---|
|
0ab789e067 | ||
|
0323645066 | ||
|
61e2cfdadd | ||
|
7bda3deb89 |
@ -53,7 +53,7 @@ DEMUXLIBLIST=''
|
|||||||
if [[ -z "${SDK}" ]]; then
|
if [[ -z "${SDK}" ]]; then
|
||||||
echo "iOS SDK not available"
|
echo "iOS SDK not available"
|
||||||
exit 1
|
exit 1
|
||||||
elif [[ ${SDK%%.*} -gt 8 ]]; then
|
elif [[ ${SDK%%.*} -gt 8 && "${XCODE%%.*}" -lt 16 ]]; then
|
||||||
EXTRA_CFLAGS="-fembed-bitcode"
|
EXTRA_CFLAGS="-fembed-bitcode"
|
||||||
elif [[ ${SDK%%.*} -le 6 ]]; then
|
elif [[ ${SDK%%.*} -le 6 ]]; then
|
||||||
echo "You need iOS SDK version 6.0 or above"
|
echo "You need iOS SDK version 6.0 or above"
|
||||||
|
@ -175,64 +175,102 @@ static void CollectColorRedTransforms_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
|||||||
|
|
||||||
// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
|
// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
|
||||||
// that's ok since the histogram values are less than 1<<28 (max picture size).
|
// that's ok since the histogram values are less than 1<<28 (max picture size).
|
||||||
#define LINE_SIZE 16 // 8 or 16
|
|
||||||
static void AddVector_SSE2(const uint32_t* WEBP_RESTRICT a,
|
static void AddVector_SSE2(const uint32_t* WEBP_RESTRICT a,
|
||||||
const uint32_t* WEBP_RESTRICT b,
|
const uint32_t* WEBP_RESTRICT b,
|
||||||
uint32_t* WEBP_RESTRICT out, int size) {
|
uint32_t* WEBP_RESTRICT out, int size) {
|
||||||
int i;
|
int i = 0;
|
||||||
for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) {
|
int aligned_size = size & ~15;
|
||||||
|
// Size is, at minimum, NUM_DISTANCE_CODES (40) and may be as large as
|
||||||
|
// NUM_LITERAL_CODES (256) + NUM_LENGTH_CODES (24) + (0 or a non-zero power of
|
||||||
|
// 2). See the usage in VP8LHistogramAdd().
|
||||||
|
assert(size >= 16);
|
||||||
|
assert(size % 2 == 0);
|
||||||
|
|
||||||
|
do {
|
||||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||||
#if (LINE_SIZE == 16)
|
|
||||||
const __m128i a2 = _mm_loadu_si128((const __m128i*)&a[i + 8]);
|
const __m128i a2 = _mm_loadu_si128((const __m128i*)&a[i + 8]);
|
||||||
const __m128i a3 = _mm_loadu_si128((const __m128i*)&a[i + 12]);
|
const __m128i a3 = _mm_loadu_si128((const __m128i*)&a[i + 12]);
|
||||||
#endif
|
|
||||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i + 0]);
|
const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i + 0]);
|
||||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i + 4]);
|
const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i + 4]);
|
||||||
#if (LINE_SIZE == 16)
|
|
||||||
const __m128i b2 = _mm_loadu_si128((const __m128i*)&b[i + 8]);
|
const __m128i b2 = _mm_loadu_si128((const __m128i*)&b[i + 8]);
|
||||||
const __m128i b3 = _mm_loadu_si128((const __m128i*)&b[i + 12]);
|
const __m128i b3 = _mm_loadu_si128((const __m128i*)&b[i + 12]);
|
||||||
#endif
|
|
||||||
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||||
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||||
#if (LINE_SIZE == 16)
|
|
||||||
_mm_storeu_si128((__m128i*)&out[i + 8], _mm_add_epi32(a2, b2));
|
_mm_storeu_si128((__m128i*)&out[i + 8], _mm_add_epi32(a2, b2));
|
||||||
_mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
|
_mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
|
||||||
#endif
|
i += 16;
|
||||||
|
} while (i != aligned_size);
|
||||||
|
|
||||||
|
if ((size & 8) != 0) {
|
||||||
|
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||||
|
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||||
|
const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i + 0]);
|
||||||
|
const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i + 4]);
|
||||||
|
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||||
|
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||||
|
i += 8;
|
||||||
}
|
}
|
||||||
for (; i < size; ++i) {
|
|
||||||
out[i] = a[i] + b[i];
|
size &= 7;
|
||||||
|
if (size == 4) {
|
||||||
|
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]);
|
||||||
|
const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i]);
|
||||||
|
_mm_storeu_si128((__m128i*)&out[i], _mm_add_epi32(a0, b0));
|
||||||
|
} else if (size == 2) {
|
||||||
|
const __m128i a0 = _mm_loadl_epi64((const __m128i*)&a[i]);
|
||||||
|
const __m128i b0 = _mm_loadl_epi64((const __m128i*)&b[i]);
|
||||||
|
_mm_storel_epi64((__m128i*)&out[i], _mm_add_epi32(a0, b0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void AddVectorEq_SSE2(const uint32_t* WEBP_RESTRICT a,
|
static void AddVectorEq_SSE2(const uint32_t* WEBP_RESTRICT a,
|
||||||
uint32_t* WEBP_RESTRICT out, int size) {
|
uint32_t* WEBP_RESTRICT out, int size) {
|
||||||
int i;
|
int i = 0;
|
||||||
for (i = 0; i + LINE_SIZE <= size; i += LINE_SIZE) {
|
int aligned_size = size & ~15;
|
||||||
|
// Size is, at minimum, NUM_DISTANCE_CODES (40) and may be as large as
|
||||||
|
// NUM_LITERAL_CODES (256) + NUM_LENGTH_CODES (24) + (0 or a non-zero power of
|
||||||
|
// 2). See the usage in VP8LHistogramAdd().
|
||||||
|
assert(size >= 16);
|
||||||
|
assert(size % 2 == 0);
|
||||||
|
|
||||||
|
do {
|
||||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||||
#if (LINE_SIZE == 16)
|
|
||||||
const __m128i a2 = _mm_loadu_si128((const __m128i*)&a[i + 8]);
|
const __m128i a2 = _mm_loadu_si128((const __m128i*)&a[i + 8]);
|
||||||
const __m128i a3 = _mm_loadu_si128((const __m128i*)&a[i + 12]);
|
const __m128i a3 = _mm_loadu_si128((const __m128i*)&a[i + 12]);
|
||||||
#endif
|
|
||||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)&out[i + 0]);
|
const __m128i b0 = _mm_loadu_si128((const __m128i*)&out[i + 0]);
|
||||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)&out[i + 4]);
|
const __m128i b1 = _mm_loadu_si128((const __m128i*)&out[i + 4]);
|
||||||
#if (LINE_SIZE == 16)
|
|
||||||
const __m128i b2 = _mm_loadu_si128((const __m128i*)&out[i + 8]);
|
const __m128i b2 = _mm_loadu_si128((const __m128i*)&out[i + 8]);
|
||||||
const __m128i b3 = _mm_loadu_si128((const __m128i*)&out[i + 12]);
|
const __m128i b3 = _mm_loadu_si128((const __m128i*)&out[i + 12]);
|
||||||
#endif
|
|
||||||
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||||
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||||
#if (LINE_SIZE == 16)
|
|
||||||
_mm_storeu_si128((__m128i*)&out[i + 8], _mm_add_epi32(a2, b2));
|
_mm_storeu_si128((__m128i*)&out[i + 8], _mm_add_epi32(a2, b2));
|
||||||
_mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
|
_mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
|
||||||
#endif
|
i += 16;
|
||||||
|
} while (i != aligned_size);
|
||||||
|
|
||||||
|
if ((size & 8) != 0) {
|
||||||
|
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i + 0]);
|
||||||
|
const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
|
||||||
|
const __m128i b0 = _mm_loadu_si128((const __m128i*)&out[i + 0]);
|
||||||
|
const __m128i b1 = _mm_loadu_si128((const __m128i*)&out[i + 4]);
|
||||||
|
_mm_storeu_si128((__m128i*)&out[i + 0], _mm_add_epi32(a0, b0));
|
||||||
|
_mm_storeu_si128((__m128i*)&out[i + 4], _mm_add_epi32(a1, b1));
|
||||||
|
i += 8;
|
||||||
}
|
}
|
||||||
for (; i < size; ++i) {
|
|
||||||
out[i] += a[i];
|
size &= 7;
|
||||||
|
if (size == 4) {
|
||||||
|
const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]);
|
||||||
|
const __m128i b0 = _mm_loadu_si128((const __m128i*)&out[i]);
|
||||||
|
_mm_storeu_si128((__m128i*)&out[i], _mm_add_epi32(a0, b0));
|
||||||
|
} else if (size == 2) {
|
||||||
|
const __m128i a0 = _mm_loadl_epi64((const __m128i*)&a[i]);
|
||||||
|
const __m128i b0 = _mm_loadl_epi64((const __m128i*)&out[i]);
|
||||||
|
_mm_storel_epi64((__m128i*)&out[i], _mm_add_epi32(a0, b0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#undef LINE_SIZE
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Entropy
|
// Entropy
|
||||||
|
@ -172,7 +172,9 @@ for (( i = 0; i < $NUM_PLATFORMS; ++i )); do
|
|||||||
CFLAGS="-pipe -isysroot ${SDKROOT} -O3 -DNDEBUG"
|
CFLAGS="-pipe -isysroot ${SDKROOT} -O3 -DNDEBUG"
|
||||||
case "${PLATFORM}" in
|
case "${PLATFORM}" in
|
||||||
iPhone*)
|
iPhone*)
|
||||||
|
if [[ "${XCODE%%.*}" -lt 16 ]]; then
|
||||||
CFLAGS+=" -fembed-bitcode"
|
CFLAGS+=" -fembed-bitcode"
|
||||||
|
fi
|
||||||
CFLAGS+=" -target ${ARCH}-apple-ios${IOS_MIN_VERSION}"
|
CFLAGS+=" -target ${ARCH}-apple-ios${IOS_MIN_VERSION}"
|
||||||
[[ "${PLATFORM}" == *Simulator* ]] && CFLAGS+="-simulator"
|
[[ "${PLATFORM}" == *Simulator* ]] && CFLAGS+="-simulator"
|
||||||
;;
|
;;
|
||||||
|
Loading…
Reference in New Issue
Block a user