mirror of
https://github.com/webmproject/libwebp.git
synced 2025-08-29 15:22:12 +02:00
apply clang-format
(Debian clang-format version 19.1.7 (3+build4)) with `--style=Google`. Manual changes: * clang-format disabled around macros with stringification (mostly assembly) * some inline assembly strings were adjusted to avoid awkward line breaks * trailing commas, `//` or suffixes (`ull`) added to help array formatting * thread_utils.c: parameter comments were changed to the more common /*...=*/ style to improve formatting The automatically generated code under swig/ was skipped. Bug: 433996651 Change-Id: Iea3f24160d78d2a2653971cdf13fa932e47ff1b3
This commit is contained in:
@@ -16,8 +16,8 @@
|
||||
#if defined(WEBP_USE_SSE2)
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include "src/webp/types.h"
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
@@ -90,7 +90,7 @@ static void DispatchAlphaToGreen_SSE2(const uint8_t* WEBP_RESTRICT alpha,
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const int limit = width & ~15;
|
||||
for (j = 0; j < height; ++j) {
|
||||
for (i = 0; i < limit; i += 16) { // process 16 alpha bytes
|
||||
for (i = 0; i < limit; i += 16) { // process 16 alpha bytes
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)&alpha[i]);
|
||||
const __m128i a1 = _mm_unpacklo_epi8(zero, a0); // note the 'zero' first!
|
||||
const __m128i b1 = _mm_unpackhi_epi8(zero, a0);
|
||||
@@ -98,9 +98,9 @@ static void DispatchAlphaToGreen_SSE2(const uint8_t* WEBP_RESTRICT alpha,
|
||||
const __m128i b2_lo = _mm_unpacklo_epi16(b1, zero);
|
||||
const __m128i a2_hi = _mm_unpackhi_epi16(a1, zero);
|
||||
const __m128i b2_hi = _mm_unpackhi_epi16(b1, zero);
|
||||
_mm_storeu_si128((__m128i*)&dst[i + 0], a2_lo);
|
||||
_mm_storeu_si128((__m128i*)&dst[i + 4], a2_hi);
|
||||
_mm_storeu_si128((__m128i*)&dst[i + 8], b2_lo);
|
||||
_mm_storeu_si128((__m128i*)&dst[i + 0], a2_lo);
|
||||
_mm_storeu_si128((__m128i*)&dst[i + 4], a2_hi);
|
||||
_mm_storeu_si128((__m128i*)&dst[i + 8], b2_lo);
|
||||
_mm_storeu_si128((__m128i*)&dst[i + 12], b2_hi);
|
||||
}
|
||||
for (; i < width; ++i) dst[i] = alpha[i] << 8;
|
||||
@@ -197,36 +197,37 @@ static void ExtractGreen_SSE2(const uint32_t* WEBP_RESTRICT argb,
|
||||
//------------------------------------------------------------------------------
|
||||
// Non-dither premultiplied modes
|
||||
|
||||
#define MULTIPLIER(a) ((a) * 0x8081)
|
||||
#define MULTIPLIER(a) ((a) * 0x8081)
|
||||
#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
|
||||
|
||||
// We can't use a 'const int' for the SHUFFLE value, because it has to be an
|
||||
// immediate in the _mm_shufflexx_epi16() instruction. We really need a macro.
|
||||
// We use: v / 255 = (v * 0x8081) >> 23, where v = alpha * {r,g,b} is a 16bit
|
||||
// value.
|
||||
#define APPLY_ALPHA(RGBX, SHUFFLE) do { \
|
||||
const __m128i argb0 = _mm_loadu_si128((const __m128i*)&(RGBX)); \
|
||||
const __m128i argb1_lo = _mm_unpacklo_epi8(argb0, zero); \
|
||||
const __m128i argb1_hi = _mm_unpackhi_epi8(argb0, zero); \
|
||||
const __m128i alpha0_lo = _mm_or_si128(argb1_lo, kMask); \
|
||||
const __m128i alpha0_hi = _mm_or_si128(argb1_hi, kMask); \
|
||||
const __m128i alpha1_lo = _mm_shufflelo_epi16(alpha0_lo, SHUFFLE); \
|
||||
const __m128i alpha1_hi = _mm_shufflelo_epi16(alpha0_hi, SHUFFLE); \
|
||||
const __m128i alpha2_lo = _mm_shufflehi_epi16(alpha1_lo, SHUFFLE); \
|
||||
const __m128i alpha2_hi = _mm_shufflehi_epi16(alpha1_hi, SHUFFLE); \
|
||||
/* alpha2 = [ff a0 a0 a0][ff a1 a1 a1] */ \
|
||||
const __m128i A0_lo = _mm_mullo_epi16(alpha2_lo, argb1_lo); \
|
||||
const __m128i A0_hi = _mm_mullo_epi16(alpha2_hi, argb1_hi); \
|
||||
const __m128i A1_lo = _mm_mulhi_epu16(A0_lo, kMult); \
|
||||
const __m128i A1_hi = _mm_mulhi_epu16(A0_hi, kMult); \
|
||||
const __m128i A2_lo = _mm_srli_epi16(A1_lo, 7); \
|
||||
const __m128i A2_hi = _mm_srli_epi16(A1_hi, 7); \
|
||||
const __m128i A3 = _mm_packus_epi16(A2_lo, A2_hi); \
|
||||
_mm_storeu_si128((__m128i*)&(RGBX), A3); \
|
||||
} while (0)
|
||||
#define APPLY_ALPHA(RGBX, SHUFFLE) \
|
||||
do { \
|
||||
const __m128i argb0 = _mm_loadu_si128((const __m128i*)&(RGBX)); \
|
||||
const __m128i argb1_lo = _mm_unpacklo_epi8(argb0, zero); \
|
||||
const __m128i argb1_hi = _mm_unpackhi_epi8(argb0, zero); \
|
||||
const __m128i alpha0_lo = _mm_or_si128(argb1_lo, kMask); \
|
||||
const __m128i alpha0_hi = _mm_or_si128(argb1_hi, kMask); \
|
||||
const __m128i alpha1_lo = _mm_shufflelo_epi16(alpha0_lo, SHUFFLE); \
|
||||
const __m128i alpha1_hi = _mm_shufflelo_epi16(alpha0_hi, SHUFFLE); \
|
||||
const __m128i alpha2_lo = _mm_shufflehi_epi16(alpha1_lo, SHUFFLE); \
|
||||
const __m128i alpha2_hi = _mm_shufflehi_epi16(alpha1_hi, SHUFFLE); \
|
||||
/* alpha2 = [ff a0 a0 a0][ff a1 a1 a1] */ \
|
||||
const __m128i A0_lo = _mm_mullo_epi16(alpha2_lo, argb1_lo); \
|
||||
const __m128i A0_hi = _mm_mullo_epi16(alpha2_hi, argb1_hi); \
|
||||
const __m128i A1_lo = _mm_mulhi_epu16(A0_lo, kMult); \
|
||||
const __m128i A1_hi = _mm_mulhi_epu16(A0_hi, kMult); \
|
||||
const __m128i A2_lo = _mm_srli_epi16(A1_lo, 7); \
|
||||
const __m128i A2_hi = _mm_srli_epi16(A1_hi, 7); \
|
||||
const __m128i A3 = _mm_packus_epi16(A2_lo, A2_hi); \
|
||||
_mm_storeu_si128((__m128i*)&(RGBX), A3); \
|
||||
} while (0)
|
||||
|
||||
static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first,
|
||||
int w, int h, int stride) {
|
||||
static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first, int w,
|
||||
int h, int stride) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i kMult = _mm_set1_epi16((short)0x8081);
|
||||
const __m128i kMask = _mm_set_epi16(0, 0xff, 0xff, 0, 0, 0xff, 0xff, 0);
|
||||
@@ -273,7 +274,8 @@ static int HasAlpha8b_SSE2(const uint8_t* src, int length) {
|
||||
const int mask = _mm_movemask_epi8(bits);
|
||||
if (mask != 0xffff) return 1;
|
||||
}
|
||||
for (; i < length; ++i) if (src[i] != 0xff) return 1;
|
||||
for (; i < length; ++i)
|
||||
if (src[i] != 0xff) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -284,9 +286,9 @@ static int HasAlpha32b_SSE2(const uint8_t* src, int length) {
|
||||
// We don't know if we can access the last 3 bytes after the last alpha
|
||||
// value 'src[4 * length - 4]' (because we don't know if alpha is the first
|
||||
// or the last byte of the quadruplet). Hence the '-3' protection below.
|
||||
length = length * 4 - 3; // size in bytes
|
||||
length = length * 4 - 3; // size in bytes
|
||||
for (; i + 64 <= length; i += 64) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0));
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0));
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
|
||||
const __m128i a2 = _mm_loadu_si128((const __m128i*)(src + i + 32));
|
||||
const __m128i a3 = _mm_loadu_si128((const __m128i*)(src + i + 48));
|
||||
@@ -296,23 +298,24 @@ static int HasAlpha32b_SSE2(const uint8_t* src, int length) {
|
||||
const __m128i b3 = _mm_and_si128(a3, alpha_mask);
|
||||
const __m128i c0 = _mm_packs_epi32(b0, b1);
|
||||
const __m128i c1 = _mm_packs_epi32(b2, b3);
|
||||
const __m128i d = _mm_packus_epi16(c0, c1);
|
||||
const __m128i d = _mm_packus_epi16(c0, c1);
|
||||
const __m128i bits = _mm_cmpeq_epi8(d, all_0xff);
|
||||
const int mask = _mm_movemask_epi8(bits);
|
||||
if (mask != 0xffff) return 1;
|
||||
}
|
||||
for (; i + 32 <= length; i += 32) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0));
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i + 0));
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
|
||||
const __m128i b0 = _mm_and_si128(a0, alpha_mask);
|
||||
const __m128i b1 = _mm_and_si128(a1, alpha_mask);
|
||||
const __m128i c = _mm_packs_epi32(b0, b1);
|
||||
const __m128i d = _mm_packus_epi16(c, c);
|
||||
const __m128i c = _mm_packs_epi32(b0, b1);
|
||||
const __m128i d = _mm_packus_epi16(c, c);
|
||||
const __m128i bits = _mm_cmpeq_epi8(d, all_0xff);
|
||||
const int mask = _mm_movemask_epi8(bits);
|
||||
if (mask != 0xffff) return 1;
|
||||
}
|
||||
for (; i <= length; i += 4) if (src[i] != 0xff) return 1;
|
||||
for (; i <= length; i += 4)
|
||||
if (src[i] != 0xff) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -334,7 +337,8 @@ static void AlphaReplace_SSE2(uint32_t* src, int length, uint32_t color) {
|
||||
_mm_storeu_si128((__m128i*)(src + i + 0), _mm_or_si128(d0, e0));
|
||||
_mm_storeu_si128((__m128i*)(src + i + 4), _mm_or_si128(d1, e1));
|
||||
}
|
||||
for (; i < length; ++i) if ((src[i] >> 24) == 0) src[i] = color;
|
||||
for (; i < length; ++i)
|
||||
if ((src[i] >> 24) == 0) src[i] = color;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
@@ -369,8 +373,8 @@ static void MultARGBRow_SSE2(uint32_t* const ptr, int width, int inverse) {
|
||||
}
|
||||
|
||||
static void MultRow_SSE2(uint8_t* WEBP_RESTRICT const ptr,
|
||||
const uint8_t* WEBP_RESTRICT const alpha,
|
||||
int width, int inverse) {
|
||||
const uint8_t* WEBP_RESTRICT const alpha, int width,
|
||||
int inverse) {
|
||||
int x = 0;
|
||||
if (!inverse) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
|
Reference in New Issue
Block a user