mirror of
https://github.com/webmproject/libwebp.git
synced 2024-11-20 04:18:26 +01:00
rescaler_sse2: harmonize function suffixes
BUG=webp:355 Change-Id: I978fd826ff90149c0ffd9d7607dcc6f88082d3e6
This commit is contained in:
parent
2c1b18ba2f
commit
582a1b572a
@ -27,7 +27,7 @@
|
||||
#define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
|
||||
|
||||
// input: 8 bytes ABCDEFGH -> output: A0E0B0F0C0G0D0H0
|
||||
static void LoadTwoPixels(const uint8_t* const src, __m128i* out) {
|
||||
static void LoadTwoPixels_SSE2(const uint8_t* const src, __m128i* out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i A = _mm_loadl_epi64((const __m128i*)(src)); // ABCDEFGH
|
||||
const __m128i B = _mm_unpacklo_epi8(A, zero); // A0B0C0D0E0F0G0H0
|
||||
@ -36,7 +36,7 @@ static void LoadTwoPixels(const uint8_t* const src, __m128i* out) {
|
||||
}
|
||||
|
||||
// input: 8 bytes ABCDEFGH -> output: A0B0C0D0E0F0G0H0
|
||||
static void LoadHeightPixels(const uint8_t* const src, __m128i* out) {
|
||||
static void LoadHeightPixels_SSE2(const uint8_t* const src, __m128i* out) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i A = _mm_loadl_epi64((const __m128i*)(src)); // ABCDEFGH
|
||||
*out = _mm_unpacklo_epi8(A, zero);
|
||||
@ -57,7 +57,7 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
|
||||
WebPRescalerImportRowExpand_C(wrk, src);
|
||||
return;
|
||||
}
|
||||
LoadTwoPixels(src, &cur_pixels);
|
||||
LoadTwoPixels_SSE2(src, &cur_pixels);
|
||||
src += 4;
|
||||
while (1) {
|
||||
const __m128i mult = _mm_set1_epi32(((x_add - accum) << 16) | accum);
|
||||
@ -67,7 +67,7 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
|
||||
if (frow >= frow_end) break;
|
||||
accum -= wrk->x_sub;
|
||||
if (accum < 0) {
|
||||
LoadTwoPixels(src, &cur_pixels);
|
||||
LoadTwoPixels_SSE2(src, &cur_pixels);
|
||||
src += 4;
|
||||
accum += x_add;
|
||||
}
|
||||
@ -79,7 +79,7 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
|
||||
WebPRescalerImportRowExpand_C(wrk, src);
|
||||
return;
|
||||
}
|
||||
LoadHeightPixels(src, &cur_pixels);
|
||||
LoadHeightPixels_SSE2(src, &cur_pixels);
|
||||
src += 7;
|
||||
left = 7;
|
||||
while (1) {
|
||||
@ -94,7 +94,7 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
|
||||
if (--left) {
|
||||
cur_pixels = _mm_srli_si128(cur_pixels, 2);
|
||||
} else if (src <= src_limit) {
|
||||
LoadHeightPixels(src, &cur_pixels);
|
||||
LoadHeightPixels_SSE2(src, &cur_pixels);
|
||||
src += 7;
|
||||
left = 7;
|
||||
} else { // tail
|
||||
@ -169,7 +169,7 @@ static void RescalerImportRowShrink_SSE2(WebPRescaler* const wrk,
|
||||
// Row export
|
||||
|
||||
// load *src as epi64, multiply by mult and store result in [out0 ... out3]
|
||||
static WEBP_INLINE void LoadDispatchAndMult(const rescaler_t* const src,
|
||||
static WEBP_INLINE void LoadDispatchAndMult_SSE2(const rescaler_t* const src,
|
||||
const __m128i* const mult,
|
||||
__m128i* const out0,
|
||||
__m128i* const out1,
|
||||
@ -192,7 +192,7 @@ static WEBP_INLINE void LoadDispatchAndMult(const rescaler_t* const src,
|
||||
}
|
||||
}
|
||||
|
||||
static WEBP_INLINE void ProcessRow(const __m128i* const A0,
|
||||
static WEBP_INLINE void ProcessRow_SSE2(const __m128i* const A0,
|
||||
const __m128i* const A1,
|
||||
const __m128i* const A2,
|
||||
const __m128i* const A3,
|
||||
@ -240,8 +240,8 @@ static void RescalerExportRowExpand_SSE2(WebPRescaler* const wrk) {
|
||||
if (wrk->y_accum == 0) {
|
||||
for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
|
||||
__m128i A0, A1, A2, A3;
|
||||
LoadDispatchAndMult(frow + x_out, NULL, &A0, &A1, &A2, &A3);
|
||||
ProcessRow(&A0, &A1, &A2, &A3, &mult, dst + x_out);
|
||||
LoadDispatchAndMult_SSE2(frow + x_out, NULL, &A0, &A1, &A2, &A3);
|
||||
ProcessRow_SSE2(&A0, &A1, &A2, &A3, &mult, dst + x_out);
|
||||
}
|
||||
for (; x_out < x_out_max; ++x_out) {
|
||||
const uint32_t J = frow[x_out];
|
||||
@ -257,8 +257,8 @@ static void RescalerExportRowExpand_SSE2(WebPRescaler* const wrk) {
|
||||
const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER);
|
||||
for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
|
||||
__m128i A0, A1, A2, A3, B0, B1, B2, B3;
|
||||
LoadDispatchAndMult(frow + x_out, &mA, &A0, &A1, &A2, &A3);
|
||||
LoadDispatchAndMult(irow + x_out, &mB, &B0, &B1, &B2, &B3);
|
||||
LoadDispatchAndMult_SSE2(frow + x_out, &mA, &A0, &A1, &A2, &A3);
|
||||
LoadDispatchAndMult_SSE2(irow + x_out, &mB, &B0, &B1, &B2, &B3);
|
||||
{
|
||||
const __m128i C0 = _mm_add_epi64(A0, B0);
|
||||
const __m128i C1 = _mm_add_epi64(A1, B1);
|
||||
@ -272,7 +272,7 @@ static void RescalerExportRowExpand_SSE2(WebPRescaler* const wrk) {
|
||||
const __m128i E1 = _mm_srli_epi64(D1, WEBP_RESCALER_RFIX);
|
||||
const __m128i E2 = _mm_srli_epi64(D2, WEBP_RESCALER_RFIX);
|
||||
const __m128i E3 = _mm_srli_epi64(D3, WEBP_RESCALER_RFIX);
|
||||
ProcessRow(&E0, &E1, &E2, &E3, &mult, dst + x_out);
|
||||
ProcessRow_SSE2(&E0, &E1, &E2, &E3, &mult, dst + x_out);
|
||||
}
|
||||
}
|
||||
for (; x_out < x_out_max; ++x_out) {
|
||||
@ -303,8 +303,8 @@ static void RescalerExportRowShrink_SSE2(WebPRescaler* const wrk) {
|
||||
const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER);
|
||||
for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
|
||||
__m128i A0, A1, A2, A3, B0, B1, B2, B3;
|
||||
LoadDispatchAndMult(irow + x_out, NULL, &A0, &A1, &A2, &A3);
|
||||
LoadDispatchAndMult(frow + x_out, &mult_y, &B0, &B1, &B2, &B3);
|
||||
LoadDispatchAndMult_SSE2(irow + x_out, NULL, &A0, &A1, &A2, &A3);
|
||||
LoadDispatchAndMult_SSE2(frow + x_out, &mult_y, &B0, &B1, &B2, &B3);
|
||||
{
|
||||
const __m128i C0 = _mm_add_epi64(B0, rounder);
|
||||
const __m128i C1 = _mm_add_epi64(B1, rounder);
|
||||
@ -324,7 +324,7 @@ static void RescalerExportRowShrink_SSE2(WebPRescaler* const wrk) {
|
||||
const __m128i G1 = _mm_or_si128(D1, F3);
|
||||
_mm_storeu_si128((__m128i*)(irow + x_out + 0), G0);
|
||||
_mm_storeu_si128((__m128i*)(irow + x_out + 4), G1);
|
||||
ProcessRow(&E0, &E1, &E2, &E3, &mult_xy, dst + x_out);
|
||||
ProcessRow_SSE2(&E0, &E1, &E2, &E3, &mult_xy, dst + x_out);
|
||||
}
|
||||
}
|
||||
for (; x_out < x_out_max; ++x_out) {
|
||||
@ -340,10 +340,10 @@ static void RescalerExportRowShrink_SSE2(WebPRescaler* const wrk) {
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
|
||||
__m128i A0, A1, A2, A3;
|
||||
LoadDispatchAndMult(irow + x_out, NULL, &A0, &A1, &A2, &A3);
|
||||
LoadDispatchAndMult_SSE2(irow + x_out, NULL, &A0, &A1, &A2, &A3);
|
||||
_mm_storeu_si128((__m128i*)(irow + x_out + 0), zero);
|
||||
_mm_storeu_si128((__m128i*)(irow + x_out + 4), zero);
|
||||
ProcessRow(&A0, &A1, &A2, &A3, &mult, dst + x_out);
|
||||
ProcessRow_SSE2(&A0, &A1, &A2, &A3, &mult, dst + x_out);
|
||||
}
|
||||
for (; x_out < x_out_max; ++x_out) {
|
||||
const int v = (int)MULT_FIX(irow[x_out], scale);
|
||||
|
Loading…
Reference in New Issue
Block a user