mirror of
https://github.com/webmproject/libwebp.git
synced 2024-11-20 04:18:26 +01:00
Call the C function to finish off lossless SSE loops only when necessary.
Change-Id: I4e221d80879dc9c90c24d69a40bc5811d73787ad
This commit is contained in:
parent
875fafc191
commit
1cb638010c
@ -37,8 +37,10 @@ static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
|
||||
_mm_storeu_si128((__m128i*)&argb_data[i], out);
|
||||
}
|
||||
// fallthrough and finish off with plain-C
|
||||
if (i != num_pixels) {
|
||||
VP8LSubtractGreenFromBlueAndRed_C(argb_data + i, num_pixels - i);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Color Transform
|
||||
@ -71,8 +73,10 @@ static void TransformColor(const VP8LMultipliers* const m,
|
||||
_mm_storeu_si128((__m128i*)&argb_data[i], out);
|
||||
}
|
||||
// fallthrough and finish off with plain-C
|
||||
if (i != num_pixels) {
|
||||
VP8LTransformColor_C(m, argb_data + i, num_pixels - i);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
#define SPAN 8
|
||||
@ -477,8 +481,10 @@ static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
const __m128i res = _mm_sub_epi8(src, black);
|
||||
_mm_storeu_si128((__m128i*)&out[i], res);
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsSub_C[0](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
|
||||
#define GENERATE_PREDICTOR_1(X, IN) \
|
||||
static void PredictorSub##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
|
||||
@ -490,7 +496,9 @@ static void PredictorSub##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
|
||||
const __m128i res = _mm_sub_epi8(src, pred); \
|
||||
_mm_storeu_si128((__m128i*)&out[i], res); \
|
||||
} \
|
||||
if (i != num_pixels) { \
|
||||
VP8LPredictorsSub_C[(X)](in + i, upper + i, num_pixels - i, out + i); \
|
||||
} \
|
||||
}
|
||||
|
||||
GENERATE_PREDICTOR_1(1, in[i - 1]) // Predictor1: L
|
||||
@ -514,8 +522,10 @@ static void PredictorSub5_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
res = _mm_sub_epi8(src, pred);
|
||||
_mm_storeu_si128((__m128i*)&out[i], res);
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsSub_C[5](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
|
||||
#define GENERATE_PREDICTOR_2(X, A, B) \
|
||||
static void PredictorSub##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
|
||||
@ -530,7 +540,9 @@ static void PredictorSub##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
|
||||
res = _mm_sub_epi8(src, pred); \
|
||||
_mm_storeu_si128((__m128i*)&out[i], res); \
|
||||
} \
|
||||
if (i != num_pixels) { \
|
||||
VP8LPredictorsSub_C[(X)](in + i, upper + i, num_pixels - i, out + i); \
|
||||
} \
|
||||
}
|
||||
|
||||
GENERATE_PREDICTOR_2(6, in[i - 1], upper[i - 1]) // Predictor6: avg(L, TL)
|
||||
@ -556,8 +568,10 @@ static void PredictorSub10_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
res = _mm_sub_epi8(src, avg);
|
||||
_mm_storeu_si128((__m128i*)&out[i], res);
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsSub_C[10](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
|
||||
// Predictor11: select.
|
||||
static void GetSumAbsDiff32(const __m128i* const A, const __m128i* const B,
|
||||
@ -593,8 +607,10 @@ static void PredictorSub11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
_mm_storeu_si128((__m128i*)&out[i], res);
|
||||
}
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsSub_C[11](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
|
||||
// Predictor12: ClampedSubSubtractFull.
|
||||
static void PredictorSub12_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
@ -620,8 +636,10 @@ static void PredictorSub12_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
const __m128i res = _mm_sub_epi8(src, pred);
|
||||
_mm_storeu_si128((__m128i*)&out[i], res);
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsSub_C[12](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
|
||||
// Predictors13: ClampedAddSubtractHalf
|
||||
static void PredictorSub13_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
@ -648,8 +666,10 @@ static void PredictorSub13_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
const __m128i res = _mm_sub_epi8(src, pred);
|
||||
_mm_storel_epi64((__m128i*)&out[i], res);
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsSub_C[13](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
@ -32,8 +32,10 @@ static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
|
||||
_mm_storeu_si128((__m128i*)&argb_data[i], out);
|
||||
}
|
||||
// fallthrough and finish off with plain-C
|
||||
if (i != num_pixels) {
|
||||
VP8LSubtractGreenFromBlueAndRed_C(argb_data + i, num_pixels - i);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
@ -186,8 +186,10 @@ static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
const __m128i res = _mm_add_epi8(src, black);
|
||||
_mm_storeu_si128((__m128i*)&out[i], res);
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsAdd_C[0](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
|
||||
// Predictor1: left.
|
||||
static void PredictorAdd1_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
@ -210,8 +212,10 @@ static void PredictorAdd1_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
// replicate prev output on the four lanes
|
||||
prev = _mm_shuffle_epi32(res, (3 << 0) | (3 << 2) | (3 << 4) | (3 << 6));
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsAdd_C[1](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
|
||||
// Macro that adds 32-bit integers from IN using mod 256 arithmetic
|
||||
// per 8 bit channel.
|
||||
@ -225,7 +229,9 @@ static void PredictorAdd##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
|
||||
const __m128i res = _mm_add_epi8(src, other); \
|
||||
_mm_storeu_si128((__m128i*)&out[i], res); \
|
||||
} \
|
||||
if (i != num_pixels) { \
|
||||
VP8LPredictorsAdd_C[(X)](in + i, upper + i, num_pixels - i, out + i); \
|
||||
} \
|
||||
}
|
||||
|
||||
// Predictor2: Top.
|
||||
@ -255,7 +261,9 @@ static void PredictorAdd##X##_SSE2(const uint32_t* in, const uint32_t* upper, \
|
||||
res = _mm_add_epi8(avg, src); \
|
||||
_mm_storeu_si128((__m128i*)&out[i], res); \
|
||||
} \
|
||||
if (i != num_pixels) { \
|
||||
VP8LPredictorsAdd_C[(X)](in + i, upper + i, num_pixels - i, out + i); \
|
||||
} \
|
||||
}
|
||||
// Predictor8: average TL T.
|
||||
GENERATE_PREDICTOR_2(8, upper[i - 1])
|
||||
@ -287,8 +295,10 @@ static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
src = _mm_srli_si128(src, 4);
|
||||
}
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsAdd_C[10](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
|
||||
// Predictor11: select.
|
||||
static void GetSumAbsDiff32(const __m128i* const A, const __m128i* const B,
|
||||
@ -331,8 +341,10 @@ static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
pa = _mm_srli_si128(pa, 4);
|
||||
}
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsAdd_C[11](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
|
||||
// Predictor12: ClampedAddSubtractFull.
|
||||
#define DO_PRED12(DIFF, LANE, OUT) \
|
||||
@ -369,8 +381,10 @@ static void PredictorAdd12_SSE2(const uint32_t* in, const uint32_t* upper,
|
||||
DO_PRED12(diff_hi, 0, 2);
|
||||
DO_PRED12(diff_hi, 1, 3);
|
||||
}
|
||||
if (i != num_pixels) {
|
||||
VP8LPredictorsAdd_C[12](in + i, upper + i, num_pixels - i, out + i);
|
||||
}
|
||||
}
|
||||
#undef DO_PRED12
|
||||
|
||||
// Due to averages with integers, values cannot be accumulated in parallel for
|
||||
@ -392,8 +406,10 @@ static void AddGreenToBlueAndRed(const uint32_t* const src, int num_pixels,
|
||||
_mm_storeu_si128((__m128i*)&dst[i], out);
|
||||
}
|
||||
// fallthrough and finish off with plain-C
|
||||
if (i != num_pixels) {
|
||||
VP8LAddGreenToBlueAndRed_C(src + i, num_pixels - i, dst + i);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Color Transform
|
||||
@ -430,8 +446,10 @@ static void TransformColorInverse(const VP8LMultipliers* const m,
|
||||
_mm_storeu_si128((__m128i*)&dst[i], out);
|
||||
}
|
||||
// Fall-back to C-version for left-overs.
|
||||
if (i != num_pixels) {
|
||||
VP8LTransformColorInverse_C(m, src + i, num_pixels - i, dst + i);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Color-space conversion functions
|
||||
@ -467,8 +485,10 @@ static void ConvertBGRAToRGB(const uint32_t* src, int num_pixels,
|
||||
num_pixels -= 32;
|
||||
}
|
||||
// left-overs
|
||||
if (num_pixels > 0) {
|
||||
VP8LConvertBGRAToRGB_C((const uint32_t*)in, num_pixels, (uint8_t*)out);
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGBA(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
@ -494,8 +514,10 @@ static void ConvertBGRAToRGBA(const uint32_t* src,
|
||||
num_pixels -= 8;
|
||||
}
|
||||
// left-overs
|
||||
if (num_pixels > 0) {
|
||||
VP8LConvertBGRAToRGBA_C((const uint32_t*)in, num_pixels, (uint8_t*)out);
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGBA4444(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
@ -528,8 +550,10 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src,
|
||||
num_pixels -= 8;
|
||||
}
|
||||
// left-overs
|
||||
if (num_pixels > 0) {
|
||||
VP8LConvertBGRAToRGBA4444_C((const uint32_t*)in, num_pixels, (uint8_t*)out);
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToRGB565(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
@ -567,8 +591,10 @@ static void ConvertBGRAToRGB565(const uint32_t* src,
|
||||
num_pixels -= 8;
|
||||
}
|
||||
// left-overs
|
||||
if (num_pixels > 0) {
|
||||
VP8LConvertBGRAToRGB565_C((const uint32_t*)in, num_pixels, (uint8_t*)out);
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertBGRAToBGR(const uint32_t* src,
|
||||
int num_pixels, uint8_t* dst) {
|
||||
@ -598,8 +624,10 @@ static void ConvertBGRAToBGR(const uint32_t* src,
|
||||
num_pixels -= 8;
|
||||
}
|
||||
// left-overs
|
||||
if (num_pixels > 0) {
|
||||
VP8LConvertBGRAToBGR_C((const uint32_t*)in, num_pixels, dst);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry point
|
||||
|
Loading…
Reference in New Issue
Block a user