mirror of
https://github.com/webmproject/libwebp.git
synced 2025-01-27 15:12:54 +01:00
add WebPDispatchAlphaToGreen() to dsp
SSE2 version is 2.1x faster This is used to transfer the alpha plane to green channel before lossless compression. Change-Id: I01d9df0051c183b1ff5d6eb69961d4f43e33141a
This commit is contained in:
parent
1bd4c2ad23
commit
2d9b0a4472
@ -303,6 +303,19 @@ static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
|
||||
return (alpha_mask != 0xff);
|
||||
}
|
||||
|
||||
static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint32_t* dst, int dst_stride) {
|
||||
int i, j;
|
||||
for (j = 0; j < height; ++j) {
|
||||
for (i = 0; i < width; ++i) {
|
||||
dst[i] = alpha[i] << 8; // leave A/R/B channels zero'd.
|
||||
}
|
||||
alpha += alpha_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
static int ExtractAlpha(const uint8_t* argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* alpha, int alpha_stride) {
|
||||
@ -324,6 +337,7 @@ static int ExtractAlpha(const uint8_t* argb, int argb_stride,
|
||||
void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int);
|
||||
void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int);
|
||||
int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
|
||||
void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int);
|
||||
int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -339,6 +353,7 @@ void WebPInitAlphaProcessing(void) {
|
||||
WebPApplyAlphaMultiply = ApplyAlphaMultiply;
|
||||
WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b;
|
||||
WebPDispatchAlpha = DispatchAlpha;
|
||||
WebPDispatchAlphaToGreen = DispatchAlphaToGreen;
|
||||
WebPExtractAlpha = ExtractAlpha;
|
||||
|
||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||
|
@ -72,6 +72,32 @@ static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
|
||||
return (alpha_and != 0xff);
|
||||
}
|
||||
|
||||
static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint32_t* dst, int dst_stride) {
|
||||
int i, j;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const int limit = width & ~15;
|
||||
for (j = 0; j < height; ++j) {
|
||||
for (i = 0; i < limit; i += 16) { // process 16 alpha bytes
|
||||
const __m128i a0 = _mm_loadu_si128((__m128i*)&alpha[i]);
|
||||
const __m128i a1 = _mm_unpacklo_epi8(zero, a0); // note the 'zero' first!
|
||||
const __m128i b1 = _mm_unpackhi_epi8(zero, a0);
|
||||
const __m128i a2_lo = _mm_unpacklo_epi16(a1, zero);
|
||||
const __m128i b2_lo = _mm_unpacklo_epi16(b1, zero);
|
||||
const __m128i a2_hi = _mm_unpackhi_epi16(a1, zero);
|
||||
const __m128i b2_hi = _mm_unpackhi_epi16(b1, zero);
|
||||
_mm_storeu_si128((__m128i*)&dst[i + 0], a2_lo);
|
||||
_mm_storeu_si128((__m128i*)&dst[i + 4], a2_hi);
|
||||
_mm_storeu_si128((__m128i*)&dst[i + 8], b2_lo);
|
||||
_mm_storeu_si128((__m128i*)&dst[i + 12], b2_hi);
|
||||
}
|
||||
for (; i < width; ++i) dst[i] = alpha[i] << 8;
|
||||
alpha += alpha_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
static int ExtractAlpha(const uint8_t* argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* alpha, int alpha_stride) {
|
||||
@ -264,6 +290,7 @@ void WebPInitAlphaProcessingSSE2(void) {
|
||||
WebPMultRow = MultRow;
|
||||
WebPApplyAlphaMultiply = ApplyAlphaMultiply;
|
||||
WebPDispatchAlpha = DispatchAlpha;
|
||||
WebPDispatchAlphaToGreen = DispatchAlphaToGreen;
|
||||
WebPExtractAlpha = ExtractAlpha;
|
||||
#endif
|
||||
}
|
||||
|
@ -274,6 +274,12 @@ extern int (*WebPDispatchAlpha)(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint8_t* dst, int dst_stride);
|
||||
|
||||
// Transfer packed 8b alpha[] values to green channel in dst[], zero'ing the
|
||||
// A/R/B values. 'dst_stride' is the stride for dst[] in uint32_t units.
|
||||
extern void (*WebPDispatchAlphaToGreen)(const uint8_t* alpha, int alpha_stride,
|
||||
int width, int height,
|
||||
uint32_t* dst, int dst_stride);
|
||||
|
||||
// Extract the alpha values from 32b values in argb[] and pack them into alpha[]
|
||||
// (this is the opposite of WebPDispatchAlpha).
|
||||
// Returns true if there's only trivial 0xff alpha values.
|
||||
|
@ -61,18 +61,8 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
|
||||
if (!WebPPictureAlloc(&picture)) return 0;
|
||||
|
||||
// Transfer the alpha values to the green channel.
|
||||
{
|
||||
int i, j;
|
||||
uint32_t* dst = picture.argb;
|
||||
const uint8_t* src = data;
|
||||
for (j = 0; j < picture.height; ++j) {
|
||||
for (i = 0; i < picture.width; ++i) {
|
||||
dst[i] = src[i] << 8; // we leave A/R/B channels zero'd.
|
||||
}
|
||||
src += width;
|
||||
dst += picture.argb_stride;
|
||||
}
|
||||
}
|
||||
WebPDispatchAlphaToGreen(data, width, picture.width, picture.height,
|
||||
picture.argb, picture.argb_stride);
|
||||
|
||||
WebPConfigInit(&config);
|
||||
config.lossless = 1;
|
||||
@ -376,6 +366,7 @@ static int CompressAlphaJob(VP8Encoder* const enc, void* dummy) {
|
||||
}
|
||||
|
||||
void VP8EncInitAlpha(VP8Encoder* const enc) {
|
||||
WebPInitAlphaProcessing();
|
||||
enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_);
|
||||
enc->alpha_data_ = NULL;
|
||||
enc->alpha_data_size_ = 0;
|
||||
@ -430,4 +421,3 @@ int VP8EncDeleteAlpha(VP8Encoder* const enc) {
|
||||
enc->has_alpha_ = 0;
|
||||
return ok;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user