mirror of
https://github.com/webmproject/libwebp.git
synced 2025-01-27 15:12:54 +01:00
add WebPDispatchAlphaToGreen() to dsp
SSE2 version is 2.1x faster This is used to transfer the alpha plane to green channel before lossless compression. Change-Id: I01d9df0051c183b1ff5d6eb69961d4f43e33141a
This commit is contained in:
parent
1bd4c2ad23
commit
2d9b0a4472
@ -303,6 +303,19 @@ static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
|
|||||||
return (alpha_mask != 0xff);
|
return (alpha_mask != 0xff);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride,
|
||||||
|
int width, int height,
|
||||||
|
uint32_t* dst, int dst_stride) {
|
||||||
|
int i, j;
|
||||||
|
for (j = 0; j < height; ++j) {
|
||||||
|
for (i = 0; i < width; ++i) {
|
||||||
|
dst[i] = alpha[i] << 8; // leave A/R/B channels zero'd.
|
||||||
|
}
|
||||||
|
alpha += alpha_stride;
|
||||||
|
dst += dst_stride;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int ExtractAlpha(const uint8_t* argb, int argb_stride,
|
static int ExtractAlpha(const uint8_t* argb, int argb_stride,
|
||||||
int width, int height,
|
int width, int height,
|
||||||
uint8_t* alpha, int alpha_stride) {
|
uint8_t* alpha, int alpha_stride) {
|
||||||
@ -324,6 +337,7 @@ static int ExtractAlpha(const uint8_t* argb, int argb_stride,
|
|||||||
void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int);
|
void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int);
|
||||||
void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int);
|
void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int);
|
||||||
int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
|
int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
|
||||||
|
void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int);
|
||||||
int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
|
int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
@ -339,6 +353,7 @@ void WebPInitAlphaProcessing(void) {
|
|||||||
WebPApplyAlphaMultiply = ApplyAlphaMultiply;
|
WebPApplyAlphaMultiply = ApplyAlphaMultiply;
|
||||||
WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b;
|
WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b;
|
||||||
WebPDispatchAlpha = DispatchAlpha;
|
WebPDispatchAlpha = DispatchAlpha;
|
||||||
|
WebPDispatchAlphaToGreen = DispatchAlphaToGreen;
|
||||||
WebPExtractAlpha = ExtractAlpha;
|
WebPExtractAlpha = ExtractAlpha;
|
||||||
|
|
||||||
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
// If defined, use CPUInfo() to overwrite some pointers with faster versions.
|
||||||
|
@ -72,6 +72,32 @@ static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
|
|||||||
return (alpha_and != 0xff);
|
return (alpha_and != 0xff);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride,
|
||||||
|
int width, int height,
|
||||||
|
uint32_t* dst, int dst_stride) {
|
||||||
|
int i, j;
|
||||||
|
const __m128i zero = _mm_setzero_si128();
|
||||||
|
const int limit = width & ~15;
|
||||||
|
for (j = 0; j < height; ++j) {
|
||||||
|
for (i = 0; i < limit; i += 16) { // process 16 alpha bytes
|
||||||
|
const __m128i a0 = _mm_loadu_si128((__m128i*)&alpha[i]);
|
||||||
|
const __m128i a1 = _mm_unpacklo_epi8(zero, a0); // note the 'zero' first!
|
||||||
|
const __m128i b1 = _mm_unpackhi_epi8(zero, a0);
|
||||||
|
const __m128i a2_lo = _mm_unpacklo_epi16(a1, zero);
|
||||||
|
const __m128i b2_lo = _mm_unpacklo_epi16(b1, zero);
|
||||||
|
const __m128i a2_hi = _mm_unpackhi_epi16(a1, zero);
|
||||||
|
const __m128i b2_hi = _mm_unpackhi_epi16(b1, zero);
|
||||||
|
_mm_storeu_si128((__m128i*)&dst[i + 0], a2_lo);
|
||||||
|
_mm_storeu_si128((__m128i*)&dst[i + 4], a2_hi);
|
||||||
|
_mm_storeu_si128((__m128i*)&dst[i + 8], b2_lo);
|
||||||
|
_mm_storeu_si128((__m128i*)&dst[i + 12], b2_hi);
|
||||||
|
}
|
||||||
|
for (; i < width; ++i) dst[i] = alpha[i] << 8;
|
||||||
|
alpha += alpha_stride;
|
||||||
|
dst += dst_stride;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int ExtractAlpha(const uint8_t* argb, int argb_stride,
|
static int ExtractAlpha(const uint8_t* argb, int argb_stride,
|
||||||
int width, int height,
|
int width, int height,
|
||||||
uint8_t* alpha, int alpha_stride) {
|
uint8_t* alpha, int alpha_stride) {
|
||||||
@ -264,6 +290,7 @@ void WebPInitAlphaProcessingSSE2(void) {
|
|||||||
WebPMultRow = MultRow;
|
WebPMultRow = MultRow;
|
||||||
WebPApplyAlphaMultiply = ApplyAlphaMultiply;
|
WebPApplyAlphaMultiply = ApplyAlphaMultiply;
|
||||||
WebPDispatchAlpha = DispatchAlpha;
|
WebPDispatchAlpha = DispatchAlpha;
|
||||||
|
WebPDispatchAlphaToGreen = DispatchAlphaToGreen;
|
||||||
WebPExtractAlpha = ExtractAlpha;
|
WebPExtractAlpha = ExtractAlpha;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -274,6 +274,12 @@ extern int (*WebPDispatchAlpha)(const uint8_t* alpha, int alpha_stride,
|
|||||||
int width, int height,
|
int width, int height,
|
||||||
uint8_t* dst, int dst_stride);
|
uint8_t* dst, int dst_stride);
|
||||||
|
|
||||||
|
// Transfer packed 8b alpha[] values to green channel in dst[], zero'ing the
|
||||||
|
// A/R/B values. 'dst_stride' is the stride for dst[] in uint32_t units.
|
||||||
|
extern void (*WebPDispatchAlphaToGreen)(const uint8_t* alpha, int alpha_stride,
|
||||||
|
int width, int height,
|
||||||
|
uint32_t* dst, int dst_stride);
|
||||||
|
|
||||||
// Extract the alpha values from 32b values in argb[] and pack them into alpha[]
|
// Extract the alpha values from 32b values in argb[] and pack them into alpha[]
|
||||||
// (this is the opposite of WebPDispatchAlpha).
|
// (this is the opposite of WebPDispatchAlpha).
|
||||||
// Returns true if there's only trivial 0xff alpha values.
|
// Returns true if there's only trivial 0xff alpha values.
|
||||||
|
@ -61,18 +61,8 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
|
|||||||
if (!WebPPictureAlloc(&picture)) return 0;
|
if (!WebPPictureAlloc(&picture)) return 0;
|
||||||
|
|
||||||
// Transfer the alpha values to the green channel.
|
// Transfer the alpha values to the green channel.
|
||||||
{
|
WebPDispatchAlphaToGreen(data, width, picture.width, picture.height,
|
||||||
int i, j;
|
picture.argb, picture.argb_stride);
|
||||||
uint32_t* dst = picture.argb;
|
|
||||||
const uint8_t* src = data;
|
|
||||||
for (j = 0; j < picture.height; ++j) {
|
|
||||||
for (i = 0; i < picture.width; ++i) {
|
|
||||||
dst[i] = src[i] << 8; // we leave A/R/B channels zero'd.
|
|
||||||
}
|
|
||||||
src += width;
|
|
||||||
dst += picture.argb_stride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
WebPConfigInit(&config);
|
WebPConfigInit(&config);
|
||||||
config.lossless = 1;
|
config.lossless = 1;
|
||||||
@ -376,6 +366,7 @@ static int CompressAlphaJob(VP8Encoder* const enc, void* dummy) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void VP8EncInitAlpha(VP8Encoder* const enc) {
|
void VP8EncInitAlpha(VP8Encoder* const enc) {
|
||||||
|
WebPInitAlphaProcessing();
|
||||||
enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_);
|
enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_);
|
||||||
enc->alpha_data_ = NULL;
|
enc->alpha_data_ = NULL;
|
||||||
enc->alpha_data_size_ = 0;
|
enc->alpha_data_size_ = 0;
|
||||||
@ -430,4 +421,3 @@ int VP8EncDeleteAlpha(VP8Encoder* const enc) {
|
|||||||
enc->has_alpha_ = 0;
|
enc->has_alpha_ = 0;
|
||||||
return ok;
|
return ok;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user