mirror of
				https://github.com/webmproject/libwebp.git
				synced 2025-10-31 02:15:42 +01:00 
			
		
		
		
	add WebPDispatchAlphaToGreen() to dsp
SSE2 version is 2.1x faster This is used to transfer the alpha plane to green channel before lossless compression. Change-Id: I01d9df0051c183b1ff5d6eb69961d4f43e33141a
This commit is contained in:
		| @@ -303,6 +303,19 @@ static int DispatchAlpha(const uint8_t* alpha, int alpha_stride, | |||||||
|   return (alpha_mask != 0xff); |   return (alpha_mask != 0xff); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride, | ||||||
|  |                                  int width, int height, | ||||||
|  |                                  uint32_t* dst, int dst_stride) { | ||||||
|  |   int i, j; | ||||||
|  |   for (j = 0; j < height; ++j) { | ||||||
|  |     for (i = 0; i < width; ++i) { | ||||||
|  |       dst[i] = alpha[i] << 8;  // leave A/R/B channels zero'd. | ||||||
|  |     } | ||||||
|  |     alpha += alpha_stride; | ||||||
|  |     dst += dst_stride; | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
| static int ExtractAlpha(const uint8_t* argb, int argb_stride, | static int ExtractAlpha(const uint8_t* argb, int argb_stride, | ||||||
|                         int width, int height, |                         int width, int height, | ||||||
|                         uint8_t* alpha, int alpha_stride) { |                         uint8_t* alpha, int alpha_stride) { | ||||||
| @@ -324,6 +337,7 @@ static int ExtractAlpha(const uint8_t* argb, int argb_stride, | |||||||
| void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int); | void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int); | ||||||
| void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int); | void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int); | ||||||
| int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int); | int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int); | ||||||
|  | void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int); | ||||||
| int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int); | int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int); | ||||||
|  |  | ||||||
| //------------------------------------------------------------------------------ | //------------------------------------------------------------------------------ | ||||||
| @@ -339,6 +353,7 @@ void WebPInitAlphaProcessing(void) { | |||||||
|   WebPApplyAlphaMultiply = ApplyAlphaMultiply; |   WebPApplyAlphaMultiply = ApplyAlphaMultiply; | ||||||
|   WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b; |   WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b; | ||||||
|   WebPDispatchAlpha = DispatchAlpha; |   WebPDispatchAlpha = DispatchAlpha; | ||||||
|  |   WebPDispatchAlphaToGreen = DispatchAlphaToGreen; | ||||||
|   WebPExtractAlpha = ExtractAlpha; |   WebPExtractAlpha = ExtractAlpha; | ||||||
|  |  | ||||||
|   // If defined, use CPUInfo() to overwrite some pointers with faster versions. |   // If defined, use CPUInfo() to overwrite some pointers with faster versions. | ||||||
|   | |||||||
| @@ -72,6 +72,32 @@ static int DispatchAlpha(const uint8_t* alpha, int alpha_stride, | |||||||
|   return (alpha_and != 0xff); |   return (alpha_and != 0xff); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride, | ||||||
|  |                                  int width, int height, | ||||||
|  |                                  uint32_t* dst, int dst_stride) { | ||||||
|  |   int i, j; | ||||||
|  |   const __m128i zero = _mm_setzero_si128(); | ||||||
|  |   const int limit = width & ~15; | ||||||
|  |   for (j = 0; j < height; ++j) { | ||||||
|  |     for (i = 0; i < limit; i += 16) {   // process 16 alpha bytes | ||||||
|  |       const __m128i a0 = _mm_loadu_si128((__m128i*)&alpha[i]); | ||||||
|  |       const __m128i a1 = _mm_unpacklo_epi8(zero, a0);  // note the 'zero' first! | ||||||
|  |       const __m128i b1 = _mm_unpackhi_epi8(zero, a0); | ||||||
|  |       const __m128i a2_lo = _mm_unpacklo_epi16(a1, zero); | ||||||
|  |       const __m128i b2_lo = _mm_unpacklo_epi16(b1, zero); | ||||||
|  |       const __m128i a2_hi = _mm_unpackhi_epi16(a1, zero); | ||||||
|  |       const __m128i b2_hi = _mm_unpackhi_epi16(b1, zero); | ||||||
|  |       _mm_storeu_si128((__m128i*)&dst[i +  0], a2_lo); | ||||||
|  |       _mm_storeu_si128((__m128i*)&dst[i +  4], a2_hi); | ||||||
|  |       _mm_storeu_si128((__m128i*)&dst[i +  8], b2_lo); | ||||||
|  |       _mm_storeu_si128((__m128i*)&dst[i + 12], b2_hi); | ||||||
|  |     } | ||||||
|  |     for (; i < width; ++i) dst[i] = alpha[i] << 8; | ||||||
|  |     alpha += alpha_stride; | ||||||
|  |     dst += dst_stride; | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
| static int ExtractAlpha(const uint8_t* argb, int argb_stride, | static int ExtractAlpha(const uint8_t* argb, int argb_stride, | ||||||
|                         int width, int height, |                         int width, int height, | ||||||
|                         uint8_t* alpha, int alpha_stride) { |                         uint8_t* alpha, int alpha_stride) { | ||||||
| @@ -264,6 +290,7 @@ void WebPInitAlphaProcessingSSE2(void) { | |||||||
|   WebPMultRow = MultRow; |   WebPMultRow = MultRow; | ||||||
|   WebPApplyAlphaMultiply = ApplyAlphaMultiply; |   WebPApplyAlphaMultiply = ApplyAlphaMultiply; | ||||||
|   WebPDispatchAlpha = DispatchAlpha; |   WebPDispatchAlpha = DispatchAlpha; | ||||||
|  |   WebPDispatchAlphaToGreen = DispatchAlphaToGreen; | ||||||
|   WebPExtractAlpha = ExtractAlpha; |   WebPExtractAlpha = ExtractAlpha; | ||||||
| #endif | #endif | ||||||
| } | } | ||||||
|   | |||||||
| @@ -274,6 +274,12 @@ extern int (*WebPDispatchAlpha)(const uint8_t* alpha, int alpha_stride, | |||||||
|                                 int width, int height, |                                 int width, int height, | ||||||
|                                 uint8_t* dst, int dst_stride); |                                 uint8_t* dst, int dst_stride); | ||||||
|  |  | ||||||
|  | // Transfer packed 8b alpha[] values to green channel in dst[], zero'ing the | ||||||
|  | // A/R/B values. 'dst_stride' is the stride for dst[] in uint32_t units. | ||||||
|  | extern void (*WebPDispatchAlphaToGreen)(const uint8_t* alpha, int alpha_stride, | ||||||
|  |                                         int width, int height, | ||||||
|  |                                         uint32_t* dst, int dst_stride); | ||||||
|  |  | ||||||
| // Extract the alpha values from 32b values in argb[] and pack them into alpha[] | // Extract the alpha values from 32b values in argb[] and pack them into alpha[] | ||||||
| // (this is the opposite of WebPDispatchAlpha). | // (this is the opposite of WebPDispatchAlpha). | ||||||
| // Returns true if there's only trivial 0xff alpha values. | // Returns true if there's only trivial 0xff alpha values. | ||||||
|   | |||||||
| @@ -61,18 +61,8 @@ static int EncodeLossless(const uint8_t* const data, int width, int height, | |||||||
|   if (!WebPPictureAlloc(&picture)) return 0; |   if (!WebPPictureAlloc(&picture)) return 0; | ||||||
|  |  | ||||||
|   // Transfer the alpha values to the green channel. |   // Transfer the alpha values to the green channel. | ||||||
|   { |   WebPDispatchAlphaToGreen(data, width, picture.width, picture.height, | ||||||
|     int i, j; |                            picture.argb, picture.argb_stride); | ||||||
|     uint32_t* dst = picture.argb; |  | ||||||
|     const uint8_t* src = data; |  | ||||||
|     for (j = 0; j < picture.height; ++j) { |  | ||||||
|       for (i = 0; i < picture.width; ++i) { |  | ||||||
|         dst[i] = src[i] << 8;  // we leave A/R/B channels zero'd. |  | ||||||
|       } |  | ||||||
|       src += width; |  | ||||||
|       dst += picture.argb_stride; |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   WebPConfigInit(&config); |   WebPConfigInit(&config); | ||||||
|   config.lossless = 1; |   config.lossless = 1; | ||||||
| @@ -376,6 +366,7 @@ static int CompressAlphaJob(VP8Encoder* const enc, void* dummy) { | |||||||
| } | } | ||||||
|  |  | ||||||
| void VP8EncInitAlpha(VP8Encoder* const enc) { | void VP8EncInitAlpha(VP8Encoder* const enc) { | ||||||
|  |   WebPInitAlphaProcessing(); | ||||||
|   enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_); |   enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_); | ||||||
|   enc->alpha_data_ = NULL; |   enc->alpha_data_ = NULL; | ||||||
|   enc->alpha_data_size_ = 0; |   enc->alpha_data_size_ = 0; | ||||||
| @@ -430,4 +421,3 @@ int VP8EncDeleteAlpha(VP8Encoder* const enc) { | |||||||
|   enc->has_alpha_ = 0; |   enc->has_alpha_ = 0; | ||||||
|   return ok; |   return ok; | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user