mirror of
				https://github.com/webmproject/libwebp.git
				synced 2025-10-31 10:25:46 +01:00 
			
		
		
		
	Add a WebPExtractAlpha function to dsp
This is the opposite of WebPDispatchAlpha + Implement the SSE2 version Change-Id: I0c297309255f508c5261da8aad01f7e57f924d6c
This commit is contained in:
		| @@ -303,9 +303,28 @@ static int DispatchAlpha(const uint8_t* alpha, int alpha_stride, | ||||
|   return (alpha_mask != 0xff); | ||||
| } | ||||
|  | ||||
| static int ExtractAlpha(const uint8_t* argb, int argb_stride, | ||||
|                         int width, int height, | ||||
|                         uint8_t* alpha, int alpha_stride) { | ||||
|   uint8_t alpha_mask = 0xff; | ||||
|   int i, j; | ||||
|  | ||||
|   for (j = 0; j < height; ++j) { | ||||
|     for (i = 0; i < width; ++i) { | ||||
|       const uint8_t alpha_value = argb[4 * i]; | ||||
|       alpha[i] = alpha_value; | ||||
|       alpha_mask &= alpha_value; | ||||
|     } | ||||
|     argb += argb_stride; | ||||
|     alpha += alpha_stride; | ||||
|   } | ||||
|   return (alpha_mask == 0xff); | ||||
| } | ||||
|  | ||||
| void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int); | ||||
| void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int); | ||||
| int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int); | ||||
| int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int); | ||||
|  | ||||
| //------------------------------------------------------------------------------ | ||||
| // Init function | ||||
| @@ -320,6 +339,7 @@ void WebPInitAlphaProcessing(void) { | ||||
|   WebPApplyAlphaMultiply = ApplyAlphaMultiply; | ||||
|   WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b; | ||||
|   WebPDispatchAlpha = DispatchAlpha; | ||||
|   WebPExtractAlpha = ExtractAlpha; | ||||
|  | ||||
|   // If defined, use CPUInfo() to overwrite some pointers with faster versions. | ||||
|   if (VP8GetCPUInfo != NULL) { | ||||
|   | ||||
| @@ -72,6 +72,51 @@ static int DispatchAlpha(const uint8_t* alpha, int alpha_stride, | ||||
|   return (alpha_and != 0xff); | ||||
| } | ||||
|  | ||||
| static int ExtractAlpha(const uint8_t* argb, int argb_stride, | ||||
|                         int width, int height, | ||||
|                         uint8_t* alpha, int alpha_stride) { | ||||
|   // alpha_and stores an 'and' operation of all the alpha[] values. The final | ||||
|   // value is not 0xff if any of the alpha[] is not equal to 0xff. | ||||
|   uint32_t alpha_and = 0xff; | ||||
|   int i, j; | ||||
|   const __m128i a_mask = _mm_set1_epi32(0xffu);  // to preserve alpha | ||||
|   const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u); | ||||
|   __m128i all_alphas = all_0xff; | ||||
|  | ||||
|   // We must be able to access 3 extra bytes after the last written byte | ||||
|   // 'src[4 * width - 4]', because we don't know if alpha is the first or the | ||||
|   // last byte of the quadruplet. | ||||
|   const int limit = (width - 1) & ~7; | ||||
|  | ||||
|   for (j = 0; j < height; ++j) { | ||||
|     const __m128i* src = (const __m128i*)argb; | ||||
|     for (i = 0; i < limit; i += 8) { | ||||
|       // load 32 argb bytes | ||||
|       const __m128i a0 = _mm_loadu_si128(src + 0); | ||||
|       const __m128i a1 = _mm_loadu_si128(src + 1); | ||||
|       const __m128i b0 = _mm_and_si128(a0, a_mask); | ||||
|       const __m128i b1 = _mm_and_si128(a1, a_mask); | ||||
|       const __m128i c0 = _mm_packs_epi32(b0, b1); | ||||
|       const __m128i d0 = _mm_packus_epi16(c0, c0); | ||||
|       // store | ||||
|       _mm_storel_epi64((__m128i*)&alpha[i], d0); | ||||
|       // accumulate eight alpha 'and' in parallel | ||||
|       all_alphas = _mm_and_si128(all_alphas, d0); | ||||
|       src += 2; | ||||
|     } | ||||
|     for (; i < width; ++i) { | ||||
|       const uint32_t alpha_value = argb[4 * i]; | ||||
|       alpha[i] = alpha_value; | ||||
|       alpha_and &= alpha_value; | ||||
|     } | ||||
|     argb += argb_stride; | ||||
|     alpha += alpha_stride; | ||||
|   } | ||||
|   // Combine the eight alpha 'and' into a 8-bit mask. | ||||
|   alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff)); | ||||
|   return (alpha_and == 0xff); | ||||
| } | ||||
|  | ||||
| //------------------------------------------------------------------------------ | ||||
| // Non-dither premultiplied modes | ||||
|  | ||||
| @@ -219,5 +264,6 @@ void WebPInitAlphaProcessingSSE2(void) { | ||||
|   WebPMultRow = MultRow; | ||||
|   WebPApplyAlphaMultiply = ApplyAlphaMultiply; | ||||
|   WebPDispatchAlpha = DispatchAlpha; | ||||
|   WebPExtractAlpha = ExtractAlpha; | ||||
| #endif | ||||
| } | ||||
|   | ||||
| @@ -268,13 +268,19 @@ extern void (*WebPApplyAlphaMultiply)( | ||||
| extern void (*WebPApplyAlphaMultiply4444)( | ||||
|     uint8_t* rgba4444, int w, int h, int stride); | ||||
|  | ||||
|  | ||||
| // Dispatch the values from alpha[] plane to the ARGB destination 'dst'. | ||||
| // Returns true if alpha[] plane has non-trivial values different from 0xff. | ||||
| extern int (*WebPDispatchAlpha)(const uint8_t* alpha, int alpha_stride, | ||||
|                                 int width, int height, | ||||
|                                 uint8_t* dst, int dst_stride); | ||||
|  | ||||
| // Extract the alpha values from 32b values in argb[] and pack them into alpha[] | ||||
| // (this is the opposite of WebPDispatchAlpha). | ||||
| // Returns true if there's only trivial 0xff alpha values. | ||||
| extern int (*WebPExtractAlpha)(const uint8_t* argb, int argb_stride, | ||||
|                                int width, int height, | ||||
|                                uint8_t* alpha, int alpha_stride); | ||||
|  | ||||
| // Pre-Multiply operation transforms x into x * A / 255  (where x=Y,R,G or B). | ||||
| // Un-Multiply operation transforms x into x * 255 / A. | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user