diff --git a/src/dsp/alpha_processing.c b/src/dsp/alpha_processing.c index 590e3bc3..995b4345 100644 --- a/src/dsp/alpha_processing.c +++ b/src/dsp/alpha_processing.c @@ -366,6 +366,16 @@ static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) { return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b); } +#ifdef WORDS_BIGENDIAN +static void PackARGB_C(const uint8_t* a, const uint8_t* r, const uint8_t* g, + const uint8_t* b, int len, uint32_t* out) { + int i; + for (i = 0; i < len; ++i) { + out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]); + } +} +#endif + static void PackRGB_C(const uint8_t* r, const uint8_t* g, const uint8_t* b, int len, int step, uint32_t* out) { int i, offset = 0; @@ -381,6 +391,10 @@ int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int); void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int); int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int); void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size); +#ifdef WORDS_BIGENDIAN +void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r, const uint8_t* g, + const uint8_t* b, int, uint32_t*); +#endif void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b, int len, int step, uint32_t* out); @@ -405,6 +419,9 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) { WebPMultRow = WebPMultRow_C; WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b_C; +#ifdef WORDS_BIGENDIAN + WebPPackARGB = PackARGB_C; +#endif WebPPackRGB = PackRGB_C; #if !WEBP_NEON_OMIT_C_CODE WebPApplyAlphaMultiply = ApplyAlphaMultiply_C; @@ -451,6 +468,9 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) { assert(WebPDispatchAlphaToGreen != NULL); assert(WebPExtractAlpha != NULL); assert(WebPExtractGreen != NULL); +#ifdef WORDS_BIGENDIAN + assert(WebPPackARGB != NULL); +#endif assert(WebPPackRGB != NULL); assert(WebPHasAlpha8b != NULL); assert(WebPHasAlpha32b != NULL); diff --git a/src/dsp/alpha_processing_mips_dsp_r2.c b/src/dsp/alpha_processing_mips_dsp_r2.c index e0dc91ba..0090e87c 100644 --- a/src/dsp/alpha_processing_mips_dsp_r2.c +++ b/src/dsp/alpha_processing_mips_dsp_r2.c @@ -125,6 +125,49 @@ static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width, } } +#ifdef WORDS_BIGENDIAN +static void PackARGB_MIPSdspR2(const uint8_t* a, const uint8_t* r, + const uint8_t* g, const uint8_t* b, int len, + uint32_t* out) { + int temp0, temp1, temp2, temp3, offset; + const int rest = len & 1; + const uint32_t* const loop_end = out + len - rest; + const int step = 4; + __asm__ volatile ( + "xor %[offset], %[offset], %[offset] \n\t" + "beq %[loop_end], %[out], 0f \n\t" + "2: \n\t" + "lbux %[temp0], %[offset](%[a]) \n\t" + "lbux %[temp1], %[offset](%[r]) \n\t" + "lbux %[temp2], %[offset](%[g]) \n\t" + "lbux %[temp3], %[offset](%[b]) \n\t" + "ins %[temp1], %[temp0], 16, 16 \n\t" + "ins %[temp3], %[temp2], 16, 16 \n\t" + "addiu %[out], %[out], 4 \n\t" + "precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t" + "sw %[temp0], -4(%[out]) \n\t" + "addu %[offset], %[offset], %[step] \n\t" + "bne %[loop_end], %[out], 2b \n\t" + "0: \n\t" + "beq %[rest], $zero, 1f \n\t" + "lbux %[temp0], %[offset](%[a]) \n\t" + "lbux %[temp1], %[offset](%[r]) \n\t" + "lbux %[temp2], %[offset](%[g]) \n\t" + "lbux %[temp3], %[offset](%[b]) \n\t" + "ins %[temp1], %[temp0], 16, 16 \n\t" + "ins %[temp3], %[temp2], 16, 16 \n\t" + "precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t" + "sw %[temp0], 0(%[out]) \n\t" + "1: \n\t" + : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), + [temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out) + : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step), + [loop_end]"r"(loop_end), [rest]"r"(rest) + : "memory" + ); +} +#endif // WORDS_BIGENDIAN + static void PackRGB_MIPSdspR2(const uint8_t* r, const uint8_t* g, const uint8_t* b, int len, int step, uint32_t* out) { @@ -172,6 +215,9 @@ extern void WebPInitAlphaProcessingMIPSdspR2(void); WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingMIPSdspR2(void) { WebPDispatchAlpha = DispatchAlpha_MIPSdspR2; WebPMultARGBRow = MultARGBRow_MIPSdspR2; +#ifdef WORDS_BIGENDIAN + WebPPackARGB = PackARGB_MIPSdspR2; +#endif WebPPackRGB = PackRGB_MIPSdspR2; } diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h index 99eefe09..5146949e 100644 --- a/src/dsp/dsp.h +++ b/src/dsp/dsp.h @@ -166,6 +166,13 @@ extern "C" { #define WEBP_SWAP_16BIT_CSP 0 #endif +// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__) +#if !defined(WORDS_BIGENDIAN) && \ + (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \ + (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) +#define WORDS_BIGENDIAN +#endif + typedef enum { kSSE2, kSSE3, @@ -578,6 +585,13 @@ void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha, int width, int inverse); void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse); +#ifdef WORDS_BIGENDIAN +// ARGB packing function: a/r/g/b input is rgba or bgra order. +extern void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r, + const uint8_t* g, const uint8_t* b, int len, + uint32_t* out); +#endif + // RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order. extern void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b, int len, int step, uint32_t* out); diff --git a/src/enc/picture_csp_enc.c b/src/enc/picture_csp_enc.c index d531dd02..7316f3cc 100644 --- a/src/enc/picture_csp_enc.c +++ b/src/enc/picture_csp_enc.c @@ -28,11 +28,11 @@ // If defined, use table to compute x / alpha. #define USE_INVERSE_ALPHA_TABLE -static const union { - uint32_t argb; - uint8_t bytes[4]; -} test_endian = { 0xff000000u }; -#define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff) +#ifdef WORDS_BIGENDIAN +#define ALPHA_OFFSET 0 // uint32_t 0xff000000 is 0xff,00,00,00 in memory +#else +#define ALPHA_OFFSET 3 // uint32_t 0xff000000 is 0x00,00,00,ff in memory +#endif //------------------------------------------------------------------------------ // Detection of non-trivial transparency @@ -61,7 +61,7 @@ int WebPPictureHasTransparency(const WebPPicture* picture) { return CheckNonOpaque(picture->a, picture->width, picture->height, 1, picture->a_stride); } else { - const int alpha_offset = ALPHA_IS_LAST ? 3 : 0; + const int alpha_offset = ALPHA_OFFSET; return CheckNonOpaque((const uint8_t*)picture->argb + alpha_offset, picture->width, picture->height, 4, picture->argb_stride * sizeof(*picture->argb)); @@ -990,10 +990,10 @@ static int PictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace, return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); } else { const uint8_t* const argb = (const uint8_t*)picture->argb; - const uint8_t* const r = ALPHA_IS_LAST ? argb + 2 : argb + 1; - const uint8_t* const g = ALPHA_IS_LAST ? argb + 1 : argb + 2; - const uint8_t* const b = ALPHA_IS_LAST ? argb + 0 : argb + 3; - const uint8_t* const a = ALPHA_IS_LAST ? argb + 3 : argb + 0; + const uint8_t* const a = argb + (0 ^ ALPHA_OFFSET); + const uint8_t* const r = argb + (1 ^ ALPHA_OFFSET); + const uint8_t* const g = argb + (2 ^ ALPHA_OFFSET); + const uint8_t* const b = argb + (3 ^ ALPHA_OFFSET); picture->colorspace = WEBP_YUV420; return ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride, @@ -1044,7 +1044,8 @@ int WebPPictureYUVAToARGB(WebPPicture* picture) { const int argb_stride = 4 * picture->argb_stride; uint8_t* dst = (uint8_t*)picture->argb; const uint8_t *cur_u = picture->u, *cur_v = picture->v, *cur_y = picture->y; - WebPUpsampleLinePairFunc upsample = WebPGetLinePairConverter(ALPHA_IS_LAST); + WebPUpsampleLinePairFunc upsample = + WebPGetLinePairConverter(ALPHA_OFFSET > 0); // First row, with replicated top samples. upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width); @@ -1087,6 +1088,7 @@ static int Import(WebPPicture* const picture, const uint8_t* rgb, int rgb_stride, int step, int swap_rb, int import_alpha) { int y; + // swap_rb -> b,g,r,a , !swap_rb -> r,g,b,a const uint8_t* r_ptr = rgb + (swap_rb ? 2 : 0); const uint8_t* g_ptr = rgb + 1; const uint8_t* b_ptr = rgb + (swap_rb ? 0 : 2); @@ -1104,19 +1106,32 @@ static int Import(WebPPicture* const picture, WebPInitAlphaProcessing(); if (import_alpha) { + // dst[] byte order is {a,r,g,b} for big-endian, {b,g,r,a} for little endian uint32_t* dst = picture->argb; - const int do_copy = - (!swap_rb && !ALPHA_IS_LAST) || (swap_rb && ALPHA_IS_LAST); + const int do_copy = (ALPHA_OFFSET == 3) && swap_rb; assert(step == 4); - for (y = 0; y < height; ++y) { - if (do_copy) { + if (do_copy) { + for (y = 0; y < height; ++y) { memcpy(dst, rgb, width * 4); - } else { + rgb += rgb_stride; + dst += picture->argb_stride; + } + } else { + for (y = 0; y < height; ++y) { +#ifdef WORDS_BIGENDIAN + // BGRA or RGBA input order. + const uint8_t* a_ptr = rgb + 3; + WebPPackARGB(a_ptr, r_ptr, g_ptr, b_ptr, width, dst); + r_ptr += rgb_stride; + g_ptr += rgb_stride; + b_ptr += rgb_stride; +#else // RGBA input order. Need to swap R and B. VP8LConvertBGRAToRGBA((const uint32_t*)rgb, width, (uint8_t*)dst); +#endif + rgb += rgb_stride; + dst += picture->argb_stride; } - rgb += rgb_stride; - dst += picture->argb_stride; } } else { uint32_t* dst = picture->argb; diff --git a/src/utils/endian_inl_utils.h b/src/utils/endian_inl_utils.h index 4b2f91df..3630a293 100644 --- a/src/utils/endian_inl_utils.h +++ b/src/utils/endian_inl_utils.h @@ -19,13 +19,6 @@ #include "src/dsp/dsp.h" #include "src/webp/types.h" -// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__) -#if !defined(WORDS_BIGENDIAN) && \ - (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \ - (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) -#define WORDS_BIGENDIAN -#endif - #if defined(WORDS_BIGENDIAN) #define HToLE32 BSwap32 #define HToLE16 BSwap16