Merge "further refine the COPY_PATTERN optim for DecodeAlpha"

This commit is contained in:
skal 2014-09-04 03:43:55 -07:00 committed by Gerrit Code Review
commit e564062522

View File

@ -746,37 +746,81 @@ static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int row) {
} }
// cyclic rotation of pattern word // cyclic rotation of pattern word
static WEBP_INLINE uint32_t Rotate8b(uint32_t V) {
#if defined(WORDS_BIGENDIAN) #if defined(WORDS_BIGENDIAN)
#define ROTATE8b(V) do { \ return ((V & 0xff000000u) >> 24) | (V << 8);
(V) = (((V) & 0xff000000u) >> 24) | ((V) << 8); \
} while (0)
#else #else
#define ROTATE8b(V) do { \ return ((V & 0xffu) << 24) | (V >> 8);
(V) = (((V) & 0xffu) << 24) | ((V) >> 8); \
} while (0)
#endif #endif
}
// copy 1, 2 or 4-bytes pattern // copy 1, 2 or 4-bytes pattern
#define COPY_SMALL_PATTERN() do { \ static WEBP_INLINE void CopySmallPattern(const uint8_t* data_src,
int ilength = length; \ uint8_t* data_dst,
uint32_t* pdata; \ int length, uint32_t pattern) {
int j = 0; \ int ilength = length;
while ((uintptr_t)pdata1 & 3) { \ uint32_t* pdata;
*pdata1++ = pdata2[j]; \ int j = 0;
ROTATE8b(temp1); \ int i;
++j; \ // align 'data_dst' to 4-bytes boundary. Adjust the pattern along the way.
} \ while ((uintptr_t)data_dst & 3) {
ilength -= j; \ *data_dst++ = data_src[j];
pdata = (uint32_t*)pdata1; \ pattern = Rotate8b(pattern);
for (i = 0; i < (ilength >> 2); ++i) { \ ++j;
pdata[i] = temp1; \ }
} \ ilength -= j;
pdata1 = (uint8_t*)pdata; \ data_src += j;
pdata2 += j; \ // Copy the pattern 4 bytes at a time.
for (i <<= 2; i < ilength; ++i) { \ pdata = (uint32_t*)data_dst;
pdata1[i] = pdata2[i]; \ for (i = 0; i < (ilength >> 2); ++i) {
} \ pdata[i] = pattern;
} while (0) }
// Finish with left-overs. 'pattern' is still correctly positioned,
// so no Rotate8b() call is needed.
data_dst = (uint8_t*)pdata;
for (i <<= 2; i < ilength; ++i) {
data_dst[i] = data_src[i];
}
}
static WEBP_INLINE void CopyBlock(uint8_t* data_dst, int dist, int length) {
const uint8_t* data_src = data_dst - dist;
if (length >= 8) {
uint32_t pattern;
switch (dist) {
case 1:
pattern = *data_src;
#if defined(__arm__) || defined(_M_ARM) // arm doesn't like multiply that much
pattern |= pattern << 8;
pattern |= pattern << 16;
#else
pattern = 0x01010101u * pattern;
#endif
break;
case 2:
pattern = *(const uint16_t*)data_src;
#if defined(__arm__) || defined(_M_ARM)
pattern |= pattern << 16;
#else
pattern = 0x00010001u * pattern;
#endif
break;
case 4:
pattern = *(const uint32_t*)data_src;
break;
default:
goto Copy;
break;
}
CopySmallPattern(data_src, data_dst, length, pattern);
} else {
Copy:
{
int i;
for (i = 0; i < length; ++i) data_dst[i] = data_src[i];
}
}
}
static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data, static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
int width, int height, int last_row) { int width, int height, int last_row) {
@ -824,41 +868,7 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
dist_code = GetCopyDistance(dist_symbol, br); dist_code = GetCopyDistance(dist_symbol, br);
dist = PlaneCodeToDistance(width, dist_code); dist = PlaneCodeToDistance(width, dist_code);
if (pos >= dist && end - pos >= length) { if (pos >= dist && end - pos >= length) {
uint8_t* pdata1 = data + pos; CopyBlock(data + pos, dist, length);
const uint8_t* pdata2 = pdata1 - dist;
int i;
if (length >= 8) {
uint32_t temp1;
switch (dist) {
case 1:
temp1 = pdata1[-1];
#if defined(__arm__) || defined(_M_ARM) // arm doesn't like multiply that much
temp1 |= temp1 << 8;
temp1 |= temp1 << 16;
#else
temp1 = 0x01010101u * temp1;
#endif
break;
case 2:
temp1 = ((uint16_t*)pdata1)[-1];
#if defined(__arm__) || defined(_M_ARM)
temp1 |= temp1 << 16;
#else
temp1 = 0x00010001u * temp1;
#endif
break;
case 4:
temp1 = ((uint32_t*)pdata1)[-1];
break;
default:
goto Copy;
break;
}
COPY_SMALL_PATTERN();
} else {
Copy:
for (i = 0; i < length; ++i) pdata1[i] = pdata2[i];
}
} else { } else {
ok = 0; ok = 0;
goto End; goto End;
@ -897,9 +907,6 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
return ok; return ok;
} }
#undef COPY_PATTERN
#undef ROTATE8b
static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data, static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
int width, int height, int last_row, int width, int height, int last_row,
ProcessRowsFunc process_func) { ProcessRowsFunc process_func) {