mirror of
https://github.com/webmproject/libwebp.git
synced 2024-11-20 12:28:26 +01:00
Merge "further refine the COPY_PATTERN optim for DecodeAlpha"
This commit is contained in:
commit
e564062522
135
src/dec/vp8l.c
135
src/dec/vp8l.c
@ -746,37 +746,81 @@ static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int row) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// cyclic rotation of pattern word
|
// cyclic rotation of pattern word
|
||||||
|
static WEBP_INLINE uint32_t Rotate8b(uint32_t V) {
|
||||||
#if defined(WORDS_BIGENDIAN)
|
#if defined(WORDS_BIGENDIAN)
|
||||||
#define ROTATE8b(V) do { \
|
return ((V & 0xff000000u) >> 24) | (V << 8);
|
||||||
(V) = (((V) & 0xff000000u) >> 24) | ((V) << 8); \
|
|
||||||
} while (0)
|
|
||||||
#else
|
#else
|
||||||
#define ROTATE8b(V) do { \
|
return ((V & 0xffu) << 24) | (V >> 8);
|
||||||
(V) = (((V) & 0xffu) << 24) | ((V) >> 8); \
|
|
||||||
} while (0)
|
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
// copy 1, 2 or 4-bytes pattern
|
// copy 1, 2 or 4-bytes pattern
|
||||||
#define COPY_SMALL_PATTERN() do { \
|
static WEBP_INLINE void CopySmallPattern(const uint8_t* data_src,
|
||||||
int ilength = length; \
|
uint8_t* data_dst,
|
||||||
uint32_t* pdata; \
|
int length, uint32_t pattern) {
|
||||||
int j = 0; \
|
int ilength = length;
|
||||||
while ((uintptr_t)pdata1 & 3) { \
|
uint32_t* pdata;
|
||||||
*pdata1++ = pdata2[j]; \
|
int j = 0;
|
||||||
ROTATE8b(temp1); \
|
int i;
|
||||||
++j; \
|
// align 'data_dst' to 4-bytes boundary. Adjust the pattern along the way.
|
||||||
} \
|
while ((uintptr_t)data_dst & 3) {
|
||||||
ilength -= j; \
|
*data_dst++ = data_src[j];
|
||||||
pdata = (uint32_t*)pdata1; \
|
pattern = Rotate8b(pattern);
|
||||||
for (i = 0; i < (ilength >> 2); ++i) { \
|
++j;
|
||||||
pdata[i] = temp1; \
|
}
|
||||||
} \
|
ilength -= j;
|
||||||
pdata1 = (uint8_t*)pdata; \
|
data_src += j;
|
||||||
pdata2 += j; \
|
// Copy the pattern 4 bytes at a time.
|
||||||
for (i <<= 2; i < ilength; ++i) { \
|
pdata = (uint32_t*)data_dst;
|
||||||
pdata1[i] = pdata2[i]; \
|
for (i = 0; i < (ilength >> 2); ++i) {
|
||||||
} \
|
pdata[i] = pattern;
|
||||||
} while (0)
|
}
|
||||||
|
// Finish with left-overs. 'pattern' is still correctly positioned,
|
||||||
|
// so no Rotate8b() call is needed.
|
||||||
|
data_dst = (uint8_t*)pdata;
|
||||||
|
for (i <<= 2; i < ilength; ++i) {
|
||||||
|
data_dst[i] = data_src[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static WEBP_INLINE void CopyBlock(uint8_t* data_dst, int dist, int length) {
|
||||||
|
const uint8_t* data_src = data_dst - dist;
|
||||||
|
if (length >= 8) {
|
||||||
|
uint32_t pattern;
|
||||||
|
switch (dist) {
|
||||||
|
case 1:
|
||||||
|
pattern = *data_src;
|
||||||
|
#if defined(__arm__) || defined(_M_ARM) // arm doesn't like multiply that much
|
||||||
|
pattern |= pattern << 8;
|
||||||
|
pattern |= pattern << 16;
|
||||||
|
#else
|
||||||
|
pattern = 0x01010101u * pattern;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
pattern = *(const uint16_t*)data_src;
|
||||||
|
#if defined(__arm__) || defined(_M_ARM)
|
||||||
|
pattern |= pattern << 16;
|
||||||
|
#else
|
||||||
|
pattern = 0x00010001u * pattern;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
pattern = *(const uint32_t*)data_src;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
goto Copy;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
CopySmallPattern(data_src, data_dst, length, pattern);
|
||||||
|
} else {
|
||||||
|
Copy:
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < length; ++i) data_dst[i] = data_src[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
|
static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
|
||||||
int width, int height, int last_row) {
|
int width, int height, int last_row) {
|
||||||
@ -824,41 +868,7 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
|
|||||||
dist_code = GetCopyDistance(dist_symbol, br);
|
dist_code = GetCopyDistance(dist_symbol, br);
|
||||||
dist = PlaneCodeToDistance(width, dist_code);
|
dist = PlaneCodeToDistance(width, dist_code);
|
||||||
if (pos >= dist && end - pos >= length) {
|
if (pos >= dist && end - pos >= length) {
|
||||||
uint8_t* pdata1 = data + pos;
|
CopyBlock(data + pos, dist, length);
|
||||||
const uint8_t* pdata2 = pdata1 - dist;
|
|
||||||
int i;
|
|
||||||
if (length >= 8) {
|
|
||||||
uint32_t temp1;
|
|
||||||
switch (dist) {
|
|
||||||
case 1:
|
|
||||||
temp1 = pdata1[-1];
|
|
||||||
#if defined(__arm__) || defined(_M_ARM) // arm doesn't like multiply that much
|
|
||||||
temp1 |= temp1 << 8;
|
|
||||||
temp1 |= temp1 << 16;
|
|
||||||
#else
|
|
||||||
temp1 = 0x01010101u * temp1;
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
temp1 = ((uint16_t*)pdata1)[-1];
|
|
||||||
#if defined(__arm__) || defined(_M_ARM)
|
|
||||||
temp1 |= temp1 << 16;
|
|
||||||
#else
|
|
||||||
temp1 = 0x00010001u * temp1;
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
temp1 = ((uint32_t*)pdata1)[-1];
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
goto Copy;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
COPY_SMALL_PATTERN();
|
|
||||||
} else {
|
|
||||||
Copy:
|
|
||||||
for (i = 0; i < length; ++i) pdata1[i] = pdata2[i];
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
ok = 0;
|
ok = 0;
|
||||||
goto End;
|
goto End;
|
||||||
@ -897,9 +907,6 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
|
|||||||
return ok;
|
return ok;
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef COPY_PATTERN
|
|
||||||
#undef ROTATE8b
|
|
||||||
|
|
||||||
static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
|
static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
|
||||||
int width, int height, int last_row,
|
int width, int height, int last_row,
|
||||||
ProcessRowsFunc process_func) {
|
ProcessRowsFunc process_func) {
|
||||||
|
Loading…
Reference in New Issue
Block a user