diff --git a/src/utils/bit_reader.h b/src/utils/bit_reader.h index 3b6ca663..ccf450c5 100644 --- a/src/utils/bit_reader.h +++ b/src/utils/bit_reader.h @@ -59,7 +59,7 @@ extern "C" { // The right-justify strategy tends to use less shifts and is often faster. //------------------------------------------------------------------------------ -// BITS can be either 32, 24, 16 or 8. +// BITS can be any multiple of 8 from 8 to 56 (inclusive). // Pick values that fit natural register size. #if !defined(WEBP_REFERENCE_IMPLEMENTATION) @@ -68,7 +68,9 @@ extern "C" { #if defined(__i386__) || defined(_M_IX86) // x86 32bit #define BITS 16 -#elif defined(__arm__) || defined(_M_ARM) // ARM +#elif defined(__x86_64__) || defined(_M_X64) // x86 64bit +#define BITS 56 +#elif defined(__arm__) || defined(_M_ARM) // ARM #define BITS 24 #else // reasonable default #define BITS 24 @@ -84,9 +86,15 @@ extern "C" { //------------------------------------------------------------------------------ // Derived types and constants -#if (BITS == 32) -typedef uint64_t bit_t; // natural register type -typedef uint32_t lbit_t; // natural type for memory I/O +// bit_t = natural register type +// lbit_t = natural type for memory I/O + +#if (BITS > 32) +typedef uint64_t bit_t; +typedef uint64_t lbit_t; +#elif (BITS == 32) +typedef uint64_t bit_t; +typedef uint32_t lbit_t; #elif (BITS == 24) typedef uint32_t bit_t; typedef uint32_t lbit_t; @@ -148,19 +156,36 @@ static WEBP_INLINE void VP8LoadNewBytes(VP8BitReader* const br) { lbit_t in_bits = *(lbit_t*)br->buf_; br->buf_ += (BITS) >> 3; #if !defined(__BIG_ENDIAN__) -#if (BITS == 32) || (BITS == 24) +#if (BITS > 32) +// gcc 4.3 has builtin functions for swap32/swap64 +#if defined(__GNUC__) && \ + (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) + bits = (bit_t)__builtin_bswap64(in_bits); +#elif defined(_MSC_VER) + bits = (bit_t)_byteswap_uint64(in_bits); +#elif defined(__x86_64__) + __asm__ volatile("bswapq %0" : "=r"(bits) : "0"(in_bits)); +#else // generic code for swapping 64-bit values (suggested by bdb@) + bits = (bit_t)in_bits; + bits = ((bits & 0xffffffff00000000ull) >> 32) | + ((bits & 0x00000000ffffffffull) << 32); + bits = ((bits & 0xffff0000ffff0000ull) >> 16) | + ((bits & 0x0000ffff0000ffffull) << 16); + bits = ((bits & 0xff00ff00ff00ff00ull) >> 8) | + ((bits & 0x00ff00ff00ff00ffull) << 8); +#endif + bits >>= 64 - BITS; +#elif (BITS >= 24) #if defined(__i386__) || defined(__x86_64__) __asm__ volatile("bswap %k0" : "=r"(in_bits) : "0"(in_bits)); bits = (bit_t)in_bits; // 24b/32b -> 32b/64b zero-extension #elif defined(_MSC_VER) - bits = _byteswap_ulong(in_bits); + bits = (bit_t)_byteswap_ulong(in_bits); #else bits = (bit_t)(in_bits >> 24) | ((in_bits >> 8) & 0xff00) | ((in_bits << 8) & 0xff0000) | (in_bits << 24); #endif // x86 -#if (BITS == 24) - bits >>= 8; -#endif + bits >>= (32 - BITS); #elif (BITS == 16) // gcc will recognize a 'rorw $8, ...' here: bits = (bit_t)(in_bits >> 8) | ((in_bits & 0xff) << 8); @@ -248,7 +273,7 @@ static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob) { } static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) { - const bit_t split = (br->range_ >> 1); + const range_t split = (br->range_ >> 1); const int bit = VP8BitUpdate(br, split); VP8Shift(br); return bit ? -v : v;