add a kSlowSSSE3 feature for CPUInfo

This is meant to be used for run-time detection of slow platforms
regarding instructions like pshufb and bsr.

Adapted from libvpx patch: https://chromium-review.googlesource.com/#/c/367731

Change-Id: I2c22fbb9aae699d87a041393ba1ad5f1f21ff640
This commit is contained in:
Pascal Massimino 2017-01-12 01:21:36 -08:00
parent 28fe054e73
commit 86bbd24552
2 changed files with 41 additions and 5 deletions

View File

@ -95,26 +95,62 @@ static WEBP_INLINE uint64_t xgetbv(void) {
#endif
#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
// helper function for run-time detection of slow SSSE3 platforms
static int CheckSlowModel(int info) {
// Table listing display models with longer latencies for the bsr instruction
// (ie 2 cycles vs 10/16 cycles) and some SSSE3 instructions like pshufb.
// Refer to Intel 64 and IA-32 Architectures Optimization Reference Manual.
static const uint8_t kSlowModels[] = {
0x37, 0x4a, 0x4d, // Silvermont Microarchitecture
0x1c, 0x26, 0x27 // Atom Microarchitecture
};
const uint32_t model = ((info & 0xf0000) >> 12) | ((info >> 4) & 0xf);
const uint32_t family = (info >> 8) & 0xf;
if (family == 0x06) {
size_t i;
for (i = 0; i < sizeof(kSlowModels) / sizeof(kSlowModels[0]); ++i) {
if (model == kSlowModels[i]) return 1;
}
}
return 0;
}
static int x86CPUInfo(CPUFeature feature) {
int max_cpuid_value;
int cpu_info[4];
int is_intel = 0;
// get the highest feature value cpuid supports
GetCPUInfo(cpu_info, 0);
max_cpuid_value = cpu_info[0];
if (max_cpuid_value < 1) {
return 0;
} else {
const int VENDOR_ID_INTEL_EBX = 0x756e6547; // uneG
const int VENDOR_ID_INTEL_EDX = 0x49656e69; // Ieni
const int VENDOR_ID_INTEL_ECX = 0x6c65746e; // letn
is_intel = (cpu_info[1] == VENDOR_ID_INTEL_EBX &&
cpu_info[2] == VENDOR_ID_INTEL_ECX &&
cpu_info[3] == VENDOR_ID_INTEL_EDX); // genuine Intel?
}
GetCPUInfo(cpu_info, 1);
if (feature == kSSE2) {
return 0 != (cpu_info[3] & 0x04000000);
return !!(cpu_info[3] & (1 << 26));
}
if (feature == kSSE3) {
return 0 != (cpu_info[2] & 0x00000001);
return !!(cpu_info[2] & (1 << 0));
}
if (feature == kSlowSSSE3) {
if (is_intel && (cpu_info[2] & (1 << 0))) { // SSSE3?
return CheckSlowModel(cpu_info[0]);
}
return 0;
}
if (feature == kSSE4_1) {
return 0 != (cpu_info[2] & 0x00080000);
return !!(cpu_info[2] & (1 << 19));
}
if (feature == kAVX) {
// bits 27 (OSXSAVE) & 28 (256-bit AVX)
@ -126,7 +162,7 @@ static int x86CPUInfo(CPUFeature feature) {
if (feature == kAVX2) {
if (x86CPUInfo(kAVX) && max_cpuid_value >= 7) {
GetCPUInfo(cpu_info, 7);
return ((cpu_info[1] & 0x00000020) == 0x00000020);
return !!(cpu_info[1] & (1 << 5));
}
}
return 0;
@ -184,4 +220,3 @@ VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
#else
VP8CPUInfo VP8GetCPUInfo = NULL;
#endif

View File

@ -132,6 +132,7 @@ extern "C" {
typedef enum {
kSSE2,
kSSE3,
kSlowSSSE3, // special feature for slow SSSE3 architectures
kSSE4_1,
kAVX,
kAVX2,