From: Tanner Gooding Date: Sat, 28 Jul 2018 17:37:59 +0000 (-0700) Subject: Updating SetCpuInfo to be more accurate X-Git-Tag: accepted/tizen/unified/20190422.045933~1558 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=369e9a97ee0cce00532a03924f98723b17b4aa01;p=platform%2Fupstream%2Fcoreclr.git Updating SetCpuInfo to be more accurate --- diff --git a/src/vm/codeman.cpp b/src/vm/codeman.cpp index 8529c3f..33adba5 100644 --- a/src/vm/codeman.cpp +++ b/src/vm/codeman.cpp @@ -1282,11 +1282,12 @@ void EEJitManager::SetCpuInfo() switch (CPU_X86_FAMILY(cpuInfo.dwCPUType)) { - case CPU_X86_PENTIUM_4: - CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_TARGET_P4); - break; - default: - break; + case CPU_X86_PENTIUM_4: + CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_TARGET_P4); + break; + + default: + break; } if (CPU_X86_USE_CMOV(cpuInfo.dwFeatures)) @@ -1302,121 +1303,134 @@ void EEJitManager::SetCpuInfo() #endif // _TARGET_X86_ #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + // NOTE: The below checks are based on the information reported by + // Intel® 64 and IA-32 Architectures Software Developer’s Manual. Volume 2 + // and + // AMD64 Architecture Programmer’s Manual. Volume 3 + // For more information, please refer to the CPUID instruction in the respective manuals + + // We will set the following flags: + // CORJIT_FLAG_USE_SSE2 is required + // SSE - EDX bit 25 (buffer[15] & 0x02) + // SSE2 - EDX bit 26 (buffer[15] & 0x04) + // CORJIT_FLAG_USE_SSE3 if the following feature bits are set (input EAX of 1) + // CORJIT_FLAG_USE_SSE2 + // SSE3 - ECX bit 0 (buffer[8] & 0x01) + // CORJIT_FLAG_USE_SSSE3 if the following feature bits are set (input EAX of 1) + // CORJIT_FLAG_USE_SSE3 + // SSSE3 - ECX bit 9 (buffer[9] & 0x02) + // CORJIT_FLAG_USE_SSE41 if the following feature bits are set (input EAX of 1) + // CORJIT_FLAG_USE_SSSE3 + // SSE4.1 - ECX bit 19 (buffer[10] & 0x08) + // CORJIT_FLAG_USE_SSE42 if the following feature bits are set (input EAX of 1) + // CORJIT_FLAG_USE_SSE41 + // SSE4.2 - ECX bit 20 (buffer[10] & 0x10) + // CORJIT_FLAG_USE_POPCNT if the following feature bits are set (input EAX of 1) + // CORJIT_FLAG_USE_SSE42 + // POPCNT - ECX bit 23 (buffer[10] & 0x80) + // CORJIT_FLAG_USE_AVX if the following feature bits are set (input EAX of 1), and xmmYmmStateSupport returns 1: + // CORJIT_FLAG_USE_SSE42 + // OSXSAVE - ECX bit 27 (buffer[11] & 0x08) + // XGETBV - XCR0[2:1] 11b + // AVX - ECX bit 28 (buffer[11] & 0x10) + // CORJIT_FLAG_USE_FMA if the following feature bits are set (input EAX of 1), and xmmYmmStateSupport returns 1: + // CORJIT_FLAG_USE_AVX + // FMA - ECX bit 12 (buffer[9] & 0x10) + // CORJIT_FLAG_USE_AVX2 if the following feature bit is set (input EAX of 0x07 and input ECX of 0): + // CORJIT_FLAG_USE_AVX + // AVX2 - EBX bit 5 (buffer[4] & 0x20) + // CORJIT_FLAG_USE_AVX_512 is not currently set, but defined so that it can be used in future without + // CORJIT_FLAG_USE_AES + // CORJIT_FLAG_USE_SSE2 + // AES - ECX bit 25 (buffer[11] & 0x01) + // CORJIT_FLAG_USE_PCLMULQDQ + // CORJIT_FLAG_USE_SSE2 + // PCLMULQDQ - ECX bit 1 (buffer[8] & 0x01) + // CORJIT_FLAG_USE_BMI1 if the following feature bit is set (input EAX of 0x07 and input ECX of 0): + // BMI1 - EBX bit 3 (buffer[4] & 0x08) + // CORJIT_FLAG_USE_BMI2 if the following feature bit is set (input EAX of 0x07 and input ECX of 0): + // BMI2 - EBX bit 8 (buffer[5] & 0x01) + // CORJIT_FLAG_USE_LZCNT if the following feature bits are set (input EAX of 80000001H) + // LZCNT - ECX bit 5 (buffer[8] & 0x20) + // synchronously updating VM and JIT. + unsigned char buffer[16]; DWORD maxCpuId = getcpuid(0, buffer); - if (maxCpuId >= 0) + + if (maxCpuId >= 1) { // getcpuid executes cpuid with eax set to its first argument, and ecx cleared. // It returns the resulting eax in buffer[0-3], ebx in buffer[4-7], ecx in buffer[8-11], // and edx in buffer[12-15]. - // We will set the following flags: - // CORJIT_FLAG_USE_SSE3 if the following feature bits are set (input EAX of 1) - // SSE3 - ECX bit 0 (buffer[8] & 0x01) - // CORJIT_FLAG_USE_SSSE3 if the following feature bits are set (input EAX of 1) - // SSE3 - ECX bit 0 (buffer[8] & 0x01) - // SSSE3 - ECX bit 9 (buffer[9] & 0x02) - // CORJIT_FLAG_USE_SSE41 if the following feature bits are set (input EAX of 1) - // SSE3 - ECX bit 0 (buffer[8] & 0x01) - // SSSE3 - ECX bit 9 (buffer[9] & 0x02) - // SSE4.1 - ECX bit 19 (buffer[10] & 0x08) - // CORJIT_FLAG_USE_SSE42 if the following feature bits are set (input EAX of 1) - // SSE3 - ECX bit 0 (buffer[8] & 0x01) - // SSSE3 - ECX bit 9 (buffer[9] & 0x02) - // SSE4.2 - ECX bit 20 (buffer[10] & 0x10) - // CORJIT_FLAG_USE_POPCNT if the following feature bits are set (input EAX of 1) - // SSE3 - ECX bit 0 (buffer[8] & 0x01) - // SSSE3 - ECX bit 9 (buffer[9] & 0x02) - // SSE4.2 - ECX bit 20 (buffer[10] & 0x10) - // POPCNT - ECX bit 23 (buffer[10] & 0x80) - // CORJIT_FLAG_USE_AVX if the following feature bits are set (input EAX of 1), and xmmYmmStateSupport returns 1: - // OSXSAVE - ECX bit 27 (buffer[11] & 0x08) - // AVX - ECX bit 28 (buffer[11] & 0x10) - // CORJIT_FLAG_USE_FMA if the following feature bits are set (input EAX of 1), and xmmYmmStateSupport returns 1: - // FMA - ECX bit 12 (buffer[9] & 0x10) - // CORJIT_FLAG_USE_AVX2 if the following feature bit is set (input EAX of 0x07 and input ECX of 0): - // AVX2 - EBX bit 5 (buffer[4] & 0x20) - // CORJIT_FLAG_USE_AVX_512 is not currently set, but defined so that it can be used in future without - // CORJIT_FLAG_USE_BMI1 if the following feature bit is set (input EAX of 0x07 and input ECX of 0): - // BMI1 - EBX bit 3 (buffer[4] & 0x08) - // CORJIT_FLAG_USE_BMI2 if the following feature bit is set (input EAX of 0x07 and input ECX of 0): - // BMI2 - EBX bit 8 (buffer[5] & 0x01) - // CORJIT_FLAG_USE_LZCNT if the following feature bits are set (input EAX of 80000001H) - // LZCNT - ECX bit 5 (buffer[8] & 0x20) - // synchronously updating VM and JIT. + (void) getcpuid(1, buffer); - // If SSE2 is not enabled, there is no point in checking the rest. - // SSE2 is bit 26 of EDX (buffer[15] & 0x04) - // TODO: Determine whether we should break out the various SSE options further. - if ((buffer[15] & 0x04) != 0) // SSE2 + + // If SSE/SSE2 is not enabled, there is no point in checking the rest. + // SSE is bit 25 of EDX (buffer[15] & 0x02) + // SSE2 is bit 26 of EDX (buffer[15] & 0x04) + + if ((buffer[15] & 0x06) == 0x06) // SSE & SSE2 { - if ((buffer[8] & 0x01) != 0) // SSE3 - { - CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE3); - if ((buffer[9] & 0x02) != 0) // SSSE3 - { - CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSSE3); - if ((buffer[10] & 0x08) != 0) // SSE4.1 - { - CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE41); - } - if ((buffer[10] & 0x10) != 0) // SSE4.2 - { - CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE42); - if ((buffer[10] & 0x80) != 0) // POPCNT - { - CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_POPCNT); - } - } - } - } - - if ((buffer[11] & 0x01) != 0) // AESNI + if ((buffer[11] & 0x02) != 0) // AESNI { CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_AES); } - if ((buffer[8] & 0x02) != 0) // PCLMULQDQ + + if ((buffer[8] & 0x02) != 0) // PCLMULQDQ { CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_PCLMULQDQ); } - if ((buffer[11] & 0x18) == 0x18) // AVX + + if ((buffer[8] & 0x01) != 0) // SSE3 { - if(DoesOSSupportAVX()) + CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE3); + + if ((buffer[9] & 0x02) != 0) // SSSE3 { - if (xmmYmmStateSupport() == 1) + CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSSE3); + + if ((buffer[10] & 0x08) != 0) // SSE4.1 { - CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_AVX); - if ((buffer[9] & 0x10) != 0) // FMA - { - CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_FMA); - } - if (maxCpuId >= 0x07) + CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE41); + + if ((buffer[10] & 0x10) != 0) // SSE4.2 { - (void) getextcpuid(0, 0x07, buffer); - if ((buffer[4] & 0x20) != 0) // AVX2 + CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE42); + + if ((buffer[10] & 0x80) != 0) // POPCNT { - CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_AVX2); + CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_POPCNT); + } + + if ((buffer[11] & 0x18) == 0x18) // AVX & OSXSAVE + { + if(DoesOSSupportAVX() && (xmmYmmStateSupport() == 1)) + { + CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_AVX); + + if ((buffer[9] & 0x10) != 0) // FMA + { + CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_FMA); + } + + if (maxCpuId >= 0x07) + { + (void) getextcpuid(0, 0x07, buffer); + + if ((buffer[4] & 0x20) != 0) // AVX2 + { + CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_AVX2); + } + } + } } } } } } - (void) getextcpuid(0, 0x07, buffer); - if ((buffer[4] & 0x08) != 0) // BMI1 - { - CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_BMI1); - } - if ((buffer[5] & 0x01) != 0) //BMI2 - { - CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_BMI2); - } - - (void) getcpuid(0x80000001, buffer); - if ((buffer[8] & 0x20) != 0) // LZCNT - { - CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_LZCNT); - } static ConfigDWORD fFeatureSIMD; + if (fFeatureSIMD.val(CLRConfig::EXTERNAL_FeatureSIMD) != 0) { CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_FEATURE_SIMD); @@ -1427,6 +1441,37 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Clear(CORJIT_FLAGS::CORJIT_FLAG_USE_AVX2); } } + + if (maxCpuId >= 0x07) + { + (void)getextcpuid(0, 0x07, buffer); + + if ((buffer[4] & 0x08) != 0) // BMI1 + { + CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_BMI1); + } + + if ((buffer[5] & 0x01) != 0) // BMI2 + { + CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_BMI2); + } + } + } + + DWORD maxCpuIdEx = getcpuid(0x80000000, buffer); + + if (maxCpuIdEx >= 0x80000001) + { + // getcpuid executes cpuid with eax set to its first argument, and ecx cleared. + // It returns the resulting eax in buffer[0-3], ebx in buffer[4-7], ecx in buffer[8-11], + // and edx in buffer[12-15]. + + (void) getcpuid(0x80000001, buffer); + + if ((buffer[8] & 0x20) != 0) // LZCNT + { + CPUCompileFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_LZCNT); + } } #endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) diff --git a/src/zap/zapper.cpp b/src/zap/zapper.cpp index 5b4b23c..6d1cbd9 100644 --- a/src/zap/zapper.cpp +++ b/src/zap/zapper.cpp @@ -1289,12 +1289,12 @@ void Zapper::InitializeCompilerFlags(CORCOMPILE_VERSION_INFO * pVersionInfo) switch (CPU_X86_FAMILY(pVersionInfo->cpuInfo.dwCPUType)) { - case CPU_X86_PENTIUM_4: - m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_TARGET_P4); - break; + case CPU_X86_PENTIUM_4: + m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_TARGET_P4); + break; - default: - break; + default: + break; } if (CPU_X86_USE_CMOV(pVersionInfo->cpuInfo.dwFeatures))