From 3f0ab1fd8394754d6cbc7c4810a6873ea37453fa Mon Sep 17 00:00:00 2001 From: Jan Kotas Date: Fri, 1 May 2015 13:24:30 -0700 Subject: [PATCH] Fix AVX OS support detection In order to check for OS support for AVX, we need to add a check for the OSXSAVE bit in SetCpuInfo in the EEJItManager. In addition, the arch spec indicates that bits 0x6 of xcr0 (obtained by executing xgetbv instruction with ecx == 0) must be set. This fix adds both of these, although the CRT uses only the check of the two bits from SetCpuInfo. (Change made by @CarolEidt) [tfs-changeset: 1463578] --- src/vm/amd64/AsmHelpers.asm | 16 ++++++++++++++++ src/vm/codeman.cpp | 19 ++++++++++++------- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/src/vm/amd64/AsmHelpers.asm b/src/vm/amd64/AsmHelpers.asm index a6d397c..7687574 100644 --- a/src/vm/amd64/AsmHelpers.asm +++ b/src/vm/amd64/AsmHelpers.asm @@ -737,6 +737,22 @@ NESTED_ENTRY getcpuid, _TEXT ret NESTED_END getcpuid, _TEXT + +;; extern "C" DWORD __stdcall xmmYmmStateSupport(); +LEAF_ENTRY xmmYmmStateSupport, _TEXT + mov ecx, 0 ; Specify xcr0 + xgetbv ; result in EDX:EAX + and eax, 06H + cmp eax, 06H ; check OS has enabled both XMM and YMM state support + jne not_supported + mov eax, 1 + jmp done + not_supported: + mov eax, 0 + done: + ret +LEAF_END xmmYmmStateSupport, _TEXT + ;The following function uses Deterministic Cache Parameter leafs to determine the cache hierarchy information on Prescott & Above platforms. ; This function takes 3 arguments: ; Arg1 is an input to ECX. Used as index to specify which cache level to return information on by CPUID. diff --git a/src/vm/codeman.cpp b/src/vm/codeman.cpp index 1175f63..dc6937f 100644 --- a/src/vm/codeman.cpp +++ b/src/vm/codeman.cpp @@ -1176,6 +1176,7 @@ EEJitManager::EEJitManager() #if defined(_TARGET_AMD64_) extern "C" DWORD __stdcall getcpuid(DWORD arg, unsigned char result[16]); +extern "C" DWORD __stdcall xmmYmmStateSupport(); #endif // defined(_TARGET_AMD64_) void EEJitManager::SetCpuInfo() @@ -1227,7 +1228,8 @@ void EEJitManager::SetCpuInfo() // SSSE3 - ECX bit 9 (buffer[9] & 0x02) // SSE4.1 - ECX bit 19 (buffer[10] & 0x08) // SSE4.2 - ECX bit 20 (buffer[10] & 0x10) - // CORJIT_FLG_USE_AVX if the following feature bit is set (input EAX of 1): + // CORJIT_FLG_USE_AVX if the following feature bits are set (input EAX of 1), and xmmYmmStateSupport returns 1: + // OSXSAVE - ECX bit 27 (buffer[11] & 0x08) // AVX - ECX bit 28 (buffer[11] & 0x10) // CORJIT_FLG_USE_AVX2 if the following feature bit is set (input EAX of 0x07 and input ECX of 0): // AVX2 - EBX bit 5 (buffer[4] & 0x20) @@ -1246,15 +1248,18 @@ void EEJitManager::SetCpuInfo() { dwCPUCompileFlags |= CORJIT_FLG_USE_SSE3_4; } - if ((buffer[11] & 0x10) != 0) + if ((buffer[11] & 0x18) == 0x18) { - dwCPUCompileFlags |= CORJIT_FLG_USE_AVX; - if (maxCpuId >= 0x07) + if (xmmYmmStateSupport() == 1) { - (void) getcpuid(0x07, buffer); - if ((buffer[4] & 0x20) != 0) + dwCPUCompileFlags |= CORJIT_FLG_USE_AVX; + if (maxCpuId >= 0x07) { - dwCPUCompileFlags |= CORJIT_FLG_USE_AVX2; + (void) getcpuid(0x07, buffer); + if ((buffer[4] & 0x20) != 0) + { + dwCPUCompileFlags |= CORJIT_FLG_USE_AVX2; + } } } } -- 2.7.4