Unify hardware feature detection between CoreCLR JIT and AOT (#89342)
authorJan Kotas <jkotas@microsoft.com>
Sun, 23 Jul 2023 04:13:29 +0000 (21:13 -0700)
committerGitHub <noreply@github.com>
Sun, 23 Jul 2023 04:13:29 +0000 (21:13 -0700)
Follow up on #87865

25 files changed:
src/coreclr/CMakeLists.txt
src/coreclr/classlibnative/bcltype/system.cpp
src/coreclr/gc/vxsort/isa_detection.cpp
src/coreclr/minipal/Unix/CMakeLists.txt
src/coreclr/minipal/Windows/CMakeLists.txt
src/coreclr/nativeaot/Runtime/amd64/MemClrForGC.asm [deleted file]
src/coreclr/nativeaot/Runtime/i386/MemClrForGC.asm [deleted file]
src/coreclr/pal/inc/pal.h
src/coreclr/pal/src/CMakeLists.txt
src/coreclr/pal/src/arch/amd64/processor.cpp [deleted file]
src/coreclr/pal/src/arch/arm/processor.cpp [deleted file]
src/coreclr/pal/src/arch/arm64/processor.cpp [deleted file]
src/coreclr/pal/src/arch/i386/processor.cpp [deleted file]
src/coreclr/pal/src/arch/loongarch64/processor.cpp [deleted file]
src/coreclr/pal/src/arch/ppc64le/processor.cpp [deleted file]
src/coreclr/pal/src/arch/riscv64/processor.cpp [deleted file]
src/coreclr/pal/src/arch/s390x/processor.cpp [deleted file]
src/coreclr/pal/src/include/pal/context.h
src/coreclr/pal/src/misc/jitsupport.cpp [deleted file]
src/coreclr/vm/amd64/AsmHelpers.asm
src/coreclr/vm/amd64/unixstubs.cpp
src/coreclr/vm/codeman.cpp
src/coreclr/vm/i386/jitinterfacex86.cpp
src/native/minipal/cpufeatures.c
src/native/minipal/cpuid.h

index d21c158..925fc6d 100644 (file)
@@ -191,7 +191,6 @@ include_directories("debug/inc/${ARCH_SOURCES_DIR}")
 include_directories("debug/inc/dump")
 include_directories("md/inc")
 include_directories("classlibnative/bcltype")
-include_directories("classlibnative/cryptography")
 include_directories("classlibnative/inc")
 include_directories("${GENERATED_INCLUDE_DIR}")
 include_directories("hosts/inc")
index 325afdc..ef02743 100644 (file)
@@ -30,7 +30,7 @@
 #include "array.h"
 #include "eepolicy.h"
 
-
+#include <minipal/cpuid.h>
 
 
 FCIMPL0(UINT32, SystemNative::GetTickCount)
@@ -50,8 +50,6 @@ FCIMPL0(UINT64, SystemNative::GetTickCount64)
 FCIMPLEND;
 
 
-
-
 extern "C" VOID QCALLTYPE Environment_Exit(INT32 exitcode)
 {
     QCALL_CONTRACT;
index 1dcb791..93c7288 100644 (file)
@@ -19,28 +19,6 @@ enum class SupportedISA
 
 #if defined(TARGET_AMD64) && defined(TARGET_WINDOWS)
 
-static DWORD64 GetEnabledXStateFeaturesHelper()
-{
-    // On Windows we have an api(GetEnabledXStateFeatures) to check if AVX is supported
-    typedef DWORD64(WINAPI* PGETENABLEDXSTATEFEATURES)();
-    PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL;
-
-    HMODULE hMod = LoadLibraryExW(L"kernel32.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
-    if (hMod == NULL)
-        return 0;
-
-    pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hMod, "GetEnabledXStateFeatures");
-
-    if (pfnGetEnabledXStateFeatures == NULL)
-    {
-        return 0;
-    }
-
-    DWORD64 FeatureMask = pfnGetEnabledXStateFeatures();
-
-    return FeatureMask;
-}
-
 SupportedISA DetermineSupportedISA()
 {
     // register definitions to make the following code more readable
@@ -78,7 +56,7 @@ SupportedISA DetermineSupportedISA()
     DWORD64 xcr0 = _xgetbv(0);
 
     // get OS XState info
-    DWORD64 FeatureMask = GetEnabledXStateFeaturesHelper();
+    DWORD64 FeatureMask = GetEnabledXStateFeatures();
 
     // get processor extended feature flag info
     __cpuidex(reg, 7, 0);
index 0c09c76..ca41eb4 100644 (file)
@@ -1,5 +1,18 @@
-add_library(coreclrminipal
-    STATIC
+set(SOURCES
     doublemapping.cpp
     dn-u16.cpp
 )
+
+if(NOT CLR_CROSS_COMPONENTS_BUILD)
+    list(APPEND SOURCES
+        ${CLR_SRC_NATIVE_DIR}/minipal/cpufeatures.c
+    )
+endif()
+
+add_library(coreclrminipal
+    STATIC
+    ${SOURCES}
+)
+
+include(${CLR_SRC_NATIVE_DIR}/minipal/configure.cmake)
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
index 0c09c76..0c83eea 100644 (file)
@@ -1,5 +1,15 @@
-add_library(coreclrminipal
-    STATIC
+set(SOURCES
     doublemapping.cpp
     dn-u16.cpp
 )
+
+if(NOT CLR_CROSS_COMPONENTS_BUILD)
+    list(APPEND SOURCES
+        ${CLR_SRC_NATIVE_DIR}/minipal/cpufeatures.c
+    )
+endif()
+
+add_library(coreclrminipal
+    STATIC
+    ${SOURCES}
+)
diff --git a/src/coreclr/nativeaot/Runtime/amd64/MemClrForGC.asm b/src/coreclr/nativeaot/Runtime/amd64/MemClrForGC.asm
deleted file mode 100644 (file)
index de5476c..0000000
+++ /dev/null
@@ -1,99 +0,0 @@
-;; Licensed to the .NET Foundation under one or more agreements.
-;; The .NET Foundation licenses this file to you under the MIT license.
-
-include AsmMacros.inc
-
-
-LEAF_ENTRY memclr_for_gc, _TEXT
-
-;    x64 version
-
-;   we get the following parameters
-;   rcx = destination address
-;   rdx = size to clear
-
-    ; save rdi - this should be faster than a push
-    mov     r11,rdi
-
-    xor     eax, eax
-
-    ; check alignment of destination
-    test    cl,7
-    jnz     alignDest
-alignDone:
-    ; now destination is qword aligned
-    ; move it to rdi for rep stos
-    mov     rdi,rcx
-
-    ; compute number of bytes to clear non-temporally
-    ; we wish to clear the first 8k or so with rep stos,
-    ; anything above that non-temporally
-
-    xor     r8,r8
-    cmp     rdx,8*1024
-    jbe     noNonTempClear
-
-    ; compute the number of bytes above 8k
-    ; and round down to a multiple of 64
-    mov     r8,rdx
-    sub     r8,8*1024
-    and     r8,not 63
-
-    ; compute remaining size to clear temporally
-    sub     rdx,r8
-
-noNonTempClear:
-
-    ; do the temporal clear
-    mov     rcx,rdx
-    shr     rcx,3
-    rep     stosq
-
-    ; do the non-temporal clear
-    test    r8,r8
-    jne     nonTempClearLoop
-
-nonTempClearDone:
-
-    ; clear any remaining bytes
-    mov     rcx,rdx
-    and     rcx,7
-    rep     stosb
-
-    ; restore rdi
-    mov     rdi,r11
-
-    ret
-
-    ; this is the infrequent case, hence out of line
-nonTempClearLoop:
-    movnti  [rdi+ 0],rax
-    movnti  [rdi+ 8],rax
-    movnti  [rdi+16],rax
-    movnti  [rdi+24],rax
-
-    movnti  [rdi+32],rax
-    movnti  [rdi+40],rax
-    movnti  [rdi+48],rax
-    movnti  [rdi+56],rax
-
-    add     rdi,64
-    sub     r8,64
-    ja      nonTempClearLoop
-    jmp     nonTempClearDone
-
-alignDest:
-    test    rdx,rdx
-    je      alignDone
-alignLoop:
-    mov     [rcx],al
-    add     rcx,1
-    sub     rdx,1
-    jz      alignDone
-    test    cl,7
-    jnz     alignLoop
-    jmp     alignDone
-
-LEAF_END memclr_for_gc, _TEXT
-
-    end
diff --git a/src/coreclr/nativeaot/Runtime/i386/MemClrForGC.asm b/src/coreclr/nativeaot/Runtime/i386/MemClrForGC.asm
deleted file mode 100644 (file)
index d093e0a..0000000
+++ /dev/null
@@ -1,148 +0,0 @@
-;; Licensed to the .NET Foundation under one or more agreements.
-;; The .NET Foundation licenses this file to you under the MIT license.
-
-    .586
-    .xmm
-    .model  flat
-    option  casemap:none
-
-
-EXTERN  _IsProcessorFeaturePresent@4 : PROC
-
-PF_XMMI64_INSTRUCTIONS_AVAILABLE equ 10
-
-    .data
-canUseSSE2  db  0
-
-    .code
-
-_memclr_for_gc@8 proc public
-
-;    x86 version
-
-;   we get the following parameters
-;   ecx = destination address
-;   edx = size to clear
-
-    push    ebx
-    push    edi
-
-    xor     eax, eax
-
-    ; load destination
-    mov     edi,[esp+8+4]
-
-    ; load size
-    mov     ebx,[esp+8+8]
-
-    ; check alignment of destination
-    test    edi,3
-    jnz     alignDest
-alignDone:
-    ; now destination is dword aligned
-
-    ; compute number of bytes to clear non-temporally
-    ; we wish to clear the first 8k or so with rep stos,
-    ; anything above that non-temporally
-
-    xor     edx,edx
-    cmp     ebx,8*1024
-    jbe     noNonTempClear
-
-    ; can we use SSE2 instructions?
-    cmp     canUseSSE2,0
-    js      noNonTempClear
-    jz      computeCanUseSSE2
-
-computeNonTempClear:
-
-    ; compute the number of bytes above 8k
-    ; and round down to a multiple of 64
-    mov     edx,ebx
-    sub     edx,8*1024
-    and     edx,not 63
-
-    ; compute remaining size to clear temporally
-    sub     ebx,edx
-
-noNonTempClear:
-    ; do the temporal clear
-    mov     ecx,ebx
-    shr     ecx,2
-    rep     stosd
-
-    ; do the non-temporal clear
-    test    edx,edx
-    jne     nonTempClearLoop
-
-nonTempClearDone:
-
-    ; clear any remaining bytes
-    mov     ecx,ebx
-    and     ecx,3
-    rep     stosb
-
-    pop     edi
-    pop     ebx
-    ret     8
-
-    ; this is the infrequent case, hence out of line
-nonTempClearLoop:
-    movnti  [edi+ 0],eax
-    movnti  [edi+ 4],eax
-    movnti  [edi+ 8],eax
-    movnti  [edi+12],eax
-
-    movnti  [edi+16],eax
-    movnti  [edi+20],eax
-    movnti  [edi+24],eax
-    movnti  [edi+28],eax
-
-    movnti  [edi+32],eax
-    movnti  [edi+36],eax
-    movnti  [edi+40],eax
-    movnti  [edi+44],eax
-
-    movnti  [edi+48],eax
-    movnti  [edi+52],eax
-    movnti  [edi+56],eax
-    movnti  [edi+60],eax
-
-    add     edi,64
-    sub     edx,64
-    ja      nonTempClearLoop
-    jmp     nonTempClearDone
-
-alignDest:
-    test    ebx,ebx
-    je      alignDone
-alignLoop:
-    mov     [edi],al
-    add     edi,1
-    sub     ebx,1
-    jz      alignDone
-    test    edi,3
-    jnz     alignLoop
-    jmp     alignDone
-
-computeCanUseSSE2:
-    ; we are not using the sse2 register set,
-    ; just sse2 instructions (movnti),
-    ; thus we just ask the OS about the usability of the instructions
-    ; OS bugs about saving/restoring registers like in early versions
-    ; of Vista etc. in the WoW shouldn't matter
-
-    push    PF_XMMI64_INSTRUCTIONS_AVAILABLE
-    call    _IsProcessorFeaturePresent@4
-    mov     ecx,eax
-    xor     eax,eax         ; reset eax to 0
-    test    ecx,ecx
-    mov     canUseSSE2,1
-    jne     computeNonTempClear
-    mov     canUseSSE2,-1
-    xor     edx,edx
-    jmp     noNonTempClear
-
-_memclr_for_gc@8 endp
-
-    end
index 936b410..494e562 100644 (file)
@@ -1325,47 +1325,6 @@ QueueUserAPC(
          IN HANDLE hThread,
          IN ULONG_PTR dwData);
 
-#ifndef __has_builtin
-#define __has_builtin(x) 0
-#endif
-
-#if defined(HOST_X86) || defined(HOST_AMD64)
-// MSVC directly defines intrinsics for __cpuid and __cpuidex matching the below signatures
-// We define matching signatures for use on Unix platforms.
-//
-// IMPORTANT: Unlike MSVC, Unix does not explicitly zero ECX for __cpuid
-
-#if __has_builtin(__cpuid)
-extern "C" void __cpuid(int cpuInfo[4], int function_id);
-#else
-inline void __cpuid(int cpuInfo[4], int function_id)
-{
-    // Based on the Clang implementation provided in cpuid.h:
-    // https://github.com/llvm/llvm-project/blob/main/clang/lib/Headers/cpuid.h
-
-    __asm("  cpuid\n" \
-        : "=a"(cpuInfo[0]), "=b"(cpuInfo[1]), "=c"(cpuInfo[2]), "=d"(cpuInfo[3]) \
-        : "0"(function_id)
-    );
-}
-#endif // __cpuid
-
-#if __has_builtin(__cpuidex)
-extern "C" void __cpuidex(int cpuInfo[4], int function_id, int subFunction_id);
-#else
-inline void __cpuidex(int cpuInfo[4], int function_id, int subFunction_id)
-{
-    // Based on the Clang implementation provided in cpuid.h:
-    // https://github.com/llvm/llvm-project/blob/main/clang/lib/Headers/cpuid.h
-
-    __asm("  cpuid\n" \
-        : "=a"(cpuInfo[0]), "=b"(cpuInfo[1]), "=c"(cpuInfo[2]), "=d"(cpuInfo[3]) \
-        : "0"(function_id), "2"(subFunction_id)
-    );
-}
-#endif // __cpuidex
-#endif // HOST_X86 || HOST_AMD64
-
 #ifdef HOST_X86
 
 //
@@ -4518,19 +4477,6 @@ void _mm_setcsr(unsigned int i);
 
 /******************* PAL functions for CPU capability detection *******/
 
-#ifdef  __cplusplus
-
-#if defined(HOST_ARM64) && defined(TARGET_ARM64)
-class CORJIT_FLAGS;
-
-PALIMPORT
-VOID
-PALAPI
-PAL_GetJitCpuCapabilityFlags(CORJIT_FLAGS *flags);
-#endif // HOST_ARM64 && TARGET_ARM64
-
-#endif
-
 #ifdef __cplusplus
 
 PALIMPORT
index 804a712..26f0fa0 100644 (file)
@@ -106,7 +106,6 @@ set(ARCH_SOURCES
   arch/${PAL_ARCH_SOURCES_DIR}/context2.S
   arch/${PAL_ARCH_SOURCES_DIR}/debugbreak.S
   arch/${PAL_ARCH_SOURCES_DIR}/exceptionhelper.S
-  arch/${PAL_ARCH_SOURCES_DIR}/processor.cpp
 )
 
 if(NOT CLR_CMAKE_TARGET_OSX)
@@ -162,7 +161,6 @@ set(SOURCES
   misc/error.cpp
   misc/errorstrings.cpp
   misc/fmtmessage.cpp
-  misc/jitsupport.cpp
   misc/miscpalapi.cpp
   misc/perfjitdump.cpp
   misc/strutil.cpp
diff --git a/src/coreclr/pal/src/arch/amd64/processor.cpp b/src/coreclr/pal/src/arch/amd64/processor.cpp
deleted file mode 100644 (file)
index 0fe9ff7..0000000
+++ /dev/null
@@ -1,79 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*++
-
-
-
-Module Name:
-
-    processor.cpp
-
-Abstract:
-
-    Implementation of processor related functions for the Intel x86/x64
-    platforms. These functions are processor dependent.
-
-
-
---*/
-
-#include "pal/palinternal.h"
-
-/*++
-Function:
-XmmYmmStateSupport
-
-Check if OS has enabled both XMM and YMM state support
-
-Return value:
-1 if XMM and YMM are enabled, 0 otherwise
---*/
-extern "C" unsigned int XmmYmmStateSupport()
-{
-    unsigned int eax;
-    __asm("  mov $1, %%eax\n" \
-          "  cpuid\n" \
-          "  xor %%eax, %%eax\n" \
-          "  and $0x18000000, %%ecx\n" /* check for xsave feature set and that it is enabled by the OS */ \
-          "  cmp $0x18000000, %%ecx\n" \
-          "  jne end\n" \
-          "  xor %%ecx, %%ecx\n" \
-          "  xgetbv\n" \
-          "end:\n" \
-        : "=a"(eax) /* output in eax */ \
-        : /* no inputs */ \
-        : "ebx", "ecx", "edx" /* registers that are clobbered */
-      );
-    // Check OS has enabled both XMM and YMM state support
-    return ((eax & 0x06) == 0x06) ? 1 : 0;
-}
-
-/*++
-Function:
-Avx512StateSupport
-
-Check if OS has enabled XMM, YMM and ZMM state support
-
-Return value:
-1 if XMM, YMM and ZMM are enabled, 0 otherwise
---*/
-extern "C" unsigned int Avx512StateSupport()
-{
-    unsigned int eax;
-    __asm("  mov $1, %%eax\n" \
-          "  cpuid\n" \
-          "  xor %%eax, %%eax\n" \
-          "  and $0x18000000, %%ecx\n" /* check for xsave feature set and that it is enabled by the OS */ \
-          "  cmp $0x18000000, %%ecx\n" \
-          "  jne endz\n" \
-          "  xor %%ecx, %%ecx\n" \
-          "  xgetbv\n" \
-          "endz:\n" \
-        : "=a"(eax) /* output in eax */ \
-        : /* no inputs */ \
-        : "ebx", "ecx", "edx" /* registers that are clobbered */
-      );
-    // Check OS has enabled XMM, YMM and ZMM state support
-    return ((eax & 0x0E6) == 0x0E6) ? 1 : 0;
-}
diff --git a/src/coreclr/pal/src/arch/arm/processor.cpp b/src/coreclr/pal/src/arch/arm/processor.cpp
deleted file mode 100644 (file)
index 7048a5b..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*++
-
-
-
-Module Name:
-
-    processor.cpp
-
-Abstract:
-
-    Implementation of processor related functions for the ARM
-    platform. These functions are processor dependent.
-
-
-
---*/
-
-#include "pal/palinternal.h"
diff --git a/src/coreclr/pal/src/arch/arm64/processor.cpp b/src/coreclr/pal/src/arch/arm64/processor.cpp
deleted file mode 100644 (file)
index ab4b84f..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*++
-
-
-
-Module Name:
-
-    processor.cpp
-
-Abstract:
-
-    Implementation of processor related functions for the ARM64
-    platform. These functions are processor dependent.
-
-
-
---*/
-
-#include "pal/palinternal.h"
diff --git a/src/coreclr/pal/src/arch/i386/processor.cpp b/src/coreclr/pal/src/arch/i386/processor.cpp
deleted file mode 100644 (file)
index 7f60b75..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*++
-
-
-
-Module Name:
-
-    processor.cpp
-
-Abstract:
-
-    Implementation of processor related functions for the Intel x86/x64
-    platforms. These functions are processor dependent.
-
-
-
---*/
-
-#include "pal/palinternal.h"
diff --git a/src/coreclr/pal/src/arch/loongarch64/processor.cpp b/src/coreclr/pal/src/arch/loongarch64/processor.cpp
deleted file mode 100644 (file)
index c0cc5be..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*++
-
-
-
-Module Name:
-
-    processor.cpp
-
-Abstract:
-
-    Implementation of processor related functions for the LOONGARCH64
-    platform. These functions are processor dependent.
-
-
-
---*/
-
-#include "pal/palinternal.h"
diff --git a/src/coreclr/pal/src/arch/ppc64le/processor.cpp b/src/coreclr/pal/src/arch/ppc64le/processor.cpp
deleted file mode 100644 (file)
index 6680dbc..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*++
-
-
-
-Module Name:
-
-    processor.cpp
-
-Abstract:
-
-    Implementation of processor related functions for the IBM PowerPC (ppc64le)
-    platforms. These functions are processor dependent.
-
-
-
---*/
-
-#include "pal/palinternal.h"
diff --git a/src/coreclr/pal/src/arch/riscv64/processor.cpp b/src/coreclr/pal/src/arch/riscv64/processor.cpp
deleted file mode 100644 (file)
index 4e89986..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*++
-
-
-
-Module Name:
-
-    processor.cpp
-
-Abstract:
-
-    Implementation of processor related functions for the RISCV64
-    platform. These functions are processor dependent.
-
-
-
---*/
-
-#include "pal/palinternal.h"
diff --git a/src/coreclr/pal/src/arch/s390x/processor.cpp b/src/coreclr/pal/src/arch/s390x/processor.cpp
deleted file mode 100644 (file)
index d2d0fdd..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-/*++
-
-
-
-Module Name:
-
-    processor.cpp
-
-Abstract:
-
-    Implementation of processor related functions for the IBM s390x
-    platforms. These functions are processor dependent.
-
-
-
---*/
-
-#include "pal/palinternal.h"
-
index 011860c..8856673 100644 (file)
@@ -28,6 +28,8 @@ extern "C"
 #include <signal.h>
 #include <pthread.h>
 
+#include <minipal/cpuid.h>
+
 /* A type to wrap the native context type, which is ucontext_t on some
  * platforms and another type elsewhere. */
 #if HAVE_UCONTEXT_T
diff --git a/src/coreclr/pal/src/misc/jitsupport.cpp b/src/coreclr/pal/src/misc/jitsupport.cpp
deleted file mode 100644 (file)
index 3042629..0000000
+++ /dev/null
@@ -1,302 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-
-#include "pal/palinternal.h"
-#include "pal/dbgmsg.h"
-SET_DEFAULT_DEBUG_CHANNEL(MISC);
-
-#include "../../../inc/corjitflags.h"
-
-#if HAVE_AUXV_HWCAP_H
-#include <sys/auxv.h>
-#include <asm/hwcap.h>
-#endif
-
-#if HAVE_SYSCTLBYNAME
-#include <sys/sysctl.h>
-#endif
-
-#if defined(HOST_ARM64) && defined(__linux__)
-struct CpuCapability
-{
-    const char* name;
-    unsigned long hwCapFlag;
-};
-
-static const CpuCapability CpuCapabilities[] = {
-    //{ "fp", HWCAP_FP },
-#ifdef HWCAP_ASIMD
-    { "asimd", HWCAP_ASIMD },
-#endif
-    //{ "evtstrm", HWCAP_EVTSTRM },
-#ifdef HWCAP_AES
-    { "aes", HWCAP_AES },
-#endif
-    //{ "pmull", HWCAP_PMULL },
-#ifdef HWCAP_SHA1
-    { "sha1", HWCAP_SHA1 },
-#endif
-#ifdef HWCAP_SHA2
-    { "sha2", HWCAP_SHA2 },
-#endif
-#ifdef HWCAP_CRC32
-    { "crc32", HWCAP_CRC32 },
-#endif
-#ifdef HWCAP_ATOMICS
-    { "atomics", HWCAP_ATOMICS },
-#endif
-    //{ "fphp", HWCAP_FPHP },
-    //{ "asimdhp", HWCAP_ASIMDHP },
-    //{ "cpuid", HWCAP_CPUID },
-#ifdef HWCAP_ASIMDRDM
-    { "asimdrdm", HWCAP_ASIMDRDM },
-#endif
-    //{ "jscvt", HWCAP_JSCVT },
-    //{ "fcma", HWCAP_FCMA },
-#ifdef HWCAP_LRCPC
-    { "lrcpc", HWCAP_LRCPC },
-#endif
-    //{ "dcpop", HWCAP_DCPOP },
-    //{ "sha3", HWCAP_SHA3 },
-    //{ "sm3", HWCAP_SM3 },
-    //{ "sm4", HWCAP_SM4 },
-#ifdef HWCAP_ASIMDDP
-    { "asimddp", HWCAP_ASIMDDP },
-#endif
-    //{ "sha512", HWCAP_SHA512 },
-    //{ "sve", HWCAP_SVE },
-    //{ "asimdfhm", HWCAP_ASIMDFHM },
-    //{ "dit", HWCAP_DIT },
-    //{ "uscat", HWCAP_USCAT },
-    //{ "ilrcpc", HWCAP_ILRCPC },
-    //{ "flagm", HWCAP_FLAGM },
-    //{ "ssbs", HWCAP_SSBS },
-    //{ "sb", HWCAP_SB },
-    //{ "paca", HWCAP_PACA },
-    //{ "pacg", HWCAP_PACG },
-
-    // Ensure the array is never empty
-    { "", 0 }
-};
-
-// Returns the HWCAP_* flag corresponding to the given capability name.
-// If the capability name is not recognized or unused at present, zero is returned.
-static unsigned long LookupCpuCapabilityFlag(const char* start, size_t length)
-{
-    for (size_t i = 0; i < ARRAY_SIZE(CpuCapabilities); i++)
-    {
-        const char* capabilityName = CpuCapabilities[i].name;
-        if ((length == strlen(capabilityName)) && (memcmp(start, capabilityName, length) == 0))
-        {
-            return CpuCapabilities[i].hwCapFlag;
-        }
-    }
-    return 0;
-}
-
-// Reads the first Features entry from /proc/cpuinfo (assuming other entries are essentially
-// identical) and translates it into a set of HWCAP_* flags.
-static unsigned long GetCpuCapabilityFlagsFromCpuInfo()
-{
-    unsigned long capabilityFlags = 0;
-    FILE* cpuInfoFile = fopen("/proc/cpuinfo", "r");
-
-    if (cpuInfoFile != NULL)
-    {
-        char* line = nullptr;
-        size_t lineLen = 0;
-
-        while (getline(&line, &lineLen, cpuInfoFile) != -1)
-        {
-            char* p = line;
-            while (isspace(*p)) p++;
-
-            if (memcmp(p, "Features", 8) != 0)
-                continue;
-
-            // Skip "Features" and look for ':'
-            p += 8;
-
-            while (isspace(*p)) p++;
-            if (*p != ':')
-                continue;
-
-            // Skip ':' and parse the list
-            p++;
-
-            while (true)
-            {
-                while (isspace(*p)) p++;
-                if (*p == 0)
-                    break;
-
-                char* start = p++;
-                while ((*p != 0) && !isspace(*p)) p++;
-
-                capabilityFlags |= LookupCpuCapabilityFlag(start, p - start);
-            }
-
-            break;
-        }
-
-        free(line);
-        fclose(cpuInfoFile);
-    }
-
-    return capabilityFlags;
-}
-#endif // defined(HOST_ARM64) && defined(__linux__)
-
-#if defined(HOST_ARM64) && defined(TARGET_ARM64)
-PALIMPORT
-VOID
-PALAPI
-PAL_GetJitCpuCapabilityFlags(CORJIT_FLAGS *flags)
-{
-    _ASSERTE(flags);
-
-#if HAVE_AUXV_HWCAP_H
-    unsigned long hwCap = getauxval(AT_HWCAP);
-
-#if defined(__linux__)
-    // getauxval(AT_HWCAP) returns zero on WSL1 (https://github.com/microsoft/WSL/issues/3682),
-    // fall back to reading capabilities from /proc/cpuinfo.
-    if (hwCap == 0)
-        hwCap = GetCpuCapabilityFlagsFromCpuInfo();
-#endif
-
-// HWCAP_* flags are introduced by ARM into the Linux kernel as new extensions are published.
-// For a given kernel, some of these flags may not be present yet.
-// Use ifdef for each to allow for compilation with any vintage kernel.
-// From a single binary distribution perspective, compiling with latest kernel asm/hwcap.h should
-// include all published flags.  Given flags are merged to kernel and published before silicon is
-// available, using the latest kernel for release should be sufficient.
-    flags->Set(InstructionSet_ArmBase);
-#ifdef HWCAP_AES
-    if (hwCap & HWCAP_AES)
-        flags->Set(InstructionSet_Aes);
-#endif
-#ifdef HWCAP_ATOMICS
-    if (hwCap & HWCAP_ATOMICS)
-        flags->Set(InstructionSet_Atomics);
-#endif
-#ifdef HWCAP_CRC32
-    if (hwCap & HWCAP_CRC32)
-        flags->Set(InstructionSet_Crc32);
-#endif
-#ifdef HWCAP_DCPOP
-//    if (hwCap & HWCAP_DCPOP)
-//        flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_DCPOP);
-#endif
-#ifdef HWCAP_ASIMDDP
-    if (hwCap & HWCAP_ASIMDDP)
-        flags->Set(InstructionSet_Dp);
-#endif
-#ifdef HWCAP_FCMA
-//    if (hwCap & HWCAP_FCMA)
-//        flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_FCMA);
-#endif
-#ifdef HWCAP_FP
-//    if (hwCap & HWCAP_FP)
-//        flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_FP);
-#endif
-#ifdef HWCAP_FPHP
-//    if (hwCap & HWCAP_FPHP)
-//        flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_FP16);
-#endif
-#ifdef HWCAP_JSCVT
-//    if (hwCap & HWCAP_JSCVT)
-//        flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_JSCVT);
-#endif
-#ifdef HWCAP_LRCPC
-      if (hwCap & HWCAP_LRCPC)
-          flags->Set(InstructionSet_Rcpc);
-#endif
-#ifdef HWCAP_PMULL
-//    if (hwCap & HWCAP_PMULL)
-//        flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_PMULL);
-#endif
-#ifdef HWCAP_SHA1
-    if (hwCap & HWCAP_SHA1)
-        flags->Set(InstructionSet_Sha1);
-#endif
-#ifdef HWCAP_SHA2
-    if (hwCap & HWCAP_SHA2)
-        flags->Set(InstructionSet_Sha256);
-#endif
-#ifdef HWCAP_SHA512
-//    if (hwCap & HWCAP_SHA512)
-//        flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_SHA512);
-#endif
-#ifdef HWCAP_SHA3
-//    if (hwCap & HWCAP_SHA3)
-//        flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_SHA3);
-#endif
-#ifdef HWCAP_ASIMD
-    if (hwCap & HWCAP_ASIMD)
-    {
-        flags->Set(InstructionSet_AdvSimd);
-        flags->Set(InstructionSet_VectorT128);
-    }
-#endif
-#ifdef HWCAP_ASIMDRDM
-    if (hwCap & HWCAP_ASIMDRDM)
-        flags->Set(InstructionSet_Rdm);
-#endif
-#ifdef HWCAP_ASIMDHP
-//    if (hwCap & HWCAP_ASIMDHP)
-//        flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_ADVSIMD_FP16);
-#endif
-#ifdef HWCAP_SM3
-//    if (hwCap & HWCAP_SM3)
-//        flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_SM3);
-#endif
-#ifdef HWCAP_SM4
-//    if (hwCap & HWCAP_SM4)
-//        flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_SM4);
-#endif
-#ifdef HWCAP_SVE
-//    if (hwCap & HWCAP_SVE)
-//        flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_SVE);
-#endif
-#else // !HAVE_AUXV_HWCAP_H
-#if HAVE_SYSCTLBYNAME
-    int64_t valueFromSysctl = 0;
-    size_t sz = sizeof(valueFromSysctl);
-
-    if ((sysctlbyname("hw.optional.arm.FEAT_AES", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0))
-        flags->Set(InstructionSet_Aes);
-
-    if ((sysctlbyname("hw.optional.armv8_crc32", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0))
-        flags->Set(InstructionSet_Crc32);
-
-    if ((sysctlbyname("hw.optional.arm.FEAT_DotProd", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0))
-        flags->Set(InstructionSet_Dp);
-
-    if ((sysctlbyname("hw.optional.arm.FEAT_RDM", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0))
-        flags->Set(InstructionSet_Rdm);
-
-    if ((sysctlbyname("hw.optional.arm.FEAT_SHA1", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0))
-        flags->Set(InstructionSet_Sha1);
-
-    if ((sysctlbyname("hw.optional.arm.FEAT_SHA256", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0))
-        flags->Set(InstructionSet_Sha256);
-
-    if ((sysctlbyname("hw.optional.armv8_1_atomics", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0))
-        flags->Set(InstructionSet_Atomics);
-
-    if ((sysctlbyname("hw.optional.arm.FEAT_LRCPC", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0))
-        flags->Set(InstructionSet_Rcpc);
-#endif // HAVE_SYSCTLBYNAME
-    // CoreCLR SIMD and FP support is included in ARM64 baseline
-    // On exceptional basis platforms may leave out support, but CoreCLR does not
-    // yet support such platforms
-    // Set baseline flags if OS has not exposed mechanism for us to determine CPU capabilities
-    flags->Set(InstructionSet_ArmBase);
-    flags->Set(InstructionSet_AdvSimd);
-    flags->Set(InstructionSet_VectorT128);
-    //    flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_FP);
-#endif // HAVE_AUXV_HWCAP_H
-}
-#endif // HOST_ARM64 && TARGET_ARM64
index c450154..aa1c443 100644 (file)
@@ -634,38 +634,6 @@ NESTED_ENTRY ProfileTailcallNaked, _TEXT
 NESTED_END ProfileTailcallNaked, _TEXT
 
 
-;; extern "C" DWORD __stdcall xmmYmmStateSupport();
-LEAF_ENTRY xmmYmmStateSupport, _TEXT
-        mov     ecx, 0                  ; Specify xcr0
-        xgetbv                          ; result in EDX:EAX
-        and eax, 06H
-        cmp eax, 06H                    ; check OS has enabled both XMM and YMM state support
-        jne     not_supported
-        mov     eax, 1
-        jmp     done
-    not_supported:
-        mov     eax, 0
-    done:
-        ret
-LEAF_END xmmYmmStateSupport, _TEXT
-
-;; extern "C" DWORD __stdcall avx512StateSupport();
-LEAF_ENTRY avx512StateSupport, _TEXT
-        mov     ecx, 0                  ; Specify xcr0
-        xgetbv                          ; result in EDX:EAX
-        and eax, 0E6H
-        cmp eax, 0E6H                    ; check OS has enabled XMM, YMM and ZMM state support
-        jne     not_supported
-        mov     eax, 1
-        jmp     done
-    not_supported:
-        mov     eax, 0
-    done:
-        ret
-LEAF_END avx512StateSupport, _TEXT
-
-
-
 ; EXTERN_C void moveOWord(LPVOID* src, LPVOID* target);
 ; <NOTE>
 ; MOVDQA is not an atomic operation.  You need to call this function in a crst.
index d5bb054..e818594 100644 (file)
@@ -10,49 +10,6 @@ extern "C"
         PORTABILITY_ASSERT("Implement for PAL");
     }
 
-    DWORD xmmYmmStateSupport()
-    {
-        DWORD eax;
-        __asm("  xgetbv\n" \
-            : "=a"(eax) /*output in eax*/\
-            : "c"(0) /*inputs - 0 in ecx*/\
-            : "edx" /* registers that are clobbered*/
-          );
-        // check OS has enabled both XMM and YMM state support
-        return ((eax & 0x06) == 0x06) ? 1 : 0;
-    }
-
-#ifndef XSTATE_MASK_AVX512
-#define XSTATE_MASK_AVX512 (0xE0) /* 0b1110_0000 */
-#endif // XSTATE_MASK_AVX512
-
-    DWORD avx512StateSupport()
-    {
-#if defined(TARGET_OSX)
-        // MacOS has specialized behavior where it reports AVX512 support but doesnt
-        // actually enable AVX512 until the first instruction is executed and does so
-        // on a per thread basis. It does this by catching the faulting instruction and
-        // checking for the EVEX encoding. The kmov instructions, despite being part
-        // of the AVX512 instruction set are VEX encoded and dont trigger the enablement
-        //
-        // See https://github.com/apple/darwin-xnu/blob/main/osfmk/i386/fpu.c#L174
-
-        // TODO-AVX512: Enabling this for OSX requires ensuring threads explicitly trigger
-        // the AVX-512 enablement so that arbitrary usage doesn't cause downstream problems
-
-        return false;
-#else
-        DWORD eax;
-        __asm("  xgetbv\n" \
-            : "=a"(eax) /*output in eax*/\
-            : "c"(0) /*inputs - 0 in ecx*/\
-            : "edx" /* registers that are clobbered*/
-          );
-        // check OS has enabled XMM, YMM and ZMM state support
-        return ((eax & 0x0E6) == 0x0E6) ? 1 : 0;
-#endif
-    }
-
     void STDMETHODCALLTYPE JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle)
     {
     }
index 51eec2b..defe0ca 100644 (file)
@@ -30,6 +30,9 @@
 
 #include "configuration.h"
 
+#include <minipal/cpufeatures.h>
+#include <minipal/cpuid.h>
+
 #ifdef HOST_64BIT
 #define CHECK_DUPLICATED_STRUCT_LAYOUTS
 #include "../debug/daccess/fntableaccess.h"
@@ -1260,70 +1263,6 @@ EEJitManager::EEJitManager()
     SetCpuInfo();
 }
 
-#if defined(TARGET_X86) || defined(TARGET_AMD64)
-
-bool DoesOSSupportAVX()
-{
-    LIMITED_METHOD_CONTRACT;
-
-#ifndef TARGET_UNIX
-    // On Windows we have an api(GetEnabledXStateFeatures) to check if AVX is supported
-    typedef DWORD64 (WINAPI *PGETENABLEDXSTATEFEATURES)();
-    PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL;
-
-    HMODULE hMod = WszLoadLibraryEx(WINDOWS_KERNEL32_DLLNAME_W, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
-    if(hMod == NULL)
-        return FALSE;
-
-    pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hMod, "GetEnabledXStateFeatures");
-
-    if (pfnGetEnabledXStateFeatures == NULL)
-    {
-        return FALSE;
-    }
-
-    DWORD64 FeatureMask = pfnGetEnabledXStateFeatures();
-    if ((FeatureMask & XSTATE_MASK_AVX) == 0)
-    {
-        return FALSE;
-    }
-#endif // !TARGET_UNIX
-
-    return TRUE;
-}
-
-bool DoesOSSupportAVX512()
-{
-    LIMITED_METHOD_CONTRACT;
-
-#ifndef TARGET_UNIX
-    // On Windows we have an api(GetEnabledXStateFeatures) to check if AVX512 is supported
-    typedef DWORD64 (WINAPI *PGETENABLEDXSTATEFEATURES)();
-    PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL;
-
-    HMODULE hMod = WszLoadLibraryEx(WINDOWS_KERNEL32_DLLNAME_W, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
-    if(hMod == NULL)
-        return FALSE;
-
-    pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hMod, "GetEnabledXStateFeatures");
-
-    if (pfnGetEnabledXStateFeatures == NULL)
-    {
-        return FALSE;
-    }
-
-    DWORD64 FeatureMask = pfnGetEnabledXStateFeatures();
-    if ((FeatureMask & XSTATE_MASK_AVX512) == 0)
-    {
-        return FALSE;
-    }
-#endif // !TARGET_UNIX
-
-    return TRUE;
-}
-
-#endif // defined(TARGET_X86) || defined(TARGET_AMD64)
-
 #ifdef TARGET_ARM64
 extern "C" DWORD64 __stdcall GetDataCacheZeroIDReg();
 #endif
@@ -1338,531 +1277,262 @@ void EEJitManager::SetCpuInfo()
 
     CORJIT_FLAGS CPUCompileFlags;
 
-    // Get the maximum bitwidth of Vector<T>, rounding down to the nearest multiple of 128-bits
-    uint32_t maxVectorTBitWidth = (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_MaxVectorTBitWidth) / 128) * 128;
+    int cpuFeatures = minipal_getcpufeatures();
 
 #if defined(TARGET_X86) || defined(TARGET_AMD64)
-    CPUCompileFlags.Set(InstructionSet_X86Base);
-
-    // NOTE: The below checks are based on the information reported by
-    //   Intel® 64 and IA-32 Architectures Software Developer’s Manual. Volume 2
-    //   and
-    //   AMD64 Architecture Programmer’s Manual. Volume 3
-    // For more information, please refer to the CPUID instruction in the respective manuals
-
-    union XarchCpuInfo
-    {
-        struct {
-            uint32_t SteppingId       : 4;
-            uint32_t Model            : 4;
-            uint32_t FamilyId         : 4;
-            uint32_t ProcessorType    : 2;
-            uint32_t Reserved1        : 2; // Unused bits in the CPUID result
-            uint32_t ExtendedModelId  : 4;
-            uint32_t ExtendedFamilyId : 8;
-            uint32_t Reserved         : 4; // Unused bits in the CPUID result
-        };
-
-        uint32_t Value;
-    } xarchCpuInfo;
-
-    int cpuidInfo[4];
-
-    const int CPUID_EAX = 0;
-    const int CPUID_EBX = 1;
-    const int CPUID_ECX = 2;
-    const int CPUID_EDX = 3;
-
-    __cpuid(cpuidInfo, 0x00000000);
-
-    uint32_t maxCpuId = static_cast<uint32_t>(cpuidInfo[CPUID_EAX]);
-    _ASSERTE(maxCpuId >= 1);
-
-    bool isGenuineIntel = (cpuidInfo[CPUID_EBX] == 0x756E6547) && // Genu
-                          (cpuidInfo[CPUID_EDX] == 0x49656E69) && // ineI
-                          (cpuidInfo[CPUID_ECX] == 0x6C65746E);   // ntel
-
-    __cpuid(cpuidInfo, 0x00000001);
-    _ASSERTE((cpuidInfo[CPUID_EDX] & (1 << 15)) != 0);                                                    // CMOV
-
-    xarchCpuInfo.Value = cpuidInfo[CPUID_EAX];
 
 #if defined(TARGET_X86) && !defined(TARGET_WINDOWS)
     // Linux may still support no SSE/SSE2 for 32-bit
-    if ((cpuidInfo[CPUID_EDX] & (1 << 25)) != 0)
+    if ((cpuFeatures & XArchIntrinsicConstants_VectorT128) == 0)
     {
-        EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("SSE is not supported on the processor."));
-    }
-    if ((cpuidInfo[CPUID_EDX] & (1 << 26)) != 0)
-    {
-        EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("SSE2 is not supported on the processor."));
+        EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("SSE and SSE2 processor support required."));
     }
 #else
-    _ASSERTE((cpuidInfo[CPUID_EDX] & (1 << 25)) != 0);                                                    // SSE
-    _ASSERTE((cpuidInfo[CPUID_EDX] & (1 << 26)) != 0);                                                    // SSE2
+    _ASSERTE((cpuFeatures & XArchIntrinsicConstants_VectorT128) != 0);
 #endif
 
-    CPUCompileFlags.Set(InstructionSet_SSE);
-    CPUCompileFlags.Set(InstructionSet_SSE2);
     CPUCompileFlags.Set(InstructionSet_VectorT128);
 
-    if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0)                                                          // AESNI
-    {
-        CPUCompileFlags.Set(InstructionSet_AES);
-    }
-
-    if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0)                                                           // PCLMULQDQ
-    {
-        CPUCompileFlags.Set(InstructionSet_PCLMULQDQ);
-    }
-
-    if ((cpuidInfo[CPUID_ECX] & (1 << 0)) != 0)                                                           // SSE3
-    {
-        CPUCompileFlags.Set(InstructionSet_SSE3);
-
-        if ((cpuidInfo[CPUID_ECX] & (1 << 9)) != 0)                                                       // SSSE3
-        {
-            CPUCompileFlags.Set(InstructionSet_SSSE3);
-
-            if ((cpuidInfo[CPUID_ECX] & (1 << 19)) != 0)                                                  // SSE4.1
-            {
-                CPUCompileFlags.Set(InstructionSet_SSE41);
-
-                if ((cpuidInfo[CPUID_ECX] & (1 << 20)) != 0)                                              // SSE4.2
-                {
-                    CPUCompileFlags.Set(InstructionSet_SSE42);
-
-                    if ((cpuidInfo[CPUID_ECX] & (1 << 22)) != 0)                                          // MOVBE
-                    {
-                        CPUCompileFlags.Set(InstructionSet_MOVBE);
-                    }
-
-                    if ((cpuidInfo[CPUID_ECX] & (1 << 23)) != 0)                                          // POPCNT
-                    {
-                        CPUCompileFlags.Set(InstructionSet_POPCNT);
-                    }
-
-                    const int requiredAvxEcxFlags = (1 << 27)                                             // OSXSAVE
-                                                  | (1 << 28);                                            // AVX
-
-                    if ((cpuidInfo[CPUID_ECX] & requiredAvxEcxFlags) == requiredAvxEcxFlags)
-                    {
-                        if(DoesOSSupportAVX() && (xmmYmmStateSupport() == 1))                             // XGETBV == 11
-                        {
-                            CPUCompileFlags.Set(InstructionSet_AVX);
-
-                            if ((cpuidInfo[CPUID_ECX] & (1 << 12)) != 0)                                  // FMA
-                            {
-                                CPUCompileFlags.Set(InstructionSet_FMA);
-                            }
-
-                            if (maxCpuId >= 0x07)
-                            {
-                                __cpuidex(cpuidInfo, 0x00000007, 0x00000000);
-
-                                if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0)                               // AVX2
-                                {
-                                    CPUCompileFlags.Set(InstructionSet_AVX2);
-
-                                    if ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 256))
-                                    {
-                                        // We allow 256-bit Vector<T> by default
-                                        CPUCompileFlags.Clear(InstructionSet_VectorT128);
-                                        CPUCompileFlags.Set(InstructionSet_VectorT256);
-                                    }
-
-                                    if (DoesOSSupportAVX512() && (avx512StateSupport() == 1))             // XGETBV XRC0[7:5] == 111
-                                    {
-                                        if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0)                      // AVX512F
-                                        {
-                                            CPUCompileFlags.Set(InstructionSet_AVX512F);
-
-                                            // TODO-XArch: Add support for 512-bit Vector<T>
-                                            assert(!CPUCompileFlags.IsSet(InstructionSet_VectorT512));
-
-                                            bool isAVX512_VLSupported = false;
-                                            if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0)                  // AVX512VL
-                                            {
-                                                CPUCompileFlags.Set(InstructionSet_AVX512F_VL);
-                                                isAVX512_VLSupported = true;
-                                            }
-
-                                            if ((cpuidInfo[CPUID_EBX] & (1 << 30)) != 0)                  // AVX512BW
-                                            {
-                                                CPUCompileFlags.Set(InstructionSet_AVX512BW);
-                                                if (isAVX512_VLSupported)                                 // AVX512BW_VL
-                                                {
-                                                    CPUCompileFlags.Set(InstructionSet_AVX512BW_VL);
-                                                }
-                                            }
-
-                                            if ((cpuidInfo[CPUID_EBX] & (1 << 28)) != 0)                  // AVX512CD
-                                            {
-                                                CPUCompileFlags.Set(InstructionSet_AVX512CD);
-                                                if (isAVX512_VLSupported)                                 // AVX512CD_VL
-                                                {
-                                                    CPUCompileFlags.Set(InstructionSet_AVX512CD_VL);
-                                                }
-                                            }
-
-                                            if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0)                  // AVX512DQ
-                                            {
-                                                CPUCompileFlags.Set(InstructionSet_AVX512DQ);
-                                                if (isAVX512_VLSupported)                                 // AVX512DQ_VL
-                                                {
-                                                    CPUCompileFlags.Set(InstructionSet_AVX512DQ_VL);
-                                                }
-                                            }
-
-                                            if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0)                   // AVX512VBMI
-                                            {
-                                                CPUCompileFlags.Set(InstructionSet_AVX512VBMI);
-                                                if (isAVX512_VLSupported)                                 // AVX512VBMI_VL
-                                                {
-                                                    CPUCompileFlags.Set(InstructionSet_AVX512VBMI_VL);
-                                                }
-                                            }
-                                        }
-                                    }
-
-                                    __cpuidex(cpuidInfo, 0x00000007, 0x00000001);
-
-                                    if ((cpuidInfo[CPUID_EAX] & (1 << 4)) != 0)                           // AVX-VNNI
-                                    {
-                                        CPUCompileFlags.Set(InstructionSet_AVXVNNI);
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    if (maxCpuId >= 0x07)
-    {
-        __cpuidex(cpuidInfo, 0x00000007, 0x00000000);
-
-        if ((cpuidInfo[CPUID_EBX] & (1 << 3)) != 0)                                                       // BMI1
-        {
-            CPUCompileFlags.Set(InstructionSet_BMI1);
-        }
-
-        if ((cpuidInfo[CPUID_EBX] & (1 << 8)) != 0)                                                       // BMI2
-        {
-            CPUCompileFlags.Set(InstructionSet_BMI2);
-        }
-
-        if ((cpuidInfo[CPUID_EDX] & (1 << 14)) != 0)
-        {
-            CPUCompileFlags.Set(InstructionSet_X86Serialize);                                             // SERIALIZE
-        }
-    }
-
-    __cpuid(cpuidInfo, 0x80000000);
-    uint32_t maxCpuIdEx = static_cast<uint32_t>(cpuidInfo[CPUID_EAX]);
-
-    if (maxCpuIdEx >= 0x80000001)
-    {
-        __cpuid(cpuidInfo, 0x80000001);
-
-        if ((cpuidInfo[CPUID_ECX] & (1 << 5)) != 0)                                                       // LZCNT
-        {
-            CPUCompileFlags.Set(InstructionSet_LZCNT);
-        }
-    }
-#endif // defined(TARGET_X86) || defined(TARGET_AMD64)
-
-#if defined(TARGET_ARM64)
-#if defined(TARGET_UNIX)
-    PAL_GetJitCpuCapabilityFlags(&CPUCompileFlags);
-
-    // For HOST_ARM64, if OS has exposed mechanism to detect CPU capabilities, make sure it has AdvSimd capability.
-    // For other cases i.e. if !HOST_ARM64 but TARGET_ARM64 or HOST_ARM64 but OS doesn't expose way to detect
-    // CPU capabilities, we always enable AdvSimd flags by default.
-    //
-    if (!CPUCompileFlags.IsSet(InstructionSet_AdvSimd))
-    {
-        EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("AdvSimd is not supported on the processor."));
-    }
-#elif defined(HOST_64BIT)
-    // FP and SIMD support are enabled by default
-    CPUCompileFlags.Set(InstructionSet_ArmBase);
-    CPUCompileFlags.Set(InstructionSet_AdvSimd);
-    CPUCompileFlags.Set(InstructionSet_VectorT128);
+    // Get the maximum bitwidth of Vector<T>, rounding down to the nearest multiple of 128-bits
+    uint32_t maxVectorTBitWidth = (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_MaxVectorTBitWidth) / 128) * 128;
 
-    // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE (30)
-    if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
+    if (((cpuFeatures & XArchIntrinsicConstants_VectorT256) != 0) && ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 256)))
     {
-        CPUCompileFlags.Set(InstructionSet_Aes);
-        CPUCompileFlags.Set(InstructionSet_Sha1);
-        CPUCompileFlags.Set(InstructionSet_Sha256);
-    }
-    // PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE (31)
-    if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
-    {
-        CPUCompileFlags.Set(InstructionSet_Crc32);
+        // We allow 256-bit Vector<T> by default
+        CPUCompileFlags.Clear(InstructionSet_VectorT128);
+        CPUCompileFlags.Set(InstructionSet_VectorT256);
     }
 
-// Older version of SDK would return false for these intrinsics
-// but make sure we pass the right values to the APIs
-#ifndef PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE 34
-#endif
-#ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43
-#endif
-#ifndef PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE 45
-#endif
-
-    // PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE (34)
-    if (IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE))
-    {
-        CPUCompileFlags.Set(InstructionSet_Atomics);
-    }
+    // TODO-XArch: Add support for 512-bit Vector<T>
+    _ASSERTE(!CPUCompileFlags.IsSet(InstructionSet_VectorT512));
 
-    // PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE (43)
-    if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE))
+    if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic))
     {
-        CPUCompileFlags.Set(InstructionSet_Dp);
+        CPUCompileFlags.Set(InstructionSet_X86Base);
     }
 
-    // PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE (45)
-    if (IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE))
+    if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE))
     {
-        CPUCompileFlags.Set(InstructionSet_Rcpc);
+        CPUCompileFlags.Set(InstructionSet_SSE);
     }
 
-#endif // HOST_64BIT
-    if (GetDataCacheZeroIDReg() == 4)
+    if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE2))
     {
-        // DCZID_EL0<4> (DZP) indicates whether use of DC ZVA instructions is permitted (0) or prohibited (1).
-        // DCZID_EL0<3:0> (BS) specifies Log2 of the block size in words.
-        //
-        // We set the flag when the instruction is permitted and the block size is 64 bytes.
-        CPUCompileFlags.Set(InstructionSet_Dczva);
+        CPUCompileFlags.Set(InstructionSet_SSE2);
     }
 
-    if (CPUCompileFlags.IsSet(InstructionSet_Atomics))
+    if (((cpuFeatures & XArchIntrinsicConstants_Aes) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAES))
     {
-        g_arm64_atomics_present = true;
+        CPUCompileFlags.Set(InstructionSet_AES);
     }
-#endif // TARGET_ARM64
-
-    // Now that we've queried the actual hardware support, we need to adjust what is actually supported based
-    // on some externally available config switches that exist so users can test code for downlevel hardware.
 
-#if defined(TARGET_X86) || defined(TARGET_AMD64)
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic))
+    if (((cpuFeatures & XArchIntrinsicConstants_Avx) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX))
     {
-        CPUCompileFlags.Clear(InstructionSet_X86Base);
+        CPUCompileFlags.Set(InstructionSet_AVX);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAES))
+    if (((cpuFeatures & XArchIntrinsicConstants_Avx2) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX2))
     {
-        CPUCompileFlags.Clear(InstructionSet_AES);
+        CPUCompileFlags.Set(InstructionSet_AVX2);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX))
+    if (((cpuFeatures & XArchIntrinsicConstants_Avx512f) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F))
     {
-        CPUCompileFlags.Clear(InstructionSet_AVX);
+        CPUCompileFlags.Set(InstructionSet_AVX512F);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX2))
+    if (((cpuFeatures & XArchIntrinsicConstants_Avx512f_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F_VL))
     {
-        CPUCompileFlags.Clear(InstructionSet_AVX2);
+        CPUCompileFlags.Set(InstructionSet_AVX512F_VL);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F))
+    if (((cpuFeatures & XArchIntrinsicConstants_Avx512bw) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW))
     {
-        CPUCompileFlags.Clear(InstructionSet_AVX512F);
+        CPUCompileFlags.Set(InstructionSet_AVX512BW);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F_VL))
+    if (((cpuFeatures & XArchIntrinsicConstants_Avx512bw_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW_VL))
     {
-        CPUCompileFlags.Clear(InstructionSet_AVX512F_VL);
+        CPUCompileFlags.Set(InstructionSet_AVX512BW_VL);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW))
+    if (((cpuFeatures & XArchIntrinsicConstants_Avx512cd) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD))
     {
-        CPUCompileFlags.Clear(InstructionSet_AVX512BW);
+        CPUCompileFlags.Set(InstructionSet_AVX512CD);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW_VL))
+    if (((cpuFeatures & XArchIntrinsicConstants_Avx512cd_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD_VL))
     {
-        CPUCompileFlags.Clear(InstructionSet_AVX512BW_VL);
+        CPUCompileFlags.Set(InstructionSet_AVX512CD_VL);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD))
+    if (((cpuFeatures & XArchIntrinsicConstants_Avx512dq) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ))
     {
-        CPUCompileFlags.Clear(InstructionSet_AVX512CD);
+        CPUCompileFlags.Set(InstructionSet_AVX512DQ);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD_VL))
+    if (((cpuFeatures & XArchIntrinsicConstants_Avx512dq_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ_VL))
     {
-        CPUCompileFlags.Clear(InstructionSet_AVX512CD_VL);
+        CPUCompileFlags.Set(InstructionSet_AVX512DQ_VL);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ))
+    if (((cpuFeatures & XArchIntrinsicConstants_Avx512Vbmi) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512VBMI))
     {
-        CPUCompileFlags.Clear(InstructionSet_AVX512DQ);
+        CPUCompileFlags.Set(InstructionSet_AVX512VBMI);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ_VL))
+    if (((cpuFeatures & XArchIntrinsicConstants_Avx512Vbmi_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512VBMI_VL))
     {
-        CPUCompileFlags.Clear(InstructionSet_AVX512DQ_VL);
+        CPUCompileFlags.Set(InstructionSet_AVX512VBMI_VL);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512VBMI))
+    if (((cpuFeatures & XArchIntrinsicConstants_AvxVnni) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVXVNNI))
     {
-        CPUCompileFlags.Clear(InstructionSet_AVX512VBMI);
+        CPUCompileFlags.Set(InstructionSet_AVXVNNI);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512VBMI_VL))
+    if (((cpuFeatures & XArchIntrinsicConstants_Bmi1) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableBMI1))
     {
-        CPUCompileFlags.Clear(InstructionSet_AVX512VBMI_VL);
+        CPUCompileFlags.Set(InstructionSet_BMI1);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVXVNNI))
+    if (((cpuFeatures & XArchIntrinsicConstants_Bmi2) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableBMI2))
     {
-        CPUCompileFlags.Clear(InstructionSet_AVXVNNI);
+        CPUCompileFlags.Set(InstructionSet_BMI2);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableBMI1))
+    if (((cpuFeatures & XArchIntrinsicConstants_Fma) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableFMA))
     {
-        CPUCompileFlags.Clear(InstructionSet_BMI1);
+        CPUCompileFlags.Set(InstructionSet_FMA);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableBMI2))
+    if (((cpuFeatures & XArchIntrinsicConstants_Lzcnt) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableLZCNT))
     {
-        CPUCompileFlags.Clear(InstructionSet_BMI2);
+        CPUCompileFlags.Set(InstructionSet_LZCNT);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableFMA))
+    if (((cpuFeatures & XArchIntrinsicConstants_Pclmulqdq) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnablePCLMULQDQ))
     {
-        CPUCompileFlags.Clear(InstructionSet_FMA);
+        CPUCompileFlags.Set(InstructionSet_PCLMULQDQ);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableLZCNT))
+    if (((cpuFeatures & XArchIntrinsicConstants_Movbe) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableMOVBE))
     {
-        CPUCompileFlags.Clear(InstructionSet_LZCNT);
+        CPUCompileFlags.Set(InstructionSet_MOVBE);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnablePCLMULQDQ))
+    if (((cpuFeatures & XArchIntrinsicConstants_Popcnt) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnablePOPCNT))
     {
-        CPUCompileFlags.Clear(InstructionSet_PCLMULQDQ);
+        CPUCompileFlags.Set(InstructionSet_POPCNT);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableMOVBE))
+    // We need to additionally check that EXTERNAL_EnableSSE3_4 is set, as that
+    // is a prexisting config flag that controls the SSE3+ ISAs
+    if (((cpuFeatures & XArchIntrinsicConstants_Sse3) != 0) &&
+        CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE3) &&
+        CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE3_4))
     {
-        CPUCompileFlags.Clear(InstructionSet_MOVBE);
+        CPUCompileFlags.Set(InstructionSet_SSE3);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnablePOPCNT))
+    if (((cpuFeatures & XArchIntrinsicConstants_Sse41) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE41))
     {
-        CPUCompileFlags.Clear(InstructionSet_POPCNT);
+        CPUCompileFlags.Set(InstructionSet_SSE41);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE))
+    if (((cpuFeatures & XArchIntrinsicConstants_Sse42) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE42))
     {
-        CPUCompileFlags.Clear(InstructionSet_SSE);
+        CPUCompileFlags.Set(InstructionSet_SSE42);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE2))
+    if (((cpuFeatures & XArchIntrinsicConstants_Ssse3) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSSE3))
     {
-        CPUCompileFlags.Clear(InstructionSet_SSE2);
+        CPUCompileFlags.Set(InstructionSet_SSSE3);
     }
 
-    // We need to additionally check that EXTERNAL_EnableSSE3_4 is set, as that
-    // is a prexisting config flag that controls the SSE3+ ISAs
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE3) ||
-        !CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE3_4))
+    if (((cpuFeatures & XArchIntrinsicConstants_Serialize) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableX86Serialize))
     {
-        CPUCompileFlags.Clear(InstructionSet_SSE3);
+        CPUCompileFlags.Set(InstructionSet_X86Serialize);
     }
+#elif defined(TARGET_ARM64)
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE41))
+#if !defined(TARGET_WINDOWS)
+    // Linux may still support no AdvSimd
+    if ((cpuFeatures & ARM64IntrinsicConstants_VectorT128) == 0)
     {
-        CPUCompileFlags.Clear(InstructionSet_SSE41);
+        EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("AdvSimd processor support required."));
     }
+#else
+    _ASSERTE((cpuFeatures & ARM64IntrinsicConstants_VectorT128) != 0);
+#endif
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE42))
-    {
-        CPUCompileFlags.Clear(InstructionSet_SSE42);
-    }
+    CPUCompileFlags.Set(InstructionSet_VectorT128);
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSSE3))
+    if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic))
     {
-        CPUCompileFlags.Clear(InstructionSet_SSSE3);
+        CPUCompileFlags.Set(InstructionSet_ArmBase);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableX86Serialize))
-    {
-        CPUCompileFlags.Clear(InstructionSet_X86Serialize);
-    }
-#elif defined(TARGET_ARM64)
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic))
+    if (((cpuFeatures & ARM64IntrinsicConstants_AdvSimd) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64AdvSimd))
     {
-        CPUCompileFlags.Clear(InstructionSet_ArmBase);
+        CPUCompileFlags.Set(InstructionSet_AdvSimd);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64AdvSimd))
+    if (((cpuFeatures & ARM64IntrinsicConstants_Aes) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Aes))
     {
-        CPUCompileFlags.Clear(InstructionSet_AdvSimd);
+        CPUCompileFlags.Set(InstructionSet_Aes);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Aes))
+    if (((cpuFeatures & ARM64IntrinsicConstants_Atomics) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Atomics))
     {
-        CPUCompileFlags.Clear(InstructionSet_Aes);
+        CPUCompileFlags.Set(InstructionSet_Atomics);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Atomics))
+    if (((cpuFeatures & ARM64IntrinsicConstants_Rcpc) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Rcpc))
     {
-        CPUCompileFlags.Clear(InstructionSet_Atomics);
+        CPUCompileFlags.Set(InstructionSet_Rcpc);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Rcpc))
+    if (((cpuFeatures & ARM64IntrinsicConstants_Crc32) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Crc32))
     {
-        CPUCompileFlags.Clear(InstructionSet_Rcpc);
+        CPUCompileFlags.Set(InstructionSet_Crc32);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Crc32))
+    if (((cpuFeatures & ARM64IntrinsicConstants_Dp) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Dp))
     {
-        CPUCompileFlags.Clear(InstructionSet_Crc32);
+        CPUCompileFlags.Set(InstructionSet_Dp);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Dczva))
+    if (((cpuFeatures & ARM64IntrinsicConstants_Rdm) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Rdm))
     {
-        CPUCompileFlags.Clear(InstructionSet_Dczva);
+        CPUCompileFlags.Set(InstructionSet_Rdm);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Dp))
+    if (((cpuFeatures & ARM64IntrinsicConstants_Sha1) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Sha1))
     {
-        CPUCompileFlags.Clear(InstructionSet_Dp);
+        CPUCompileFlags.Set(InstructionSet_Sha1);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Rdm))
+    if (((cpuFeatures & ARM64IntrinsicConstants_Sha256) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Sha256))
     {
-        CPUCompileFlags.Clear(InstructionSet_Rdm);
+        CPUCompileFlags.Set(InstructionSet_Sha256);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Sha1))
+    // DCZID_EL0<4> (DZP) indicates whether use of DC ZVA instructions is permitted (0) or prohibited (1).
+    // DCZID_EL0<3:0> (BS) specifies Log2 of the block size in words.
+    //
+    // We set the flag when the instruction is permitted and the block size is 64 bytes.
+    if ((GetDataCacheZeroIDReg() == 4) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Dczva))
     {
-        CPUCompileFlags.Clear(InstructionSet_Sha1);
+        CPUCompileFlags.Set(InstructionSet_Dczva);
     }
 
-    if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Sha256))
+    if ((cpuFeatures & ARM64IntrinsicConstants_Atomics) != 0)
     {
-        CPUCompileFlags.Clear(InstructionSet_Sha256);
+        g_arm64_atomics_present = true;
     }
 #endif
 
-#if defined(TARGET_LOONGARCH64)
-    // TODO-LoongArch64: set LoongArch64's InstructionSet features !
-#endif // TARGET_LOONGARCH64
-
     // These calls are very important as it ensures the flags are consistent with any
     // removals specified above. This includes removing corresponding 64-bit ISAs
     // and any other implications such as SSE2 depending on SSE or AdvSimd on ArmBase
@@ -1871,8 +1541,42 @@ void EEJitManager::SetCpuInfo()
     CPUCompileFlags.EnsureValidInstructionSetSupport();
 
 #if defined(TARGET_X86) || defined(TARGET_AMD64)
+    int cpuidInfo[4];
+
+    const int CPUID_EAX = 0;
+    const int CPUID_EBX = 1;
+    const int CPUID_ECX = 2;
+    const int CPUID_EDX = 3;
+
+    __cpuid(cpuidInfo, 0x00000000);
+
+    bool isGenuineIntel = (cpuidInfo[CPUID_EBX] == 0x756E6547) && // Genu
+                          (cpuidInfo[CPUID_EDX] == 0x49656E69) && // ineI
+                          (cpuidInfo[CPUID_ECX] == 0x6C65746E);   // ntel
+
     if (isGenuineIntel)
     {
+        union XarchCpuInfo
+        {
+            struct {
+                uint32_t SteppingId       : 4;
+                uint32_t Model            : 4;
+                uint32_t FamilyId         : 4;
+                uint32_t ProcessorType    : 2;
+                uint32_t Reserved1        : 2; // Unused bits in the CPUID result
+                uint32_t ExtendedModelId  : 4;
+                uint32_t ExtendedFamilyId : 8;
+                uint32_t Reserved         : 4; // Unused bits in the CPUID result
+            };
+
+            uint32_t Value;
+        } xarchCpuInfo;
+
+        __cpuid(cpuidInfo, 0x00000001);
+        _ASSERTE((cpuidInfo[CPUID_EDX] & (1 << 15)) != 0);                                                    // CMOV
+
+        xarchCpuInfo.Value = cpuidInfo[CPUID_EAX];
+
         // Some architectures can experience frequency throttling when executing
         // executing 512-bit width instructions. To account for this we set the
         // default preferred vector width to 256-bits in some scenarios. Power
index e51a324..08360e9 100644 (file)
@@ -22,6 +22,8 @@
 #include "eventtrace.h"
 #include "threadsuspend.h"
 
+#include <minipal/cpuid.h>
+
 #if defined(_DEBUG) && !defined (WRITE_BARRIER_CHECK)
 #define WRITE_BARRIER_CHECK 1
 #endif
index b91b37b..a606bfb 100644 (file)
@@ -99,60 +99,18 @@ static uint32_t avx512StateSupport()
     return ((_xgetbv(0) & 0xE6) == 0x0E6) ? 1 : 0;
 }
 
-static HMODULE LoadKernel32dll()
-{
-    return LoadLibraryExW(L"kernel32", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
-}
-
 static bool IsAvxEnabled()
 {
-    typedef DWORD64(WINAPI* PGETENABLEDXSTATEFEATURES)();
-    PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL;
-
-    HMODULE hMod = LoadKernel32dll();
-    if (hMod == NULL)
-        return FALSE;
-
-    pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hMod, "GetEnabledXStateFeatures");
-
-    if (pfnGetEnabledXStateFeatures == NULL)
-    {
-        return FALSE;
-    }
-
-    DWORD64 FeatureMask = pfnGetEnabledXStateFeatures();
-    if ((FeatureMask & XSTATE_MASK_AVX) == 0)
-    {
-        return FALSE;
-    }
-
-    return TRUE;
+    DWORD64 FeatureMask = GetEnabledXStateFeatures();
+    return ((FeatureMask & XSTATE_MASK_AVX) != 0);
 }
 
 static bool IsAvx512Enabled()
 {
-    typedef DWORD64(WINAPI* PGETENABLEDXSTATEFEATURES)();
-    PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL;
-
-    HMODULE hMod = LoadKernel32dll();
-    if (hMod == NULL)
-        return FALSE;
-
-    pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hMod, "GetEnabledXStateFeatures");
-
-    if (pfnGetEnabledXStateFeatures == NULL)
-    {
-        return FALSE;
-    }
-
-    DWORD64 FeatureMask = pfnGetEnabledXStateFeatures();
-    if ((FeatureMask & XSTATE_MASK_AVX512) == 0)
-    {
-        return FALSE;
-    }
-
-    return TRUE;
+    DWORD64 FeatureMask = GetEnabledXStateFeatures();
+    return ((FeatureMask & XSTATE_MASK_AVX512) != 0);
 }
+
 #endif // defined(TARGET_X86) || defined(TARGET_AMD64)
 #endif // TARGET_WINDOWS
 
@@ -524,18 +482,6 @@ int minipal_getcpufeatures(void)
 #endif // TARGET_UNIX
 
 #if defined(TARGET_WINDOWS)
-// Older version of SDK would return false for these intrinsics
-// but make sure we pass the right values to the APIs
-#ifndef PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE 34
-#endif
-#ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43
-#endif
-#ifndef PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE 45
-#endif
-
     // FP and SIMD support are enabled by default
     result |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128;
 
index b8a6bd2..f6d39ba 100644 (file)
@@ -4,15 +4,15 @@
 #ifndef HAVE_MINIPAL_CPUID_H
 #define HAVE_MINIPAL_CPUID_H
 
-#if defined(TARGET_X86) || defined(TARGET_AMD64)
+#if defined(HOST_X86) || defined(HOST_AMD64)
 
-#if defined(TARGET_WINDOWS)
+#if defined(HOST_WINDOWS)
 
 #include <intrin.h>
 
-#endif // TARGET_WINDOWS
+#endif // HOST_WINDOWS
 
-#if defined(TARGET_UNIX)
+#if defined(HOST_UNIX)
 // MSVC directly defines intrinsics for __cpuid and __cpuidex matching the below signatures
 // We define matching signatures for use on Unix platforms.
 //
@@ -48,7 +48,7 @@ static void __cpuidex(int cpuInfo[4], int function_id, int subFunction_id)
 void __cpuidex(int cpuInfo[4], int function_id, int subFunction_id);
 #endif
 
-#endif // TARGET_UNIX
-#endif // defined(TARGET_X86) || defined(TARGET_AMD64)
+#endif // HOST_UNIX
+#endif // defined(HOST_X86) || defined(HOST_AMD64)
 
 #endif