Allow the user to control the MaxVectorTBitWidth (#85551)
authorTanner Gooding <tagoo@outlook.com>
Mon, 5 Jun 2023 21:58:28 +0000 (14:58 -0700)
committerGitHub <noreply@github.com>
Mon, 5 Jun 2023 21:58:28 +0000 (14:58 -0700)
* Expose DOTNET_MaxVectorTBitWidth and an undocumented DOTNET_PreferredVectorBitWidth

* Ensure SPMI keeps a getMaxVectorTBitWidth implementation

* Fix the non-xarch vm build

* Remove getMaxVectorTBitWidth from the JIT/EE interface, it's no longer needed

* Move SetCpuInfo down into the EEJitManager constructor

* Remove getXarchCpuInfo in favor of passing `JIT_FLAG_VECTOR512_THROTTLING`

* Make sure CORINFO_XARCH_CPU is fully removed

* Have ENCODE_VERIFY_TYPE_LAYOUT not fail-fast for Vector<T> size differences

* Only encode types containing Vector<T> as check, not verify

* Remove changes that were extracted to separate PRs

* Ensure that the optimistic flags are a strict superset of the supported flags

* Make VectorT128/256/512 proper instruction sets and only allow one to be active at a time

* Don't allow avxvnni to be "optimistic" since that brings in avx2

* Ensure we handle HWIntrinsics being disabled

* Ensure that the Vector<T> size ISAs are covered by FromInstructionSet

* Ensure that `getMaxVectorByteLength` being 0 is handled

* Ensure NAOT startup can correctly check for the VectorT size bits

* Have BlkOpKindUnroll account for SIMD being disabled

* Ensure InstructionSet_VectorT128 is set in the fallback path for PAL_GetJitCpuCapabilityFlags

42 files changed:
src/coreclr/inc/clrconfigvalues.h
src/coreclr/inc/corinfoinstructionset.h
src/coreclr/inc/jiteeversionguid.h
src/coreclr/inc/readytoruninstructionset.h
src/coreclr/jit/codegenxarch.cpp
src/coreclr/jit/compiler.h
src/coreclr/jit/hwintrinsicxarch.cpp
src/coreclr/jit/lclvars.cpp
src/coreclr/jit/lsraxarch.cpp
src/coreclr/jit/simd.cpp
src/coreclr/jit/simdashwintrinsic.cpp
src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.targets
src/coreclr/nativeaot/Runtime/IntrinsicConstants.h
src/coreclr/nativeaot/Runtime/startup.cpp
src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp
src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp
src/coreclr/nativeaot/docs/optimizing.md
src/coreclr/pal/src/misc/jitsupport.cpp
src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs
src/coreclr/tools/Common/InstructionSetHelpers.cs
src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs
src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs
src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs
src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt
src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs
src/coreclr/tools/Common/TypeSystem/Common/DefType.FieldLayout.cs
src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs
src/coreclr/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs
src/coreclr/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.cs
src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs
src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/VectorOfTFieldLayoutAlgorithm.cs
src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeFixupSignature.cs
src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs
src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilerContext.cs
src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs
src/coreclr/tools/aot/ILCompiler/ILCompilerRootCommand.cs
src/coreclr/tools/aot/ILCompiler/Program.cs
src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs
src/coreclr/tools/aot/crossgen2/Program.cs
src/coreclr/tools/aot/crossgen2/Properties/Resources.resx
src/coreclr/vm/codeman.cpp
src/coreclr/vm/methodtablebuilder.cpp

index 33241f4..824be81 100644 (file)
@@ -346,12 +346,6 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_JitRegisterFP, W("JitRegisterFP"), 3, "Control
 RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitELTHookEnabled, W("JitELTHookEnabled"), 0, "On ARM, setting this will emit Enter/Leave/TailCall callbacks")
 RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitMemStats, W("JitMemStats"), 0, "Display JIT memory usage statistics")
 RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitVNMapSelBudget, W("JitVNMapSelBudget"), 100, "Max # of MapSelect's considered for a particular top-level invocation.")
-#if defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64)
-#define EXTERNAL_FeatureSIMD_Default 1
-#else // !(defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64))
-#define EXTERNAL_FeatureSIMD_Default 0
-#endif // !(defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64))
-RETAIL_CONFIG_DWORD_INFO(INTERNAL_SIMD16ByteOnly, W("SIMD16ByteOnly"), 0, "Limit maximum SIMD vector length to 16 bytes (used by x64_arm64_altjit)")
 RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TrackDynamicMethodDebugInfo, W("TrackDynamicMethodDebugInfo"), 0, "Specifies whether debug info should be generated and tracked for dynamic methods")
 
 #ifdef FEATURE_MULTICOREJIT
@@ -734,15 +728,17 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_GDBJitEmitDebugFrame, W("GDBJitEmitDebugFrame"
 #endif
 #endif
 
+RETAIL_CONFIG_DWORD_INFO(EXTERNAL_MaxVectorTBitWidth,           W("MaxVectorTBitWidth"),        0, "The maximum width, in bits, that Vector<T> is allowed to be. A value less than 128 is treated as the system default.")
+
 //
 // Hardware Intrinsic ISAs; keep in sync with jitconfigvalues.h
 //
 #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
 //TODO: should implement LoongArch64's features.
 //TODO-RISCV64-CQ: should implement RISCV64's features.
-RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic,  W("EnableHWIntrinsic"),  0, "Allows Base+ hardware intrinsics to be disabled")
+RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic,            W("EnableHWIntrinsic"),         0, "Allows Base+ hardware intrinsics to be disabled")
 #else
-RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic,  W("EnableHWIntrinsic"),  1, "Allows Base+ hardware intrinsics to be disabled")
+RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic,            W("EnableHWIntrinsic"),         1, "Allows Base+ hardware intrinsics to be disabled")
 #endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
 
 #if defined(TARGET_AMD64) || defined(TARGET_X86)
index d19a6ad..5aecf46 100644 (file)
@@ -14,7 +14,7 @@
 enum CORINFO_InstructionSet
 {
     InstructionSet_ILLEGAL = 0,
-    InstructionSet_NONE = 63,
+    InstructionSet_NONE = 127,
 #ifdef TARGET_ARM64
     InstructionSet_ArmBase=1,
     InstructionSet_AdvSimd=2,
@@ -29,14 +29,15 @@ enum CORINFO_InstructionSet
     InstructionSet_Vector128=11,
     InstructionSet_Dczva=12,
     InstructionSet_Rcpc=13,
-    InstructionSet_ArmBase_Arm64=14,
-    InstructionSet_AdvSimd_Arm64=15,
-    InstructionSet_Aes_Arm64=16,
-    InstructionSet_Crc32_Arm64=17,
-    InstructionSet_Dp_Arm64=18,
-    InstructionSet_Rdm_Arm64=19,
-    InstructionSet_Sha1_Arm64=20,
-    InstructionSet_Sha256_Arm64=21,
+    InstructionSet_VectorT128=14,
+    InstructionSet_ArmBase_Arm64=15,
+    InstructionSet_AdvSimd_Arm64=16,
+    InstructionSet_Aes_Arm64=17,
+    InstructionSet_Crc32_Arm64=18,
+    InstructionSet_Dp_Arm64=19,
+    InstructionSet_Rdm_Arm64=20,
+    InstructionSet_Sha1_Arm64=21,
+    InstructionSet_Sha256_Arm64=22,
 #endif // TARGET_ARM64
 #ifdef TARGET_AMD64
     InstructionSet_X86Base=1,
@@ -71,35 +72,38 @@ enum CORINFO_InstructionSet
     InstructionSet_AVX512DQ_VL=30,
     InstructionSet_AVX512VBMI=31,
     InstructionSet_AVX512VBMI_VL=32,
-    InstructionSet_X86Base_X64=33,
-    InstructionSet_SSE_X64=34,
-    InstructionSet_SSE2_X64=35,
-    InstructionSet_SSE3_X64=36,
-    InstructionSet_SSSE3_X64=37,
-    InstructionSet_SSE41_X64=38,
-    InstructionSet_SSE42_X64=39,
-    InstructionSet_AVX_X64=40,
-    InstructionSet_AVX2_X64=41,
-    InstructionSet_AES_X64=42,
-    InstructionSet_BMI1_X64=43,
-    InstructionSet_BMI2_X64=44,
-    InstructionSet_FMA_X64=45,
-    InstructionSet_LZCNT_X64=46,
-    InstructionSet_PCLMULQDQ_X64=47,
-    InstructionSet_POPCNT_X64=48,
-    InstructionSet_AVXVNNI_X64=49,
-    InstructionSet_MOVBE_X64=50,
-    InstructionSet_X86Serialize_X64=51,
-    InstructionSet_AVX512F_X64=52,
-    InstructionSet_AVX512F_VL_X64=53,
-    InstructionSet_AVX512BW_X64=54,
-    InstructionSet_AVX512BW_VL_X64=55,
-    InstructionSet_AVX512CD_X64=56,
-    InstructionSet_AVX512CD_VL_X64=57,
-    InstructionSet_AVX512DQ_X64=58,
-    InstructionSet_AVX512DQ_VL_X64=59,
-    InstructionSet_AVX512VBMI_X64=60,
-    InstructionSet_AVX512VBMI_VL_X64=61,
+    InstructionSet_VectorT128=33,
+    InstructionSet_VectorT256=34,
+    InstructionSet_VectorT512=35,
+    InstructionSet_X86Base_X64=36,
+    InstructionSet_SSE_X64=37,
+    InstructionSet_SSE2_X64=38,
+    InstructionSet_SSE3_X64=39,
+    InstructionSet_SSSE3_X64=40,
+    InstructionSet_SSE41_X64=41,
+    InstructionSet_SSE42_X64=42,
+    InstructionSet_AVX_X64=43,
+    InstructionSet_AVX2_X64=44,
+    InstructionSet_AES_X64=45,
+    InstructionSet_BMI1_X64=46,
+    InstructionSet_BMI2_X64=47,
+    InstructionSet_FMA_X64=48,
+    InstructionSet_LZCNT_X64=49,
+    InstructionSet_PCLMULQDQ_X64=50,
+    InstructionSet_POPCNT_X64=51,
+    InstructionSet_AVXVNNI_X64=52,
+    InstructionSet_MOVBE_X64=53,
+    InstructionSet_X86Serialize_X64=54,
+    InstructionSet_AVX512F_X64=55,
+    InstructionSet_AVX512F_VL_X64=56,
+    InstructionSet_AVX512BW_X64=57,
+    InstructionSet_AVX512BW_VL_X64=58,
+    InstructionSet_AVX512CD_X64=59,
+    InstructionSet_AVX512CD_VL_X64=60,
+    InstructionSet_AVX512DQ_X64=61,
+    InstructionSet_AVX512DQ_VL_X64=62,
+    InstructionSet_AVX512VBMI_X64=63,
+    InstructionSet_AVX512VBMI_VL_X64=64,
 #endif // TARGET_AMD64
 #ifdef TARGET_X86
     InstructionSet_X86Base=1,
@@ -134,35 +138,38 @@ enum CORINFO_InstructionSet
     InstructionSet_AVX512DQ_VL=30,
     InstructionSet_AVX512VBMI=31,
     InstructionSet_AVX512VBMI_VL=32,
-    InstructionSet_X86Base_X64=33,
-    InstructionSet_SSE_X64=34,
-    InstructionSet_SSE2_X64=35,
-    InstructionSet_SSE3_X64=36,
-    InstructionSet_SSSE3_X64=37,
-    InstructionSet_SSE41_X64=38,
-    InstructionSet_SSE42_X64=39,
-    InstructionSet_AVX_X64=40,
-    InstructionSet_AVX2_X64=41,
-    InstructionSet_AES_X64=42,
-    InstructionSet_BMI1_X64=43,
-    InstructionSet_BMI2_X64=44,
-    InstructionSet_FMA_X64=45,
-    InstructionSet_LZCNT_X64=46,
-    InstructionSet_PCLMULQDQ_X64=47,
-    InstructionSet_POPCNT_X64=48,
-    InstructionSet_AVXVNNI_X64=49,
-    InstructionSet_MOVBE_X64=50,
-    InstructionSet_X86Serialize_X64=51,
-    InstructionSet_AVX512F_X64=52,
-    InstructionSet_AVX512F_VL_X64=53,
-    InstructionSet_AVX512BW_X64=54,
-    InstructionSet_AVX512BW_VL_X64=55,
-    InstructionSet_AVX512CD_X64=56,
-    InstructionSet_AVX512CD_VL_X64=57,
-    InstructionSet_AVX512DQ_X64=58,
-    InstructionSet_AVX512DQ_VL_X64=59,
-    InstructionSet_AVX512VBMI_X64=60,
-    InstructionSet_AVX512VBMI_VL_X64=61,
+    InstructionSet_VectorT128=33,
+    InstructionSet_VectorT256=34,
+    InstructionSet_VectorT512=35,
+    InstructionSet_X86Base_X64=36,
+    InstructionSet_SSE_X64=37,
+    InstructionSet_SSE2_X64=38,
+    InstructionSet_SSE3_X64=39,
+    InstructionSet_SSSE3_X64=40,
+    InstructionSet_SSE41_X64=41,
+    InstructionSet_SSE42_X64=42,
+    InstructionSet_AVX_X64=43,
+    InstructionSet_AVX2_X64=44,
+    InstructionSet_AES_X64=45,
+    InstructionSet_BMI1_X64=46,
+    InstructionSet_BMI2_X64=47,
+    InstructionSet_FMA_X64=48,
+    InstructionSet_LZCNT_X64=49,
+    InstructionSet_PCLMULQDQ_X64=50,
+    InstructionSet_POPCNT_X64=51,
+    InstructionSet_AVXVNNI_X64=52,
+    InstructionSet_MOVBE_X64=53,
+    InstructionSet_X86Serialize_X64=54,
+    InstructionSet_AVX512F_X64=55,
+    InstructionSet_AVX512F_VL_X64=56,
+    InstructionSet_AVX512BW_X64=57,
+    InstructionSet_AVX512BW_VL_X64=58,
+    InstructionSet_AVX512CD_X64=59,
+    InstructionSet_AVX512CD_VL_X64=60,
+    InstructionSet_AVX512DQ_X64=61,
+    InstructionSet_AVX512DQ_VL_X64=62,
+    InstructionSet_AVX512VBMI_X64=63,
+    InstructionSet_AVX512VBMI_VL_X64=64,
 #endif // TARGET_X86
 
 };
@@ -170,7 +177,7 @@ enum CORINFO_InstructionSet
 struct CORINFO_InstructionSetFlags
 {
 private:
-    static const int32_t FlagsFieldCount = 1;
+    static const int32_t FlagsFieldCount = 2;
     static const int32_t BitsPerFlagsField = sizeof(uint64_t) * 8;
     uint64_t _flags[FlagsFieldCount] = { };
 
@@ -404,6 +411,8 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
             resultflags.RemoveInstructionSet(InstructionSet_Vector64);
         if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_AdvSimd))
             resultflags.RemoveInstructionSet(InstructionSet_Vector128);
+        if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_AdvSimd))
+            resultflags.RemoveInstructionSet(InstructionSet_VectorT128);
 #endif // TARGET_ARM64
 #ifdef TARGET_AMD64
         if (resultflags.HasInstructionSet(InstructionSet_X86Base) && !resultflags.HasInstructionSet(InstructionSet_X86Base_X64))
@@ -594,6 +603,12 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
             resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
         if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
             resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
+        if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2))
+            resultflags.RemoveInstructionSet(InstructionSet_VectorT128);
+        if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2))
+            resultflags.RemoveInstructionSet(InstructionSet_VectorT256);
+        if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F))
+            resultflags.RemoveInstructionSet(InstructionSet_VectorT512);
         if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
             resultflags.RemoveInstructionSet(InstructionSet_AVX512F);
         if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL))
@@ -674,6 +689,12 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
             resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
         if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
             resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
+        if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2))
+            resultflags.RemoveInstructionSet(InstructionSet_VectorT128);
+        if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2))
+            resultflags.RemoveInstructionSet(InstructionSet_VectorT256);
+        if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F))
+            resultflags.RemoveInstructionSet(InstructionSet_VectorT512);
         if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
             resultflags.RemoveInstructionSet(InstructionSet_AVX512F);
         if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL))
@@ -738,6 +759,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
             return "Dczva";
         case InstructionSet_Rcpc :
             return "Rcpc";
+        case InstructionSet_VectorT128 :
+            return "VectorT128";
 #endif // TARGET_ARM64
 #ifdef TARGET_AMD64
         case InstructionSet_X86Base :
@@ -862,6 +885,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
             return "AVX512VBMI_VL";
         case InstructionSet_AVX512VBMI_VL_X64 :
             return "AVX512VBMI_VL_X64";
+        case InstructionSet_VectorT128 :
+            return "VectorT128";
+        case InstructionSet_VectorT256 :
+            return "VectorT256";
+        case InstructionSet_VectorT512 :
+            return "VectorT512";
 #endif // TARGET_AMD64
 #ifdef TARGET_X86
         case InstructionSet_X86Base :
@@ -928,6 +957,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
             return "AVX512VBMI";
         case InstructionSet_AVX512VBMI_VL :
             return "AVX512VBMI_VL";
+        case InstructionSet_VectorT128 :
+            return "VectorT128";
+        case InstructionSet_VectorT256 :
+            return "VectorT256";
+        case InstructionSet_VectorT512 :
+            return "VectorT512";
 #endif // TARGET_X86
 
         default:
@@ -958,6 +993,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
         case READYTORUN_INSTRUCTION_Sha256: return InstructionSet_Sha256;
         case READYTORUN_INSTRUCTION_Atomics: return InstructionSet_Atomics;
         case READYTORUN_INSTRUCTION_Rcpc: return InstructionSet_Rcpc;
+        case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
 #endif // TARGET_ARM64
 #ifdef TARGET_AMD64
         case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
@@ -989,6 +1025,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
         case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL;
         case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI;
         case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI_VL;
+        case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
+        case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256;
+        case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512;
 #endif // TARGET_AMD64
 #ifdef TARGET_X86
         case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
@@ -1020,6 +1059,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
         case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL;
         case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI;
         case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI_VL;
+        case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
+        case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256;
+        case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512;
 #endif // TARGET_X86
 
         default:
index 9bb39b0..ebcf491 100644 (file)
@@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
 #define GUID_DEFINED
 #endif // !GUID_DEFINED
 
-constexpr GUID JITEEVersionIdentifier = { /* d4414be1-70e4-46ac-8866-ca3a6c2f8422 */
-    0xd4414be1,
-    0x70e4,
-    0x46ac,
-    {0x88, 0x66, 0xca, 0x3a, 0x6c, 0x2f, 0x84, 0x22}
+constexpr GUID JITEEVersionIdentifier = { /* fda2f9dd-6b3e-4ecd-a7b8-79e5edf1f072 */
+    0xfda2f9dd,
+    0x6b3e,
+    0x4ecd,
+    {0xa7, 0xb8, 0x79, 0xe5, 0xed, 0xf1, 0xf0, 0x72}
   };
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////////
index faf4a50..0a9a78e 100644 (file)
@@ -47,6 +47,9 @@ enum ReadyToRunInstructionSet
     READYTORUN_INSTRUCTION_Avx512DQ_VL=36,
     READYTORUN_INSTRUCTION_Avx512Vbmi=37,
     READYTORUN_INSTRUCTION_Avx512Vbmi_VL=38,
+    READYTORUN_INSTRUCTION_VectorT128=39,
+    READYTORUN_INSTRUCTION_VectorT256=40,
+    READYTORUN_INSTRUCTION_VectorT512=41,
 
 };
 
index 3d6291e..6d7a973 100644 (file)
@@ -3401,15 +3401,15 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node)
     assert(srcOffset < (INT32_MAX - static_cast<int>(size)));
     assert(dstOffset < (INT32_MAX - static_cast<int>(size)));
 
-    if (size >= XMM_REGSIZE_BYTES)
+    // Get the largest SIMD register available if the size is large enough
+    unsigned regSize = compiler->roundDownSIMDSize(size);
+
+    if ((size >= regSize) && (regSize > 0))
     {
         regNumber tempReg = node->GetSingleTempReg(RBM_ALLFLOAT);
 
         instruction simdMov = simdUnalignedMovIns();
 
-        // Get the largest SIMD register available if the size is large enough
-        unsigned regSize = compiler->roundDownSIMDSize(size);
-
         auto emitSimdMovs = [&]() {
             if (srcLclNum != BAD_VAR_NUM)
             {
index 6ba5e07..65379c6 100644 (file)
@@ -8662,17 +8662,31 @@ private:
         CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if defined(TARGET_XARCH)
-        if (compExactlyDependsOn(InstructionSet_AVX2))
+        // TODO-XArch: Add support for 512-bit Vector<T>
+        assert(!compIsaSupportedDebugOnly(InstructionSet_VectorT512));
+
+        if (compExactlyDependsOn(InstructionSet_VectorT256))
         {
-            // TODO-XArch-AVX512 : Return ZMM_REGSIZE_BYTES once Vector<T> supports AVX512.
+            assert(!compIsaSupportedDebugOnly(InstructionSet_VectorT128));
             return YMM_REGSIZE_BYTES;
         }
-        else
+        else if (compExactlyDependsOn(InstructionSet_VectorT128))
         {
             return XMM_REGSIZE_BYTES;
         }
+        else
+        {
+            return 0;
+        }
 #elif defined(TARGET_ARM64)
-        return FP_REGSIZE_BYTES;
+        if (compExactlyDependsOn(InstructionSet_VectorT128))
+        {
+            return FP_REGSIZE_BYTES;
+        }
+        else
+        {
+            return 0;
+        }
 #else
         assert(!"getVectorTByteLength() unimplemented on target arch");
         unreached();
@@ -8691,23 +8705,33 @@ private:
     uint32_t getMaxVectorByteLength() const
     {
 #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH)
-        if (compOpportunisticallyDependsOn(InstructionSet_AVX))
+        if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
         {
-            if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
-            {
-                return ZMM_REGSIZE_BYTES;
-            }
-            else
-            {
-                return YMM_REGSIZE_BYTES;
-            }
+            return ZMM_REGSIZE_BYTES;
         }
-        else
+        else if (compOpportunisticallyDependsOn(InstructionSet_AVX))
+        {
+            return YMM_REGSIZE_BYTES;
+        }
+        else if (compOpportunisticallyDependsOn(InstructionSet_SSE))
         {
             return XMM_REGSIZE_BYTES;
         }
+        else
+        {
+            assert((JitConfig.EnableHWIntrinsic() == 0) || (JitConfig.EnableSSE() == 0));
+            return 0;
+        }
 #elif defined(TARGET_ARM64)
-        return FP_REGSIZE_BYTES;
+        if (compOpportunisticallyDependsOn(InstructionSet_AdvSimd))
+        {
+            return FP_REGSIZE_BYTES;
+        }
+        else
+        {
+            assert((JitConfig.EnableHWIntrinsic() == 0) || (JitConfig.EnableArm64AdvSimd() == 0));
+            return 0;
+        }
 #else
         assert(!"getMaxVectorByteLength() unimplemented on target arch");
         unreached();
index 379509a..9c38e45 100644 (file)
@@ -783,10 +783,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
                 return impSpecialIntrinsic(NI_Vector128_ToVector256, clsHnd, method, sig, simdBaseJitType, retType,
                                            simdSize);
             }
-            else
+            else if (vectorTByteLength == XMM_REGSIZE_BYTES)
             {
-                assert(vectorTByteLength == XMM_REGSIZE_BYTES);
-
                 // We fold away the cast here, as it only exists to satisfy
                 // the type system. It is safe to do this here since the retNode type
                 // and the signature return type are both the same TYP_SIMD.
@@ -795,6 +793,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
                 SetOpLclRelatedToSIMDIntrinsic(retNode);
                 assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass)));
             }
+            else
+            {
+                assert(vectorTByteLength == 0);
+            }
             break;
         }
 
@@ -919,10 +921,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
 
                 break;
             }
-            else
+            else if (vectorTByteLength == XMM_REGSIZE_BYTES)
             {
-                assert(vectorTByteLength == XMM_REGSIZE_BYTES);
-
                 if (compExactlyDependsOn(InstructionSet_AVX))
                 {
                     // We support Vector256 but Vector<T> is only 16-bytes, so we should
@@ -941,6 +941,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
                     }
                 }
             }
+            else
+            {
+                assert(vectorTByteLength == 0);
+            }
             break;
         }
 
@@ -969,10 +973,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
                 }
                 break;
             }
-            else
+            else if (vectorTByteLength == XMM_REGSIZE_BYTES)
             {
-                assert(vectorTByteLength == XMM_REGSIZE_BYTES);
-
                 if (compExactlyDependsOn(InstructionSet_AVX512F))
                 {
                     // We support Vector512 but Vector<T> is only 16-bytes, so we should
@@ -991,6 +993,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
                     }
                 }
             }
+            else
+            {
+                assert(vectorTByteLength == 0);
+            }
             break;
         }
 
index ff91e42..80d9493 100644 (file)
@@ -1757,7 +1757,8 @@ bool Compiler::StructPromotionHelper::CanPromoteStructType(CORINFO_CLASS_HANDLE
 
 #if defined(FEATURE_SIMD)
     // getMaxVectorByteLength() represents the size of the largest primitive type that we can struct promote.
-    const unsigned maxSize = MAX_NumOfFieldsInPromotableStruct * compiler->getMaxVectorByteLength();
+    const unsigned maxSize =
+        MAX_NumOfFieldsInPromotableStruct * max(compiler->getMaxVectorByteLength(), sizeof(double));
 #else  // !FEATURE_SIMD
     // sizeof(double) represents the size of the largest primitive type that we can struct promote.
     const unsigned maxSize = MAX_NumOfFieldsInPromotableStruct * sizeof(double);
index 765de40..08ae13c 100644 (file)
@@ -1166,7 +1166,7 @@ int LinearScan::BuildCall(GenTreeCall* call)
         // The return value will be on the X87 stack, and we will need to move it.
         dstCandidates = allRegs(registerType);
 #else  // !TARGET_X86
-        dstCandidates                     = RBM_FLOATRET;
+        dstCandidates = RBM_FLOATRET;
 #endif // !TARGET_X86
     }
     else
@@ -1378,12 +1378,10 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
         {
             case GenTreeBlk::BlkOpKindUnroll:
             {
-#ifdef TARGET_AMD64
-                const bool canUse16BytesSimdMov = !blkNode->IsOnHeapAndContainsReferences();
-                const bool willUseSimdMov       = canUse16BytesSimdMov && (size >= 16);
-#else
-                const bool willUseSimdMov = (size >= 16);
-#endif
+                const bool canUse16BytesSimdMov =
+                    !blkNode->IsOnHeapAndContainsReferences() && compiler->IsBaselineSimdIsaSupported();
+                const bool willUseSimdMov = canUse16BytesSimdMov && (size >= XMM_REGSIZE_BYTES);
+
                 if (willUseSimdMov)
                 {
                     buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
@@ -1440,8 +1438,26 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
                 break;
 
             case GenTreeBlk::BlkOpKindUnroll:
-                if ((size % XMM_REGSIZE_BYTES) != 0)
+            {
+                unsigned regSize   = compiler->roundDownSIMDSize(size);
+                unsigned remainder = size;
+
+                if ((size >= regSize) && (regSize > 0))
+                {
+                    // We need a float temporary if we're doing SIMD operations
+
+                    buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
+                    SetContainsAVXFlags(size);
+
+                    remainder %= regSize;
+                }
+
+                if ((remainder > 0) && ((regSize == 0) || (isPow2(remainder) && (remainder <= REGSIZE_BYTES))))
                 {
+                    // We need an int temporary if we're not doing SIMD operations
+                    // or if are but the remainder is a power of 2 and less than the
+                    // size of a register
+
                     regMaskTP regMask = availableIntRegs;
 #ifdef TARGET_X86
                     if ((size & 1) != 0)
@@ -1453,13 +1469,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
 #endif
                     internalIntDef = buildInternalIntRegisterDefForNode(blkNode, regMask);
                 }
-
-                if (size >= XMM_REGSIZE_BYTES)
-                {
-                    buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
-                    SetContainsAVXFlags(size);
-                }
                 break;
+            }
 
             case GenTreeBlk::BlkOpKindUnrollMemmove:
             {
index 97b7d06..2e10faa 100644 (file)
@@ -299,7 +299,10 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH
                         JITDUMP(" Found Vector<%s>\n", varTypeName(JitType2PreciseVarType(simdBaseJitType)));
                         size = getVectorTByteLength();
 
-                        assert(size != 0);
+                        if (size == 0)
+                        {
+                            return CORINFO_TYPE_UNDEF;
+                        }
                         break;
                     }
 
index 0cb64b3..7f71f1c 100644 (file)
@@ -219,8 +219,14 @@ SimdAsHWIntrinsicClassId SimdAsHWIntrinsicInfo::lookupClassId(Compiler*   comp,
                 }
 #endif // TARGET_XARCH
 
-                assert(vectorTByteLength == 16);
-                return SimdAsHWIntrinsicClassId::VectorT128;
+                if (vectorTByteLength == 16)
+                {
+                    return SimdAsHWIntrinsicClassId::VectorT128;
+                }
+                else
+                {
+                    return SimdAsHWIntrinsicClassId::Unknown;
+                }
             }
             break;
         }
index e304054..e11fac5 100644 (file)
@@ -229,6 +229,7 @@ The .NET Foundation licenses this file to you under the MIT license.
       <IlcArg Condition="$(IlcInstructionSet) != ''" Include="--instruction-set:$(IlcInstructionSet)" />
       <IlcArg Condition="$(IlcDisableReflection) == 'true'" Include="--reflectiondata:none" />
       <IlcArg Condition="$(IlcDisableReflection) == 'true'" Include="--feature:System.Collections.Generic.DefaultComparers=false" />
+      <IlcArg Condition="$(IlcMaxVectorTBitWidth) != ''" Include="--max-vectort-bitwidth:$(IlcMaxVectorTBitWidth)" />
       <IlcArg Condition="$(IlcSingleThreaded) == 'true'" Include="--parallelism:1" />
       <IlcArg Condition="$(IlcSystemModule) != ''" Include="--systemmodule:$(IlcSystemModule)" />
       <IlcArg Condition="$(IlcDumpIL) == 'true'" Include="--ildump:$(NativeIntermediateOutputPath)%(ManagedBinary.Filename).il" />
index ad7d2e1..41ec8de 100644 (file)
@@ -35,6 +35,9 @@ enum XArchIntrinsicConstants
     XArchIntrinsicConstants_Avx512Vbmi = 0x800000,
     XArchIntrinsicConstants_Avx512Vbmi_vl = 0x1000000,
     XArchIntrinsicConstants_Serialize = 0x2000000,
+    XArchIntrinsicConstants_VectorT128 = 0x4000000,
+    XArchIntrinsicConstants_VectorT256 = 0x8000000,
+    XArchIntrinsicConstants_VectorT512 = 0x10000000,
 };
 #endif //HOST_X86 || HOST_AMD64
 
@@ -50,6 +53,7 @@ enum ARM64IntrinsicConstants
     ARM64IntrinsicConstants_Sha256 = 0x0040,
     ARM64IntrinsicConstants_Atomics = 0x0080,
     ARM64IntrinsicConstants_Rcpc = 0x0100,
+    ARM64IntrinsicConstants_VectorT128 = 0x0200,
 };
 
 // Bit position for the ARM64IntrinsicConstants_Atomics flags, to be used with tbz / tbnz instructions
index 7ce9836..5874015 100644 (file)
@@ -202,6 +202,8 @@ bool DetectCPUFeatures()
 
         if ((cpuidInfo[CPUID_EDX] & requiredBaselineEdxFlags) == requiredBaselineEdxFlags)
         {
+            g_cpuFeatures |= XArchIntrinsicConstants_VectorT128;
+
             if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0)                                                                // AESNI
             {
                 g_cpuFeatures |= XArchIntrinsicConstants_Aes;
@@ -259,12 +261,14 @@ bool DetectCPUFeatures()
                                         if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0)                                     // AVX2
                                         {
                                             g_cpuFeatures |= XArchIntrinsicConstants_Avx2;
+                                            g_cpuFeatures |= XArchIntrinsicConstants_VectorT256;
 
                                             if (PalIsAvx512Enabled() && (avx512StateSupport() == 1))                    // XGETBV XRC0[7:5] == 111
                                             {
                                                 if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0)                            // AVX512F
                                                 {
                                                     g_cpuFeatures |= XArchIntrinsicConstants_Avx512f;
+                                                    g_cpuFeatures |= XArchIntrinsicConstants_VectorT512;
 
                                                     bool isAVX512_VLSupported = false;
                                                     if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0)                        // AVX512VL
index a856be4..69ee3da 100644 (file)
@@ -1446,7 +1446,7 @@ REDHAWK_PALEXPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags)
 #endif
 #ifdef HWCAP_ASIMD
     if (hwCap & HWCAP_ASIMD)
-        *flags |= ARM64IntrinsicConstants_AdvSimd;
+        *flags |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128;
 #endif
 #ifdef HWCAP_ASIMDRDM
     if (hwCap & HWCAP_ASIMDRDM)
@@ -1545,7 +1545,7 @@ REDHAWK_PALEXPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags)
     // Every ARM64 CPU should support SIMD and FP
     // If the OS have no function to query for CPU capabilities we set just these
 
-    *flags |= ARM64IntrinsicConstants_AdvSimd;
+    *flags |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128;
 #endif // HAVE_AUXV_HWCAP_H
 }
 #endif
index c7b1f3e..0c591bd 100644 (file)
@@ -796,7 +796,7 @@ REDHAWK_PALIMPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags)
 #endif
 
     // FP and SIMD support are enabled by default
-    *flags |= ARM64IntrinsicConstants_AdvSimd;
+    *flags |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128;
 
     if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
     {
index 9f2c9ae..9a90eb9 100644 (file)
@@ -40,4 +40,5 @@ Since `PublishTrimmed` is implied to be true with Native AOT, some framework fea
 * `<OptimizationPreference>Speed</OptimizationPreference>`: when generating optimized code, favor code execution speed.
 * `<OptimizationPreference>Size</OptimizationPreference>`: when generating optimized code, favor smaller code size.
 * `<IlcInstructionSet>`: By default, the compiler targets the minimum instruction set supported by the target OS and architecture. This option allows targeting newer instruction sets for better performance. The native binary will require the instruction sets to be supported by the hardware in order to run. For example, `<IlcInstructionSet>avx2,bmi2,fma,pclmul,popcnt,aes</IlcInstructionSet>` will produce binary that takes advantage of instruction sets that are typically present on current Intel and AMD processors. Run `ilc --help` for the full list of available instruction sets. `ilc` can be executed from the NativeAOT package in your local nuget cache e.g. `%USERPROFILE%\.nuget\packages\runtime.win-x64.microsoft.dotnet.ilcompiler\8.0.0-...\tools\ilc.exe` on Windows or `~/.nuget/packages/runtime.linux-arm64.microsoft.dotnet.ilcompiler/8.0.0-.../tools/ilc` on Linux.
+* `<IlcMaxVectorTBitWidth>`: By default, the compiler targets the a `Vector<T>` size of `16` or `32` bytes, depending on the underlying instruction sets supported. This option allows specifying a different maximum bit width. For example, if by default on x64 hardware `Vector<T>` will be 16-bytes. However, if `AVX2` is targeted then `Vector<T>` will automatically grow to be 32-bytes instead, setting `<IlcMaxVectorTBitWidth>128</IlcMaxVectorTBitWidth>` would keep the size as 16-bytes. Alternatively, even if `AVX512F` is targeted then by default `Vector<T>` will not grow larger than 32-bytes, setting `<IlcMaxVectorTBitWidth>512</IlcMaxVectorTBitWidth>` would allow it to grow to 64-bytes.
 
index f7ca5c3..3042629 100644 (file)
@@ -235,7 +235,10 @@ PAL_GetJitCpuCapabilityFlags(CORJIT_FLAGS *flags)
 #endif
 #ifdef HWCAP_ASIMD
     if (hwCap & HWCAP_ASIMD)
+    {
         flags->Set(InstructionSet_AdvSimd);
+        flags->Set(InstructionSet_VectorT128);
+    }
 #endif
 #ifdef HWCAP_ASIMDRDM
     if (hwCap & HWCAP_ASIMDRDM)
@@ -292,6 +295,7 @@ PAL_GetJitCpuCapabilityFlags(CORJIT_FLAGS *flags)
     // Set baseline flags if OS has not exposed mechanism for us to determine CPU capabilities
     flags->Set(InstructionSet_ArmBase);
     flags->Set(InstructionSet_AdvSimd);
+    flags->Set(InstructionSet_VectorT128);
     //    flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_FP);
 #endif // HAVE_AUXV_HWCAP_H
 }
index 6d124bd..4652838 100644 (file)
@@ -96,18 +96,37 @@ namespace ILCompiler
         {
             if ((_targetArchitecture == TargetArchitecture.X64) || (_targetArchitecture == TargetArchitecture.X86))
             {
-                Debug.Assert(InstructionSet.X64_AVX2 == InstructionSet.X86_AVX2);
-                Debug.Assert(InstructionSet.X64_SSE2 == InstructionSet.X86_SSE2);
-                if (IsInstructionSetSupported(InstructionSet.X86_AVX2))
+                Debug.Assert(InstructionSet.X64_VectorT128 == InstructionSet.X86_VectorT128);
+                Debug.Assert(InstructionSet.X64_VectorT256 == InstructionSet.X86_VectorT256);
+                Debug.Assert(InstructionSet.X64_VectorT512 == InstructionSet.X86_VectorT512);
+
+                // TODO-XArch: Add support for 512-bit Vector<T>
+                Debug.Assert(!IsInstructionSetSupported(InstructionSet.X64_VectorT512));
+
+                if (IsInstructionSetSupported(InstructionSet.X64_VectorT256))
+                {
+                    Debug.Assert(!IsInstructionSetSupported(InstructionSet.X64_VectorT128));
                     return SimdVectorLength.Vector256Bit;
-                else if (IsInstructionSetExplicitlyUnsupported(InstructionSet.X86_AVX2) && IsInstructionSetSupported(InstructionSet.X64_SSE2))
+                }
+                else if (IsInstructionSetSupported(InstructionSet.X64_VectorT128))
+                {
                     return SimdVectorLength.Vector128Bit;
+                }
                 else
+                {
                     return SimdVectorLength.None;
+                }
             }
             else if (_targetArchitecture == TargetArchitecture.ARM64)
             {
-                return SimdVectorLength.Vector128Bit;
+                if (IsInstructionSetSupported(InstructionSet.ARM64_VectorT128))
+                {
+                    return SimdVectorLength.Vector128Bit;
+                }
+                else
+                {
+                    return SimdVectorLength.None;
+                }
             }
             else if (_targetArchitecture == TargetArchitecture.ARM)
             {
@@ -183,15 +202,24 @@ namespace ILCompiler
             return s_nonSpecifiableInstructionSets[architecture];
         }
 
-        private readonly SortedSet<string> _supportedInstructionSets = new SortedSet<string>();
-        private readonly SortedSet<string> _unsupportedInstructionSets = new SortedSet<string>();
+        private readonly SortedSet<string> _supportedInstructionSets;
+        private readonly SortedSet<string> _unsupportedInstructionSets;
         private readonly TargetArchitecture _architecture;
 
         public InstructionSetSupportBuilder(TargetArchitecture architecture)
         {
+            _supportedInstructionSets = new SortedSet<string>();
+            _unsupportedInstructionSets = new SortedSet<string>();
             _architecture = architecture;
         }
 
+        public InstructionSetSupportBuilder(InstructionSetSupportBuilder other)
+        {
+            _supportedInstructionSets = new SortedSet<string>(other._supportedInstructionSets);
+            _unsupportedInstructionSets = new SortedSet<string>(other._unsupportedInstructionSets);
+            _architecture = other._architecture;
+        }
+
         /// <summary>
         /// Add a supported instruction set to the specified list.
         /// </summary>
@@ -245,9 +273,10 @@ namespace ILCompiler
         /// Seal modifications to instruction set support
         /// </summary>
         /// <returns>returns "false" if instruction set isn't valid on this architecture</returns>
-        public bool ComputeInstructionSetFlags(out InstructionSetFlags supportedInstructionSets,
-                                                              out InstructionSetFlags unsupportedInstructionSets,
-                                                              Action<string, string> invalidInstructionSetImplication)
+        public bool ComputeInstructionSetFlags(int maxVectorTBitWidth,
+                                               out InstructionSetFlags supportedInstructionSets,
+                                               out InstructionSetFlags unsupportedInstructionSets,
+                                               Action<string, string> invalidInstructionSetImplication)
         {
             supportedInstructionSets = new InstructionSetFlags();
             unsupportedInstructionSets = new InstructionSetFlags();
@@ -288,6 +317,51 @@ namespace ILCompiler
                 }
             }
 
+            switch (_architecture)
+            {
+                case TargetArchitecture.X64:
+                case TargetArchitecture.X86:
+                {
+                    Debug.Assert(InstructionSet.X86_SSE2 == InstructionSet.X64_SSE2);
+                    Debug.Assert(InstructionSet.X86_AVX2 == InstructionSet.X64_AVX2);
+                    Debug.Assert(InstructionSet.X86_AVX512F == InstructionSet.X64_AVX512F);
+
+                    Debug.Assert(InstructionSet.X86_VectorT128 == InstructionSet.X64_VectorT128);
+                    Debug.Assert(InstructionSet.X86_VectorT256 == InstructionSet.X64_VectorT256);
+                    Debug.Assert(InstructionSet.X86_VectorT512 == InstructionSet.X64_VectorT512);
+
+                    // We only want one size supported for Vector<T> and we want the other sizes explicitly
+                    // unsupported to ensure we throw away the given methods if runtime picks a larger size
+
+                    Debug.Assert(supportedInstructionSets.HasInstructionSet(InstructionSet.X86_SSE2));
+                    Debug.Assert((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 128));
+                    supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT128);
+
+                    if (supportedInstructionSets.HasInstructionSet(InstructionSet.X86_AVX2))
+                    {
+                        if ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 256))
+                        {
+                            supportedInstructionSets.RemoveInstructionSet(InstructionSet.X86_VectorT128);
+                            supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT256);
+
+                            unsupportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT128);
+                            unsupportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT512);
+                        }
+
+                        // TODO-XArch: Add support for 512-bit Vector<T>
+                    }
+                    break;
+                }
+
+                case TargetArchitecture.ARM64:
+                {
+                    Debug.Assert(supportedInstructionSets.HasInstructionSet(InstructionSet.ARM64_AdvSimd));
+                    Debug.Assert((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 128));
+                    supportedInstructionSets.AddInstructionSet(InstructionSet.ARM64_VectorT128);
+                    break;
+                }
+            }
+
             return true;
         }
     }
index 93b04a1..c969927 100644 (file)
@@ -11,7 +11,7 @@ namespace System.CommandLine
 {
     internal static partial class Helpers
     {
-        public static InstructionSetSupport ConfigureInstructionSetSupport(string instructionSet, TargetArchitecture targetArchitecture, TargetOS targetOS,
+        public static InstructionSetSupport ConfigureInstructionSetSupport(string instructionSet, int maxVectorTBitWidth, TargetArchitecture targetArchitecture, TargetOS targetOS,
             string mustNotBeMessage, string invalidImplicationMessage)
         {
             InstructionSetSupportBuilder instructionSetSupportBuilder = new(targetArchitecture);
@@ -74,11 +74,16 @@ namespace System.CommandLine
                 }
             }
 
-            instructionSetSupportBuilder.ComputeInstructionSetFlags(out var supportedInstructionSet, out var unsupportedInstructionSet,
+            instructionSetSupportBuilder.ComputeInstructionSetFlags(maxVectorTBitWidth, out var supportedInstructionSet, out var unsupportedInstructionSet,
                 (string specifiedInstructionSet, string impliedInstructionSet) =>
                     throw new CommandLineException(string.Format(invalidImplicationMessage, specifiedInstructionSet, impliedInstructionSet)));
 
-            InstructionSetSupportBuilder optimisticInstructionSetSupportBuilder = new InstructionSetSupportBuilder(targetArchitecture);
+            // Due to expansion by implication, the optimistic set is most often a pure superset of the supported set
+            //
+            // However, there are some gaps in cases like Arm64 neon where none of the optimistic sets imply it. Likewise,
+            // the optimistic set would be missing the explicitly unsupported sets. So we effectively clone the list and
+            // tack on the additional optimistic bits after. This ensures the optimistic set remains an accurate superset
+            InstructionSetSupportBuilder optimisticInstructionSetSupportBuilder = new InstructionSetSupportBuilder(instructionSetSupportBuilder);
 
             // Optimistically assume some instruction sets are present.
             if (targetArchitecture == TargetArchitecture.X86 || targetArchitecture == TargetArchitecture.X64)
@@ -112,10 +117,6 @@ namespace System.CommandLine
                     optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("bmi2");
                 }
 
-                if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX2))
-                {
-                }
-
                 Debug.Assert(InstructionSet.X64_AVX512F == InstructionSet.X86_AVX512F);
                 if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512F))
                 {
@@ -143,7 +144,7 @@ namespace System.CommandLine
                 optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("rcpc");
             }
 
-            optimisticInstructionSetSupportBuilder.ComputeInstructionSetFlags(out var optimisticInstructionSet, out _,
+            optimisticInstructionSetSupportBuilder.ComputeInstructionSetFlags(maxVectorTBitWidth, out var optimisticInstructionSet, out _,
                 (string specifiedInstructionSet, string impliedInstructionSet) => throw new NotSupportedException());
             optimisticInstructionSet.Remove(unsupportedInstructionSet);
             optimisticInstructionSet.Add(supportedInstructionSet);
index 0068f0b..32b60ec 100644 (file)
@@ -50,6 +50,9 @@ namespace Internal.ReadyToRunConstants
         Avx512DQ_VL=36,
         Avx512Vbmi=37,
         Avx512Vbmi_VL=38,
+        VectorT128=39,
+        VectorT256=40,
+        VectorT512=41,
 
     }
 }
index b387eed..f593808 100644 (file)
@@ -44,6 +44,7 @@ namespace Internal.ReadyToRunConstants
                             case InstructionSet.ARM64_Vector128: return null;
                             case InstructionSet.ARM64_Dczva: return null;
                             case InstructionSet.ARM64_Rcpc: return ReadyToRunInstructionSet.Rcpc;
+                            case InstructionSet.ARM64_VectorT128: return ReadyToRunInstructionSet.VectorT128;
 
                             default: throw new Exception("Unknown instruction set");
                         }
@@ -114,6 +115,9 @@ namespace Internal.ReadyToRunConstants
                             case InstructionSet.X64_AVX512VBMI_X64: return ReadyToRunInstructionSet.Avx512Vbmi;
                             case InstructionSet.X64_AVX512VBMI_VL: return ReadyToRunInstructionSet.Avx512Vbmi_VL;
                             case InstructionSet.X64_AVX512VBMI_VL_X64: return ReadyToRunInstructionSet.Avx512Vbmi_VL;
+                            case InstructionSet.X64_VectorT128: return ReadyToRunInstructionSet.VectorT128;
+                            case InstructionSet.X64_VectorT256: return ReadyToRunInstructionSet.VectorT256;
+                            case InstructionSet.X64_VectorT512: return ReadyToRunInstructionSet.VectorT512;
 
                             default: throw new Exception("Unknown instruction set");
                         }
@@ -184,6 +188,9 @@ namespace Internal.ReadyToRunConstants
                             case InstructionSet.X86_AVX512VBMI_X64: return null;
                             case InstructionSet.X86_AVX512VBMI_VL: return ReadyToRunInstructionSet.Avx512Vbmi_VL;
                             case InstructionSet.X86_AVX512VBMI_VL_X64: return null;
+                            case InstructionSet.X86_VectorT128: return ReadyToRunInstructionSet.VectorT128;
+                            case InstructionSet.X86_VectorT256: return ReadyToRunInstructionSet.VectorT256;
+                            case InstructionSet.X86_VectorT512: return ReadyToRunInstructionSet.VectorT512;
 
                             default: throw new Exception("Unknown instruction set");
                         }
index 1557059..f26abd6 100644 (file)
@@ -15,7 +15,7 @@ namespace Internal.JitInterface
     public enum InstructionSet
     {
         ILLEGAL = 0,
-        NONE = 63,
+        NONE = 127,
         ARM64_ArmBase = InstructionSet_ARM64.ArmBase,
         ARM64_AdvSimd = InstructionSet_ARM64.AdvSimd,
         ARM64_Aes = InstructionSet_ARM64.Aes,
@@ -29,6 +29,7 @@ namespace Internal.JitInterface
         ARM64_Vector128 = InstructionSet_ARM64.Vector128,
         ARM64_Dczva = InstructionSet_ARM64.Dczva,
         ARM64_Rcpc = InstructionSet_ARM64.Rcpc,
+        ARM64_VectorT128 = InstructionSet_ARM64.VectorT128,
         ARM64_ArmBase_Arm64 = InstructionSet_ARM64.ArmBase_Arm64,
         ARM64_AdvSimd_Arm64 = InstructionSet_ARM64.AdvSimd_Arm64,
         ARM64_Aes_Arm64 = InstructionSet_ARM64.Aes_Arm64,
@@ -69,6 +70,9 @@ namespace Internal.JitInterface
         X64_AVX512DQ_VL = InstructionSet_X64.AVX512DQ_VL,
         X64_AVX512VBMI = InstructionSet_X64.AVX512VBMI,
         X64_AVX512VBMI_VL = InstructionSet_X64.AVX512VBMI_VL,
+        X64_VectorT128 = InstructionSet_X64.VectorT128,
+        X64_VectorT256 = InstructionSet_X64.VectorT256,
+        X64_VectorT512 = InstructionSet_X64.VectorT512,
         X64_X86Base_X64 = InstructionSet_X64.X86Base_X64,
         X64_SSE_X64 = InstructionSet_X64.SSE_X64,
         X64_SSE2_X64 = InstructionSet_X64.SSE2_X64,
@@ -130,6 +134,9 @@ namespace Internal.JitInterface
         X86_AVX512DQ_VL = InstructionSet_X86.AVX512DQ_VL,
         X86_AVX512VBMI = InstructionSet_X86.AVX512VBMI,
         X86_AVX512VBMI_VL = InstructionSet_X86.AVX512VBMI_VL,
+        X86_VectorT128 = InstructionSet_X86.VectorT128,
+        X86_VectorT256 = InstructionSet_X86.VectorT256,
+        X86_VectorT512 = InstructionSet_X86.VectorT512,
         X86_X86Base_X64 = InstructionSet_X86.X86Base_X64,
         X86_SSE_X64 = InstructionSet_X86.SSE_X64,
         X86_SSE2_X64 = InstructionSet_X86.SSE2_X64,
@@ -177,14 +184,15 @@ namespace Internal.JitInterface
         Vector128 = 11,
         Dczva = 12,
         Rcpc = 13,
-        ArmBase_Arm64 = 14,
-        AdvSimd_Arm64 = 15,
-        Aes_Arm64 = 16,
-        Crc32_Arm64 = 17,
-        Dp_Arm64 = 18,
-        Rdm_Arm64 = 19,
-        Sha1_Arm64 = 20,
-        Sha256_Arm64 = 21,
+        VectorT128 = 14,
+        ArmBase_Arm64 = 15,
+        AdvSimd_Arm64 = 16,
+        Aes_Arm64 = 17,
+        Crc32_Arm64 = 18,
+        Dp_Arm64 = 19,
+        Rdm_Arm64 = 20,
+        Sha1_Arm64 = 21,
+        Sha256_Arm64 = 22,
     }
 
     public enum InstructionSet_X64
@@ -223,35 +231,38 @@ namespace Internal.JitInterface
         AVX512DQ_VL = 30,
         AVX512VBMI = 31,
         AVX512VBMI_VL = 32,
-        X86Base_X64 = 33,
-        SSE_X64 = 34,
-        SSE2_X64 = 35,
-        SSE3_X64 = 36,
-        SSSE3_X64 = 37,
-        SSE41_X64 = 38,
-        SSE42_X64 = 39,
-        AVX_X64 = 40,
-        AVX2_X64 = 41,
-        AES_X64 = 42,
-        BMI1_X64 = 43,
-        BMI2_X64 = 44,
-        FMA_X64 = 45,
-        LZCNT_X64 = 46,
-        PCLMULQDQ_X64 = 47,
-        POPCNT_X64 = 48,
-        AVXVNNI_X64 = 49,
-        MOVBE_X64 = 50,
-        X86Serialize_X64 = 51,
-        AVX512F_X64 = 52,
-        AVX512F_VL_X64 = 53,
-        AVX512BW_X64 = 54,
-        AVX512BW_VL_X64 = 55,
-        AVX512CD_X64 = 56,
-        AVX512CD_VL_X64 = 57,
-        AVX512DQ_X64 = 58,
-        AVX512DQ_VL_X64 = 59,
-        AVX512VBMI_X64 = 60,
-        AVX512VBMI_VL_X64 = 61,
+        VectorT128 = 33,
+        VectorT256 = 34,
+        VectorT512 = 35,
+        X86Base_X64 = 36,
+        SSE_X64 = 37,
+        SSE2_X64 = 38,
+        SSE3_X64 = 39,
+        SSSE3_X64 = 40,
+        SSE41_X64 = 41,
+        SSE42_X64 = 42,
+        AVX_X64 = 43,
+        AVX2_X64 = 44,
+        AES_X64 = 45,
+        BMI1_X64 = 46,
+        BMI2_X64 = 47,
+        FMA_X64 = 48,
+        LZCNT_X64 = 49,
+        PCLMULQDQ_X64 = 50,
+        POPCNT_X64 = 51,
+        AVXVNNI_X64 = 52,
+        MOVBE_X64 = 53,
+        X86Serialize_X64 = 54,
+        AVX512F_X64 = 55,
+        AVX512F_VL_X64 = 56,
+        AVX512BW_X64 = 57,
+        AVX512BW_VL_X64 = 58,
+        AVX512CD_X64 = 59,
+        AVX512CD_VL_X64 = 60,
+        AVX512DQ_X64 = 61,
+        AVX512DQ_VL_X64 = 62,
+        AVX512VBMI_X64 = 63,
+        AVX512VBMI_VL_X64 = 64,
     }
 
     public enum InstructionSet_X86
@@ -290,40 +301,43 @@ namespace Internal.JitInterface
         AVX512DQ_VL = 30,
         AVX512VBMI = 31,
         AVX512VBMI_VL = 32,
-        X86Base_X64 = 33,
-        SSE_X64 = 34,
-        SSE2_X64 = 35,
-        SSE3_X64 = 36,
-        SSSE3_X64 = 37,
-        SSE41_X64 = 38,
-        SSE42_X64 = 39,
-        AVX_X64 = 40,
-        AVX2_X64 = 41,
-        AES_X64 = 42,
-        BMI1_X64 = 43,
-        BMI2_X64 = 44,
-        FMA_X64 = 45,
-        LZCNT_X64 = 46,
-        PCLMULQDQ_X64 = 47,
-        POPCNT_X64 = 48,
-        AVXVNNI_X64 = 49,
-        MOVBE_X64 = 50,
-        X86Serialize_X64 = 51,
-        AVX512F_X64 = 52,
-        AVX512F_VL_X64 = 53,
-        AVX512BW_X64 = 54,
-        AVX512BW_VL_X64 = 55,
-        AVX512CD_X64 = 56,
-        AVX512CD_VL_X64 = 57,
-        AVX512DQ_X64 = 58,
-        AVX512DQ_VL_X64 = 59,
-        AVX512VBMI_X64 = 60,
-        AVX512VBMI_VL_X64 = 61,
+        VectorT128 = 33,
+        VectorT256 = 34,
+        VectorT512 = 35,
+        X86Base_X64 = 36,
+        SSE_X64 = 37,
+        SSE2_X64 = 38,
+        SSE3_X64 = 39,
+        SSSE3_X64 = 40,
+        SSE41_X64 = 41,
+        SSE42_X64 = 42,
+        AVX_X64 = 43,
+        AVX2_X64 = 44,
+        AES_X64 = 45,
+        BMI1_X64 = 46,
+        BMI2_X64 = 47,
+        FMA_X64 = 48,
+        LZCNT_X64 = 49,
+        PCLMULQDQ_X64 = 50,
+        POPCNT_X64 = 51,
+        AVXVNNI_X64 = 52,
+        MOVBE_X64 = 53,
+        X86Serialize_X64 = 54,
+        AVX512F_X64 = 55,
+        AVX512F_VL_X64 = 56,
+        AVX512BW_X64 = 57,
+        AVX512BW_VL_X64 = 58,
+        AVX512CD_X64 = 59,
+        AVX512CD_VL_X64 = 60,
+        AVX512DQ_X64 = 61,
+        AVX512DQ_VL_X64 = 62,
+        AVX512VBMI_X64 = 63,
+        AVX512VBMI_VL_X64 = 64,
     }
 
     public unsafe struct InstructionSetFlags : IEnumerable<InstructionSet>
     {
-        private const int FlagsFieldCount = 1;
+        private const int FlagsFieldCount = 2;
         private const int BitsPerFlagsField = 64;
         private fixed ulong _flags[FlagsFieldCount];
         public IEnumerable<InstructionSet_ARM64> ARM64Flags => this.Select((x) => (InstructionSet_ARM64)x);
@@ -527,6 +541,8 @@ namespace Internal.JitInterface
                         resultflags.AddInstructionSet(InstructionSet.ARM64_AdvSimd);
                     if (resultflags.HasInstructionSet(InstructionSet.ARM64_Vector128))
                         resultflags.AddInstructionSet(InstructionSet.ARM64_AdvSimd);
+                    if (resultflags.HasInstructionSet(InstructionSet.ARM64_VectorT128))
+                        resultflags.AddInstructionSet(InstructionSet.ARM64_AdvSimd);
                     break;
 
                 case TargetArchitecture.X64:
@@ -718,6 +734,12 @@ namespace Internal.JitInterface
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT128))
+                        resultflags.AddInstructionSet(InstructionSet.X64_SSE2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT256))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT512))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVX512F);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F))
@@ -799,6 +821,12 @@ namespace Internal.JitInterface
                         resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI_VL))
                         resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT128))
+                        resultflags.AddInstructionSet(InstructionSet.X86_SSE2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT256))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX2);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT512))
+                        resultflags.AddInstructionSet(InstructionSet.X86_AVX512F);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F))
                         resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F))
@@ -862,6 +890,8 @@ namespace Internal.JitInterface
                         resultflags.AddInstructionSet(InstructionSet.ARM64_Vector64);
                     if (resultflags.HasInstructionSet(InstructionSet.ARM64_AdvSimd))
                         resultflags.AddInstructionSet(InstructionSet.ARM64_Vector128);
+                    if (resultflags.HasInstructionSet(InstructionSet.ARM64_AdvSimd))
+                        resultflags.AddInstructionSet(InstructionSet.ARM64_VectorT128);
                     break;
 
                 case TargetArchitecture.X64:
@@ -995,6 +1025,12 @@ namespace Internal.JitInterface
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2))
+                        resultflags.AddInstructionSet(InstructionSet.X64_VectorT128);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2))
+                        resultflags.AddInstructionSet(InstructionSet.X64_VectorT256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F))
+                        resultflags.AddInstructionSet(InstructionSet.X64_VectorT512);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL))
                         resultflags.AddInstructionSet(InstructionSet.X64_AVX512F);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL))
@@ -1076,6 +1112,12 @@ namespace Internal.JitInterface
                         resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL))
                         resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2))
+                        resultflags.AddInstructionSet(InstructionSet.X86_VectorT128);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2))
+                        resultflags.AddInstructionSet(InstructionSet.X86_VectorT256);
+                    if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F))
+                        resultflags.AddInstructionSet(InstructionSet.X86_VectorT512);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL))
                         resultflags.AddInstructionSet(InstructionSet.X86_AVX512F);
                     if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD_VL))
@@ -1152,6 +1194,7 @@ namespace Internal.JitInterface
                     yield return new InstructionSetInfo("Vector128", "", InstructionSet.ARM64_Vector128, false);
                     yield return new InstructionSetInfo("Dczva", "", InstructionSet.ARM64_Dczva, false);
                     yield return new InstructionSetInfo("rcpc", "", InstructionSet.ARM64_Rcpc, true);
+                    yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.ARM64_VectorT128, true);
                     break;
 
                 case TargetArchitecture.X64:
@@ -1187,6 +1230,9 @@ namespace Internal.JitInterface
                     yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X64_AVX512DQ_VL, true);
                     yield return new InstructionSetInfo("avx512vbmi", "Avx512Vbmi", InstructionSet.X64_AVX512VBMI, true);
                     yield return new InstructionSetInfo("avx512vbmi_vl", "Avx512Vbmi_VL", InstructionSet.X64_AVX512VBMI_VL, true);
+                    yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X64_VectorT128, true);
+                    yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X64_VectorT256, true);
+                    yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X64_VectorT512, true);
                     break;
 
                 case TargetArchitecture.X86:
@@ -1222,6 +1268,9 @@ namespace Internal.JitInterface
                     yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X86_AVX512DQ_VL, true);
                     yield return new InstructionSetInfo("avx512vbmi", "Avx512Vbmi", InstructionSet.X86_AVX512VBMI, true);
                     yield return new InstructionSetInfo("avx512vbmi_vl", "Avx512Vbmi_VL", InstructionSet.X86_AVX512VBMI_VL, true);
+                    yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X86_VectorT128, true);
+                    yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X86_VectorT256, true);
+                    yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X86_VectorT512, true);
                     break;
             }
         }
@@ -1496,6 +1545,9 @@ namespace Internal.JitInterface
                         else
                         { return InstructionSet.ARM64_Sha256; }
 
+                    case "VectorT128":
+                        { return InstructionSet.ARM64_VectorT128; }
+
                 }
                 break;
 
@@ -1662,6 +1714,15 @@ namespace Internal.JitInterface
                         else
                         { return InstructionSet.X64_AVX512VBMI; }
 
+                    case "VectorT128":
+                        { return InstructionSet.X64_VectorT128; }
+
+                    case "VectorT256":
+                        { return InstructionSet.X64_VectorT256; }
+
+                    case "VectorT512":
+                        { return InstructionSet.X64_VectorT512; }
+
                 }
                 break;
 
@@ -1756,6 +1817,15 @@ namespace Internal.JitInterface
                         else
                         { return InstructionSet.X86_AVX512VBMI; }
 
+                    case "VectorT128":
+                        { return InstructionSet.X86_VectorT128; }
+
+                    case "VectorT256":
+                        { return InstructionSet.X86_VectorT256; }
+
+                    case "VectorT512":
+                        { return InstructionSet.X86_VectorT512; }
+
                 }
                 break;
 
index 1b8b2c1..3c669e1 100644 (file)
@@ -57,7 +57,9 @@ instructionset     ,X86   ,Avx512DQ             ,        ,35 ,AVX512DQ
 instructionset     ,X86   ,Avx512DQ_VL          ,        ,36 ,AVX512DQ_VL           ,avx512dq_vl
 instructionset     ,X86   ,Avx512Vbmi           ,        ,37 ,AVX512VBMI            ,avx512vbmi
 instructionset     ,X86   ,Avx512Vbmi_VL        ,        ,38 ,AVX512VBMI_VL         ,avx512vbmi_vl
-
+instructionset     ,X86   ,VectorT128           ,        ,39 ,VectorT128            ,vectort128
+instructionset     ,X86   ,VectorT256           ,        ,40 ,VectorT256            ,vectort256
+instructionset     ,X86   ,VectorT512           ,        ,41 ,VectorT512            ,vectort512
 
 instructionset64bit,X86   ,X86Base
 instructionset64bit,X86   ,SSE
@@ -129,6 +131,9 @@ implication        ,X86   ,AVX512DQ_VL          ,AVX512F_VL
 implication        ,X86   ,AVX512VBMI           ,AVX512BW
 implication        ,X86   ,AVX512VBMI_VL        ,AVX512VBMI
 implication        ,X86   ,AVX512VBMI_VL        ,AVX512BW_VL
+implication        ,X86   ,VectorT128           ,SSE2
+implication        ,X86   ,VectorT256           ,AVX2
+implication        ,X86   ,VectorT512           ,AVX512F
 
 ; While the AVX-512 ISAs can be individually lit-up, they really
 ; need F, BW, CD, DQ, and VL to be fully functional without adding
@@ -148,19 +153,20 @@ copyinstructionsets,X86   ,X64
 ; Definition of Arm64 instruction sets
 definearch         ,ARM64 ,64Bit     ,Arm64, Arm64
 
-instructionset     ,ARM64 ,ArmBase   ,        ,16 ,ArmBase  ,base
-instructionset     ,ARM64 ,AdvSimd   ,        ,17 ,AdvSimd  ,neon
-instructionset     ,ARM64 ,Aes       ,        ,9  ,Aes      ,aes
-instructionset     ,ARM64 ,Crc32     ,        ,18 ,Crc32    ,crc
-instructionset     ,ARM64 ,Dp        ,        ,23 ,Dp       ,dotprod
-instructionset     ,ARM64 ,Rdm       ,        ,24 ,Rdm      ,rdma
-instructionset     ,ARM64 ,Sha1      ,        ,19 ,Sha1     ,sha1
-instructionset     ,ARM64 ,Sha256    ,        ,20 ,Sha256   ,sha2
-instructionset     ,ARM64 ,          ,Atomics ,21 ,Atomics  ,lse
-instructionset     ,ARM64 ,          ,        ,   ,Vector64 ,
-instructionset     ,ARM64 ,          ,        ,   ,Vector128,
-instructionset     ,ARM64 ,          ,        ,   ,Dczva    ,
-instructionset     ,ARM64 ,          ,Rcpc    ,26 ,Rcpc     ,rcpc
+instructionset     ,ARM64 ,ArmBase               ,        ,16 ,ArmBase               ,base
+instructionset     ,ARM64 ,AdvSimd               ,        ,17 ,AdvSimd               ,neon
+instructionset     ,ARM64 ,Aes                   ,        ,9  ,Aes                   ,aes
+instructionset     ,ARM64 ,Crc32                 ,        ,18 ,Crc32                 ,crc
+instructionset     ,ARM64 ,Dp                    ,        ,23 ,Dp                    ,dotprod
+instructionset     ,ARM64 ,Rdm                   ,        ,24 ,Rdm                   ,rdma
+instructionset     ,ARM64 ,Sha1                  ,        ,19 ,Sha1                  ,sha1
+instructionset     ,ARM64 ,Sha256                ,        ,20 ,Sha256                ,sha2
+instructionset     ,ARM64 ,                      ,Atomics ,21 ,Atomics               ,lse
+instructionset     ,ARM64 ,                      ,        ,   ,Vector64              ,
+instructionset     ,ARM64 ,                      ,        ,   ,Vector128             ,
+instructionset     ,ARM64 ,                      ,        ,   ,Dczva                 ,
+instructionset     ,ARM64 ,                      ,Rcpc    ,26 ,Rcpc                  ,rcpc
+instructionset     ,ARM64 ,VectorT128            ,        ,39 ,VectorT128            ,vectort128
 
 instructionset64bit,ARM64 ,ArmBase
 instructionset64bit,ARM64 ,AdvSimd
@@ -174,16 +180,16 @@ instructionset64bit,ARM64 ,Sha256
 vectorinstructionset,ARM64,Vector64
 vectorinstructionset,ARM64,Vector128
 
-implication        ,ARM64 ,AdvSimd   ,ArmBase
-implication        ,ARM64 ,Aes       ,ArmBase
-implication        ,ARM64 ,Crc32     ,ArmBase
-implication        ,ARM64 ,Dp        ,AdvSimd
-implication        ,ARM64 ,Rdm       ,AdvSimd
-implication        ,ARM64 ,Sha1      ,ArmBase
-implication        ,ARM64 ,Sha256    ,ArmBase
-implication        ,ARM64 ,Vector64  ,AdvSimd
-implication        ,ARM64 ,Vector128 ,AdvSimd
-
+implication        ,ARM64 ,AdvSimd    ,ArmBase
+implication        ,ARM64 ,Aes        ,ArmBase
+implication        ,ARM64 ,Crc32      ,ArmBase
+implication        ,ARM64 ,Dp         ,AdvSimd
+implication        ,ARM64 ,Rdm        ,AdvSimd
+implication        ,ARM64 ,Sha1       ,ArmBase
+implication        ,ARM64 ,Sha256     ,ArmBase
+implication        ,ARM64 ,Vector64   ,AdvSimd
+implication        ,ARM64 ,Vector128  ,AdvSimd
+implication        ,ARM64 ,VectorT128 ,AdvSimd
 
 ;                    ,name and aliases           ,archs    ,lower baselines included by implication
 ;
index 4547b91..3ce3bf5 100644 (file)
@@ -95,7 +95,7 @@ namespace Thunkerator
         private Dictionary<string, string> _64BitVariantArchitectureManagedNameSuffix = new Dictionary<string, string>();
 
         // This represents the number of flags fields we currently track
-        private const int FlagsFieldCount = 1;
+        private const int FlagsFieldCount = 2;
 
         private void ArchitectureEncountered(string arch)
         {
index 9930580..fb4cd6b 100644 (file)
@@ -73,6 +73,11 @@ namespace Internal.TypeSystem
             /// True if the type transitively has an Int128 in it or is an Int128
             /// </summary>
             public const int IsInt128OrHasInt128Fields = 0x800;
+
+            /// <summary>
+            /// True if the type transitively has a Vector<T> in it or is Vector<T>
+            /// </summary>
+            public const int IsVectorTOrHasVectorTFields = 0x1000;
         }
 
         private sealed class StaticBlockInfo
@@ -154,6 +159,21 @@ namespace Internal.TypeSystem
         }
 
         /// <summary>
+        /// Is a type Vector<T> or transitively have any fields of a type Vector<T>.
+        /// </summary>
+        public virtual bool IsVectorTOrHasVectorTFields
+        {
+            get
+            {
+                if (!_fieldLayoutFlags.HasFlags(FieldLayoutFlags.ComputedInstanceTypeLayout))
+                {
+                    ComputeInstanceLayout(InstanceLayoutKind.TypeAndFields);
+                }
+                return _fieldLayoutFlags.HasFlags(FieldLayoutFlags.IsVectorTOrHasVectorTFields);
+            }
+        }
+
+        /// <summary>
         /// The number of bytes required to hold a field of this type
         /// </summary>
         public LayoutInt InstanceFieldSize
@@ -451,6 +471,10 @@ namespace Internal.TypeSystem
             {
                 _fieldLayoutFlags.AddFlags(FieldLayoutFlags.IsInt128OrHasInt128Fields);
             }
+            if (computedLayout.IsVectorTOrHasVectorTFields)
+            {
+                _fieldLayoutFlags.AddFlags(FieldLayoutFlags.IsVectorTOrHasVectorTFields);
+            }
 
             if (computedLayout.Offsets != null)
             {
index 53388c9..31a46ec 100644 (file)
@@ -84,6 +84,7 @@ namespace Internal.TypeSystem
         public bool LayoutAbiStable; // Is the layout stable such that it can safely be used in function calling conventions
         public bool IsAutoLayoutOrHasAutoLayoutFields;
         public bool IsInt128OrHasInt128Fields;
+        public bool IsVectorTOrHasVectorTFields;
 
         /// <summary>
         /// If Offsets is non-null, then all field based layout is complete.
index 0fc9064..d56bfa1 100644 (file)
@@ -110,6 +110,7 @@ namespace Internal.TypeSystem
                     LayoutAbiStable = true,
                     IsAutoLayoutOrHasAutoLayoutFields = false,
                     IsInt128OrHasInt128Fields = false,
+                    IsVectorTOrHasVectorTFields = false,
                 };
 
                 if (numInstanceFields > 0)
@@ -211,7 +212,7 @@ namespace Internal.TypeSystem
                 }
 
                 ref StaticsBlock block = ref GetStaticsBlockForField(ref result, field);
-                SizeAndAlignment sizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout: false, context.Target.DefaultPackingSize, out bool _, out bool _, out bool _);
+                SizeAndAlignment sizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout: false, context.Target.DefaultPackingSize, out bool _, out bool _, out bool _, out bool _);
 
                 block.Size = LayoutInt.AlignUp(block.Size, sizeAndAlignment.Alignment, context.Target);
                 result.Offsets[index] = new FieldAndOffset(field, block.Size);
@@ -303,18 +304,27 @@ namespace Internal.TypeSystem
             int fieldOrdinal = 0;
             bool layoutAbiStable = true;
             bool hasAutoLayoutField = false;
-            bool hasInt128Field = type.BaseType == null ? false : type.BaseType.IsInt128OrHasInt128Fields;
+            bool hasInt128Field = false;
+            bool hasVectorTField = false;
+
+            if (type.BaseType is not null)
+            {
+                hasInt128Field = type.BaseType.IsInt128OrHasInt128Fields;
+                hasVectorTField = type.BaseType.IsVectorTOrHasVectorTFields;
+            }
 
             foreach (var fieldAndOffset in layoutMetadata.Offsets)
             {
                 TypeDesc fieldType = fieldAndOffset.Field.FieldType;
-                var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType.UnderlyingType, hasLayout: true, packingSize, out bool fieldLayoutAbiStable, out bool fieldHasAutoLayout, out bool fieldHasInt128Field);
+                var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType.UnderlyingType, hasLayout: true, packingSize, out bool fieldLayoutAbiStable, out bool fieldHasAutoLayout, out bool fieldHasInt128Field, out bool fieldHasVectorTField);
                 if (!fieldLayoutAbiStable)
                     layoutAbiStable = false;
                 if (fieldHasAutoLayout)
                     hasAutoLayoutField = true;
                 if (fieldHasInt128Field)
                     hasInt128Field = true;
+                if (fieldHasVectorTField)
+                    hasVectorTField = true;
 
                 largestAlignmentRequired = LayoutInt.Max(fieldSizeAndAlignment.Alignment, largestAlignmentRequired);
 
@@ -367,6 +377,7 @@ namespace Internal.TypeSystem
             {
                 IsAutoLayoutOrHasAutoLayoutFields = hasAutoLayoutField,
                 IsInt128OrHasInt128Fields = hasInt128Field,
+                IsVectorTOrHasVectorTFields = hasVectorTField,
             };
             computedLayout.FieldAlignment = instanceSizeAndAlignment.Alignment;
             computedLayout.FieldSize = instanceSizeAndAlignment.Size;
@@ -402,20 +413,29 @@ namespace Internal.TypeSystem
             int packingSize = ComputePackingSize(type, layoutMetadata);
             bool layoutAbiStable = true;
             bool hasAutoLayoutField = false;
-            bool hasInt128Field = type.BaseType == null ? false : type.BaseType.IsInt128OrHasInt128Fields;
+            bool hasInt128Field = false;
+            bool hasVectorTField = false;
+
+            if (type.BaseType is not null)
+            {
+                hasInt128Field = type.BaseType.IsInt128OrHasInt128Fields;
+                hasVectorTField = type.BaseType.IsVectorTOrHasVectorTFields;
+            }
 
             foreach (var field in type.GetFields())
             {
                 if (field.IsStatic)
                     continue;
 
-                var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(field.FieldType.UnderlyingType, hasLayout: true, packingSize, out bool fieldLayoutAbiStable, out bool fieldHasAutoLayout, out bool fieldHasInt128Field);
+                var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(field.FieldType.UnderlyingType, hasLayout: true, packingSize, out bool fieldLayoutAbiStable, out bool fieldHasAutoLayout, out bool fieldHasInt128Field, out bool fieldHasVectorTField);
                 if (!fieldLayoutAbiStable)
                     layoutAbiStable = false;
                 if (fieldHasAutoLayout)
                     hasAutoLayoutField = true;
                 if (fieldHasInt128Field)
                     hasInt128Field = true;
+                if (fieldHasVectorTField)
+                    hasVectorTField = true;
 
                 largestAlignmentRequirement = LayoutInt.Max(fieldSizeAndAlignment.Alignment, largestAlignmentRequirement);
 
@@ -443,6 +463,7 @@ namespace Internal.TypeSystem
             {
                 IsAutoLayoutOrHasAutoLayoutFields = hasAutoLayoutField,
                 IsInt128OrHasInt128Fields = hasInt128Field,
+                IsVectorTOrHasVectorTFields = hasVectorTField,
             };
             computedLayout.FieldAlignment = instanceSizeAndAlignment.Alignment;
             computedLayout.FieldSize = instanceSizeAndAlignment.Size;
@@ -517,6 +538,7 @@ namespace Internal.TypeSystem
             int instanceGCPointerFieldsCount = 0;
             int[] instanceNonGCPointerFieldsCount = new int[maxLog2Size + 1];
             bool hasInt128Field = false;
+            bool hasVectorTField = false;
 
             foreach (var field in type.GetFields())
             {
@@ -531,6 +553,8 @@ namespace Internal.TypeSystem
                     instanceValueClassFieldCount++;
                     if (((DefType)fieldType).IsInt128OrHasInt128Fields)
                         hasInt128Field = true;
+                    if (((DefType)fieldType).IsVectorTOrHasVectorTFields)
+                        hasVectorTField = true;
                 }
                 else if (fieldType.IsGCPointer)
                 {
@@ -540,7 +564,7 @@ namespace Internal.TypeSystem
                 {
                     Debug.Assert(fieldType.IsPrimitive || fieldType.IsPointer || fieldType.IsFunctionPointer || fieldType.IsEnum || fieldType.IsByRef);
 
-                    var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout, packingSize, out bool _, out bool _, out bool _);
+                    var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout, packingSize, out bool _, out bool _, out bool _, out bool _);
                     instanceNonGCPointerFieldsCount[CalculateLog2(fieldSizeAndAlignment.Size.AsInt)]++;
                 }
             }
@@ -577,7 +601,7 @@ namespace Internal.TypeSystem
 
                 TypeDesc fieldType = field.FieldType;
 
-                var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout, packingSize, out bool fieldLayoutAbiStable, out bool _, out bool _);
+                var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout, packingSize, out bool fieldLayoutAbiStable, out bool _, out bool _, out bool _);
                 if (!fieldLayoutAbiStable)
                     layoutAbiStable = false;
 
@@ -747,7 +771,7 @@ namespace Internal.TypeSystem
             for (int i = 0; i < instanceValueClassFieldsArr.Length; i++)
             {
                 // Align the cumulative field offset to the indeterminate value
-                var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(instanceValueClassFieldsArr[i].FieldType, hasLayout, packingSize, out bool fieldLayoutAbiStable, out bool _, out bool _);
+                var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(instanceValueClassFieldsArr[i].FieldType, hasLayout, packingSize, out bool fieldLayoutAbiStable, out bool _, out bool _, out bool _);
                 if (!fieldLayoutAbiStable)
                     layoutAbiStable = false;
 
@@ -804,6 +828,7 @@ namespace Internal.TypeSystem
             {
                 IsAutoLayoutOrHasAutoLayoutFields = true,
                 IsInt128OrHasInt128Fields = hasInt128Field,
+                IsVectorTOrHasVectorTFields = hasVectorTField,
             };
             computedLayout.FieldAlignment = instanceSizeAndAlignment.Alignment;
             computedLayout.FieldSize = instanceSizeAndAlignment.Size;
@@ -817,7 +842,7 @@ namespace Internal.TypeSystem
 
         private static void PlaceInstanceField(FieldDesc field, bool hasLayout, int packingSize, FieldAndOffset[] offsets, ref LayoutInt instanceFieldPos, ref int fieldOrdinal, LayoutInt offsetBias)
         {
-            var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(field.FieldType, hasLayout, packingSize, out bool _, out bool _, out bool _);
+            var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(field.FieldType, hasLayout, packingSize, out bool _, out bool _, out bool _, out bool _);
 
             instanceFieldPos = AlignUpInstanceFieldOffset(instanceFieldPos, fieldSizeAndAlignment.Alignment, field.Context.Target);
             offsets[fieldOrdinal] = new FieldAndOffset(field, instanceFieldPos + offsetBias);
@@ -877,12 +902,13 @@ namespace Internal.TypeSystem
             return cumulativeInstanceFieldPos;
         }
 
-        private static SizeAndAlignment ComputeFieldSizeAndAlignment(TypeDesc fieldType, bool hasLayout, int packingSize, out bool layoutAbiStable, out bool fieldTypeHasAutoLayout, out bool fieldTypeHasInt128Field)
+        private static SizeAndAlignment ComputeFieldSizeAndAlignment(TypeDesc fieldType, bool hasLayout, int packingSize, out bool layoutAbiStable, out bool fieldTypeHasAutoLayout, out bool fieldTypeHasInt128Field, out bool fieldTypeHasVectorTField)
         {
             SizeAndAlignment result;
             layoutAbiStable = true;
             fieldTypeHasAutoLayout = true;
             fieldTypeHasInt128Field = false;
+            fieldTypeHasVectorTField = false;
 
             if (fieldType.IsDefType)
             {
@@ -894,6 +920,7 @@ namespace Internal.TypeSystem
                     layoutAbiStable = defType.LayoutAbiStable;
                     fieldTypeHasAutoLayout = defType.IsAutoLayoutOrHasAutoLayoutFields;
                     fieldTypeHasInt128Field = defType.IsInt128OrHasInt128Fields;
+                    fieldTypeHasVectorTField = defType.IsVectorTOrHasVectorTFields;
                 }
                 else
                 {
index cb80143..3aa6ca2 100644 (file)
@@ -422,6 +422,12 @@ namespace Internal.TypeSystem.Interop
                     return MarshallerKind.Invalid;
                 }
 
+                if (!isField && ((DefType)type).IsVectorTOrHasVectorTFields)
+                {
+                    // Vector<T> types or structs that contain them cannot be passed by value
+                    return MarshallerKind.Invalid;
+                }
+
                 if (MarshalUtils.IsBlittableType(type))
                 {
                     if (nativeType != NativeTypeKind.Default && nativeType != NativeTypeKind.Struct)
index b7e4ee1..cd7a616 100644 (file)
@@ -112,6 +112,9 @@ namespace ILCompiler
             public const int Avx512Vbmi = 0x800000;
             public const int Avx512Vbmi_vl = 0x1000000;
             public const int Serialize = 0x2000000;
+            public const int VectorT128 = 0x4000000;
+            public const int VectorT256 = 0x8000000;
+            public const int VectorT512 = 0x10000000;
 
             public static int FromInstructionSet(InstructionSet instructionSet)
             {
@@ -121,6 +124,7 @@ namespace ILCompiler
 
                 return instructionSet switch
                 {
+                    // Optional ISAs - only available via opt-in or opportunistic light-up
                     InstructionSet.X64_AES => Aes,
                     InstructionSet.X64_AES_X64 => Aes,
                     InstructionSet.X64_PCLMULQDQ => Pclmulqdq,
@@ -174,7 +178,7 @@ namespace ILCompiler
                     InstructionSet.X64_X86Serialize => Serialize,
                     InstructionSet.X64_X86Serialize_X64 => Serialize,
 
-                    // SSE and SSE2 are baseline ISAs - they're always available
+                    // Baseline ISAs - they're always available
                     InstructionSet.X64_SSE => 0,
                     InstructionSet.X64_SSE_X64 => 0,
                     InstructionSet.X64_SSE2 => 0,
@@ -183,6 +187,11 @@ namespace ILCompiler
                     InstructionSet.X64_X86Base => 0,
                     InstructionSet.X64_X86Base_X64 => 0,
 
+                    // Vector<T> Sizes
+                    InstructionSet.X64_VectorT128 => VectorT128,
+                    InstructionSet.X64_VectorT256 => VectorT256,
+                    InstructionSet.X64_VectorT512 => VectorT512,
+
                     _ => throw new NotSupportedException(((InstructionSet_X64)instructionSet).ToString())
                 };
             }
@@ -199,13 +208,20 @@ namespace ILCompiler
             public const int Sha256 = 0x0040;
             public const int Atomics = 0x0080;
             public const int Rcpc = 0x0100;
+            public const int VectorT128 = 0x0200;
 
             public static int FromInstructionSet(InstructionSet instructionSet)
             {
                 return instructionSet switch
                 {
+
+                    // Baseline ISAs - they're always available
+                    InstructionSet.ARM64_ArmBase => 0,
+                    InstructionSet.ARM64_ArmBase_Arm64 => 0,
                     InstructionSet.ARM64_AdvSimd => AdvSimd,
                     InstructionSet.ARM64_AdvSimd_Arm64 => AdvSimd,
+
+                    // Optional ISAs - only available via opt-in or opportunistic light-up
                     InstructionSet.ARM64_Aes => Aes,
                     InstructionSet.ARM64_Aes_Arm64 => Aes,
                     InstructionSet.ARM64_Crc32 => Crc32,
@@ -221,8 +237,8 @@ namespace ILCompiler
                     InstructionSet.ARM64_Atomics => Atomics,
                     InstructionSet.ARM64_Rcpc => Rcpc,
 
-                    InstructionSet.ARM64_ArmBase => 0,
-                    InstructionSet.ARM64_ArmBase_Arm64 => 0,
+                    // Vector<T> Sizes
+                    InstructionSet.ARM64_VectorT128 => VectorT128,
 
                     _ => throw new NotSupportedException(((InstructionSet_ARM64)instructionSet).ToString())
                 };
index 1a1eef1..cfa4dc2 100644 (file)
@@ -25,6 +25,7 @@ namespace ILCompiler
             TargetDetails targetDetails = defType.Context.Target;
 
             ComputedInstanceFieldLayout layoutFromMetadata = _fallbackAlgorithm.ComputeInstanceLayout(defType, layoutKind);
+            layoutFromMetadata.IsVectorTOrHasVectorTFields = true;
 
             LayoutInt instanceFieldSize;
 
@@ -53,6 +54,7 @@ namespace ILCompiler
                 FieldAlignment = layoutFromMetadata.FieldAlignment,
                 FieldSize = instanceFieldSize,
                 Offsets = layoutFromMetadata.Offsets,
+                IsVectorTOrHasVectorTFields = true,
             };
         }
 
index a0b9e80..e379651 100644 (file)
@@ -36,14 +36,20 @@ namespace ILCompiler.DependencyAnalysis.ReadyToRun
 
             if (!relocsOnly)
             {
+                ReadyToRunFixupKind fixupKind = _fixupKind;
                 dataBuilder.AddSymbol(this);
 
+                if ((fixupKind == ReadyToRunFixupKind.Verify_TypeLayout) && ((MetadataType)_typeDesc).IsVectorTOrHasVectorTFields)
+                {
+                    fixupKind = ReadyToRunFixupKind.Check_TypeLayout;
+                }
+
                 IEcmaModule targetModule = factory.SignatureContext.GetTargetModule(_typeDesc);
-                SignatureContext innerContext = dataBuilder.EmitFixup(factory, _fixupKind, targetModule, factory.SignatureContext);
+                SignatureContext innerContext = dataBuilder.EmitFixup(factory, fixupKind, targetModule, factory.SignatureContext);
                 dataBuilder.EmitTypeSignature(_typeDesc, innerContext);
 
-                if ((_fixupKind == ReadyToRunFixupKind.Check_TypeLayout) ||
-                    (_fixupKind == ReadyToRunFixupKind.Verify_TypeLayout))
+                if ((fixupKind == ReadyToRunFixupKind.Check_TypeLayout) ||
+                    (fixupKind == ReadyToRunFixupKind.Verify_TypeLayout))
                 {
                     EncodeTypeLayout(dataBuilder, _typeDesc);
                 }
@@ -92,7 +98,7 @@ namespace ILCompiler.DependencyAnalysis.ReadyToRun
                 };
                 dataBuilder.EmitUInt((uint)hfaElementType);
             }
-            
+
             if (alignment != pointerSize)
             {
                 dataBuilder.EmitUInt((uint)alignment);
index 8b1b8d7..3644241 100644 (file)
@@ -475,12 +475,18 @@ namespace ILCompiler
                 return true;
             }
 
-            if (!(type is MetadataType defType))
+            if (type is not MetadataType defType)
             {
                 // Non metadata backed types have layout defined in all version bubbles
                 return true;
             }
 
+            if (VectorOfTFieldLayoutAlgorithm.IsVectorOfTType(defType))
+            {
+                // Vector<T> always needs a layout check
+                return false;
+            }
+
             if (!NodeFactory.CompilationModuleGroup.VersionsWithModule(defType.Module))
             {
                 // Valuetypes with non-versionable attribute are candidates for fixed layout. Reject the rest.
index 6eed362..c6d40d4 100644 (file)
@@ -220,6 +220,7 @@ namespace ILCompiler
                     ByteCountAlignment = LayoutInt.Indeterminate,
                     Offsets = fieldsAndOffsets.ToArray(),
                     LayoutAbiStable = false,
+                    IsVectorTOrHasVectorTFields = true,
                 };
                 return instanceLayout;
             }
@@ -238,6 +239,7 @@ namespace ILCompiler
                     FieldSize = layoutFromSimilarIntrinsicVector.FieldSize,
                     Offsets = layoutFromMetadata.Offsets,
                     LayoutAbiStable = _vectorAbiIsStable,
+                    IsVectorTOrHasVectorTFields = true,
                 };
 #else
                 return new ComputedInstanceFieldLayout
@@ -248,6 +250,7 @@ namespace ILCompiler
                     FieldSize = layoutFromSimilarIntrinsicVector.FieldSize,
                     Offsets = layoutFromMetadata.Offsets,
                     LayoutAbiStable = _vectorAbiIsStable,
+                    IsVectorTOrHasVectorTFields = true,
                 };
 #endif
             }
index bb23738..6744603 100644 (file)
@@ -3128,7 +3128,7 @@ namespace Internal.JitInterface
         {
             return false;
         }
-        
+
         private CORINFO_OBJECT_STRUCT_* getRuntimeTypePointer(CORINFO_CLASS_STRUCT_* cls)
         {
             return null;
index 7a27f04..574bfb7 100644 (file)
@@ -112,6 +112,8 @@ namespace ILCompiler
             }, true, "Maximum number of threads to use during compilation");
         public Option<string> InstructionSet { get; } =
             new(new[] { "--instruction-set" }, "Instruction set to allow or disallow");
+        public Option<int> MaxVectorTBitWidth { get; } =
+            new(new[] { "--max-vectort-bitwidth" }, "Maximum width, in bits, that Vector<T> is allowed to be");
         public Option<string> Guard { get; } =
             new(new[] { "--guard" }, "Enable mitigations. Options: 'cf': CFG (Control Flow Guard, Windows only)");
         public Option<bool> Dehydrate { get; } =
@@ -210,6 +212,7 @@ namespace ILCompiler
             AddOption(RuntimeKnobs);
             AddOption(Parallelism);
             AddOption(InstructionSet);
+            AddOption(MaxVectorTBitWidth);
             AddOption(Guard);
             AddOption(Dehydrate);
             AddOption(PreinitStatics);
index da1aa1a..f83ad61 100644 (file)
@@ -67,7 +67,7 @@ namespace ILCompiler
 
             TargetArchitecture targetArchitecture = Get(_command.TargetArchitecture);
             TargetOS targetOS = Get(_command.TargetOS);
-            InstructionSetSupport instructionSetSupport = Helpers.ConfigureInstructionSetSupport(Get(_command.InstructionSet), targetArchitecture, targetOS,
+            InstructionSetSupport instructionSetSupport = Helpers.ConfigureInstructionSetSupport(Get(_command.InstructionSet), Get(_command.MaxVectorTBitWidth), targetArchitecture, targetOS,
                 "Unrecognized instruction set {0}", "Unsupported combination of instruction sets: {0}/{1}");
 
             string systemModuleName = Get(_command.SystemModuleName);
index e391837..ef384a1 100644 (file)
@@ -23,6 +23,8 @@ namespace ILCompiler
             new(new[] { "--reference", "-r" }, result => Helpers.BuildPathDictionary(result.Tokens, false), true, SR.ReferenceFiles);
         public Option<string> InstructionSet { get; } =
             new(new[] { "--instruction-set" }, SR.InstructionSets);
+        public Option<int> MaxVectorTBitWidth { get; } =
+            new(new[] { "--max-vectort-bitwidth" }, SR.MaxVectorTBitWidths);
         public Option<string[]> MibcFilePaths { get; } =
             new(new[] { "--mibc", "-m" }, Array.Empty<string>, SR.MibcFiles);
         public Option<string> OutputFilePath { get; } =
@@ -193,6 +195,7 @@ namespace ILCompiler
             AddOption(UnrootedInputFilePaths);
             AddOption(ReferenceFilePaths);
             AddOption(InstructionSet);
+            AddOption(MaxVectorTBitWidth);
             AddOption(MibcFilePaths);
             AddOption(OutputFilePath);
             AddOption(CompositeRootPath);
index 639e555..1d43fc8 100644 (file)
@@ -76,7 +76,7 @@ namespace ILCompiler
 
             TargetArchitecture targetArchitecture = Get(_command.TargetArchitecture);
             TargetOS targetOS = Get(_command.TargetOS);
-            InstructionSetSupport instructionSetSupport = Helpers.ConfigureInstructionSetSupport(Get(_command.InstructionSet), targetArchitecture, targetOS,
+            InstructionSetSupport instructionSetSupport = Helpers.ConfigureInstructionSetSupport(Get(_command.InstructionSet), Get(_command.MaxVectorTBitWidth), targetArchitecture, targetOS,
                 SR.InstructionSetMustNotBe, SR.InstructionSetInvalidImplication);
             SharedGenericsMode genericsMode = SharedGenericsMode.CanonicalReferenceTypes;
             var targetDetails = new TargetDetails(targetArchitecture, targetOS, Crossgen2RootCommand.IsArmel ? TargetAbi.NativeAotArmel : TargetAbi.NativeAot, instructionSetSupport.GetVectorTSimdVector());
index b899f77..a737ea6 100644 (file)
   <data name="InstructionSetInvalidImplication" xml:space="preserve">
     <value>Instruction set '{0}' implies support for instruction set '{1}'</value>
   </data>
+  <data name="MaxVectorTBitWidths" xml:space="preserve">
+    <value>The maximum width, in bits, for System.Numerics.Vector&lt;T&gt;. For example '128', '256', or '512'.</value>
+  </data>
   <data name="UnrootedInputFilesToCompile" xml:space="preserve">
     <value>Input files without automatic rooting of all methods</value>
   </data>
index cea5890..51eec2b 100644 (file)
@@ -1338,6 +1338,9 @@ void EEJitManager::SetCpuInfo()
 
     CORJIT_FLAGS CPUCompileFlags;
 
+    // Get the maximum bitwidth of Vector<T>, rounding down to the nearest multiple of 128-bits
+    uint32_t maxVectorTBitWidth = (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_MaxVectorTBitWidth) / 128) * 128;
+
 #if defined(TARGET_X86) || defined(TARGET_AMD64)
     CPUCompileFlags.Set(InstructionSet_X86Base);
 
@@ -1401,6 +1404,7 @@ void EEJitManager::SetCpuInfo()
 
     CPUCompileFlags.Set(InstructionSet_SSE);
     CPUCompileFlags.Set(InstructionSet_SSE2);
+    CPUCompileFlags.Set(InstructionSet_VectorT128);
 
     if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0)                                                          // AESNI
     {
@@ -1460,12 +1464,22 @@ void EEJitManager::SetCpuInfo()
                                 {
                                     CPUCompileFlags.Set(InstructionSet_AVX2);
 
+                                    if ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 256))
+                                    {
+                                        // We allow 256-bit Vector<T> by default
+                                        CPUCompileFlags.Clear(InstructionSet_VectorT128);
+                                        CPUCompileFlags.Set(InstructionSet_VectorT256);
+                                    }
+
                                     if (DoesOSSupportAVX512() && (avx512StateSupport() == 1))             // XGETBV XRC0[7:5] == 111
                                     {
                                         if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0)                      // AVX512F
                                         {
                                             CPUCompileFlags.Set(InstructionSet_AVX512F);
 
+                                            // TODO-XArch: Add support for 512-bit Vector<T>
+                                            assert(!CPUCompileFlags.IsSet(InstructionSet_VectorT512));
+
                                             bool isAVX512_VLSupported = false;
                                             if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0)                  // AVX512VL
                                             {
@@ -1526,11 +1540,6 @@ void EEJitManager::SetCpuInfo()
         }
     }
 
-    if (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_SIMD16ByteOnly) != 0)
-    {
-        CPUCompileFlags.Clear(InstructionSet_AVX2);
-    }
-
     if (maxCpuId >= 0x07)
     {
         __cpuidex(cpuidInfo, 0x00000007, 0x00000000);
@@ -1581,6 +1590,7 @@ void EEJitManager::SetCpuInfo()
     // FP and SIMD support are enabled by default
     CPUCompileFlags.Set(InstructionSet_ArmBase);
     CPUCompileFlags.Set(InstructionSet_AdvSimd);
+    CPUCompileFlags.Set(InstructionSet_VectorT128);
 
     // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE (30)
     if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
@@ -1792,7 +1802,6 @@ void EEJitManager::SetCpuInfo()
     {
         CPUCompileFlags.Clear(InstructionSet_X86Serialize);
     }
-
 #elif defined(TARGET_ARM64)
     if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic))
     {
index 0349ca2..45c585d 100644 (file)
@@ -1135,6 +1135,7 @@ BOOL MethodTableBuilder::CheckIfSIMDAndUpdateSize()
 
     LPCUTF8 className;
     LPCUTF8 nameSpace;
+
     if (FAILED(GetMDImport()->GetNameOfTypeDef(bmtInternal->pType->GetTypeDefToken(), &className, &nameSpace)))
         return false;
 
@@ -1144,7 +1145,12 @@ BOOL MethodTableBuilder::CheckIfSIMDAndUpdateSize()
     CORJIT_FLAGS CPUCompileFlags       = ExecutionManager::GetEEJitManager()->GetCPUCompileFlags();
     uint32_t     numInstanceFieldBytes = 16;
 
-    if (CPUCompileFlags.IsSet(InstructionSet_AVX2))
+    if (CPUCompileFlags.IsSet(InstructionSet_VectorT512))
+    {
+        // TODO-XARCH: The JIT needs to be updated to support 64-byte Vector<T>
+        numInstanceFieldBytes = 32;
+    }
+    else if (CPUCompileFlags.IsSet(InstructionSet_VectorT256))
     {
         numInstanceFieldBytes = 32;
     }