* Expose DOTNET_MaxVectorTBitWidth and an undocumented DOTNET_PreferredVectorBitWidth
* Ensure SPMI keeps a getMaxVectorTBitWidth implementation
* Fix the non-xarch vm build
* Remove getMaxVectorTBitWidth from the JIT/EE interface, it's no longer needed
* Move SetCpuInfo down into the EEJitManager constructor
* Remove getXarchCpuInfo in favor of passing `JIT_FLAG_VECTOR512_THROTTLING`
* Make sure CORINFO_XARCH_CPU is fully removed
* Have ENCODE_VERIFY_TYPE_LAYOUT not fail-fast for Vector<T> size differences
* Only encode types containing Vector<T> as check, not verify
* Remove changes that were extracted to separate PRs
* Ensure that the optimistic flags are a strict superset of the supported flags
* Make VectorT128/256/512 proper instruction sets and only allow one to be active at a time
* Don't allow avxvnni to be "optimistic" since that brings in avx2
* Ensure we handle HWIntrinsics being disabled
* Ensure that the Vector<T> size ISAs are covered by FromInstructionSet
* Ensure that `getMaxVectorByteLength` being 0 is handled
* Ensure NAOT startup can correctly check for the VectorT size bits
* Have BlkOpKindUnroll account for SIMD being disabled
* Ensure InstructionSet_VectorT128 is set in the fallback path for PAL_GetJitCpuCapabilityFlags
RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitELTHookEnabled, W("JitELTHookEnabled"), 0, "On ARM, setting this will emit Enter/Leave/TailCall callbacks")
RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitMemStats, W("JitMemStats"), 0, "Display JIT memory usage statistics")
RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitVNMapSelBudget, W("JitVNMapSelBudget"), 100, "Max # of MapSelect's considered for a particular top-level invocation.")
-#if defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64)
-#define EXTERNAL_FeatureSIMD_Default 1
-#else // !(defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64))
-#define EXTERNAL_FeatureSIMD_Default 0
-#endif // !(defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64))
-RETAIL_CONFIG_DWORD_INFO(INTERNAL_SIMD16ByteOnly, W("SIMD16ByteOnly"), 0, "Limit maximum SIMD vector length to 16 bytes (used by x64_arm64_altjit)")
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TrackDynamicMethodDebugInfo, W("TrackDynamicMethodDebugInfo"), 0, "Specifies whether debug info should be generated and tracked for dynamic methods")
#ifdef FEATURE_MULTICOREJIT
#endif
#endif
+RETAIL_CONFIG_DWORD_INFO(EXTERNAL_MaxVectorTBitWidth, W("MaxVectorTBitWidth"), 0, "The maximum width, in bits, that Vector<T> is allowed to be. A value less than 128 is treated as the system default.")
+
//
// Hardware Intrinsic ISAs; keep in sync with jitconfigvalues.h
//
#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
//TODO: should implement LoongArch64's features.
//TODO-RISCV64-CQ: should implement RISCV64's features.
-RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 0, "Allows Base+ hardware intrinsics to be disabled")
+RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 0, "Allows Base+ hardware intrinsics to be disabled")
#else
-RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 1, "Allows Base+ hardware intrinsics to be disabled")
+RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 1, "Allows Base+ hardware intrinsics to be disabled")
#endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
#if defined(TARGET_AMD64) || defined(TARGET_X86)
enum CORINFO_InstructionSet
{
InstructionSet_ILLEGAL = 0,
- InstructionSet_NONE = 63,
+ InstructionSet_NONE = 127,
#ifdef TARGET_ARM64
InstructionSet_ArmBase=1,
InstructionSet_AdvSimd=2,
InstructionSet_Vector128=11,
InstructionSet_Dczva=12,
InstructionSet_Rcpc=13,
- InstructionSet_ArmBase_Arm64=14,
- InstructionSet_AdvSimd_Arm64=15,
- InstructionSet_Aes_Arm64=16,
- InstructionSet_Crc32_Arm64=17,
- InstructionSet_Dp_Arm64=18,
- InstructionSet_Rdm_Arm64=19,
- InstructionSet_Sha1_Arm64=20,
- InstructionSet_Sha256_Arm64=21,
+ InstructionSet_VectorT128=14,
+ InstructionSet_ArmBase_Arm64=15,
+ InstructionSet_AdvSimd_Arm64=16,
+ InstructionSet_Aes_Arm64=17,
+ InstructionSet_Crc32_Arm64=18,
+ InstructionSet_Dp_Arm64=19,
+ InstructionSet_Rdm_Arm64=20,
+ InstructionSet_Sha1_Arm64=21,
+ InstructionSet_Sha256_Arm64=22,
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
InstructionSet_X86Base=1,
InstructionSet_AVX512DQ_VL=30,
InstructionSet_AVX512VBMI=31,
InstructionSet_AVX512VBMI_VL=32,
- InstructionSet_X86Base_X64=33,
- InstructionSet_SSE_X64=34,
- InstructionSet_SSE2_X64=35,
- InstructionSet_SSE3_X64=36,
- InstructionSet_SSSE3_X64=37,
- InstructionSet_SSE41_X64=38,
- InstructionSet_SSE42_X64=39,
- InstructionSet_AVX_X64=40,
- InstructionSet_AVX2_X64=41,
- InstructionSet_AES_X64=42,
- InstructionSet_BMI1_X64=43,
- InstructionSet_BMI2_X64=44,
- InstructionSet_FMA_X64=45,
- InstructionSet_LZCNT_X64=46,
- InstructionSet_PCLMULQDQ_X64=47,
- InstructionSet_POPCNT_X64=48,
- InstructionSet_AVXVNNI_X64=49,
- InstructionSet_MOVBE_X64=50,
- InstructionSet_X86Serialize_X64=51,
- InstructionSet_AVX512F_X64=52,
- InstructionSet_AVX512F_VL_X64=53,
- InstructionSet_AVX512BW_X64=54,
- InstructionSet_AVX512BW_VL_X64=55,
- InstructionSet_AVX512CD_X64=56,
- InstructionSet_AVX512CD_VL_X64=57,
- InstructionSet_AVX512DQ_X64=58,
- InstructionSet_AVX512DQ_VL_X64=59,
- InstructionSet_AVX512VBMI_X64=60,
- InstructionSet_AVX512VBMI_VL_X64=61,
+ InstructionSet_VectorT128=33,
+ InstructionSet_VectorT256=34,
+ InstructionSet_VectorT512=35,
+ InstructionSet_X86Base_X64=36,
+ InstructionSet_SSE_X64=37,
+ InstructionSet_SSE2_X64=38,
+ InstructionSet_SSE3_X64=39,
+ InstructionSet_SSSE3_X64=40,
+ InstructionSet_SSE41_X64=41,
+ InstructionSet_SSE42_X64=42,
+ InstructionSet_AVX_X64=43,
+ InstructionSet_AVX2_X64=44,
+ InstructionSet_AES_X64=45,
+ InstructionSet_BMI1_X64=46,
+ InstructionSet_BMI2_X64=47,
+ InstructionSet_FMA_X64=48,
+ InstructionSet_LZCNT_X64=49,
+ InstructionSet_PCLMULQDQ_X64=50,
+ InstructionSet_POPCNT_X64=51,
+ InstructionSet_AVXVNNI_X64=52,
+ InstructionSet_MOVBE_X64=53,
+ InstructionSet_X86Serialize_X64=54,
+ InstructionSet_AVX512F_X64=55,
+ InstructionSet_AVX512F_VL_X64=56,
+ InstructionSet_AVX512BW_X64=57,
+ InstructionSet_AVX512BW_VL_X64=58,
+ InstructionSet_AVX512CD_X64=59,
+ InstructionSet_AVX512CD_VL_X64=60,
+ InstructionSet_AVX512DQ_X64=61,
+ InstructionSet_AVX512DQ_VL_X64=62,
+ InstructionSet_AVX512VBMI_X64=63,
+ InstructionSet_AVX512VBMI_VL_X64=64,
#endif // TARGET_AMD64
#ifdef TARGET_X86
InstructionSet_X86Base=1,
InstructionSet_AVX512DQ_VL=30,
InstructionSet_AVX512VBMI=31,
InstructionSet_AVX512VBMI_VL=32,
- InstructionSet_X86Base_X64=33,
- InstructionSet_SSE_X64=34,
- InstructionSet_SSE2_X64=35,
- InstructionSet_SSE3_X64=36,
- InstructionSet_SSSE3_X64=37,
- InstructionSet_SSE41_X64=38,
- InstructionSet_SSE42_X64=39,
- InstructionSet_AVX_X64=40,
- InstructionSet_AVX2_X64=41,
- InstructionSet_AES_X64=42,
- InstructionSet_BMI1_X64=43,
- InstructionSet_BMI2_X64=44,
- InstructionSet_FMA_X64=45,
- InstructionSet_LZCNT_X64=46,
- InstructionSet_PCLMULQDQ_X64=47,
- InstructionSet_POPCNT_X64=48,
- InstructionSet_AVXVNNI_X64=49,
- InstructionSet_MOVBE_X64=50,
- InstructionSet_X86Serialize_X64=51,
- InstructionSet_AVX512F_X64=52,
- InstructionSet_AVX512F_VL_X64=53,
- InstructionSet_AVX512BW_X64=54,
- InstructionSet_AVX512BW_VL_X64=55,
- InstructionSet_AVX512CD_X64=56,
- InstructionSet_AVX512CD_VL_X64=57,
- InstructionSet_AVX512DQ_X64=58,
- InstructionSet_AVX512DQ_VL_X64=59,
- InstructionSet_AVX512VBMI_X64=60,
- InstructionSet_AVX512VBMI_VL_X64=61,
+ InstructionSet_VectorT128=33,
+ InstructionSet_VectorT256=34,
+ InstructionSet_VectorT512=35,
+ InstructionSet_X86Base_X64=36,
+ InstructionSet_SSE_X64=37,
+ InstructionSet_SSE2_X64=38,
+ InstructionSet_SSE3_X64=39,
+ InstructionSet_SSSE3_X64=40,
+ InstructionSet_SSE41_X64=41,
+ InstructionSet_SSE42_X64=42,
+ InstructionSet_AVX_X64=43,
+ InstructionSet_AVX2_X64=44,
+ InstructionSet_AES_X64=45,
+ InstructionSet_BMI1_X64=46,
+ InstructionSet_BMI2_X64=47,
+ InstructionSet_FMA_X64=48,
+ InstructionSet_LZCNT_X64=49,
+ InstructionSet_PCLMULQDQ_X64=50,
+ InstructionSet_POPCNT_X64=51,
+ InstructionSet_AVXVNNI_X64=52,
+ InstructionSet_MOVBE_X64=53,
+ InstructionSet_X86Serialize_X64=54,
+ InstructionSet_AVX512F_X64=55,
+ InstructionSet_AVX512F_VL_X64=56,
+ InstructionSet_AVX512BW_X64=57,
+ InstructionSet_AVX512BW_VL_X64=58,
+ InstructionSet_AVX512CD_X64=59,
+ InstructionSet_AVX512CD_VL_X64=60,
+ InstructionSet_AVX512DQ_X64=61,
+ InstructionSet_AVX512DQ_VL_X64=62,
+ InstructionSet_AVX512VBMI_X64=63,
+ InstructionSet_AVX512VBMI_VL_X64=64,
#endif // TARGET_X86
};
struct CORINFO_InstructionSetFlags
{
private:
- static const int32_t FlagsFieldCount = 1;
+ static const int32_t FlagsFieldCount = 2;
static const int32_t BitsPerFlagsField = sizeof(uint64_t) * 8;
uint64_t _flags[FlagsFieldCount] = { };
resultflags.RemoveInstructionSet(InstructionSet_Vector64);
if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_AdvSimd))
resultflags.RemoveInstructionSet(InstructionSet_Vector128);
+ if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_AdvSimd))
+ resultflags.RemoveInstructionSet(InstructionSet_VectorT128);
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
if (resultflags.HasInstructionSet(InstructionSet_X86Base) && !resultflags.HasInstructionSet(InstructionSet_X86Base_X64))
resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
+ if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2))
+ resultflags.RemoveInstructionSet(InstructionSet_VectorT128);
+ if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2))
+ resultflags.RemoveInstructionSet(InstructionSet_VectorT256);
+ if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F))
+ resultflags.RemoveInstructionSet(InstructionSet_VectorT512);
if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
resultflags.RemoveInstructionSet(InstructionSet_AVX512F);
if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL))
resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
+ if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2))
+ resultflags.RemoveInstructionSet(InstructionSet_VectorT128);
+ if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2))
+ resultflags.RemoveInstructionSet(InstructionSet_VectorT256);
+ if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F))
+ resultflags.RemoveInstructionSet(InstructionSet_VectorT512);
if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
resultflags.RemoveInstructionSet(InstructionSet_AVX512F);
if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL))
return "Dczva";
case InstructionSet_Rcpc :
return "Rcpc";
+ case InstructionSet_VectorT128 :
+ return "VectorT128";
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
case InstructionSet_X86Base :
return "AVX512VBMI_VL";
case InstructionSet_AVX512VBMI_VL_X64 :
return "AVX512VBMI_VL_X64";
+ case InstructionSet_VectorT128 :
+ return "VectorT128";
+ case InstructionSet_VectorT256 :
+ return "VectorT256";
+ case InstructionSet_VectorT512 :
+ return "VectorT512";
#endif // TARGET_AMD64
#ifdef TARGET_X86
case InstructionSet_X86Base :
return "AVX512VBMI";
case InstructionSet_AVX512VBMI_VL :
return "AVX512VBMI_VL";
+ case InstructionSet_VectorT128 :
+ return "VectorT128";
+ case InstructionSet_VectorT256 :
+ return "VectorT256";
+ case InstructionSet_VectorT512 :
+ return "VectorT512";
#endif // TARGET_X86
default:
case READYTORUN_INSTRUCTION_Sha256: return InstructionSet_Sha256;
case READYTORUN_INSTRUCTION_Atomics: return InstructionSet_Atomics;
case READYTORUN_INSTRUCTION_Rcpc: return InstructionSet_Rcpc;
+ case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL;
case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI;
case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI_VL;
+ case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
+ case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256;
+ case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512;
#endif // TARGET_AMD64
#ifdef TARGET_X86
case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL;
case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI;
case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI_VL;
+ case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
+ case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256;
+ case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512;
#endif // TARGET_X86
default:
#define GUID_DEFINED
#endif // !GUID_DEFINED
-constexpr GUID JITEEVersionIdentifier = { /* d4414be1-70e4-46ac-8866-ca3a6c2f8422 */
- 0xd4414be1,
- 0x70e4,
- 0x46ac,
- {0x88, 0x66, 0xca, 0x3a, 0x6c, 0x2f, 0x84, 0x22}
+constexpr GUID JITEEVersionIdentifier = { /* fda2f9dd-6b3e-4ecd-a7b8-79e5edf1f072 */
+ 0xfda2f9dd,
+ 0x6b3e,
+ 0x4ecd,
+ {0xa7, 0xb8, 0x79, 0xe5, 0xed, 0xf1, 0xf0, 0x72}
};
//////////////////////////////////////////////////////////////////////////////////////////////////////////
READYTORUN_INSTRUCTION_Avx512DQ_VL=36,
READYTORUN_INSTRUCTION_Avx512Vbmi=37,
READYTORUN_INSTRUCTION_Avx512Vbmi_VL=38,
+ READYTORUN_INSTRUCTION_VectorT128=39,
+ READYTORUN_INSTRUCTION_VectorT256=40,
+ READYTORUN_INSTRUCTION_VectorT512=41,
};
assert(srcOffset < (INT32_MAX - static_cast<int>(size)));
assert(dstOffset < (INT32_MAX - static_cast<int>(size)));
- if (size >= XMM_REGSIZE_BYTES)
+ // Get the largest SIMD register available if the size is large enough
+ unsigned regSize = compiler->roundDownSIMDSize(size);
+
+ if ((size >= regSize) && (regSize > 0))
{
regNumber tempReg = node->GetSingleTempReg(RBM_ALLFLOAT);
instruction simdMov = simdUnalignedMovIns();
- // Get the largest SIMD register available if the size is large enough
- unsigned regSize = compiler->roundDownSIMDSize(size);
-
auto emitSimdMovs = [&]() {
if (srcLclNum != BAD_VAR_NUM)
{
CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(TARGET_XARCH)
- if (compExactlyDependsOn(InstructionSet_AVX2))
+ // TODO-XArch: Add support for 512-bit Vector<T>
+ assert(!compIsaSupportedDebugOnly(InstructionSet_VectorT512));
+
+ if (compExactlyDependsOn(InstructionSet_VectorT256))
{
- // TODO-XArch-AVX512 : Return ZMM_REGSIZE_BYTES once Vector<T> supports AVX512.
+ assert(!compIsaSupportedDebugOnly(InstructionSet_VectorT128));
return YMM_REGSIZE_BYTES;
}
- else
+ else if (compExactlyDependsOn(InstructionSet_VectorT128))
{
return XMM_REGSIZE_BYTES;
}
+ else
+ {
+ return 0;
+ }
#elif defined(TARGET_ARM64)
- return FP_REGSIZE_BYTES;
+ if (compExactlyDependsOn(InstructionSet_VectorT128))
+ {
+ return FP_REGSIZE_BYTES;
+ }
+ else
+ {
+ return 0;
+ }
#else
assert(!"getVectorTByteLength() unimplemented on target arch");
unreached();
uint32_t getMaxVectorByteLength() const
{
#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH)
- if (compOpportunisticallyDependsOn(InstructionSet_AVX))
+ if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
{
- if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
- {
- return ZMM_REGSIZE_BYTES;
- }
- else
- {
- return YMM_REGSIZE_BYTES;
- }
+ return ZMM_REGSIZE_BYTES;
}
- else
+ else if (compOpportunisticallyDependsOn(InstructionSet_AVX))
+ {
+ return YMM_REGSIZE_BYTES;
+ }
+ else if (compOpportunisticallyDependsOn(InstructionSet_SSE))
{
return XMM_REGSIZE_BYTES;
}
+ else
+ {
+ assert((JitConfig.EnableHWIntrinsic() == 0) || (JitConfig.EnableSSE() == 0));
+ return 0;
+ }
#elif defined(TARGET_ARM64)
- return FP_REGSIZE_BYTES;
+ if (compOpportunisticallyDependsOn(InstructionSet_AdvSimd))
+ {
+ return FP_REGSIZE_BYTES;
+ }
+ else
+ {
+ assert((JitConfig.EnableHWIntrinsic() == 0) || (JitConfig.EnableArm64AdvSimd() == 0));
+ return 0;
+ }
#else
assert(!"getMaxVectorByteLength() unimplemented on target arch");
unreached();
return impSpecialIntrinsic(NI_Vector128_ToVector256, clsHnd, method, sig, simdBaseJitType, retType,
simdSize);
}
- else
+ else if (vectorTByteLength == XMM_REGSIZE_BYTES)
{
- assert(vectorTByteLength == XMM_REGSIZE_BYTES);
-
// We fold away the cast here, as it only exists to satisfy
// the type system. It is safe to do this here since the retNode type
// and the signature return type are both the same TYP_SIMD.
SetOpLclRelatedToSIMDIntrinsic(retNode);
assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass)));
}
+ else
+ {
+ assert(vectorTByteLength == 0);
+ }
break;
}
break;
}
- else
+ else if (vectorTByteLength == XMM_REGSIZE_BYTES)
{
- assert(vectorTByteLength == XMM_REGSIZE_BYTES);
-
if (compExactlyDependsOn(InstructionSet_AVX))
{
// We support Vector256 but Vector<T> is only 16-bytes, so we should
}
}
}
+ else
+ {
+ assert(vectorTByteLength == 0);
+ }
break;
}
}
break;
}
- else
+ else if (vectorTByteLength == XMM_REGSIZE_BYTES)
{
- assert(vectorTByteLength == XMM_REGSIZE_BYTES);
-
if (compExactlyDependsOn(InstructionSet_AVX512F))
{
// We support Vector512 but Vector<T> is only 16-bytes, so we should
}
}
}
+ else
+ {
+ assert(vectorTByteLength == 0);
+ }
break;
}
#if defined(FEATURE_SIMD)
// getMaxVectorByteLength() represents the size of the largest primitive type that we can struct promote.
- const unsigned maxSize = MAX_NumOfFieldsInPromotableStruct * compiler->getMaxVectorByteLength();
+ const unsigned maxSize =
+ MAX_NumOfFieldsInPromotableStruct * max(compiler->getMaxVectorByteLength(), sizeof(double));
#else // !FEATURE_SIMD
// sizeof(double) represents the size of the largest primitive type that we can struct promote.
const unsigned maxSize = MAX_NumOfFieldsInPromotableStruct * sizeof(double);
// The return value will be on the X87 stack, and we will need to move it.
dstCandidates = allRegs(registerType);
#else // !TARGET_X86
- dstCandidates = RBM_FLOATRET;
+ dstCandidates = RBM_FLOATRET;
#endif // !TARGET_X86
}
else
{
case GenTreeBlk::BlkOpKindUnroll:
{
-#ifdef TARGET_AMD64
- const bool canUse16BytesSimdMov = !blkNode->IsOnHeapAndContainsReferences();
- const bool willUseSimdMov = canUse16BytesSimdMov && (size >= 16);
-#else
- const bool willUseSimdMov = (size >= 16);
-#endif
+ const bool canUse16BytesSimdMov =
+ !blkNode->IsOnHeapAndContainsReferences() && compiler->IsBaselineSimdIsaSupported();
+ const bool willUseSimdMov = canUse16BytesSimdMov && (size >= XMM_REGSIZE_BYTES);
+
if (willUseSimdMov)
{
buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
break;
case GenTreeBlk::BlkOpKindUnroll:
- if ((size % XMM_REGSIZE_BYTES) != 0)
+ {
+ unsigned regSize = compiler->roundDownSIMDSize(size);
+ unsigned remainder = size;
+
+ if ((size >= regSize) && (regSize > 0))
+ {
+ // We need a float temporary if we're doing SIMD operations
+
+ buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
+ SetContainsAVXFlags(size);
+
+ remainder %= regSize;
+ }
+
+ if ((remainder > 0) && ((regSize == 0) || (isPow2(remainder) && (remainder <= REGSIZE_BYTES))))
{
+ // We need an int temporary if we're not doing SIMD operations
+ // or if are but the remainder is a power of 2 and less than the
+ // size of a register
+
regMaskTP regMask = availableIntRegs;
#ifdef TARGET_X86
if ((size & 1) != 0)
#endif
internalIntDef = buildInternalIntRegisterDefForNode(blkNode, regMask);
}
-
- if (size >= XMM_REGSIZE_BYTES)
- {
- buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
- SetContainsAVXFlags(size);
- }
break;
+ }
case GenTreeBlk::BlkOpKindUnrollMemmove:
{
JITDUMP(" Found Vector<%s>\n", varTypeName(JitType2PreciseVarType(simdBaseJitType)));
size = getVectorTByteLength();
- assert(size != 0);
+ if (size == 0)
+ {
+ return CORINFO_TYPE_UNDEF;
+ }
break;
}
}
#endif // TARGET_XARCH
- assert(vectorTByteLength == 16);
- return SimdAsHWIntrinsicClassId::VectorT128;
+ if (vectorTByteLength == 16)
+ {
+ return SimdAsHWIntrinsicClassId::VectorT128;
+ }
+ else
+ {
+ return SimdAsHWIntrinsicClassId::Unknown;
+ }
}
break;
}
<IlcArg Condition="$(IlcInstructionSet) != ''" Include="--instruction-set:$(IlcInstructionSet)" />
<IlcArg Condition="$(IlcDisableReflection) == 'true'" Include="--reflectiondata:none" />
<IlcArg Condition="$(IlcDisableReflection) == 'true'" Include="--feature:System.Collections.Generic.DefaultComparers=false" />
+ <IlcArg Condition="$(IlcMaxVectorTBitWidth) != ''" Include="--max-vectort-bitwidth:$(IlcMaxVectorTBitWidth)" />
<IlcArg Condition="$(IlcSingleThreaded) == 'true'" Include="--parallelism:1" />
<IlcArg Condition="$(IlcSystemModule) != ''" Include="--systemmodule:$(IlcSystemModule)" />
<IlcArg Condition="$(IlcDumpIL) == 'true'" Include="--ildump:$(NativeIntermediateOutputPath)%(ManagedBinary.Filename).il" />
XArchIntrinsicConstants_Avx512Vbmi = 0x800000,
XArchIntrinsicConstants_Avx512Vbmi_vl = 0x1000000,
XArchIntrinsicConstants_Serialize = 0x2000000,
+ XArchIntrinsicConstants_VectorT128 = 0x4000000,
+ XArchIntrinsicConstants_VectorT256 = 0x8000000,
+ XArchIntrinsicConstants_VectorT512 = 0x10000000,
};
#endif //HOST_X86 || HOST_AMD64
ARM64IntrinsicConstants_Sha256 = 0x0040,
ARM64IntrinsicConstants_Atomics = 0x0080,
ARM64IntrinsicConstants_Rcpc = 0x0100,
+ ARM64IntrinsicConstants_VectorT128 = 0x0200,
};
// Bit position for the ARM64IntrinsicConstants_Atomics flags, to be used with tbz / tbnz instructions
if ((cpuidInfo[CPUID_EDX] & requiredBaselineEdxFlags) == requiredBaselineEdxFlags)
{
+ g_cpuFeatures |= XArchIntrinsicConstants_VectorT128;
+
if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0) // AESNI
{
g_cpuFeatures |= XArchIntrinsicConstants_Aes;
if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0) // AVX2
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx2;
+ g_cpuFeatures |= XArchIntrinsicConstants_VectorT256;
if (PalIsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111
{
if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512f;
+ g_cpuFeatures |= XArchIntrinsicConstants_VectorT512;
bool isAVX512_VLSupported = false;
if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL
#endif
#ifdef HWCAP_ASIMD
if (hwCap & HWCAP_ASIMD)
- *flags |= ARM64IntrinsicConstants_AdvSimd;
+ *flags |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128;
#endif
#ifdef HWCAP_ASIMDRDM
if (hwCap & HWCAP_ASIMDRDM)
// Every ARM64 CPU should support SIMD and FP
// If the OS have no function to query for CPU capabilities we set just these
- *flags |= ARM64IntrinsicConstants_AdvSimd;
+ *flags |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128;
#endif // HAVE_AUXV_HWCAP_H
}
#endif
#endif
// FP and SIMD support are enabled by default
- *flags |= ARM64IntrinsicConstants_AdvSimd;
+ *flags |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128;
if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
{
* `<OptimizationPreference>Speed</OptimizationPreference>`: when generating optimized code, favor code execution speed.
* `<OptimizationPreference>Size</OptimizationPreference>`: when generating optimized code, favor smaller code size.
* `<IlcInstructionSet>`: By default, the compiler targets the minimum instruction set supported by the target OS and architecture. This option allows targeting newer instruction sets for better performance. The native binary will require the instruction sets to be supported by the hardware in order to run. For example, `<IlcInstructionSet>avx2,bmi2,fma,pclmul,popcnt,aes</IlcInstructionSet>` will produce binary that takes advantage of instruction sets that are typically present on current Intel and AMD processors. Run `ilc --help` for the full list of available instruction sets. `ilc` can be executed from the NativeAOT package in your local nuget cache e.g. `%USERPROFILE%\.nuget\packages\runtime.win-x64.microsoft.dotnet.ilcompiler\8.0.0-...\tools\ilc.exe` on Windows or `~/.nuget/packages/runtime.linux-arm64.microsoft.dotnet.ilcompiler/8.0.0-.../tools/ilc` on Linux.
+* `<IlcMaxVectorTBitWidth>`: By default, the compiler targets the a `Vector<T>` size of `16` or `32` bytes, depending on the underlying instruction sets supported. This option allows specifying a different maximum bit width. For example, if by default on x64 hardware `Vector<T>` will be 16-bytes. However, if `AVX2` is targeted then `Vector<T>` will automatically grow to be 32-bytes instead, setting `<IlcMaxVectorTBitWidth>128</IlcMaxVectorTBitWidth>` would keep the size as 16-bytes. Alternatively, even if `AVX512F` is targeted then by default `Vector<T>` will not grow larger than 32-bytes, setting `<IlcMaxVectorTBitWidth>512</IlcMaxVectorTBitWidth>` would allow it to grow to 64-bytes.
#endif
#ifdef HWCAP_ASIMD
if (hwCap & HWCAP_ASIMD)
+ {
flags->Set(InstructionSet_AdvSimd);
+ flags->Set(InstructionSet_VectorT128);
+ }
#endif
#ifdef HWCAP_ASIMDRDM
if (hwCap & HWCAP_ASIMDRDM)
// Set baseline flags if OS has not exposed mechanism for us to determine CPU capabilities
flags->Set(InstructionSet_ArmBase);
flags->Set(InstructionSet_AdvSimd);
+ flags->Set(InstructionSet_VectorT128);
// flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_FP);
#endif // HAVE_AUXV_HWCAP_H
}
{
if ((_targetArchitecture == TargetArchitecture.X64) || (_targetArchitecture == TargetArchitecture.X86))
{
- Debug.Assert(InstructionSet.X64_AVX2 == InstructionSet.X86_AVX2);
- Debug.Assert(InstructionSet.X64_SSE2 == InstructionSet.X86_SSE2);
- if (IsInstructionSetSupported(InstructionSet.X86_AVX2))
+ Debug.Assert(InstructionSet.X64_VectorT128 == InstructionSet.X86_VectorT128);
+ Debug.Assert(InstructionSet.X64_VectorT256 == InstructionSet.X86_VectorT256);
+ Debug.Assert(InstructionSet.X64_VectorT512 == InstructionSet.X86_VectorT512);
+
+ // TODO-XArch: Add support for 512-bit Vector<T>
+ Debug.Assert(!IsInstructionSetSupported(InstructionSet.X64_VectorT512));
+
+ if (IsInstructionSetSupported(InstructionSet.X64_VectorT256))
+ {
+ Debug.Assert(!IsInstructionSetSupported(InstructionSet.X64_VectorT128));
return SimdVectorLength.Vector256Bit;
- else if (IsInstructionSetExplicitlyUnsupported(InstructionSet.X86_AVX2) && IsInstructionSetSupported(InstructionSet.X64_SSE2))
+ }
+ else if (IsInstructionSetSupported(InstructionSet.X64_VectorT128))
+ {
return SimdVectorLength.Vector128Bit;
+ }
else
+ {
return SimdVectorLength.None;
+ }
}
else if (_targetArchitecture == TargetArchitecture.ARM64)
{
- return SimdVectorLength.Vector128Bit;
+ if (IsInstructionSetSupported(InstructionSet.ARM64_VectorT128))
+ {
+ return SimdVectorLength.Vector128Bit;
+ }
+ else
+ {
+ return SimdVectorLength.None;
+ }
}
else if (_targetArchitecture == TargetArchitecture.ARM)
{
return s_nonSpecifiableInstructionSets[architecture];
}
- private readonly SortedSet<string> _supportedInstructionSets = new SortedSet<string>();
- private readonly SortedSet<string> _unsupportedInstructionSets = new SortedSet<string>();
+ private readonly SortedSet<string> _supportedInstructionSets;
+ private readonly SortedSet<string> _unsupportedInstructionSets;
private readonly TargetArchitecture _architecture;
public InstructionSetSupportBuilder(TargetArchitecture architecture)
{
+ _supportedInstructionSets = new SortedSet<string>();
+ _unsupportedInstructionSets = new SortedSet<string>();
_architecture = architecture;
}
+ public InstructionSetSupportBuilder(InstructionSetSupportBuilder other)
+ {
+ _supportedInstructionSets = new SortedSet<string>(other._supportedInstructionSets);
+ _unsupportedInstructionSets = new SortedSet<string>(other._unsupportedInstructionSets);
+ _architecture = other._architecture;
+ }
+
/// <summary>
/// Add a supported instruction set to the specified list.
/// </summary>
/// Seal modifications to instruction set support
/// </summary>
/// <returns>returns "false" if instruction set isn't valid on this architecture</returns>
- public bool ComputeInstructionSetFlags(out InstructionSetFlags supportedInstructionSets,
- out InstructionSetFlags unsupportedInstructionSets,
- Action<string, string> invalidInstructionSetImplication)
+ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth,
+ out InstructionSetFlags supportedInstructionSets,
+ out InstructionSetFlags unsupportedInstructionSets,
+ Action<string, string> invalidInstructionSetImplication)
{
supportedInstructionSets = new InstructionSetFlags();
unsupportedInstructionSets = new InstructionSetFlags();
}
}
+ switch (_architecture)
+ {
+ case TargetArchitecture.X64:
+ case TargetArchitecture.X86:
+ {
+ Debug.Assert(InstructionSet.X86_SSE2 == InstructionSet.X64_SSE2);
+ Debug.Assert(InstructionSet.X86_AVX2 == InstructionSet.X64_AVX2);
+ Debug.Assert(InstructionSet.X86_AVX512F == InstructionSet.X64_AVX512F);
+
+ Debug.Assert(InstructionSet.X86_VectorT128 == InstructionSet.X64_VectorT128);
+ Debug.Assert(InstructionSet.X86_VectorT256 == InstructionSet.X64_VectorT256);
+ Debug.Assert(InstructionSet.X86_VectorT512 == InstructionSet.X64_VectorT512);
+
+ // We only want one size supported for Vector<T> and we want the other sizes explicitly
+ // unsupported to ensure we throw away the given methods if runtime picks a larger size
+
+ Debug.Assert(supportedInstructionSets.HasInstructionSet(InstructionSet.X86_SSE2));
+ Debug.Assert((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 128));
+ supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT128);
+
+ if (supportedInstructionSets.HasInstructionSet(InstructionSet.X86_AVX2))
+ {
+ if ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 256))
+ {
+ supportedInstructionSets.RemoveInstructionSet(InstructionSet.X86_VectorT128);
+ supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT256);
+
+ unsupportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT128);
+ unsupportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT512);
+ }
+
+ // TODO-XArch: Add support for 512-bit Vector<T>
+ }
+ break;
+ }
+
+ case TargetArchitecture.ARM64:
+ {
+ Debug.Assert(supportedInstructionSets.HasInstructionSet(InstructionSet.ARM64_AdvSimd));
+ Debug.Assert((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 128));
+ supportedInstructionSets.AddInstructionSet(InstructionSet.ARM64_VectorT128);
+ break;
+ }
+ }
+
return true;
}
}
{
internal static partial class Helpers
{
- public static InstructionSetSupport ConfigureInstructionSetSupport(string instructionSet, TargetArchitecture targetArchitecture, TargetOS targetOS,
+ public static InstructionSetSupport ConfigureInstructionSetSupport(string instructionSet, int maxVectorTBitWidth, TargetArchitecture targetArchitecture, TargetOS targetOS,
string mustNotBeMessage, string invalidImplicationMessage)
{
InstructionSetSupportBuilder instructionSetSupportBuilder = new(targetArchitecture);
}
}
- instructionSetSupportBuilder.ComputeInstructionSetFlags(out var supportedInstructionSet, out var unsupportedInstructionSet,
+ instructionSetSupportBuilder.ComputeInstructionSetFlags(maxVectorTBitWidth, out var supportedInstructionSet, out var unsupportedInstructionSet,
(string specifiedInstructionSet, string impliedInstructionSet) =>
throw new CommandLineException(string.Format(invalidImplicationMessage, specifiedInstructionSet, impliedInstructionSet)));
- InstructionSetSupportBuilder optimisticInstructionSetSupportBuilder = new InstructionSetSupportBuilder(targetArchitecture);
+ // Due to expansion by implication, the optimistic set is most often a pure superset of the supported set
+ //
+ // However, there are some gaps in cases like Arm64 neon where none of the optimistic sets imply it. Likewise,
+ // the optimistic set would be missing the explicitly unsupported sets. So we effectively clone the list and
+ // tack on the additional optimistic bits after. This ensures the optimistic set remains an accurate superset
+ InstructionSetSupportBuilder optimisticInstructionSetSupportBuilder = new InstructionSetSupportBuilder(instructionSetSupportBuilder);
// Optimistically assume some instruction sets are present.
if (targetArchitecture == TargetArchitecture.X86 || targetArchitecture == TargetArchitecture.X64)
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("bmi2");
}
- if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX2))
- {
- }
-
Debug.Assert(InstructionSet.X64_AVX512F == InstructionSet.X86_AVX512F);
if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512F))
{
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("rcpc");
}
- optimisticInstructionSetSupportBuilder.ComputeInstructionSetFlags(out var optimisticInstructionSet, out _,
+ optimisticInstructionSetSupportBuilder.ComputeInstructionSetFlags(maxVectorTBitWidth, out var optimisticInstructionSet, out _,
(string specifiedInstructionSet, string impliedInstructionSet) => throw new NotSupportedException());
optimisticInstructionSet.Remove(unsupportedInstructionSet);
optimisticInstructionSet.Add(supportedInstructionSet);
Avx512DQ_VL=36,
Avx512Vbmi=37,
Avx512Vbmi_VL=38,
+ VectorT128=39,
+ VectorT256=40,
+ VectorT512=41,
}
}
case InstructionSet.ARM64_Vector128: return null;
case InstructionSet.ARM64_Dczva: return null;
case InstructionSet.ARM64_Rcpc: return ReadyToRunInstructionSet.Rcpc;
+ case InstructionSet.ARM64_VectorT128: return ReadyToRunInstructionSet.VectorT128;
default: throw new Exception("Unknown instruction set");
}
case InstructionSet.X64_AVX512VBMI_X64: return ReadyToRunInstructionSet.Avx512Vbmi;
case InstructionSet.X64_AVX512VBMI_VL: return ReadyToRunInstructionSet.Avx512Vbmi_VL;
case InstructionSet.X64_AVX512VBMI_VL_X64: return ReadyToRunInstructionSet.Avx512Vbmi_VL;
+ case InstructionSet.X64_VectorT128: return ReadyToRunInstructionSet.VectorT128;
+ case InstructionSet.X64_VectorT256: return ReadyToRunInstructionSet.VectorT256;
+ case InstructionSet.X64_VectorT512: return ReadyToRunInstructionSet.VectorT512;
default: throw new Exception("Unknown instruction set");
}
case InstructionSet.X86_AVX512VBMI_X64: return null;
case InstructionSet.X86_AVX512VBMI_VL: return ReadyToRunInstructionSet.Avx512Vbmi_VL;
case InstructionSet.X86_AVX512VBMI_VL_X64: return null;
+ case InstructionSet.X86_VectorT128: return ReadyToRunInstructionSet.VectorT128;
+ case InstructionSet.X86_VectorT256: return ReadyToRunInstructionSet.VectorT256;
+ case InstructionSet.X86_VectorT512: return ReadyToRunInstructionSet.VectorT512;
default: throw new Exception("Unknown instruction set");
}
public enum InstructionSet
{
ILLEGAL = 0,
- NONE = 63,
+ NONE = 127,
ARM64_ArmBase = InstructionSet_ARM64.ArmBase,
ARM64_AdvSimd = InstructionSet_ARM64.AdvSimd,
ARM64_Aes = InstructionSet_ARM64.Aes,
ARM64_Vector128 = InstructionSet_ARM64.Vector128,
ARM64_Dczva = InstructionSet_ARM64.Dczva,
ARM64_Rcpc = InstructionSet_ARM64.Rcpc,
+ ARM64_VectorT128 = InstructionSet_ARM64.VectorT128,
ARM64_ArmBase_Arm64 = InstructionSet_ARM64.ArmBase_Arm64,
ARM64_AdvSimd_Arm64 = InstructionSet_ARM64.AdvSimd_Arm64,
ARM64_Aes_Arm64 = InstructionSet_ARM64.Aes_Arm64,
X64_AVX512DQ_VL = InstructionSet_X64.AVX512DQ_VL,
X64_AVX512VBMI = InstructionSet_X64.AVX512VBMI,
X64_AVX512VBMI_VL = InstructionSet_X64.AVX512VBMI_VL,
+ X64_VectorT128 = InstructionSet_X64.VectorT128,
+ X64_VectorT256 = InstructionSet_X64.VectorT256,
+ X64_VectorT512 = InstructionSet_X64.VectorT512,
X64_X86Base_X64 = InstructionSet_X64.X86Base_X64,
X64_SSE_X64 = InstructionSet_X64.SSE_X64,
X64_SSE2_X64 = InstructionSet_X64.SSE2_X64,
X86_AVX512DQ_VL = InstructionSet_X86.AVX512DQ_VL,
X86_AVX512VBMI = InstructionSet_X86.AVX512VBMI,
X86_AVX512VBMI_VL = InstructionSet_X86.AVX512VBMI_VL,
+ X86_VectorT128 = InstructionSet_X86.VectorT128,
+ X86_VectorT256 = InstructionSet_X86.VectorT256,
+ X86_VectorT512 = InstructionSet_X86.VectorT512,
X86_X86Base_X64 = InstructionSet_X86.X86Base_X64,
X86_SSE_X64 = InstructionSet_X86.SSE_X64,
X86_SSE2_X64 = InstructionSet_X86.SSE2_X64,
Vector128 = 11,
Dczva = 12,
Rcpc = 13,
- ArmBase_Arm64 = 14,
- AdvSimd_Arm64 = 15,
- Aes_Arm64 = 16,
- Crc32_Arm64 = 17,
- Dp_Arm64 = 18,
- Rdm_Arm64 = 19,
- Sha1_Arm64 = 20,
- Sha256_Arm64 = 21,
+ VectorT128 = 14,
+ ArmBase_Arm64 = 15,
+ AdvSimd_Arm64 = 16,
+ Aes_Arm64 = 17,
+ Crc32_Arm64 = 18,
+ Dp_Arm64 = 19,
+ Rdm_Arm64 = 20,
+ Sha1_Arm64 = 21,
+ Sha256_Arm64 = 22,
}
public enum InstructionSet_X64
AVX512DQ_VL = 30,
AVX512VBMI = 31,
AVX512VBMI_VL = 32,
- X86Base_X64 = 33,
- SSE_X64 = 34,
- SSE2_X64 = 35,
- SSE3_X64 = 36,
- SSSE3_X64 = 37,
- SSE41_X64 = 38,
- SSE42_X64 = 39,
- AVX_X64 = 40,
- AVX2_X64 = 41,
- AES_X64 = 42,
- BMI1_X64 = 43,
- BMI2_X64 = 44,
- FMA_X64 = 45,
- LZCNT_X64 = 46,
- PCLMULQDQ_X64 = 47,
- POPCNT_X64 = 48,
- AVXVNNI_X64 = 49,
- MOVBE_X64 = 50,
- X86Serialize_X64 = 51,
- AVX512F_X64 = 52,
- AVX512F_VL_X64 = 53,
- AVX512BW_X64 = 54,
- AVX512BW_VL_X64 = 55,
- AVX512CD_X64 = 56,
- AVX512CD_VL_X64 = 57,
- AVX512DQ_X64 = 58,
- AVX512DQ_VL_X64 = 59,
- AVX512VBMI_X64 = 60,
- AVX512VBMI_VL_X64 = 61,
+ VectorT128 = 33,
+ VectorT256 = 34,
+ VectorT512 = 35,
+ X86Base_X64 = 36,
+ SSE_X64 = 37,
+ SSE2_X64 = 38,
+ SSE3_X64 = 39,
+ SSSE3_X64 = 40,
+ SSE41_X64 = 41,
+ SSE42_X64 = 42,
+ AVX_X64 = 43,
+ AVX2_X64 = 44,
+ AES_X64 = 45,
+ BMI1_X64 = 46,
+ BMI2_X64 = 47,
+ FMA_X64 = 48,
+ LZCNT_X64 = 49,
+ PCLMULQDQ_X64 = 50,
+ POPCNT_X64 = 51,
+ AVXVNNI_X64 = 52,
+ MOVBE_X64 = 53,
+ X86Serialize_X64 = 54,
+ AVX512F_X64 = 55,
+ AVX512F_VL_X64 = 56,
+ AVX512BW_X64 = 57,
+ AVX512BW_VL_X64 = 58,
+ AVX512CD_X64 = 59,
+ AVX512CD_VL_X64 = 60,
+ AVX512DQ_X64 = 61,
+ AVX512DQ_VL_X64 = 62,
+ AVX512VBMI_X64 = 63,
+ AVX512VBMI_VL_X64 = 64,
}
public enum InstructionSet_X86
AVX512DQ_VL = 30,
AVX512VBMI = 31,
AVX512VBMI_VL = 32,
- X86Base_X64 = 33,
- SSE_X64 = 34,
- SSE2_X64 = 35,
- SSE3_X64 = 36,
- SSSE3_X64 = 37,
- SSE41_X64 = 38,
- SSE42_X64 = 39,
- AVX_X64 = 40,
- AVX2_X64 = 41,
- AES_X64 = 42,
- BMI1_X64 = 43,
- BMI2_X64 = 44,
- FMA_X64 = 45,
- LZCNT_X64 = 46,
- PCLMULQDQ_X64 = 47,
- POPCNT_X64 = 48,
- AVXVNNI_X64 = 49,
- MOVBE_X64 = 50,
- X86Serialize_X64 = 51,
- AVX512F_X64 = 52,
- AVX512F_VL_X64 = 53,
- AVX512BW_X64 = 54,
- AVX512BW_VL_X64 = 55,
- AVX512CD_X64 = 56,
- AVX512CD_VL_X64 = 57,
- AVX512DQ_X64 = 58,
- AVX512DQ_VL_X64 = 59,
- AVX512VBMI_X64 = 60,
- AVX512VBMI_VL_X64 = 61,
+ VectorT128 = 33,
+ VectorT256 = 34,
+ VectorT512 = 35,
+ X86Base_X64 = 36,
+ SSE_X64 = 37,
+ SSE2_X64 = 38,
+ SSE3_X64 = 39,
+ SSSE3_X64 = 40,
+ SSE41_X64 = 41,
+ SSE42_X64 = 42,
+ AVX_X64 = 43,
+ AVX2_X64 = 44,
+ AES_X64 = 45,
+ BMI1_X64 = 46,
+ BMI2_X64 = 47,
+ FMA_X64 = 48,
+ LZCNT_X64 = 49,
+ PCLMULQDQ_X64 = 50,
+ POPCNT_X64 = 51,
+ AVXVNNI_X64 = 52,
+ MOVBE_X64 = 53,
+ X86Serialize_X64 = 54,
+ AVX512F_X64 = 55,
+ AVX512F_VL_X64 = 56,
+ AVX512BW_X64 = 57,
+ AVX512BW_VL_X64 = 58,
+ AVX512CD_X64 = 59,
+ AVX512CD_VL_X64 = 60,
+ AVX512DQ_X64 = 61,
+ AVX512DQ_VL_X64 = 62,
+ AVX512VBMI_X64 = 63,
+ AVX512VBMI_VL_X64 = 64,
}
public unsafe struct InstructionSetFlags : IEnumerable<InstructionSet>
{
- private const int FlagsFieldCount = 1;
+ private const int FlagsFieldCount = 2;
private const int BitsPerFlagsField = 64;
private fixed ulong _flags[FlagsFieldCount];
public IEnumerable<InstructionSet_ARM64> ARM64Flags => this.Select((x) => (InstructionSet_ARM64)x);
resultflags.AddInstructionSet(InstructionSet.ARM64_AdvSimd);
if (resultflags.HasInstructionSet(InstructionSet.ARM64_Vector128))
resultflags.AddInstructionSet(InstructionSet.ARM64_AdvSimd);
+ if (resultflags.HasInstructionSet(InstructionSet.ARM64_VectorT128))
+ resultflags.AddInstructionSet(InstructionSet.ARM64_AdvSimd);
break;
case TargetArchitecture.X64:
resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI);
if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL))
resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL);
+ if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT128))
+ resultflags.AddInstructionSet(InstructionSet.X64_SSE2);
+ if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT256))
+ resultflags.AddInstructionSet(InstructionSet.X64_AVX2);
+ if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT512))
+ resultflags.AddInstructionSet(InstructionSet.X64_AVX512F);
if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F))
resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL);
if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F))
resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI);
if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI_VL))
resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL);
+ if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT128))
+ resultflags.AddInstructionSet(InstructionSet.X86_SSE2);
+ if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT256))
+ resultflags.AddInstructionSet(InstructionSet.X86_AVX2);
+ if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT512))
+ resultflags.AddInstructionSet(InstructionSet.X86_AVX512F);
if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F))
resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL);
if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F))
resultflags.AddInstructionSet(InstructionSet.ARM64_Vector64);
if (resultflags.HasInstructionSet(InstructionSet.ARM64_AdvSimd))
resultflags.AddInstructionSet(InstructionSet.ARM64_Vector128);
+ if (resultflags.HasInstructionSet(InstructionSet.ARM64_AdvSimd))
+ resultflags.AddInstructionSet(InstructionSet.ARM64_VectorT128);
break;
case TargetArchitecture.X64:
resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL);
if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL))
resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL);
+ if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2))
+ resultflags.AddInstructionSet(InstructionSet.X64_VectorT128);
+ if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2))
+ resultflags.AddInstructionSet(InstructionSet.X64_VectorT256);
+ if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F))
+ resultflags.AddInstructionSet(InstructionSet.X64_VectorT512);
if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL))
resultflags.AddInstructionSet(InstructionSet.X64_AVX512F);
if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL))
resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL);
if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL))
resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL);
+ if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2))
+ resultflags.AddInstructionSet(InstructionSet.X86_VectorT128);
+ if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2))
+ resultflags.AddInstructionSet(InstructionSet.X86_VectorT256);
+ if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F))
+ resultflags.AddInstructionSet(InstructionSet.X86_VectorT512);
if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL))
resultflags.AddInstructionSet(InstructionSet.X86_AVX512F);
if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD_VL))
yield return new InstructionSetInfo("Vector128", "", InstructionSet.ARM64_Vector128, false);
yield return new InstructionSetInfo("Dczva", "", InstructionSet.ARM64_Dczva, false);
yield return new InstructionSetInfo("rcpc", "", InstructionSet.ARM64_Rcpc, true);
+ yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.ARM64_VectorT128, true);
break;
case TargetArchitecture.X64:
yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X64_AVX512DQ_VL, true);
yield return new InstructionSetInfo("avx512vbmi", "Avx512Vbmi", InstructionSet.X64_AVX512VBMI, true);
yield return new InstructionSetInfo("avx512vbmi_vl", "Avx512Vbmi_VL", InstructionSet.X64_AVX512VBMI_VL, true);
+ yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X64_VectorT128, true);
+ yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X64_VectorT256, true);
+ yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X64_VectorT512, true);
break;
case TargetArchitecture.X86:
yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X86_AVX512DQ_VL, true);
yield return new InstructionSetInfo("avx512vbmi", "Avx512Vbmi", InstructionSet.X86_AVX512VBMI, true);
yield return new InstructionSetInfo("avx512vbmi_vl", "Avx512Vbmi_VL", InstructionSet.X86_AVX512VBMI_VL, true);
+ yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X86_VectorT128, true);
+ yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X86_VectorT256, true);
+ yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X86_VectorT512, true);
break;
}
}
else
{ return InstructionSet.ARM64_Sha256; }
+ case "VectorT128":
+ { return InstructionSet.ARM64_VectorT128; }
+
}
break;
else
{ return InstructionSet.X64_AVX512VBMI; }
+ case "VectorT128":
+ { return InstructionSet.X64_VectorT128; }
+
+ case "VectorT256":
+ { return InstructionSet.X64_VectorT256; }
+
+ case "VectorT512":
+ { return InstructionSet.X64_VectorT512; }
+
}
break;
else
{ return InstructionSet.X86_AVX512VBMI; }
+ case "VectorT128":
+ { return InstructionSet.X86_VectorT128; }
+
+ case "VectorT256":
+ { return InstructionSet.X86_VectorT256; }
+
+ case "VectorT512":
+ { return InstructionSet.X86_VectorT512; }
+
}
break;
instructionset ,X86 ,Avx512DQ_VL , ,36 ,AVX512DQ_VL ,avx512dq_vl
instructionset ,X86 ,Avx512Vbmi , ,37 ,AVX512VBMI ,avx512vbmi
instructionset ,X86 ,Avx512Vbmi_VL , ,38 ,AVX512VBMI_VL ,avx512vbmi_vl
-
+instructionset ,X86 ,VectorT128 , ,39 ,VectorT128 ,vectort128
+instructionset ,X86 ,VectorT256 , ,40 ,VectorT256 ,vectort256
+instructionset ,X86 ,VectorT512 , ,41 ,VectorT512 ,vectort512
instructionset64bit,X86 ,X86Base
instructionset64bit,X86 ,SSE
implication ,X86 ,AVX512VBMI ,AVX512BW
implication ,X86 ,AVX512VBMI_VL ,AVX512VBMI
implication ,X86 ,AVX512VBMI_VL ,AVX512BW_VL
+implication ,X86 ,VectorT128 ,SSE2
+implication ,X86 ,VectorT256 ,AVX2
+implication ,X86 ,VectorT512 ,AVX512F
; While the AVX-512 ISAs can be individually lit-up, they really
; need F, BW, CD, DQ, and VL to be fully functional without adding
; Definition of Arm64 instruction sets
definearch ,ARM64 ,64Bit ,Arm64, Arm64
-instructionset ,ARM64 ,ArmBase , ,16 ,ArmBase ,base
-instructionset ,ARM64 ,AdvSimd , ,17 ,AdvSimd ,neon
-instructionset ,ARM64 ,Aes , ,9 ,Aes ,aes
-instructionset ,ARM64 ,Crc32 , ,18 ,Crc32 ,crc
-instructionset ,ARM64 ,Dp , ,23 ,Dp ,dotprod
-instructionset ,ARM64 ,Rdm , ,24 ,Rdm ,rdma
-instructionset ,ARM64 ,Sha1 , ,19 ,Sha1 ,sha1
-instructionset ,ARM64 ,Sha256 , ,20 ,Sha256 ,sha2
-instructionset ,ARM64 , ,Atomics ,21 ,Atomics ,lse
-instructionset ,ARM64 , , , ,Vector64 ,
-instructionset ,ARM64 , , , ,Vector128,
-instructionset ,ARM64 , , , ,Dczva ,
-instructionset ,ARM64 , ,Rcpc ,26 ,Rcpc ,rcpc
+instructionset ,ARM64 ,ArmBase , ,16 ,ArmBase ,base
+instructionset ,ARM64 ,AdvSimd , ,17 ,AdvSimd ,neon
+instructionset ,ARM64 ,Aes , ,9 ,Aes ,aes
+instructionset ,ARM64 ,Crc32 , ,18 ,Crc32 ,crc
+instructionset ,ARM64 ,Dp , ,23 ,Dp ,dotprod
+instructionset ,ARM64 ,Rdm , ,24 ,Rdm ,rdma
+instructionset ,ARM64 ,Sha1 , ,19 ,Sha1 ,sha1
+instructionset ,ARM64 ,Sha256 , ,20 ,Sha256 ,sha2
+instructionset ,ARM64 , ,Atomics ,21 ,Atomics ,lse
+instructionset ,ARM64 , , , ,Vector64 ,
+instructionset ,ARM64 , , , ,Vector128 ,
+instructionset ,ARM64 , , , ,Dczva ,
+instructionset ,ARM64 , ,Rcpc ,26 ,Rcpc ,rcpc
+instructionset ,ARM64 ,VectorT128 , ,39 ,VectorT128 ,vectort128
instructionset64bit,ARM64 ,ArmBase
instructionset64bit,ARM64 ,AdvSimd
vectorinstructionset,ARM64,Vector64
vectorinstructionset,ARM64,Vector128
-implication ,ARM64 ,AdvSimd ,ArmBase
-implication ,ARM64 ,Aes ,ArmBase
-implication ,ARM64 ,Crc32 ,ArmBase
-implication ,ARM64 ,Dp ,AdvSimd
-implication ,ARM64 ,Rdm ,AdvSimd
-implication ,ARM64 ,Sha1 ,ArmBase
-implication ,ARM64 ,Sha256 ,ArmBase
-implication ,ARM64 ,Vector64 ,AdvSimd
-implication ,ARM64 ,Vector128 ,AdvSimd
-
+implication ,ARM64 ,AdvSimd ,ArmBase
+implication ,ARM64 ,Aes ,ArmBase
+implication ,ARM64 ,Crc32 ,ArmBase
+implication ,ARM64 ,Dp ,AdvSimd
+implication ,ARM64 ,Rdm ,AdvSimd
+implication ,ARM64 ,Sha1 ,ArmBase
+implication ,ARM64 ,Sha256 ,ArmBase
+implication ,ARM64 ,Vector64 ,AdvSimd
+implication ,ARM64 ,Vector128 ,AdvSimd
+implication ,ARM64 ,VectorT128 ,AdvSimd
; ,name and aliases ,archs ,lower baselines included by implication
;
private Dictionary<string, string> _64BitVariantArchitectureManagedNameSuffix = new Dictionary<string, string>();
// This represents the number of flags fields we currently track
- private const int FlagsFieldCount = 1;
+ private const int FlagsFieldCount = 2;
private void ArchitectureEncountered(string arch)
{
/// True if the type transitively has an Int128 in it or is an Int128
/// </summary>
public const int IsInt128OrHasInt128Fields = 0x800;
+
+ /// <summary>
+ /// True if the type transitively has a Vector<T> in it or is Vector<T>
+ /// </summary>
+ public const int IsVectorTOrHasVectorTFields = 0x1000;
}
private sealed class StaticBlockInfo
}
/// <summary>
+ /// Is a type Vector<T> or transitively have any fields of a type Vector<T>.
+ /// </summary>
+ public virtual bool IsVectorTOrHasVectorTFields
+ {
+ get
+ {
+ if (!_fieldLayoutFlags.HasFlags(FieldLayoutFlags.ComputedInstanceTypeLayout))
+ {
+ ComputeInstanceLayout(InstanceLayoutKind.TypeAndFields);
+ }
+ return _fieldLayoutFlags.HasFlags(FieldLayoutFlags.IsVectorTOrHasVectorTFields);
+ }
+ }
+
+ /// <summary>
/// The number of bytes required to hold a field of this type
/// </summary>
public LayoutInt InstanceFieldSize
{
_fieldLayoutFlags.AddFlags(FieldLayoutFlags.IsInt128OrHasInt128Fields);
}
+ if (computedLayout.IsVectorTOrHasVectorTFields)
+ {
+ _fieldLayoutFlags.AddFlags(FieldLayoutFlags.IsVectorTOrHasVectorTFields);
+ }
if (computedLayout.Offsets != null)
{
public bool LayoutAbiStable; // Is the layout stable such that it can safely be used in function calling conventions
public bool IsAutoLayoutOrHasAutoLayoutFields;
public bool IsInt128OrHasInt128Fields;
+ public bool IsVectorTOrHasVectorTFields;
/// <summary>
/// If Offsets is non-null, then all field based layout is complete.
LayoutAbiStable = true,
IsAutoLayoutOrHasAutoLayoutFields = false,
IsInt128OrHasInt128Fields = false,
+ IsVectorTOrHasVectorTFields = false,
};
if (numInstanceFields > 0)
}
ref StaticsBlock block = ref GetStaticsBlockForField(ref result, field);
- SizeAndAlignment sizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout: false, context.Target.DefaultPackingSize, out bool _, out bool _, out bool _);
+ SizeAndAlignment sizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout: false, context.Target.DefaultPackingSize, out bool _, out bool _, out bool _, out bool _);
block.Size = LayoutInt.AlignUp(block.Size, sizeAndAlignment.Alignment, context.Target);
result.Offsets[index] = new FieldAndOffset(field, block.Size);
int fieldOrdinal = 0;
bool layoutAbiStable = true;
bool hasAutoLayoutField = false;
- bool hasInt128Field = type.BaseType == null ? false : type.BaseType.IsInt128OrHasInt128Fields;
+ bool hasInt128Field = false;
+ bool hasVectorTField = false;
+
+ if (type.BaseType is not null)
+ {
+ hasInt128Field = type.BaseType.IsInt128OrHasInt128Fields;
+ hasVectorTField = type.BaseType.IsVectorTOrHasVectorTFields;
+ }
foreach (var fieldAndOffset in layoutMetadata.Offsets)
{
TypeDesc fieldType = fieldAndOffset.Field.FieldType;
- var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType.UnderlyingType, hasLayout: true, packingSize, out bool fieldLayoutAbiStable, out bool fieldHasAutoLayout, out bool fieldHasInt128Field);
+ var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType.UnderlyingType, hasLayout: true, packingSize, out bool fieldLayoutAbiStable, out bool fieldHasAutoLayout, out bool fieldHasInt128Field, out bool fieldHasVectorTField);
if (!fieldLayoutAbiStable)
layoutAbiStable = false;
if (fieldHasAutoLayout)
hasAutoLayoutField = true;
if (fieldHasInt128Field)
hasInt128Field = true;
+ if (fieldHasVectorTField)
+ hasVectorTField = true;
largestAlignmentRequired = LayoutInt.Max(fieldSizeAndAlignment.Alignment, largestAlignmentRequired);
{
IsAutoLayoutOrHasAutoLayoutFields = hasAutoLayoutField,
IsInt128OrHasInt128Fields = hasInt128Field,
+ IsVectorTOrHasVectorTFields = hasVectorTField,
};
computedLayout.FieldAlignment = instanceSizeAndAlignment.Alignment;
computedLayout.FieldSize = instanceSizeAndAlignment.Size;
int packingSize = ComputePackingSize(type, layoutMetadata);
bool layoutAbiStable = true;
bool hasAutoLayoutField = false;
- bool hasInt128Field = type.BaseType == null ? false : type.BaseType.IsInt128OrHasInt128Fields;
+ bool hasInt128Field = false;
+ bool hasVectorTField = false;
+
+ if (type.BaseType is not null)
+ {
+ hasInt128Field = type.BaseType.IsInt128OrHasInt128Fields;
+ hasVectorTField = type.BaseType.IsVectorTOrHasVectorTFields;
+ }
foreach (var field in type.GetFields())
{
if (field.IsStatic)
continue;
- var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(field.FieldType.UnderlyingType, hasLayout: true, packingSize, out bool fieldLayoutAbiStable, out bool fieldHasAutoLayout, out bool fieldHasInt128Field);
+ var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(field.FieldType.UnderlyingType, hasLayout: true, packingSize, out bool fieldLayoutAbiStable, out bool fieldHasAutoLayout, out bool fieldHasInt128Field, out bool fieldHasVectorTField);
if (!fieldLayoutAbiStable)
layoutAbiStable = false;
if (fieldHasAutoLayout)
hasAutoLayoutField = true;
if (fieldHasInt128Field)
hasInt128Field = true;
+ if (fieldHasVectorTField)
+ hasVectorTField = true;
largestAlignmentRequirement = LayoutInt.Max(fieldSizeAndAlignment.Alignment, largestAlignmentRequirement);
{
IsAutoLayoutOrHasAutoLayoutFields = hasAutoLayoutField,
IsInt128OrHasInt128Fields = hasInt128Field,
+ IsVectorTOrHasVectorTFields = hasVectorTField,
};
computedLayout.FieldAlignment = instanceSizeAndAlignment.Alignment;
computedLayout.FieldSize = instanceSizeAndAlignment.Size;
int instanceGCPointerFieldsCount = 0;
int[] instanceNonGCPointerFieldsCount = new int[maxLog2Size + 1];
bool hasInt128Field = false;
+ bool hasVectorTField = false;
foreach (var field in type.GetFields())
{
instanceValueClassFieldCount++;
if (((DefType)fieldType).IsInt128OrHasInt128Fields)
hasInt128Field = true;
+ if (((DefType)fieldType).IsVectorTOrHasVectorTFields)
+ hasVectorTField = true;
}
else if (fieldType.IsGCPointer)
{
{
Debug.Assert(fieldType.IsPrimitive || fieldType.IsPointer || fieldType.IsFunctionPointer || fieldType.IsEnum || fieldType.IsByRef);
- var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout, packingSize, out bool _, out bool _, out bool _);
+ var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout, packingSize, out bool _, out bool _, out bool _, out bool _);
instanceNonGCPointerFieldsCount[CalculateLog2(fieldSizeAndAlignment.Size.AsInt)]++;
}
}
TypeDesc fieldType = field.FieldType;
- var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout, packingSize, out bool fieldLayoutAbiStable, out bool _, out bool _);
+ var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout, packingSize, out bool fieldLayoutAbiStable, out bool _, out bool _, out bool _);
if (!fieldLayoutAbiStable)
layoutAbiStable = false;
for (int i = 0; i < instanceValueClassFieldsArr.Length; i++)
{
// Align the cumulative field offset to the indeterminate value
- var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(instanceValueClassFieldsArr[i].FieldType, hasLayout, packingSize, out bool fieldLayoutAbiStable, out bool _, out bool _);
+ var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(instanceValueClassFieldsArr[i].FieldType, hasLayout, packingSize, out bool fieldLayoutAbiStable, out bool _, out bool _, out bool _);
if (!fieldLayoutAbiStable)
layoutAbiStable = false;
{
IsAutoLayoutOrHasAutoLayoutFields = true,
IsInt128OrHasInt128Fields = hasInt128Field,
+ IsVectorTOrHasVectorTFields = hasVectorTField,
};
computedLayout.FieldAlignment = instanceSizeAndAlignment.Alignment;
computedLayout.FieldSize = instanceSizeAndAlignment.Size;
private static void PlaceInstanceField(FieldDesc field, bool hasLayout, int packingSize, FieldAndOffset[] offsets, ref LayoutInt instanceFieldPos, ref int fieldOrdinal, LayoutInt offsetBias)
{
- var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(field.FieldType, hasLayout, packingSize, out bool _, out bool _, out bool _);
+ var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(field.FieldType, hasLayout, packingSize, out bool _, out bool _, out bool _, out bool _);
instanceFieldPos = AlignUpInstanceFieldOffset(instanceFieldPos, fieldSizeAndAlignment.Alignment, field.Context.Target);
offsets[fieldOrdinal] = new FieldAndOffset(field, instanceFieldPos + offsetBias);
return cumulativeInstanceFieldPos;
}
- private static SizeAndAlignment ComputeFieldSizeAndAlignment(TypeDesc fieldType, bool hasLayout, int packingSize, out bool layoutAbiStable, out bool fieldTypeHasAutoLayout, out bool fieldTypeHasInt128Field)
+ private static SizeAndAlignment ComputeFieldSizeAndAlignment(TypeDesc fieldType, bool hasLayout, int packingSize, out bool layoutAbiStable, out bool fieldTypeHasAutoLayout, out bool fieldTypeHasInt128Field, out bool fieldTypeHasVectorTField)
{
SizeAndAlignment result;
layoutAbiStable = true;
fieldTypeHasAutoLayout = true;
fieldTypeHasInt128Field = false;
+ fieldTypeHasVectorTField = false;
if (fieldType.IsDefType)
{
layoutAbiStable = defType.LayoutAbiStable;
fieldTypeHasAutoLayout = defType.IsAutoLayoutOrHasAutoLayoutFields;
fieldTypeHasInt128Field = defType.IsInt128OrHasInt128Fields;
+ fieldTypeHasVectorTField = defType.IsVectorTOrHasVectorTFields;
}
else
{
return MarshallerKind.Invalid;
}
+ if (!isField && ((DefType)type).IsVectorTOrHasVectorTFields)
+ {
+ // Vector<T> types or structs that contain them cannot be passed by value
+ return MarshallerKind.Invalid;
+ }
+
if (MarshalUtils.IsBlittableType(type))
{
if (nativeType != NativeTypeKind.Default && nativeType != NativeTypeKind.Struct)
public const int Avx512Vbmi = 0x800000;
public const int Avx512Vbmi_vl = 0x1000000;
public const int Serialize = 0x2000000;
+ public const int VectorT128 = 0x4000000;
+ public const int VectorT256 = 0x8000000;
+ public const int VectorT512 = 0x10000000;
public static int FromInstructionSet(InstructionSet instructionSet)
{
return instructionSet switch
{
+ // Optional ISAs - only available via opt-in or opportunistic light-up
InstructionSet.X64_AES => Aes,
InstructionSet.X64_AES_X64 => Aes,
InstructionSet.X64_PCLMULQDQ => Pclmulqdq,
InstructionSet.X64_X86Serialize => Serialize,
InstructionSet.X64_X86Serialize_X64 => Serialize,
- // SSE and SSE2 are baseline ISAs - they're always available
+ // Baseline ISAs - they're always available
InstructionSet.X64_SSE => 0,
InstructionSet.X64_SSE_X64 => 0,
InstructionSet.X64_SSE2 => 0,
InstructionSet.X64_X86Base => 0,
InstructionSet.X64_X86Base_X64 => 0,
+ // Vector<T> Sizes
+ InstructionSet.X64_VectorT128 => VectorT128,
+ InstructionSet.X64_VectorT256 => VectorT256,
+ InstructionSet.X64_VectorT512 => VectorT512,
+
_ => throw new NotSupportedException(((InstructionSet_X64)instructionSet).ToString())
};
}
public const int Sha256 = 0x0040;
public const int Atomics = 0x0080;
public const int Rcpc = 0x0100;
+ public const int VectorT128 = 0x0200;
public static int FromInstructionSet(InstructionSet instructionSet)
{
return instructionSet switch
{
+
+ // Baseline ISAs - they're always available
+ InstructionSet.ARM64_ArmBase => 0,
+ InstructionSet.ARM64_ArmBase_Arm64 => 0,
InstructionSet.ARM64_AdvSimd => AdvSimd,
InstructionSet.ARM64_AdvSimd_Arm64 => AdvSimd,
+
+ // Optional ISAs - only available via opt-in or opportunistic light-up
InstructionSet.ARM64_Aes => Aes,
InstructionSet.ARM64_Aes_Arm64 => Aes,
InstructionSet.ARM64_Crc32 => Crc32,
InstructionSet.ARM64_Atomics => Atomics,
InstructionSet.ARM64_Rcpc => Rcpc,
- InstructionSet.ARM64_ArmBase => 0,
- InstructionSet.ARM64_ArmBase_Arm64 => 0,
+ // Vector<T> Sizes
+ InstructionSet.ARM64_VectorT128 => VectorT128,
_ => throw new NotSupportedException(((InstructionSet_ARM64)instructionSet).ToString())
};
TargetDetails targetDetails = defType.Context.Target;
ComputedInstanceFieldLayout layoutFromMetadata = _fallbackAlgorithm.ComputeInstanceLayout(defType, layoutKind);
+ layoutFromMetadata.IsVectorTOrHasVectorTFields = true;
LayoutInt instanceFieldSize;
FieldAlignment = layoutFromMetadata.FieldAlignment,
FieldSize = instanceFieldSize,
Offsets = layoutFromMetadata.Offsets,
+ IsVectorTOrHasVectorTFields = true,
};
}
if (!relocsOnly)
{
+ ReadyToRunFixupKind fixupKind = _fixupKind;
dataBuilder.AddSymbol(this);
+ if ((fixupKind == ReadyToRunFixupKind.Verify_TypeLayout) && ((MetadataType)_typeDesc).IsVectorTOrHasVectorTFields)
+ {
+ fixupKind = ReadyToRunFixupKind.Check_TypeLayout;
+ }
+
IEcmaModule targetModule = factory.SignatureContext.GetTargetModule(_typeDesc);
- SignatureContext innerContext = dataBuilder.EmitFixup(factory, _fixupKind, targetModule, factory.SignatureContext);
+ SignatureContext innerContext = dataBuilder.EmitFixup(factory, fixupKind, targetModule, factory.SignatureContext);
dataBuilder.EmitTypeSignature(_typeDesc, innerContext);
- if ((_fixupKind == ReadyToRunFixupKind.Check_TypeLayout) ||
- (_fixupKind == ReadyToRunFixupKind.Verify_TypeLayout))
+ if ((fixupKind == ReadyToRunFixupKind.Check_TypeLayout) ||
+ (fixupKind == ReadyToRunFixupKind.Verify_TypeLayout))
{
EncodeTypeLayout(dataBuilder, _typeDesc);
}
};
dataBuilder.EmitUInt((uint)hfaElementType);
}
-
+
if (alignment != pointerSize)
{
dataBuilder.EmitUInt((uint)alignment);
return true;
}
- if (!(type is MetadataType defType))
+ if (type is not MetadataType defType)
{
// Non metadata backed types have layout defined in all version bubbles
return true;
}
+ if (VectorOfTFieldLayoutAlgorithm.IsVectorOfTType(defType))
+ {
+ // Vector<T> always needs a layout check
+ return false;
+ }
+
if (!NodeFactory.CompilationModuleGroup.VersionsWithModule(defType.Module))
{
// Valuetypes with non-versionable attribute are candidates for fixed layout. Reject the rest.
ByteCountAlignment = LayoutInt.Indeterminate,
Offsets = fieldsAndOffsets.ToArray(),
LayoutAbiStable = false,
+ IsVectorTOrHasVectorTFields = true,
};
return instanceLayout;
}
FieldSize = layoutFromSimilarIntrinsicVector.FieldSize,
Offsets = layoutFromMetadata.Offsets,
LayoutAbiStable = _vectorAbiIsStable,
+ IsVectorTOrHasVectorTFields = true,
};
#else
return new ComputedInstanceFieldLayout
FieldSize = layoutFromSimilarIntrinsicVector.FieldSize,
Offsets = layoutFromMetadata.Offsets,
LayoutAbiStable = _vectorAbiIsStable,
+ IsVectorTOrHasVectorTFields = true,
};
#endif
}
{
return false;
}
-
+
private CORINFO_OBJECT_STRUCT_* getRuntimeTypePointer(CORINFO_CLASS_STRUCT_* cls)
{
return null;
}, true, "Maximum number of threads to use during compilation");
public Option<string> InstructionSet { get; } =
new(new[] { "--instruction-set" }, "Instruction set to allow or disallow");
+ public Option<int> MaxVectorTBitWidth { get; } =
+ new(new[] { "--max-vectort-bitwidth" }, "Maximum width, in bits, that Vector<T> is allowed to be");
public Option<string> Guard { get; } =
new(new[] { "--guard" }, "Enable mitigations. Options: 'cf': CFG (Control Flow Guard, Windows only)");
public Option<bool> Dehydrate { get; } =
AddOption(RuntimeKnobs);
AddOption(Parallelism);
AddOption(InstructionSet);
+ AddOption(MaxVectorTBitWidth);
AddOption(Guard);
AddOption(Dehydrate);
AddOption(PreinitStatics);
TargetArchitecture targetArchitecture = Get(_command.TargetArchitecture);
TargetOS targetOS = Get(_command.TargetOS);
- InstructionSetSupport instructionSetSupport = Helpers.ConfigureInstructionSetSupport(Get(_command.InstructionSet), targetArchitecture, targetOS,
+ InstructionSetSupport instructionSetSupport = Helpers.ConfigureInstructionSetSupport(Get(_command.InstructionSet), Get(_command.MaxVectorTBitWidth), targetArchitecture, targetOS,
"Unrecognized instruction set {0}", "Unsupported combination of instruction sets: {0}/{1}");
string systemModuleName = Get(_command.SystemModuleName);
new(new[] { "--reference", "-r" }, result => Helpers.BuildPathDictionary(result.Tokens, false), true, SR.ReferenceFiles);
public Option<string> InstructionSet { get; } =
new(new[] { "--instruction-set" }, SR.InstructionSets);
+ public Option<int> MaxVectorTBitWidth { get; } =
+ new(new[] { "--max-vectort-bitwidth" }, SR.MaxVectorTBitWidths);
public Option<string[]> MibcFilePaths { get; } =
new(new[] { "--mibc", "-m" }, Array.Empty<string>, SR.MibcFiles);
public Option<string> OutputFilePath { get; } =
AddOption(UnrootedInputFilePaths);
AddOption(ReferenceFilePaths);
AddOption(InstructionSet);
+ AddOption(MaxVectorTBitWidth);
AddOption(MibcFilePaths);
AddOption(OutputFilePath);
AddOption(CompositeRootPath);
TargetArchitecture targetArchitecture = Get(_command.TargetArchitecture);
TargetOS targetOS = Get(_command.TargetOS);
- InstructionSetSupport instructionSetSupport = Helpers.ConfigureInstructionSetSupport(Get(_command.InstructionSet), targetArchitecture, targetOS,
+ InstructionSetSupport instructionSetSupport = Helpers.ConfigureInstructionSetSupport(Get(_command.InstructionSet), Get(_command.MaxVectorTBitWidth), targetArchitecture, targetOS,
SR.InstructionSetMustNotBe, SR.InstructionSetInvalidImplication);
SharedGenericsMode genericsMode = SharedGenericsMode.CanonicalReferenceTypes;
var targetDetails = new TargetDetails(targetArchitecture, targetOS, Crossgen2RootCommand.IsArmel ? TargetAbi.NativeAotArmel : TargetAbi.NativeAot, instructionSetSupport.GetVectorTSimdVector());
<data name="InstructionSetInvalidImplication" xml:space="preserve">
<value>Instruction set '{0}' implies support for instruction set '{1}'</value>
</data>
+ <data name="MaxVectorTBitWidths" xml:space="preserve">
+ <value>The maximum width, in bits, for System.Numerics.Vector<T>. For example '128', '256', or '512'.</value>
+ </data>
<data name="UnrootedInputFilesToCompile" xml:space="preserve">
<value>Input files without automatic rooting of all methods</value>
</data>
CORJIT_FLAGS CPUCompileFlags;
+ // Get the maximum bitwidth of Vector<T>, rounding down to the nearest multiple of 128-bits
+ uint32_t maxVectorTBitWidth = (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_MaxVectorTBitWidth) / 128) * 128;
+
#if defined(TARGET_X86) || defined(TARGET_AMD64)
CPUCompileFlags.Set(InstructionSet_X86Base);
CPUCompileFlags.Set(InstructionSet_SSE);
CPUCompileFlags.Set(InstructionSet_SSE2);
+ CPUCompileFlags.Set(InstructionSet_VectorT128);
if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0) // AESNI
{
{
CPUCompileFlags.Set(InstructionSet_AVX2);
+ if ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 256))
+ {
+ // We allow 256-bit Vector<T> by default
+ CPUCompileFlags.Clear(InstructionSet_VectorT128);
+ CPUCompileFlags.Set(InstructionSet_VectorT256);
+ }
+
if (DoesOSSupportAVX512() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111
{
if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F
{
CPUCompileFlags.Set(InstructionSet_AVX512F);
+ // TODO-XArch: Add support for 512-bit Vector<T>
+ assert(!CPUCompileFlags.IsSet(InstructionSet_VectorT512));
+
bool isAVX512_VLSupported = false;
if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL
{
}
}
- if (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_SIMD16ByteOnly) != 0)
- {
- CPUCompileFlags.Clear(InstructionSet_AVX2);
- }
-
if (maxCpuId >= 0x07)
{
__cpuidex(cpuidInfo, 0x00000007, 0x00000000);
// FP and SIMD support are enabled by default
CPUCompileFlags.Set(InstructionSet_ArmBase);
CPUCompileFlags.Set(InstructionSet_AdvSimd);
+ CPUCompileFlags.Set(InstructionSet_VectorT128);
// PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE (30)
if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
{
CPUCompileFlags.Clear(InstructionSet_X86Serialize);
}
-
#elif defined(TARGET_ARM64)
if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic))
{
LPCUTF8 className;
LPCUTF8 nameSpace;
+
if (FAILED(GetMDImport()->GetNameOfTypeDef(bmtInternal->pType->GetTypeDefToken(), &className, &nameSpace)))
return false;
CORJIT_FLAGS CPUCompileFlags = ExecutionManager::GetEEJitManager()->GetCPUCompileFlags();
uint32_t numInstanceFieldBytes = 16;
- if (CPUCompileFlags.IsSet(InstructionSet_AVX2))
+ if (CPUCompileFlags.IsSet(InstructionSet_VectorT512))
+ {
+ // TODO-XARCH: The JIT needs to be updated to support 64-byte Vector<T>
+ numInstanceFieldBytes = 32;
+ }
+ else if (CPUCompileFlags.IsSet(InstructionSet_VectorT256))
{
numInstanceFieldBytes = 32;
}