From 42742cd261a203dcd18cffb3c9ac8ce86df706d9 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 8 Jul 2023 10:39:10 -0700 Subject: [PATCH] Have jitstress_isas_x86_evex set PreferredVectorBitWidth=512 (#88531) * Have jitstress_isas_x86_evex set PreferredVectorBitWidth=512 * Ensure that IsHardwareAccelerated correctly takes the preferred width into account * Ensure the tests take DOTNET_PreferredVectorBitWidth into account * Ensure PreferredVectorBitWidth is interpreted as a decimal * Fix a build failure where the default value wasn't passed in * Minor whitespace fix to trigger CI --- eng/pipelines/coreclr/jitstress-isas-avx512.yml | 3 + src/coreclr/jit/compiler.cpp | 4 +- src/coreclr/jit/compiler.h | 10 ---- src/coreclr/jit/hwintrinsic.cpp | 7 ++- src/coreclr/jit/hwintrinsiclistxarch.h | 2 +- src/coreclr/jit/jitconfigvalues.h | 2 +- src/tests/Common/testenvironment.proj | 3 +- .../JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs | 69 ++++++++++++++-------- .../readytorun/HardwareIntrinsics/X86/CpuId.cs | 69 ++++++++++++++-------- 9 files changed, 102 insertions(+), 67 deletions(-) diff --git a/eng/pipelines/coreclr/jitstress-isas-avx512.yml b/eng/pipelines/coreclr/jitstress-isas-avx512.yml index d961b058..47d8e39 100644 --- a/eng/pipelines/coreclr/jitstress-isas-avx512.yml +++ b/eng/pipelines/coreclr/jitstress-isas-avx512.yml @@ -8,6 +8,9 @@ pr: - main paths: include: + - src/coreclr/jit/hwintrinsiccodegenxarch.cpp + - src/coreclr/jit/hwintrinsiclistxarch.h + - src/coreclr/jit/hwintrinsicxarch.cpp - src/coreclr/jit/instrsxarch.h - src/coreclr/jit/emitxarch.cpp - src/coreclr/jit/emitxarch.h diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 7b2fdb8..9129d5d 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2266,7 +2266,7 @@ void Compiler::compSetProcessor() // the total sum of flags is still valid. #if defined(TARGET_XARCH) // Get the preferred vector bitwidth, rounding down to the nearest multiple of 128-bits - uint32_t preferredVectorBitWidth = (JitConfig.PreferredVectorBitWidth() / 128) * 128; + uint32_t preferredVectorBitWidth = (ReinterpretHexAsDecimal(JitConfig.PreferredVectorBitWidth()) / 128) * 128; uint32_t preferredVectorByteLength = preferredVectorBitWidth / 8; if (instructionSetFlags.HasInstructionSet(InstructionSet_SSE)) @@ -2298,7 +2298,7 @@ void Compiler::compSetProcessor() instructionSetFlags.AddInstructionSet(InstructionSet_Vector512); - if ((preferredVectorByteLength == 0) && opts.Vector512Throttling()) + if ((preferredVectorByteLength == 0) && jitFlags.IsSet(JitFlags::JIT_FLAG_VECTOR512_THROTTLING)) { // Some architectures can experience frequency throttling when // executing 512-bit width instructions. To account for this we set the diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 1d42520..8a2d862 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9526,16 +9526,6 @@ public: return jitFlags->IsSet(JitFlags::JIT_FLAG_REVERSE_PINVOKE); } - // true if JitFlags::JIT_FLAG_VECTOR512_THROTTLING is set to true - bool Vector512Throttling() - { -#if defined(TARGET_XARCH) - return jitFlags->IsSet(JitFlags::JIT_FLAG_VECTOR512_THROTTLING); -#else - return false; -#endif - } - bool compScopeInfo; // Generate the LocalVar info ? bool compDbgCode; // Generate debugger-friendly code? bool compDbgInfo; // Gather debugging info? diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 9e1351a..ed58894 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -511,12 +511,15 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, } else if (strcmp(className, "Vector256") == 0) { + if (comp->getPreferredVectorByteLength() < 32) + { + return NI_IsSupported_False; + } isa = InstructionSet_AVX2; } else if (strcmp(className, "Vector512") == 0) { - // If the JitFlags::JIT_FLAG_VECTOR512_THROTTLING flag is set, we do not need to do any further checks. - if (comp->opts.Vector512Throttling()) + if (comp->getPreferredVectorByteLength() < 64) { return NI_IsSupported_False; } diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 0bfcc48..4c09aa7 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -1292,7 +1292,7 @@ HARDWARE_INTRINSIC(POPCNT_X64, PopCount, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // X86Serialize Intrinsics -HARDWARE_INTRINSIC(X86Serialize, Serialize, 0, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) +HARDWARE_INTRINSIC(X86Serialize, Serialize, 0, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 73575fb..8c61e43 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -299,7 +299,7 @@ CONFIG_INTEGER(JitStressEvexEncoding, W("JitStressEvexEncoding"), 0) // Enable E // clang-format off -CONFIG_INTEGER(PreferredVectorBitWidth, W("PreferredVectorBitWidth"), 0) // The preferred width, in bits, to use for any implicit vectorization emitted. A value less than 128 is treated as the system default. +CONFIG_INTEGER(PreferredVectorBitWidth, W("PreferredVectorBitWidth"), 0) // The preferred decimal width, in bits, to use for any implicit vectorization emitted. A value less than 128 is treated as the system default. // // Hardware Intrinsic ISAs; keep in sync with clrconfigvalues.h diff --git a/src/tests/Common/testenvironment.proj b/src/tests/Common/testenvironment.proj index 4fb1c57..c262bd8 100644 --- a/src/tests/Common/testenvironment.proj +++ b/src/tests/Common/testenvironment.proj @@ -38,6 +38,7 @@ DOTNET_EnableSSE42; DOTNET_EnableSSSE3; DOTNET_JitStressEvexEncoding; + DOTNET_PreferredVectorBitWidth; DOTNET_ForceRelocs; DOTNET_GCStress; DOTNET_GCName; @@ -107,7 +108,7 @@ - + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs index d02c7f4..954a7cb 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs @@ -216,36 +216,50 @@ namespace XarchHardwareIntrinsicTest._CpuId } bool isAvx512HierarchyDisabled = isHierarchyDisabled; - if (isGenuineIntel && !isAvx512HierarchyDisabled) + + int preferredVectorBitWidth = (GetDotnetEnvVar("PreferredVectorBitWidth", defaultValue: 0) / 128) * 128; + int preferredVectorByteLength = preferredVectorBitWidth / 8; + + if (preferredVectorByteLength == 0) { - int steppingId = xarchCpuInfo & (int)0b1111; - int model = (xarchCpuInfo >> 4) & (int)0b1111; - int familyID = (xarchCpuInfo >> 8) & (int)0b1111; - int extendedModelID = (xarchCpuInfo >> 16) & (int)0b1111; - if (familyID == 0x06) + bool isVector512Throttling = false; + + if (isGenuineIntel) { - if (extendedModelID == 0x05) + int steppingId = xarchCpuInfo & 0b1111; + int model = (xarchCpuInfo >> 4) & 0b1111; + int familyID = (xarchCpuInfo >> 8) & 0b1111; + int extendedModelID = (xarchCpuInfo >> 16) & 0b1111; + + if (familyID == 0x06) { - if (model == 0x05) + if (extendedModelID == 0x05) { - // * Skylake (Server) - // * Cascade Lake - // * Cooper Lake - - isAvx512HierarchyDisabled = true; + if (model == 0x05) + { + // * Skylake (Server) + // * Cascade Lake + // * Cooper Lake + + isVector512Throttling = true; + } } - } - else if (extendedModelID == 0x06) - { - if (model == 0x06) + else if (extendedModelID == 0x06) { - // * Cannon Lake + if (model == 0x06) + { + // * Cannon Lake - isAvx512HierarchyDisabled = true; + isVector512Throttling = true; + } } } } + if (isVector512Throttling) + { + preferredVectorByteLength = 256 / 8; + } } if (IsBitIncorrect(ecx, 1, typeof(Avx512Vbmi), Avx512Vbmi.IsSupported, "AVX512VBMI", ref isHierarchyDisabled)) @@ -305,12 +319,12 @@ namespace XarchHardwareIntrinsicTest._CpuId testResult = Fail; } - if (IsIncorrect(typeof(Vector256), Vector256.IsHardwareAccelerated, isAvx2HierarchyDisabled)) + if (IsIncorrect(typeof(Vector256), Vector256.IsHardwareAccelerated, isAvx2HierarchyDisabled || (preferredVectorByteLength < 32))) { testResult = Fail; } - if (IsIncorrect(typeof(Vector512), Vector512.IsHardwareAccelerated, isAvx512HierarchyDisabled)) + if (IsIncorrect(typeof(Vector512), Vector512.IsHardwareAccelerated, isAvx512HierarchyDisabled || (preferredVectorByteLength < 64))) { testResult = Fail; } @@ -421,15 +435,20 @@ namespace XarchHardwareIntrinsicTest._CpuId static bool GetDotnetEnable(string name) { - string? stringValue = Environment.GetEnvironmentVariable($"DOTNET_Enable{name}"); + // Hardware Intrinsic configuration knobs default to true + return GetDotnetEnvVar($"Enable{name}", defaultValue: 1) != 0; + } + + static int GetDotnetEnvVar(string name, int defaultValue) + { + string? stringValue = Environment.GetEnvironmentVariable($"DOTNET_{name}"); if ((stringValue is null) || !int.TryParse(stringValue, out int value)) { - // Hardware Intrinsic configuration knobs default to true - return true; + return defaultValue; } - return value != 0; + return value; } } } diff --git a/src/tests/readytorun/HardwareIntrinsics/X86/CpuId.cs b/src/tests/readytorun/HardwareIntrinsics/X86/CpuId.cs index 3861b4d..605bfbf 100644 --- a/src/tests/readytorun/HardwareIntrinsics/X86/CpuId.cs +++ b/src/tests/readytorun/HardwareIntrinsics/X86/CpuId.cs @@ -211,36 +211,50 @@ namespace XarchHardwareIntrinsicTest._CpuId } bool isAvx512HierarchyDisabled = isHierarchyDisabled; - if (isGenuineIntel && !isAvx512HierarchyDisabled) + + int preferredVectorBitWidth = (GetDotnetEnvVar("PreferredVectorBitWidth", defaultValue: 0) / 128) * 128; + int preferredVectorByteLength = preferredVectorBitWidth / 8; + + if (preferredVectorByteLength == 0) { - int steppingId = xarchCpuInfo & (int)0b1111; - int model = (xarchCpuInfo >> 4) & (int)0b1111; - int familyID = (xarchCpuInfo >> 8) & (int)0b1111; - int extendedModelID = (xarchCpuInfo >> 16) & (int)0b1111; - if (familyID == 0x06) + bool isVector512Throttling = false; + + if (isGenuineIntel) { - if (extendedModelID == 0x05) + int steppingId = xarchCpuInfo & 0b1111; + int model = (xarchCpuInfo >> 4) & 0b1111; + int familyID = (xarchCpuInfo >> 8) & 0b1111; + int extendedModelID = (xarchCpuInfo >> 16) & 0b1111; + + if (familyID == 0x06) { - if (model == 0x05) + if (extendedModelID == 0x05) { - // * Skylake (Server) - // * Cascade Lake - // * Cooper Lake - - isAvx512HierarchyDisabled = true; + if (model == 0x05) + { + // * Skylake (Server) + // * Cascade Lake + // * Cooper Lake + + isVector512Throttling = true; + } } - } - else if (extendedModelID == 0x06) - { - if (model == 0x06) + else if (extendedModelID == 0x06) { - // * Cannon Lake + if (model == 0x06) + { + // * Cannon Lake - isAvx512HierarchyDisabled = true; + isVector512Throttling = true; + } } } } + if (isVector512Throttling) + { + preferredVectorByteLength = 256 / 8; + } } if (IsBitIncorrect(ecx, 1, typeof(Avx512Vbmi), Avx512Vbmi.IsSupported, "AVX512VBMI", ref isHierarchyDisabled)) @@ -299,12 +313,12 @@ namespace XarchHardwareIntrinsicTest._CpuId testResult = Fail; } - if (IsIncorrect(typeof(Vector256), Vector256.IsHardwareAccelerated, isAvx2HierarchyDisabled)) + if (IsIncorrect(typeof(Vector256), Vector256.IsHardwareAccelerated, isAvx2HierarchyDisabled || (preferredVectorByteLength < 32))) { testResult = Fail; } - if (IsIncorrect(typeof(Vector512), Vector512.IsHardwareAccelerated, isAvx512HierarchyDisabled)) + if (IsIncorrect(typeof(Vector512), Vector512.IsHardwareAccelerated, isAvx512HierarchyDisabled || (preferredVectorByteLength < 64))) { testResult = Fail; } @@ -414,15 +428,20 @@ namespace XarchHardwareIntrinsicTest._CpuId static bool GetDotnetEnable(string name) { - string? stringValue = Environment.GetEnvironmentVariable($"DOTNET_Enable{name}"); + // Hardware Intrinsic configuration knobs default to true + return GetDotnetEnvVar($"Enable{name}", defaultValue: 1) != 0; + } + + static int GetDotnetEnvVar(string name, int defaultValue) + { + string? stringValue = Environment.GetEnvironmentVariable($"DOTNET_{name}"); if ((stringValue is null) || !int.TryParse(stringValue, out int value)) { - // Hardware Intrinsic configuration knobs default to true - return true; + return defaultValue; } - return value != 0; + return value; } } } -- 2.7.4