From 4ef3cc68ccdf88af9bb39a491032b68ddf7f38ae Mon Sep 17 00:00:00 2001 From: Brian Sullivan Date: Wed, 26 Feb 2020 15:28:29 -0800 Subject: [PATCH] Add ValueNumbering support for GT_SIMD and GT_HWINTRINSIC tree nodes (#31834) * Added ValueNumbering support for GT_SIMD and GT_HWINTRINSIC tree nodes * Allow SIMD and HW Intrinsics to be CSE candidates * Correctness fix for optAssertionPropMain - Zero out the bbAssertionIn values, as these can be referenced in RangeCheck::MergeAssertion and this is shared state with the CSE phase: bbCseIn * Improve the VNFOA_ArityMask * Update to use the new TARGET macros * Include node type when value numbering SIMDIntrinsicInit Mutate the gloabl heap when performing a HW_INTRINSIC memory store operation Printing of SIMD constants only support 0 * Disable CSE's for some special HW_INTRINSIC categories * Code review feedback * Record csdStructHnd; // The class handle, currently needed to create a SIMD LclVar in PerformCSE * Instead of asserting on a struct handle mismatch, we record it in csdStructHndMismatch and avoid making the candidate into a CSE * Fix the JITDUMP messages to print the CseIndex * add check for (newElemStructHnd != NO_CLASS_HANDLE) * Additional checks for SIMD struct types when setting csdStructHnd Added Mismatched Struct Handle assert in ConsiderCandidates * fix GenTreeSIMD::OperIsMemoryLoad for ARM64 Removed ismatched Struct Handle assert * Fix the printing of BitSets on Linux, change the printf format specifier * Added check for simdNode->OperIsMemoryLoad()) to fgValueNumberSimd Added bool OperIsMemoryLoad() to GenTreeSIMD, returns true for SIMDIntrinsicInitArray Added valuenumfuncs.h to src/coreclr/src/jit/CMakeLists.txt * Avoid calling gtGetStructHandleIfPresent to set csdStructHnd when we have a GT_IND node * Fix check for (newElemStructHnd != hashDsc->csdStructHnd) * added extra value number argument VNF_SimdType for Most SIMD operations added VNF_SimdType // A value number function to compose a SIMD type added vnDumpSimdType * Added bool methods vnEncodesResultTypeForSIMDIntrinsic and vnEncodesResultTypeForHWIntrinsic these return true when a SIMD or HW Instrinsic will use an extra arg to record the result type during value numbering Changes the ValueNumFuncDef to set the arity to zero when a -1 value is passed in Updated InitValueNumStoreStatics to fixup the arity of SIMD or HW Instrinsic that have an extra arg to record the result type Allow a type mismatchj when we have a GT_BLK as the lhs of an assignment, as it is used to zero out Simd structs * Fix for SIMD_WidenLo arg count * fix typo * Fix x86 build breaks Fix SIMD_WidenHi * Added method header comment for vnEncodesResultTypeForHWIntrinsic Added & VNFOA_ArityMask when assigning to vnfOpAttribs[] * Codereview feedback and some more comments * fix typo * Moved the code that sets the arg count for the three SIMD intrinsics * clang-format * Adjust CSE for SIMD types that are live across a call * Proposed fix for #32085 * Revert "Proposed fix for #32085" This reverts commit 169c24eeafb072a79ffbdb2d1d36960ebc0c17b4. * Added better comments for optcse SIMD caller saved register heuristics * Added CONFIG_INTEGER: JitDisableSimdVN, Default 0, ValueNumbering of SIMD nodes and HW Intrinsic nodes enabled If 1, then disable ValueNumbering of SIMD nodes If 2, then disable ValueNumbering of HW Intrinsic nodes If 3, disable both SIMD and HW Intrinsic nodes * Moved JitDisableSimdVN from DEBUG to RETAIL --- src/coreclr/src/jit/CMakeLists.txt | 1 + src/coreclr/src/jit/assertionprop.cpp | 9 +- src/coreclr/src/jit/bitsetasshortlong.h | 4 +- src/coreclr/src/jit/compiler.h | 26 ++ src/coreclr/src/jit/gentree.cpp | 25 +- src/coreclr/src/jit/gentree.h | 11 +- src/coreclr/src/jit/hwintrinsic.cpp | 63 ++++ src/coreclr/src/jit/hwintrinsiclistxarch.h | 8 +- src/coreclr/src/jit/jitconfigvalues.h | 9 + src/coreclr/src/jit/optcse.cpp | 149 +++++++- src/coreclr/src/jit/simd.cpp | 40 +++ src/coreclr/src/jit/valuenum.cpp | 534 ++++++++++++++++++++++++++--- src/coreclr/src/jit/valuenum.h | 13 +- src/coreclr/src/jit/valuenumfuncs.h | 29 +- 14 files changed, 843 insertions(+), 78 deletions(-) diff --git a/src/coreclr/src/jit/CMakeLists.txt b/src/coreclr/src/jit/CMakeLists.txt index b63e08a..9d1dbe0 100644 --- a/src/coreclr/src/jit/CMakeLists.txt +++ b/src/coreclr/src/jit/CMakeLists.txt @@ -186,6 +186,7 @@ if (CLR_CMAKE_TARGET_WIN32) unwind.h utils.h valuenum.h + valuenumfuncs.h valuenumtype.h varset.h vartype.h diff --git a/src/coreclr/src/jit/assertionprop.cpp b/src/coreclr/src/jit/assertionprop.cpp index f3f95ff..916921f 100644 --- a/src/coreclr/src/jit/assertionprop.cpp +++ b/src/coreclr/src/jit/assertionprop.cpp @@ -5193,8 +5193,15 @@ void Compiler::optAssertionPropMain() } } - if (!optAssertionCount) + if (optAssertionCount == 0) { + // Zero out the bbAssertionIn values, as these can be referenced in RangeCheck::MergeAssertion + // and this is sharedstate with the CSE phase: bbCseIn + // + for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) + { + block->bbAssertionIn = BitVecOps::MakeEmpty(apTraits); + } return; } diff --git a/src/coreclr/src/jit/bitsetasshortlong.h b/src/coreclr/src/jit/bitsetasshortlong.h index c22eefc..27a8c1d 100644 --- a/src/coreclr/src/jit/bitsetasshortlong.h +++ b/src/coreclr/src/jit/bitsetasshortlong.h @@ -412,12 +412,12 @@ public: char* ptr = res; if (sizeof(size_t) == sizeof(int64_t)) { - sprintf_s(ptr, remaining, "%016zX", bits); + sprintf_s(ptr, remaining, "%016llX", bits); } else { assert(sizeof(size_t) == sizeof(int)); - sprintf_s(ptr, remaining, "%08zX", bits); + sprintf_s(ptr, remaining, "%08X", bits); } return res; } diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index 3bac311..6028877 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -4674,6 +4674,16 @@ public: // Does value-numbering for an intrinsic tree. void fgValueNumberIntrinsic(GenTree* tree); +#ifdef FEATURE_SIMD + // Does value-numbering for a GT_SIMD tree + void fgValueNumberSimd(GenTree* tree); +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS + // Does value-numbering for a GT_HWINTRINSIC tree + void fgValueNumberHWIntrinsic(GenTree* tree); +#endif // FEATURE_HW_INTRINSICS + // Does value-numbering for a call. We interpret some helper calls. void fgValueNumberCall(GenTreeCall* call); @@ -4698,6 +4708,9 @@ public: // Adds the exception set for the current tree node which is performing a overflow checking operation void fgValueNumberAddExceptionSetForOverflow(GenTree* tree); + // Adds the exception set for the current tree node which is performing a bounds check operation + void fgValueNumberAddExceptionSetForBoundsCheck(GenTree* tree); + // Adds the exception set for the current tree node which is performing a ckfinite operation void fgValueNumberAddExceptionSetForCkFinite(GenTree* tree); @@ -6190,6 +6203,12 @@ protected: treeStmtLst* csdTreeList; // list of matching tree nodes: head treeStmtLst* csdTreeLast; // list of matching tree nodes: tail + // ToDo: This can be removed when gtGetStructHandleIfPresent stops guessing + // and GT_IND nodes always have valid struct handle. + // + CORINFO_CLASS_HANDLE csdStructHnd; // The class handle, currently needed to create a SIMD LclVar in PerformCSE + bool csdStructHndMismatch; + ValueNum defExcSetPromise; // The exception set that is now required for all defs of this CSE. // This will be set to NoVN if we decide to abandon this CSE @@ -8189,6 +8208,13 @@ public: #endif // !FEATURE_SIMD } +#ifdef FEATURE_SIMD + static bool vnEncodesResultTypeForSIMDIntrinsic(SIMDIntrinsicID intrinsicId); +#endif // !FEATURE_SIMD +#ifdef FEATURE_HW_INTRINSICS + static bool vnEncodesResultTypeForHWIntrinsic(NamedIntrinsic hwIntrinsicID); +#endif // FEATURE_HW_INTRINSICS + private: // These routines need not be enclosed under FEATURE_SIMD since lvIsSIMDType() // is defined for both FEATURE_SIMD and !FEATURE_SIMD apropriately. The use diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index 9c78cc1..053482e 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -5405,11 +5405,16 @@ bool GenTree::OperIsImplicitIndir() const case GT_ARR_ELEM: case GT_ARR_OFFSET: return true; +#ifdef FEATURE_SIMD + case GT_SIMD: + { + return AsSIMD()->OperIsMemoryLoad(); + } +#endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: { - GenTreeHWIntrinsic* hwIntrinsicNode = (const_cast(this))->AsHWIntrinsic(); - return hwIntrinsicNode->OperIsMemoryLoadOrStore(); + return AsHWIntrinsic()->OperIsMemoryLoadOrStore(); } #endif // FEATURE_HW_INTRINSICS default: @@ -18224,6 +18229,16 @@ bool GenTree::isCommutativeSIMDIntrinsic() return false; } } + +// Returns true for the SIMD Instrinsic instructions that have MemoryLoad semantics, false otherwise +bool GenTreeSIMD::OperIsMemoryLoad() const +{ + if (gtSIMDIntrinsicID == SIMDIntrinsicInitArray) + { + return true; + } + return false; +} #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS @@ -18433,7 +18448,7 @@ GenTree* Compiler::gtNewMustThrowException(unsigned helper, var_types type, CORI } // Returns true for the HW Instrinsic instructions that have MemoryLoad semantics, false otherwise -bool GenTreeHWIntrinsic::OperIsMemoryLoad() +bool GenTreeHWIntrinsic::OperIsMemoryLoad() const { #ifdef TARGET_XARCH // Some xarch instructions have MemoryLoad sematics @@ -18475,7 +18490,7 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad() } // Returns true for the HW Instrinsic instructions that have MemoryStore semantics, false otherwise -bool GenTreeHWIntrinsic::OperIsMemoryStore() +bool GenTreeHWIntrinsic::OperIsMemoryStore() const { #ifdef TARGET_XARCH // Some xarch instructions have MemoryStore sematics @@ -18510,7 +18525,7 @@ bool GenTreeHWIntrinsic::OperIsMemoryStore() } // Returns true for the HW Instrinsic instructions that have MemoryLoad semantics, false otherwise -bool GenTreeHWIntrinsic::OperIsMemoryLoadOrStore() +bool GenTreeHWIntrinsic::OperIsMemoryLoadOrStore() const { #ifdef TARGET_XARCH return OperIsMemoryLoad() || OperIsMemoryStore(); diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index 32c81b4..a67fd15 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -4536,6 +4536,9 @@ struct GenTreeSIMD : public GenTreeJitIntrinsic { } + bool OperIsMemoryLoad() const; // Returns true for the SIMD Instrinsic instructions that have MemoryLoad semantics, + // false otherwise + #if DEBUGGABLE_GENTREE GenTreeSIMD() : GenTreeJitIntrinsic() { @@ -4584,12 +4587,12 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic // However there are HW Instrinsic instructions that have 3 or even 4 operands and this is // supported using a single op1 and using an ArgList for it: gtNewArgList(op1, op2, op3) - bool OperIsMemoryLoad(); // Returns true for the HW Instrinsic instructions that have MemoryLoad semantics, + bool OperIsMemoryLoad() const; // Returns true for the HW Instrinsic instructions that have MemoryLoad semantics, // false otherwise - bool OperIsMemoryStore(); // Returns true for the HW Instrinsic instructions that have MemoryStore semantics, + bool OperIsMemoryStore() const; // Returns true for the HW Instrinsic instructions that have MemoryStore semantics, // false otherwise - bool OperIsMemoryLoadOrStore(); // Returns true for the HW Instrinsic instructions that have MemoryLoad or - // MemoryStore semantics, false otherwise + bool OperIsMemoryLoadOrStore() const; // Returns true for the HW Instrinsic instructions that have MemoryLoad or + // MemoryStore semantics, false otherwise #if DEBUGGABLE_GENTREE GenTreeHWIntrinsic() : GenTreeJitIntrinsic() diff --git a/src/coreclr/src/jit/hwintrinsic.cpp b/src/coreclr/src/jit/hwintrinsic.cpp index 158130b..0ab533d 100644 --- a/src/coreclr/src/jit/hwintrinsic.cpp +++ b/src/coreclr/src/jit/hwintrinsic.cpp @@ -173,6 +173,69 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, va return NO_CLASS_HANDLE; } +#ifdef FEATURE_HW_INTRINSICS +//------------------------------------------------------------------------ +// vnEncodesResultTypeForHWIntrinsic(NamedIntrinsic hwIntrinsicID): +// +// Arguments: +// hwIntrinsicID -- The id for the HW intrinsic +// +// Return Value: +// Returns true if this intrinsic requires value numbering to add an +// extra SimdType argument that encodes the resulting type. +// If we don't do this overloaded versions can return the same VN +// leading to incorrect CSE subsitutions. +// +/* static */ bool Compiler::vnEncodesResultTypeForHWIntrinsic(NamedIntrinsic hwIntrinsicID) +{ + int numArgs = HWIntrinsicInfo::lookupNumArgs(hwIntrinsicID); + + // HW Instrinsic's with -1 for numArgs have a varying number of args, so we currently + // give themm a unique value number them, and don't add an extra argument. + // + if (numArgs == -1) + { + return false; + } + + // We iterate over all of the different baseType's for this instrinsic in the HWIntrinsicInfo table + // We set diffInsCount to the number of instructions that can execute differently. + // + unsigned diffInsCount = 0; +#ifdef TARGET_XARCH + instruction lastIns = INS_invalid; +#endif + for (var_types baseType = TYP_BYTE; (baseType <= TYP_DOUBLE); baseType = (var_types)(baseType + 1)) + { + instruction curIns = HWIntrinsicInfo::lookupIns(hwIntrinsicID, baseType); + if (curIns != INS_invalid) + { +#ifdef TARGET_XARCH + if (curIns != lastIns) + { + diffInsCount++; + // remember the last valid instruction that we saw + lastIns = curIns; + } +#elif defined(TARGET_ARM64) + // On ARM64 we use the same instruction and specify an insOpt arrangement + // so we always consider the instruction operation to be different + // + diffInsCount++; +#endif // TARGET + if (diffInsCount >= 2) + { + // We can early exit the loop now + break; + } + } + } + + // If we see two (or more) different instructions we need the extra VNF_SimdType arg + return (diffInsCount >= 2); +} +#endif // FEATURE_HW_INTRINSICS + //------------------------------------------------------------------------ // lookupId: Gets the NamedIntrinsic for a given method name and InstructionSet // diff --git a/src/coreclr/src/jit/hwintrinsiclistxarch.h b/src/coreclr/src/jit/hwintrinsiclistxarch.h index 42eda73..7bd8c15 100644 --- a/src/coreclr/src/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/hwintrinsiclistxarch.h @@ -483,10 +483,10 @@ HARDWARE_INTRINSIC(AVX2_ConvertToUInt32, "ConvertToUI HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int16, "ConvertToVector256Int16", AVX2, -1, 32, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int32, "ConvertToVector256Int32", AVX2, -1, 32, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int64, "ConvertToVector256Int64", AVX2, -1, 32, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX2_GatherVector128, "GatherVector128", AVX2, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) -HARDWARE_INTRINSIC(AVX2_GatherVector256, "GatherVector256", AVX2, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) -HARDWARE_INTRINSIC(AVX2_GatherMaskVector128, "GatherMaskVector128", AVX2, -1, 16, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) -HARDWARE_INTRINSIC(AVX2_GatherMaskVector256, "GatherMaskVector256", AVX2, -1, 32, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX2_GatherVector128, "GatherVector128", AVX2, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX2_GatherVector256, "GatherVector256", AVX2, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX2_GatherMaskVector128, "GatherMaskVector128", AVX2, -1, 16, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX2_GatherMaskVector256, "GatherMaskVector256", AVX2, -1, 32, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) HARDWARE_INTRINSIC(AVX2_HorizontalAdd, "HorizontalAdd", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phaddw, INS_invalid, INS_phaddd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2_HorizontalAddSaturate, "HorizontalAddSaturate", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2_HorizontalSubtract, "HorizontalSubtract", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phsubw, INS_invalid, INS_phsubd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) diff --git a/src/coreclr/src/jit/jitconfigvalues.h b/src/coreclr/src/jit/jitconfigvalues.h index f852fe2..70f65eb 100644 --- a/src/coreclr/src/jit/jitconfigvalues.h +++ b/src/coreclr/src/jit/jitconfigvalues.h @@ -135,6 +135,7 @@ CONFIG_INTEGER(ShouldInjectFault, W("InjectFault"), 0) CONFIG_INTEGER(StressCOMCall, W("StressCOMCall"), 0) CONFIG_INTEGER(TailcallStress, W("TailcallStress"), 0) CONFIG_INTEGER(TreesBeforeAfterMorph, W("JitDumpBeforeAfterMorph"), 0) // If 1, display each tree before/after morphing + CONFIG_METHODSET(JitBreak, W("JitBreak")) // Stops in the importer when compiling a specified method CONFIG_METHODSET(JitDebugBreak, W("JitDebugBreak")) CONFIG_METHODSET(JitDisasm, W("JitDisasm")) // Dumps disassembly for specified method @@ -275,6 +276,14 @@ CONFIG_INTEGER(EnableArm64Sve, W("EnableArm64Sve"), 1) // clang-format on +#ifdef FEATURE_SIMD +CONFIG_INTEGER(JitDisableSimdVN, W("JitDisableSimdVN"), 0) // Default 0, ValueNumbering of SIMD nodes and HW Intrinsic + // nodes enabled + // If 1, then disable ValueNumbering of SIMD nodes + // If 2, then disable ValueNumbering of HW Intrinsic nodes + // If 3, disable both SIMD and HW Intrinsic nodes +#endif // FEATURE_SIMD + /// /// JIT /// diff --git a/src/coreclr/src/jit/optcse.cpp b/src/coreclr/src/jit/optcse.cpp index 4a9a405..efb4ed3 100644 --- a/src/coreclr/src/jit/optcse.cpp +++ b/src/coreclr/src/jit/optcse.cpp @@ -498,8 +498,22 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) /* Start the list with the first CSE candidate recorded */ - hashDsc->csdTreeList = newElem; - hashDsc->csdTreeLast = newElem; + hashDsc->csdTreeList = newElem; + hashDsc->csdTreeLast = newElem; + hashDsc->csdStructHnd = NO_CLASS_HANDLE; + + hashDsc->csdStructHndMismatch = false; + + if (varTypeIsStruct(tree->gtType)) + { + // When we have a GT_IND node with a SIMD type then we don't have a reliable + // struct handle and gtGetStructHandleIfPresent returns a guess that can be wrong + // + if ((hashDsc->csdTree->OperGet() != GT_IND) || !varTypeIsSIMD(tree)) + { + hashDsc->csdStructHnd = gtGetStructHandleIfPresent(hashDsc->csdTree); + } + } } noway_assert(hashDsc->csdTreeList); @@ -516,6 +530,38 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) hashDsc->csdTreeLast->tslNext = newElem; hashDsc->csdTreeLast = newElem; + if (varTypeIsStruct(newElem->tslTree->gtType)) + { + // When we have a GT_IND node with a SIMD type then we don't have a reliable + // struct handle and gtGetStructHandleIfPresent returns a guess that can be wrong + // + if ((newElem->tslTree->OperGet() != GT_IND) || !varTypeIsSIMD(newElem->tslTree)) + { + CORINFO_CLASS_HANDLE newElemStructHnd = gtGetStructHandleIfPresent(newElem->tslTree); + if (newElemStructHnd != NO_CLASS_HANDLE) + { + if (hashDsc->csdStructHnd == NO_CLASS_HANDLE) + { + // The previous node(s) were GT_IND's and didn't carry the struct handle info + // The current node does have the struct handle info, so record it now + // + hashDsc->csdStructHnd = newElemStructHnd; + } + else if (newElemStructHnd != hashDsc->csdStructHnd) + { + hashDsc->csdStructHndMismatch = true; +#ifdef DEBUG + if (verbose) + { + printf("Abandoned - CSE candidate has mismatching struct handles!\n"); + printTreeID(newElem->tslTree); + } +#endif // DEBUG + } + } + } + } + optDoCSE = true; // Found a duplicate CSE tree /* Have we assigned a CSE index? */ @@ -2415,6 +2461,36 @@ public: extra_yes_cost *= 2; // full cost if we are being Conservative } } + +#ifdef FEATURE_SIMD + // SIMD types may cause a SIMD register to be spilled/restored in the prolog and epilog. + // + if (varTypeIsSIMD(candidate->Expr()->TypeGet())) + { + // We don't have complete information about when these extra spilled/restore will be needed. + // Instead we are conservative and assume that each SIMD CSE that is live across a call + // will cause an additional spill/restore in the prolog and epilog. + // + int spillSimdRegInProlog = 1; + + // If we have a SIMD32 that is live across a call we have even higher spill costs + // + if (candidate->Expr()->TypeGet() == TYP_SIMD32) + { + // Additionally for a simd32 CSE candidate we assume that and second spilled/restore will be needed. + // (to hold the upper half of the simd32 register that isn't preserved across the call) + // + spillSimdRegInProlog++; + + // We also increase the CSE use cost here to because we may have to generate instructions + // to move the upper half of the simd32 before and after a call. + // + cse_use_cost += 2; + } + + extra_yes_cost = (BB_UNITY_WEIGHT * spillSimdRegInProlog) * 3; + } +#endif // FEATURE_SIMD } // estimate the cost from lost codesize reduction if we do not perform the CSE @@ -2558,9 +2634,10 @@ public: var_types cseLclVarTyp = genActualType(successfulCandidate->Expr()->TypeGet()); if (varTypeIsStruct(cseLclVarTyp)) { - // After call args have been morphed, we don't need a handle for SIMD types. - // They are only required where the size is not implicit in the type and/or there are GC refs. - CORINFO_CLASS_HANDLE structHnd = m_pCompiler->gtGetStructHandleIfPresent(successfulCandidate->Expr()); + // Retrieve the struct handle that we recorded while bulding the list of CSE candidates. + // If all occurances were in GT_IND nodes it could still be NO_CLASS_HANDLE + // + CORINFO_CLASS_HANDLE structHnd = successfulCandidate->CseDsc()->csdStructHnd; assert((structHnd != NO_CLASS_HANDLE) || (cseLclVarTyp != TYP_STRUCT)); if (structHnd != NO_CLASS_HANDLE) { @@ -2974,13 +3051,19 @@ public: for (; (cnt > 0); cnt--, ptr++) { Compiler::CSEdsc* dsc = *ptr; + CSE_Candidate candidate(this, dsc); + if (dsc->defExcSetPromise == ValueNumStore::NoVN) { - JITDUMP("Abandoned CSE #%02u because we had defs with different Exc sets\n"); + JITDUMP("Abandoned CSE #%02u because we had defs with different Exc sets\n", candidate.CseIndex()); continue; } - CSE_Candidate candidate(this, dsc); + if (dsc->csdStructHndMismatch) + { + JITDUMP("Abandoned CSE #%02u because we had mismatching struct handles\n", candidate.CseIndex()); + continue; + } candidate.InitializeCounts(); @@ -3280,10 +3363,58 @@ bool Compiler::optIsCSEcandidate(GenTree* tree) case GT_LE: case GT_GE: case GT_GT: - return true; // Also CSE these Comparison Operators + return true; // Allow the CSE of Comparison operators + +#ifdef FEATURE_SIMD + case GT_SIMD: + return true; // allow SIMD intrinsics to be CSE-ed + +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + { + GenTreeHWIntrinsic* hwIntrinsicNode = tree->AsHWIntrinsic(); + assert(hwIntrinsicNode != nullptr); + HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(hwIntrinsicNode->gtHWIntrinsicId); + + switch (category) + { + case HW_Category_SimpleSIMD: + case HW_Category_IMM: + case HW_Category_Scalar: + case HW_Category_SIMDScalar: + case HW_Category_Helper: + break; + + case HW_Category_MemoryLoad: + case HW_Category_MemoryStore: + case HW_Category_Special: + default: + return false; + } + + if (hwIntrinsicNode->OperIsMemoryStore()) + { + // NI_BMI2_MultiplyNoFlags, etc... + return false; + } + if (hwIntrinsicNode->OperIsMemoryLoad()) + { + // NI_AVX2_BroadcastScalarToVector128, NI_AVX2_GatherVector128, etc... + return false; + } + + return true; // allow Hardware Intrinsics to be CSE-ed + } + +#endif // FEATURE_HW_INTRINSICS case GT_INTRINSIC: - return true; // Intrinsics + return true; // allow Intrinsics to be CSE-ed + + case GT_OBJ: + return varTypeIsEnregisterable(type); // Allow enregisterable GT_OBJ's to be CSE-ed. (i.e. SIMD types) case GT_COMMA: return true; // Allow GT_COMMA nodes to be CSE-ed. diff --git a/src/coreclr/src/jit/simd.cpp b/src/coreclr/src/jit/simd.cpp index f436f34..d970ca2 100644 --- a/src/coreclr/src/jit/simd.cpp +++ b/src/coreclr/src/jit/simd.cpp @@ -1051,6 +1051,46 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in return nullptr; } +/* static */ bool Compiler::vnEncodesResultTypeForSIMDIntrinsic(SIMDIntrinsicID intrinsicId) +{ + switch (intrinsicId) + { + case SIMDIntrinsicInit: + case SIMDIntrinsicGetItem: + case SIMDIntrinsicAdd: + case SIMDIntrinsicSub: + case SIMDIntrinsicMul: + case SIMDIntrinsicDiv: + case SIMDIntrinsicSqrt: + case SIMDIntrinsicMin: + case SIMDIntrinsicMax: + case SIMDIntrinsicAbs: + case SIMDIntrinsicEqual: + case SIMDIntrinsicLessThan: + case SIMDIntrinsicLessThanOrEqual: + case SIMDIntrinsicGreaterThan: + case SIMDIntrinsicGreaterThanOrEqual: + case SIMDIntrinsicBitwiseAnd: + case SIMDIntrinsicBitwiseAndNot: + case SIMDIntrinsicBitwiseOr: + case SIMDIntrinsicBitwiseXor: + case SIMDIntrinsicDotProduct: + case SIMDIntrinsicCast: + case SIMDIntrinsicConvertToSingle: + case SIMDIntrinsicConvertToDouble: + case SIMDIntrinsicConvertToInt32: + case SIMDIntrinsicConvertToInt64: + case SIMDIntrinsicNarrow: + case SIMDIntrinsicWidenHi: + case SIMDIntrinsicWidenLo: + return true; + + default: + break; + } + return false; +} + // Pops and returns GenTree node from importer's type stack. // Normalizes TYP_STRUCT value in case of GT_CALL, GT_RET_EXPR and arg nodes. // diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index c1d799f..66f5454 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -342,6 +342,15 @@ VNFunc GetVNFuncForNode(GenTree* node) } break; +#ifdef FEATURE_SIMD + case GT_SIMD: + return VNFunc(VNF_SIMD_FIRST + node->AsSIMD()->gtSIMDIntrinsicID); +#endif // FEATURE_SIMD +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + return VNFunc(VNF_HWI_FIRST + (node->AsHWIntrinsic()->gtHWIntrinsicId - NI_HW_INTRINSIC_START - 1)); +#endif // FEATURE_HW_INTRINSICS + case GT_CAST: // GT_CAST can overflow but it has special handling and it should not appear here. unreached(); @@ -1739,14 +1748,15 @@ ValueNum ValueNumStore::VNZeroForType(var_types typ) case TYP_BYREF: return VNForByrefCon(0); case TYP_STRUCT: + return VNForZeroMap(); // Recursion! + #ifdef FEATURE_SIMD - // TODO-CQ: Improve value numbering for SIMD types. case TYP_SIMD8: case TYP_SIMD12: case TYP_SIMD16: case TYP_SIMD32: -#endif // FEATURE_SIMD - return VNForZeroMap(); // Recursion! + return VNForLongCon(0); +#endif // FEATURE_SIMD // These should be unreached. default: @@ -5009,14 +5019,23 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr) printf("byrefVal"); break; case TYP_STRUCT: + printf("structVal(zero)"); + break; + #ifdef FEATURE_SIMD case TYP_SIMD8: case TYP_SIMD12: case TYP_SIMD16: case TYP_SIMD32: + { + // Only the zero constant is currently allowed for SIMD types + // + INT64 val = ConstantValue(vn); + assert(val == 0); + printf(" 0"); + } + break; #endif // FEATURE_SIMD - printf("structVal"); - break; // These should be unreached. default: @@ -5054,6 +5073,12 @@ void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr) case VNF_ValWithExc: vnDumpValWithExc(comp, &funcApp); break; +#ifdef FEATURE_SIMD + case VNF_SimdType: + vnDumpSimdType(comp, &funcApp); + break; +#endif // FEATURE_SIMD + default: printf("%s(", VNFuncName(funcApp.m_func)); for (unsigned i = 0; i < funcApp.m_arity; i++) @@ -5205,6 +5230,21 @@ void ValueNumStore::vnDumpMapStore(Compiler* comp, VNFuncApp* mapStore) comp->vnPrint(newValVN, 0); printf("]"); } + +#ifdef FEATURE_SIMD +void ValueNumStore::vnDumpSimdType(Compiler* comp, VNFuncApp* simdType) +{ + assert(simdType->m_func == VNF_SimdType); // Preconditions. + assert(IsVNConstant(simdType->m_args[0])); + assert(IsVNConstant(simdType->m_args[1])); + + int simdSize = ConstantValue(simdType->m_args[0]); + var_types baseType = (var_types)ConstantValue(simdType->m_args[1]); + + printf("%s(simd%d, %s)", VNFuncName(simdType->m_func), simdSize, varTypeName(baseType)); +} +#endif // FEATURE_SIMD + #endif // DEBUG // Static fields, methods. @@ -5227,9 +5267,9 @@ UINT8* ValueNumStore::s_vnfOpAttribs = nullptr; void ValueNumStore::InitValueNumStoreStatics() { - // Make sure we've gotten constants right... - assert(unsigned(VNFOA_Arity) == (1 << VNFOA_ArityShift)); - assert(unsigned(VNFOA_AfterArity) == (unsigned(VNFOA_Arity) << VNFOA_ArityBits)); + // Make sure we have the constants right... + assert(unsigned(VNFOA_Arity1) == (1 << VNFOA_ArityShift)); + assert(VNFOA_ArityMask == (VNFOA_MaxArity << VNFOA_ArityShift)); s_vnfOpAttribs = &vnfOpAttribs[0]; for (unsigned i = 0; i < GT_COUNT; i++) @@ -5249,7 +5289,7 @@ void ValueNumStore::InitValueNumStoreStatics() { arity = 2; } - vnfOpAttribs[i] |= (arity << VNFOA_ArityShift); + vnfOpAttribs[i] |= ((arity << VNFOA_ArityShift) & VNFOA_ArityMask); if (GenTree::OperIsCommutative(gtOper)) { @@ -5268,12 +5308,72 @@ void ValueNumStore::InitValueNumStoreStatics() vnfOpAttribs[vnfNum] |= VNFOA_KnownNonNull; \ if (sharedStatic) \ vnfOpAttribs[vnfNum] |= VNFOA_SharedStatic; \ - vnfOpAttribs[vnfNum] |= (arity << VNFOA_ArityShift); \ + if (arity > 0) \ + vnfOpAttribs[vnfNum] |= ((arity << VNFOA_ArityShift) & VNFOA_ArityMask); \ vnfNum++; #include "valuenumfuncs.h" #undef ValueNumFuncDef + assert(vnfNum == VNF_COUNT); + +#define ValueNumFuncSetArity(vnfNum, arity) \ + vnfOpAttribs[vnfNum] &= ~VNFOA_ArityMask; /* clear old arity value */ \ + vnfOpAttribs[vnfNum] |= ((arity << VNFOA_ArityShift) & VNFOA_ArityMask) /* set the new arity value */ + +#ifdef FEATURE_SIMD + + // SIMDIntrinsicInit has an entry of 2 for numArgs, but it only has one normal arg + ValueNumFuncSetArity(VNF_SIMD_Init, 1); + // SIMDIntrinsicWidenHi has an entry of 2 for numArgs, but it only has one normal arg + ValueNumFuncSetArity(VNF_SIMD_WidenHi, 1); + // SIMDIntrinsicWidenLo has an entry of 2 for numArgs, but it only has one normal arg + ValueNumFuncSetArity(VNF_SIMD_WidenLo, 1); + + // Some SIMD intrinsic nodes have an extra VNF_SimdType arg + // + for (SIMDIntrinsicID id = SIMDIntrinsicID::SIMDIntrinsicNone; (id < SIMDIntrinsicID::SIMDIntrinsicInvalid); + id = (SIMDIntrinsicID)(id + 1)) + { + bool encodeResultType = Compiler::vnEncodesResultTypeForSIMDIntrinsic(id); + + if (encodeResultType) + { + // These SIMDIntrinsic's have an extra VNF_SimdType arg. + // + VNFunc func = VNFunc(VNF_SIMD_FIRST + id); + unsigned oldArity = VNFuncArity(func); + unsigned newArity = oldArity + 1; + + ValueNumFuncSetArity(func, newArity); + } + } + +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS + + for (NamedIntrinsic id = (NamedIntrinsic)(NI_HW_INTRINSIC_START + 1); (id < NI_HW_INTRINSIC_END); + id = (NamedIntrinsic)(id + 1)) + { + bool encodeResultType = Compiler::vnEncodesResultTypeForHWIntrinsic(id); + + if (encodeResultType) + { + // These HW_Intrinsic's have an extra VNF_SimdType arg. + // + VNFunc func = VNFunc(VNF_HWI_FIRST + (id - NI_HW_INTRINSIC_START - 1)); + unsigned oldArity = VNFuncArity(func); + unsigned newArity = oldArity + 1; + + ValueNumFuncSetArity(func, newArity); + } + } + +#endif // FEATURE_HW_INTRINSICS + +#undef ValueNumFuncSetArity + for (unsigned i = 0; i < _countof(genTreeOpsIllegalAsVNFunc); i++) { vnfOpAttribs[genTreeOpsIllegalAsVNFunc[i]] |= VNFOA_IllegalGenTreeOp; @@ -6313,6 +6413,24 @@ void Compiler::fgValueNumberTreeConst(GenTree* tree) } break; +#ifdef FEATURE_SIMD + case TYP_SIMD8: + case TYP_SIMD12: + case TYP_SIMD16: + case TYP_SIMD32: + +#ifdef TARGET_64BIT + // Only the zero constant is currently allowed for SIMD types + // + assert(tree->AsIntConCommon()->LngValue() == 0); + tree->gtVNPair.SetBoth(vnStore->VNForLongCon(tree->AsIntConCommon()->LngValue())); +#else // 32BIT + assert(tree->AsIntConCommon()->IconValue() == 0); + tree->gtVNPair.SetBoth(vnStore->VNForIntCon(tree->AsIntConCommon()->IconValue())); +#endif + break; +#endif // FEATURE_SIMD + case TYP_FLOAT: tree->gtVNPair.SetBoth(vnStore->VNForFloatCon((float)tree->AsDblCon()->gtDconVal)); break; @@ -6680,40 +6798,46 @@ void Compiler::fgValueNumberTree(GenTree* tree) genTreeOps oper = tree->OperGet(); #ifdef FEATURE_SIMD - // TODO-CQ: For now TYP_SIMD values are not handled by value numbering to be amenable for CSE'ing. - if (oper == GT_SIMD) + if ((JitConfig.JitDisableSimdVN() & 1) == 1) { - tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, TYP_UNKNOWN)); - return; + // This Jit Config forces the previous behavior of value numbering for SIMD nodes + if (oper == GT_SIMD) + { + tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, TYP_UNKNOWN)); + return; + } } -#endif +#endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS - if (oper == GT_HWINTRINSIC) + if ((JitConfig.JitDisableSimdVN() & 2) == 2) { - // TODO-CQ: For now hardware intrinsics are not handled by value numbering to be amenable for CSE'ing. - tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, TYP_UNKNOWN)); + // This Jit Config forces the previous behavior of value numbering for HW Intrinsic nodes + if (oper == GT_HWINTRINSIC) + { + tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, TYP_UNKNOWN)); - GenTreeHWIntrinsic* hwIntrinsicNode = tree->AsHWIntrinsic(); - assert(hwIntrinsicNode != nullptr); + GenTreeHWIntrinsic* hwIntrinsicNode = tree->AsHWIntrinsic(); + assert(hwIntrinsicNode != nullptr); - // For safety/correctness we must mutate the global heap valuenumber - // for any HW intrinsic that performs a memory store operation - if (hwIntrinsicNode->OperIsMemoryStore()) - { - fgMutateGcHeap(tree DEBUGARG("HWIntrinsic - MemoryStore")); - } + // For safety/correctness we must mutate the global heap valuenumber + // for any HW intrinsic that performs a memory store operation + if (hwIntrinsicNode->OperIsMemoryStore()) + { + fgMutateGcHeap(tree DEBUGARG("HWIntrinsic - MemoryStore")); + } - return; + return; + } } #endif // FEATURE_HW_INTRINSICS var_types typ = tree->TypeGet(); if (GenTree::OperIsConst(oper)) { - // If this is a struct assignment, with a constant rhs, it is an initBlk, and it is not - // really useful to value number the constant. - if (!varTypeIsStruct(tree)) + // If this is a struct assignment, with a constant rhs, (i,.e. an initBlk), + // it is not useful to value number the constant. + if (tree->TypeGet() != TYP_STRUCT) { fgValueNumberTreeConst(tree); } @@ -7024,7 +7148,8 @@ void Compiler::fgValueNumberTree(GenTree* tree) unsigned memorySsaNum; #endif - if ((oper == GT_ASG) && !varTypeIsStruct(tree)) + // Allow assignments for all enregisterable types to be value numbered (SIMD types) + if ((oper == GT_ASG) && varTypeIsEnregisterable(tree)) { GenTree* lhs = tree->AsOp()->gtOp1; GenTree* rhs = tree->AsOp()->gtOp2; @@ -7032,7 +7157,7 @@ void Compiler::fgValueNumberTree(GenTree* tree) ValueNumPair rhsVNPair = rhs->gtVNPair; // Is the type being stored different from the type computed by the rhs? - if (rhs->TypeGet() != lhs->TypeGet()) + if ((rhs->TypeGet() != lhs->TypeGet()) && (lhs->OperGet() != GT_BLK)) { // This means that there is an implicit cast on the rhs value // @@ -7229,11 +7354,13 @@ void Compiler::fgValueNumberTree(GenTree* tree) noway_assert(!"Phi arg cannot be LHS."); break; - case GT_BLK: case GT_OBJ: - noway_assert(!"GT_BLK/GT_OBJ can not be LHS when !varTypeIsStruct(tree) is true!"); + noway_assert(!"GT_OBJ can not be LHS when (tree->TypeGet() != TYP_STRUCT)!"); break; + case GT_BLK: + __fallthrough; + case GT_IND: { bool isVolatile = (lhs->gtFlags & GTF_IND_VOLATILE) != 0; @@ -7594,7 +7721,7 @@ void Compiler::fgValueNumberTree(GenTree* tree) } } // Other kinds of assignment: initblk and copyblk. - else if (oper == GT_ASG && varTypeIsStruct(tree)) + else if (oper == GT_ASG && (tree->TypeGet() == TYP_STRUCT)) { fgValueNumberBlockAssignment(tree); } @@ -7923,6 +8050,21 @@ void Compiler::fgValueNumberTree(GenTree* tree) { fgValueNumberIntrinsic(tree); } + +#ifdef FEATURE_SIMD + else if (tree->OperGet() == GT_SIMD) + { + fgValueNumberSimd(tree); + } +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS + else if (tree->OperGet() == GT_HWINTRINSIC) + { + fgValueNumberHWIntrinsic(tree); + } +#endif // FEATURE_HW_INTRINSICS + else // Look up the VNFunc for the node { VNFunc vnf = GetVNFuncForNode(tree); @@ -7996,7 +8138,7 @@ void Compiler::fgValueNumberTree(GenTree* tree) ValueNumPair op2vnp; ValueNumPair op2Xvnp; vnStore->VNPUnpackExc(op2VNPair, &op2vnp, &op2Xvnp); - ValueNumPair excSet = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); + ValueNumPair excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); ValueNum newVN = ValueNumStore::NoVN; @@ -8010,14 +8152,14 @@ void Compiler::fgValueNumberTree(GenTree* tree) if (newVN != ValueNumStore::NoVN) { // We don't care about differences between liberal and conservative for pointer values. - newVN = vnStore->VNWithExc(newVN, excSet.GetLiberal()); + newVN = vnStore->VNWithExc(newVN, excSetPair.GetLiberal()); tree->gtVNPair.SetBoth(newVN); } else { VNFunc vnf = GetVNFuncForNode(tree); ValueNumPair normalPair = vnStore->VNPairForFunc(tree->TypeGet(), vnf, op1vnp, op2vnp); - tree->gtVNPair = vnStore->VNPWithExc(normalPair, excSet); + tree->gtVNPair = vnStore->VNPWithExc(normalPair, excSetPair); // For overflow checking operations the VNF_OverflowExc will be added below // by fgValueNumberAddExceptionSet } @@ -8145,10 +8287,7 @@ void Compiler::fgValueNumberTree(GenTree* tree) ValueNumPair vnpIndex = tree->AsBoundsChk()->gtIndex->gtVNPair; ValueNumPair vnpArrLen = tree->AsBoundsChk()->gtArrLen->gtVNPair; - // Construct the exception set for bounds check - ValueNumPair vnpExcSet = vnStore->VNPExcSetSingleton( - vnStore->VNPairForFunc(TYP_REF, VNF_IndexOutOfRangeExc, vnStore->VNPNormalPair(vnpIndex), - vnStore->VNPNormalPair(vnpArrLen))); + ValueNumPair vnpExcSet = ValueNumStore::VNPForEmptyExcSet(); // And collect the exceptions from Index and ArrLen vnpExcSet = vnStore->VNPUnionExcSet(vnpIndex, vnpExcSet); @@ -8157,6 +8296,9 @@ void Compiler::fgValueNumberTree(GenTree* tree) // A bounds check node has no value, but may throw exceptions. tree->gtVNPair = vnStore->VNPWithExc(vnStore->VNPForVoid(), vnpExcSet); + // next add the bounds check exception set for the current tree node + fgValueNumberAddExceptionSet(tree); + // Record non-constant value numbers that are used as the length argument to bounds checks, so that // assertion prop will know that comparisons against them are worth analyzing. ValueNum lengthVN = tree->AsBoundsChk()->gtArrLen->gtVNPair.GetConservative(); @@ -8276,6 +8418,242 @@ void Compiler::fgValueNumberIntrinsic(GenTree* tree) } } +#ifdef FEATURE_SIMD +// Does value-numbering for a GT_SIMD node. +void Compiler::fgValueNumberSimd(GenTree* tree) +{ + assert(tree->OperGet() == GT_SIMD); + GenTreeSIMD* simdNode = tree->AsSIMD(); + assert(simdNode != nullptr); + ValueNumPair excSetPair; + ValueNumPair normalPair; + + // There are some SIMD operations that have zero args, i.e. NI_Vector128_Zero + if (tree->AsOp()->gtOp1 == nullptr) + { + excSetPair = ValueNumStore::VNPForEmptyExcSet(); + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), GetVNFuncForNode(tree)); + } + else if (tree->AsOp()->gtOp1->OperIs(GT_LIST)) + { + assert(tree->AsOp()->gtOp2 == nullptr); + + // We have a SIMD node in the GT_LIST form with 3 or more args + // For now we will generate a unique value number for this case. + + // Generate a unique VN + tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet())); + return; + } + else // SIMD unary or binary operator. + { + ValueNumPair resvnp = ValueNumPair(); + ValueNumPair op1vnp; + ValueNumPair op1Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp1->gtVNPair, &op1vnp, &op1Xvnp); + + if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInitArray) + { + // rationalize rewrites this as an explicit load with op1 as the base address + + ValueNumPair op2vnp; + if (tree->AsOp()->gtOp2 == nullptr) + { + // a nullptr for op2 means that we have an impicit index of zero + op2vnp = ValueNumPair(vnStore->VNZeroForType(TYP_INT), vnStore->VNZeroForType(TYP_INT)); + + excSetPair = op1Xvnp; + } + else // We have an explicit index in op2 + { + ValueNumPair op2Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); + + excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); + } + + ValueNum addrVN = + vnStore->VNForFunc(TYP_BYREF, GetVNFuncForNode(tree), op1vnp.GetLiberal(), op2vnp.GetLiberal()); +#ifdef DEBUG + if (verbose) + { + printf("Treating GT_SIMD InitArray as a ByrefExposed load , addrVN is "); + vnPrint(addrVN, 0); + } +#endif // DEBUG + + // The address points into the heap, so it is an ByrefExposed load. + // + ValueNum loadVN = fgValueNumberByrefExposedLoad(tree->TypeGet(), addrVN); + tree->gtVNPair.SetLiberal(loadVN); + tree->gtVNPair.SetConservative(vnStore->VNForExpr(compCurBB, tree->TypeGet())); + tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, excSetPair); + fgValueNumberAddExceptionSetForIndirection(tree, tree->AsOp()->gtOp1); + return; + } + + bool encodeResultType = vnEncodesResultTypeForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID); + + if (encodeResultType) + { + ValueNum vnSize = vnStore->VNForIntCon(simdNode->gtSIMDSize); + ValueNum vnBaseType = vnStore->VNForIntCon(INT32(simdNode->gtSIMDBaseType)); + ValueNum simdTypeVN = vnStore->VNForFunc(TYP_REF, VNF_SimdType, vnSize, vnBaseType); + resvnp.SetBoth(simdTypeVN); + +#ifdef DEBUG + if (verbose) + { + printf(" simdTypeVN is "); + vnPrint(simdTypeVN, 1); + printf("\n"); + } +#endif + } + + VNFunc simdFunc = GetVNFuncForNode(tree); + + if (tree->AsOp()->gtOp2 == nullptr) + { + // Unary SIMD nodes have a nullptr for op2. + excSetPair = op1Xvnp; + if (encodeResultType) + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), simdFunc, op1vnp, resvnp); + assert(vnStore->VNFuncArity(simdFunc) == 2); + } + else + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), simdFunc, op1vnp); + assert(vnStore->VNFuncArity(simdFunc) == 1); + } + } + else + { + ValueNumPair op2vnp; + ValueNumPair op2Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); + + excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); + if (encodeResultType) + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), simdFunc, op1vnp, op2vnp, resvnp); + assert(vnStore->VNFuncArity(simdFunc) == 3); + } + else + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), simdFunc, op1vnp, op2vnp); + assert(vnStore->VNFuncArity(simdFunc) == 2); + } + } + } + tree->gtVNPair = vnStore->VNPWithExc(normalPair, excSetPair); +} +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS +// Does value-numbering for a GT_HWINTRINSIC node +void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) +{ + assert(tree->OperGet() == GT_HWINTRINSIC); + GenTreeHWIntrinsic* hwIntrinsicNode = tree->AsHWIntrinsic(); + assert(hwIntrinsicNode != nullptr); + + // For safety/correctness we must mutate the global heap valuenumber + // for any HW intrinsic that performs a memory store operation + if (hwIntrinsicNode->OperIsMemoryStore()) + { + fgMutateGcHeap(tree DEBUGARG("HWIntrinsic - MemoryStore")); + } + + int lookupNumArgs = HWIntrinsicInfo::lookupNumArgs(hwIntrinsicNode->gtHWIntrinsicId); + VNFunc func = GetVNFuncForNode(tree); + unsigned fixedArity = vnStore->VNFuncArity(func); + + ValueNumPair excSetPair = ValueNumStore::VNPForEmptyExcSet(); + ValueNumPair normalPair; + + // There are some HWINTRINSICS operations that have zero args, i.e. NI_Vector128_Zero + if (tree->AsOp()->gtOp1 == nullptr) + { + assert(fixedArity == 0); + + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func); + assert(lookupNumArgs == 0); + } + else if (tree->AsOp()->gtOp1->OperIs(GT_LIST) || (lookupNumArgs == -1)) + { + // We have a HWINTRINSIC node in the GT_LIST form with 3 or more args + // Or the numArgs was specified as -1 in the numArgs column in "hwinstrinsiclistxarch.h" + // For now we will generate a unique value number for this case. + + // Generate unique VN + tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet())); + return; + } + else // HWINTRINSIC unary or binary operator. + { + ValueNumPair resvnp = ValueNumPair(); + bool encodeResultType = vnEncodesResultTypeForHWIntrinsic(hwIntrinsicNode->gtHWIntrinsicId); + + if (encodeResultType) + { + ValueNum vnSize = vnStore->VNForIntCon(hwIntrinsicNode->gtSIMDSize); + ValueNum vnBaseType = vnStore->VNForIntCon(INT32(hwIntrinsicNode->gtSIMDBaseType)); + ValueNum simdTypeVN = vnStore->VNForFunc(TYP_REF, VNF_SimdType, vnSize, vnBaseType); + resvnp.SetBoth(simdTypeVN); + +#ifdef DEBUG + if (verbose) + { + printf(" simdTypeVN is "); + vnPrint(simdTypeVN, 1); + printf("\n"); + } +#endif + } + ValueNumPair op1vnp; + ValueNumPair op1Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp1->gtVNPair, &op1vnp, &op1Xvnp); + + if (tree->AsOp()->gtOp2 == nullptr) + { + excSetPair = op1Xvnp; + + if (encodeResultType) + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func, op1vnp, resvnp); + assert(vnStore->VNFuncArity(func) == 2); + } + else + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func, op1vnp); + assert(vnStore->VNFuncArity(func) == 1); + } + } + else + { + ValueNumPair op2vnp; + ValueNumPair op2Xvnp; + vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); + + excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); + if (encodeResultType) + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func, op1vnp, op2vnp, resvnp); + assert(vnStore->VNFuncArity(func) == 3); + } + else + { + normalPair = vnStore->VNPairForFunc(tree->TypeGet(), func, op1vnp, op2vnp); + assert(vnStore->VNFuncArity(func) == 2); + } + } + } + tree->gtVNPair = vnStore->VNPWithExc(normalPair, excSetPair); +} +#endif // FEATURE_HW_INTRINSICS + void Compiler::fgValueNumberCastTree(GenTree* tree) { assert(tree->OperGet() == GT_CAST); @@ -9355,6 +9733,47 @@ void Compiler::fgValueNumberAddExceptionSetForOverflow(GenTree* tree) } //-------------------------------------------------------------------------------- +// fgValueNumberAddExceptionSetForBoundsCheck +// - Adds the exception set for the current tree node +// which is performing an bounds check operation +// +// Arguments: +// tree - The current GenTree node, +// It must be a node that performs a bounds check operation +// +// Return Value: +// - The tree's gtVNPair is updated to include the +// VNF_IndexOutOfRangeExc exception set. +// +void Compiler::fgValueNumberAddExceptionSetForBoundsCheck(GenTree* tree) +{ + GenTreeBoundsChk* node = tree->AsBoundsChk(); + assert(node != nullptr); + + ValueNumPair vnpIndex = node->gtIndex->gtVNPair; + ValueNumPair vnpArrLen = node->gtArrLen->gtVNPair; + + // Unpack, Norm,Exc for the tree's VN + // + ValueNumPair vnpTreeNorm; + ValueNumPair vnpTreeExc; + + vnStore->VNPUnpackExc(tree->gtVNPair, &vnpTreeNorm, &vnpTreeExc); + + // Construct the exception set for bounds check + ValueNumPair boundsChkExcSet = vnStore->VNPExcSetSingleton( + vnStore->VNPairForFunc(TYP_REF, VNF_IndexOutOfRangeExc, vnStore->VNPNormalPair(vnpIndex), + vnStore->VNPNormalPair(vnpArrLen))); + + // Combine the new Overflow exception with the original exception set of tree + ValueNumPair newExcSet = vnStore->VNPExcSetUnion(vnpTreeExc, boundsChkExcSet); + + // Update the VN for the tree it, the updated VN for tree + // now includes the IndexOutOfRange exception. + tree->gtVNPair = vnStore->VNPWithExc(vnpTreeNorm, newExcSet); +} + +//-------------------------------------------------------------------------------- // fgValueNumberAddExceptionSetForCkFinite // - Adds the exception set for the current tree node // which is a CkFinite operation @@ -9418,9 +9837,27 @@ void Compiler::fgValueNumberAddExceptionSet(GenTree* tree) case GT_ADD: // An Overflow checking ALU operation case GT_SUB: case GT_MUL: + assert(tree->gtOverflowEx()); fgValueNumberAddExceptionSetForOverflow(tree); break; + case GT_DIV: + case GT_UDIV: + case GT_MOD: + case GT_UMOD: + fgValueNumberAddExceptionSetForDivision(tree); + break; + +#ifdef FEATURE_SIMD + case GT_SIMD_CHK: +#endif // FEATURE_SIMD +#ifdef FEATURE_HW_INTRINSICS + case GT_HW_INTRINSIC_CHK: +#endif // FEATURE_HW_INTRINSICS + case GT_ARR_BOUNDS_CHECK: + fgValueNumberAddExceptionSetForBoundsCheck(tree); + break; + case GT_LCLHEAP: // It is not necessary to model the StackOverflow exception for GT_LCLHEAP break; @@ -9460,17 +9897,16 @@ void Compiler::fgValueNumberAddExceptionSet(GenTree* tree) fgValueNumberAddExceptionSetForIndirection(tree, tree->AsArrOffs()->gtArrObj); break; - case GT_DIV: - case GT_UDIV: - case GT_MOD: - case GT_UMOD: - fgValueNumberAddExceptionSetForDivision(tree); - break; - case GT_CKFINITE: fgValueNumberAddExceptionSetForCkFinite(tree); break; +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + // ToDo: model the exceptions for Intrinsics + break; +#endif // FEATURE_HW_INTRINSICS + default: assert(!"Handle this oper in fgValueNumberAddExceptionSet"); break; diff --git a/src/coreclr/src/jit/valuenum.h b/src/coreclr/src/jit/valuenum.h index fdb6268..d4167a0 100644 --- a/src/coreclr/src/jit/valuenum.h +++ b/src/coreclr/src/jit/valuenum.h @@ -134,8 +134,9 @@ private: { VNFOA_IllegalGenTreeOp = 0x1, // corresponds to a genTreeOps value that is not a legal VN func. VNFOA_Commutative = 0x2, // 1 iff the function is commutative. - VNFOA_Arity = 0x4, // Bits 2..3 encode the arity. - VNFOA_AfterArity = 0x20, // Makes it clear what value the next flag(s) after Arity should have. + VNFOA_Arity1 = 0x4, // Bits 2,3,4 encode the arity. + VNFOA_Arity2 = 0x8, // Bits 2,3,4 encode the arity. + VNFOA_Arity4 = 0x10, // Bits 2,3,4 encode the arity. VNFOA_KnownNonNull = 0x20, // 1 iff the result is known to be non-null. VNFOA_SharedStatic = 0x40, // 1 iff this VNF is represent one of the shared static jit helpers }; @@ -143,7 +144,7 @@ private: static const unsigned VNFOA_ArityShift = 2; static const unsigned VNFOA_ArityBits = 3; static const unsigned VNFOA_MaxArity = (1 << VNFOA_ArityBits) - 1; // Max arity we can represent. - static const unsigned VNFOA_ArityMask = VNFOA_AfterArity - VNFOA_Arity; + static const unsigned VNFOA_ArityMask = (VNFOA_Arity4 | VNFOA_Arity2 | VNFOA_Arity1); // These enum constants are used to encode the cast operation in the lowest bits by VNForCastOper enum VNFCastAttrib @@ -878,6 +879,12 @@ public: // Prints a representation of the set of exceptions on standard out. void vnDumpExcSeq(Compiler* comp, VNFuncApp* excSeq, bool isHead); +#ifdef FEATURE_SIMD + // Requires "simdType" to be a VNF_SimdType VNFuncApp. + // Prints a representation (comma-separated list of field names) on standard out. + void vnDumpSimdType(Compiler* comp, VNFuncApp* simdType); +#endif // FEATURE_SIMD + // Returns the string name of "vnf". static const char* VNFuncName(VNFunc vnf); // Used in the implementation of the above. diff --git a/src/coreclr/src/jit/valuenumfuncs.h b/src/coreclr/src/jit/valuenumfuncs.h index 7e96d43..00fa690 100644 --- a/src/coreclr/src/jit/valuenumfuncs.h +++ b/src/coreclr/src/jit/valuenumfuncs.h @@ -147,7 +147,7 @@ ValueNumFuncDef(LE_UN, 2, false, false, false) ValueNumFuncDef(GE_UN, 2, false, false, false) ValueNumFuncDef(GT_UN, 2, false, false, false) -// currently we won't constant fold the next six +// currently we don't constant fold the next six ValueNumFuncDef(ADD_OVF, 2, true, false, false) // overflow checking operations ValueNumFuncDef(SUB_OVF, 2, false, false, false) @@ -157,6 +157,33 @@ ValueNumFuncDef(ADD_UN_OVF, 2, true, false, false) // unsigned overflow checkin ValueNumFuncDef(SUB_UN_OVF, 2, false, false, false) ValueNumFuncDef(MUL_UN_OVF, 2, true, false, false) +#ifdef FEATURE_SIMD +ValueNumFuncDef(SimdType, 2, false, false, false) // A value number function to compose a SIMD type +#endif + +#define SIMD_INTRINSIC(m, i, id, n, r, argCount, arg1, arg2, arg3, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) \ +ValueNumFuncDef(SIMD_##id, argCount, false, false, false) // All of the SIMD intrinsic (Consider isCommutativeSIMDIntrinsic) +#include "simdintrinsiclist.h" +#define VNF_SIMD_FIRST VNF_SIMD_None + +#if defined(TARGET_XARCH) +#define HARDWARE_INTRINSIC(id, name, isa, ival, size, argCount, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ +ValueNumFuncDef(HWI_##id, argCount, false, false, false) // All of the HARDWARE_INTRINSICS for x86/x64 +#include "hwintrinsiclistxarch.h" +#define VNF_HWI_FIRST VNF_HWI_Vector128_As + +#elif defined (TARGET_ARM64) +#define HARDWARE_INTRINSIC(isa, name, ival, size, argCount, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ +ValueNumFuncDef(HWI_##isa##_##name, argCount, false, false, false) // All of the HARDWARE_INTRINSICS for arm64 +#include "hwintrinsiclistarm64.h" +#define VNF_HWI_FIRST VNF_HWI_Vector64_AsByte + +#elif defined (TARGET_ARM) +// No Hardware Intrinsics on ARM32 +#else +#error Unsupported platform +#endif + // clang-format on #undef ValueNumFuncDef -- 2.7.4