From 146fb87bffd3e03014785548d322b228fa9499fd Mon Sep 17 00:00:00 2001 From: Eugene Rozenfeld Date: Fri, 5 Aug 2016 13:14:58 -0700 Subject: [PATCH] Null check folding. 1. Added a null check folding optimization to early prop. The optimization tries to fold GT_NULLCHECK(y) nodes into GT_IND(x) nodes where x=y+const in the same block (where const is sufficiently small). The algorithm uses SSA use-def info to go from x to its def and then tries to match the pattern x = COMMA(NULLCHECK(y), ADD(y, const))). If such a pattern is found, the algorithm checks the trees and statements that are between the use and the def in execution order to see if they have unsafe side effects: calls, exception sources, and assignments (all assignment if we are in a try and assignments to global memory if we are not). If there are no nodes with unsafe side effects, the null check is removed. 2. Made several improvements to null check elimination in assertion propagation. ..* Added a new kind for op1: O1K_VALUE_NUMBER ..* Non-null assertions can now be made about arbitrary value numbers, not just locals ..* Fixed code that was trying to find a ref given a byref: the code now handles an arbitrary number of offsets and checks whether the total offsetof is small enough. ..* Added similar code that tries to find a ref VN given a byref VN This addresses part of the suboptimal code generated for dotnet/coreclr#1226: null check is no longer emitted. Correctness: ran full desktop and CoreCLR testing. Throughput: no measurable throughput impact (verified by running internal CQNgenTP several times). Code size in CoreCLR: Framework assemblies: Total bytes of diff: -805 (-0.01 % of base) diff is an improvement. Total byte diff includes 0 bytes from reconciling methods Base had 0 unique methods, 0 unique bytes Diff had 0 unique methods, 0 unique bytes Top file improvements by size (bytes): -352 : System.Private.CoreLib.dasm (-0.01 % of base) -306 : Microsoft.CodeAnalysis.CSharp.dasm (-0.01 % of base) -58 : Microsoft.CodeAnalysis.dasm (-0.01 % of base) -48 : System.Numerics.Vectors.dasm (-0.08 % of base) -14 : System.Xml.XmlDocument.dasm (-0.01 % of base) 7 total files with size differences. Top method improvements by size (bytes): -30 : System.Numerics.Vectors.dasm - System.Numerics.Matrix4x4:ToString():ref:this -30 : System.Private.CoreLib.dasm - System.DateTimeParse:ParseByFormat(byref,byref,byref,ref,byref):bool -24 : Microsoft.CodeAnalysis.CSharp.dasm - d__68:MoveNext():bool:this -18 : System.Private.CoreLib.dasm - System.DateTimeParse:Lex(int,byref,byref,byref,byref,byref,int):bool -18 : System.Private.CoreLib.dasm - System.DateTimeParse:ProcessDateTimeSuffix(byref,byref,byref):bool 243 total methods with size differences. JIT Code quality benchmarks in CoreCLR: Total bytes of diff: -29 (-0.01 % of base) diff is an improvement. Total byte diff includes 0 bytes from reconciling methods Base had 0 unique methods, 0 unique bytes Diff had 0 unique methods, 0 unique bytes Top file improvements by size (bytes): -25 : Bytemark\Bytemark\Bytemark.dasm (-0.03 % of base) -4 : BenchmarksGame\pidigits\pi-digits\pi-digits.dasm (-0.21 % of base) 2 total files with size differences. Top method improvements by size (bytes): -9 : Bytemark\Bytemark\Bytemark.dasm - AssignJagged:second_assignments(ref,ref) -6 : Bytemark\Bytemark\Bytemark.dasm - EMFloat:MultiplyInternalFPF(byref,byref,byref) -4 : Bytemark\Bytemark\Bytemark.dasm - EMFloat:AddSubInternalFPF(ubyte,byref,byref,byref) -2 : Bytemark\Bytemark\Bytemark.dasm - EMFloat:denormalize(byref,int) -2 : Bytemark\Bytemark\Bytemark.dasm - EMFloat:DivideInternalFPF(byref,byref,byref) 8 total methods with size differences. In internal SPMI: 3915 methods with diffs, almost everything -- code size improvements 13,715 bytes code size reduction overall 0.51% on affected methods CQ_Perf: 85 methods with diffs, 84 code size improvements, no regressions Benchmarks with code size diffs: Roslyn 59 TrueTypeBench 19 mono-pi-digits 2 mono-chameneos-redux 2 ByteMark\assign_jagged 1 Json_Serialize 1 SharpChess 1 BenchI\mulmtx 1 Internal CQPerf didn't report any runtime wins for these benchmarks. Commit migrated from https://github.com/dotnet/coreclr/commit/47eec64cdd246244a7dee1e7edd054e9e579be02 --- src/coreclr/src/jit/assertionprop.cpp | 133 ++++++++++++++---- src/coreclr/src/jit/block.h | 1 + src/coreclr/src/jit/compiler.h | 7 +- src/coreclr/src/jit/earlyprop.cpp | 258 +++++++++++++++++++++++++++++++--- src/coreclr/src/jit/morph.cpp | 7 +- 5 files changed, 356 insertions(+), 50 deletions(-) diff --git a/src/coreclr/src/jit/assertionprop.cpp b/src/coreclr/src/jit/assertionprop.cpp index 750e2e3..e859cd7 100644 --- a/src/coreclr/src/jit/assertionprop.cpp +++ b/src/coreclr/src/jit/assertionprop.cpp @@ -621,6 +621,11 @@ void Compiler::optPrintAssertion(AssertionDsc* curAssertion, AssertionIndex ass printf("Loop_Bnd"); vnStore->vnDump(this, curAssertion->op1.vn); } + else if (curAssertion->op1.kind == O1K_VALUE_NUMBER) + { + printf("Value_Number"); + vnStore->vnDump(this, curAssertion->op1.vn); + } else { printf("?op1.kind?"); @@ -694,9 +699,20 @@ void Compiler::optPrintAssertion(AssertionDsc* curAssertion, AssertionIndex ass } else { - unsigned lclNum = curAssertion->op1.lcl.lclNum; assert(lclNum < lvaCount); - LclVarDsc * varDsc = lvaTable + lclNum; - if (varDsc->lvType == TYP_REF) + var_types op1Type; + + if (curAssertion->op1.kind == O1K_VALUE_NUMBER) + { + op1Type = vnStore->TypeOfVN(curAssertion->op1.vn); + } + else + { + unsigned lclNum = curAssertion->op1.lcl.lclNum; assert(lclNum < lvaCount); + LclVarDsc * varDsc = lvaTable + lclNum; + op1Type = varDsc->lvType; + } + + if (op1Type == TYP_REF) { assert(curAssertion->op2.u1.iconVal == 0); printf("null"); @@ -821,7 +837,7 @@ Compiler::AssertionIndex Compiler::optCreateAssertion(GenTreePtr op1, GenTreePtr } // - // Did we recieve Helper call args? + // Did we receive Helper call args? // if (op1->gtOper == GT_LIST) { @@ -848,17 +864,27 @@ Compiler::AssertionIndex Compiler::optCreateAssertion(GenTreePtr op1, GenTreePtr // // Set op1 to the instance pointer of the indirection // - if ((op1->gtOper == GT_ADD) && (op1->gtType == TYP_BYREF)) - { - op1 = op1->gtOp.gtOp1; - if ((op1->gtOper == GT_ADD) && (op1->gtType == TYP_BYREF)) + ssize_t offset = 0; + while ((op1->gtOper == GT_ADD) && (op1->gtType == TYP_BYREF)) + { + if (op1->gtGetOp2()->IsCnsIntOrI()) + { + offset += op1->gtGetOp2()->gtIntCon.gtIconVal; + op1 = op1->gtGetOp1(); + } + else if (op1->gtGetOp1()->IsCnsIntOrI()) { - op1 = op1->gtOp.gtOp1; + offset += op1->gtGetOp1()->gtIntCon.gtIconVal; + op1 = op1->gtGetOp2(); + } + else + { + break; } } - if (op1->gtOper != GT_LCL_VAR) + if (fgIsBigOffset(offset) || op1->gtOper != GT_LCL_VAR) { goto DONE_ASSERTION; // Don't make an assertion } @@ -866,25 +892,65 @@ Compiler::AssertionIndex Compiler::optCreateAssertion(GenTreePtr op1, GenTreePtr unsigned lclNum = op1->gtLclVarCommon.gtLclNum; noway_assert(lclNum < lvaCount); LclVarDsc * lclVar = &lvaTable[lclNum]; + ValueNum vn; + // // We only perform null-checks on GC refs // so only make non-null assertions about GC refs // if (lclVar->TypeGet() != TYP_REF) { - goto DONE_ASSERTION; // Don't make an assertion - } + if (optLocalAssertionProp || (lclVar->TypeGet() != TYP_BYREF)) + { + goto DONE_ASSERTION; // Don't make an assertion + } + + vn = op1->gtVNPair.GetConservative(); + VNFuncApp funcAttr; - // If the local variable has its address exposed then bail - if (lclVar->lvAddrExposed) + // Try to get value number corresponding to the GC ref of the indirection + while(vnStore->GetVNFunc(vn, &funcAttr) && + (funcAttr.m_func == (VNFunc)GT_ADD) && + (vnStore->TypeOfVN(vn) == TYP_BYREF)) + { + if (vnStore->IsVNConstant(funcAttr.m_args[1])) + { + offset += vnStore->CoercedConstantValue(funcAttr.m_args[1]); + vn = funcAttr.m_args[0]; + } + else if (vnStore->IsVNConstant(funcAttr.m_args[0])) + { + offset += vnStore->CoercedConstantValue(funcAttr.m_args[0]); + vn = funcAttr.m_args[1]; + } + else + { + break; + } + } + + if (fgIsBigOffset(offset) || (vnStore->TypeOfVN(vn) != TYP_REF)) + { + goto DONE_ASSERTION; // Don't make an assertion + } + + assertion->op1.kind = O1K_VALUE_NUMBER; + } + else { - goto DONE_ASSERTION; // Don't make an assertion + // If the local variable has its address exposed then bail + if (lclVar->lvAddrExposed) + { + goto DONE_ASSERTION; // Don't make an assertion + } + + assertion->op1.kind = O1K_LCLVAR; + assertion->op1.lcl.lclNum = lclNum; + assertion->op1.lcl.ssaNum = op1->AsLclVarCommon()->GetSsaNum(); + vn = op1->gtVNPair.GetConservative(); } - assertion->op1.kind = O1K_LCLVAR; - assertion->op1.lcl.lclNum = lclNum; - assertion->op1.vn = op1->gtVNPair.GetConservative(); - assertion->op1.lcl.ssaNum = op1->AsLclVarCommon()->GetSsaNum(); + assertion->op1.vn = vn; assertion->assertionKind = assertionKind; assertion->op2.kind = O2K_CONST_INT; assertion->op2.vn = ValueNumStore::VNForNull(); @@ -1272,11 +1338,16 @@ DONE_ASSERTION: if (!optLocalAssertionProp) { - if (assertion->op1.vn == ValueNumStore::NoVN || - assertion->op2.vn == ValueNumStore::NoVN || - assertion->op1.vn == ValueNumStore::VNForVoid() || - assertion->op2.vn == ValueNumStore::VNForVoid() || - assertion->op1.lcl.ssaNum == SsaConfig::RESERVED_SSA_NUM) + if ((assertion->op1.vn == ValueNumStore::NoVN) || + (assertion->op2.vn == ValueNumStore::NoVN) || + (assertion->op1.vn == ValueNumStore::VNForVoid()) || + (assertion->op2.vn == ValueNumStore::VNForVoid())) + { + return NO_ASSERTION_INDEX; + } + + // TODO: only copy assertions rely on valid SSA number so we could generate more assertions here + if ((assertion->op1.kind != O1K_VALUE_NUMBER) && (assertion->op1.lcl.ssaNum == SsaConfig::RESERVED_SSA_NUM)) { return NO_ASSERTION_INDEX; } @@ -1513,6 +1584,7 @@ void Compiler::optDebugCheckAssertion(AssertionDsc* assertion) case O1K_ARRLEN_OPER_BND: case O1K_ARRLEN_LOOP_BND: case O1K_CONSTANT_LOOP_BND: + case O1K_VALUE_NUMBER: assert(!optLocalAssertionProp); break; default: @@ -1534,7 +1606,10 @@ void Compiler::optDebugCheckAssertion(AssertionDsc* assertion) break; case O1K_LCLVAR: case O1K_ARR_BND: - assert(lvaTable[assertion->op1.lcl.lclNum].lvType != TYP_REF || assertion->op2.u1.iconVal == 0); + assert((lvaTable[assertion->op1.lcl.lclNum].lvType != TYP_REF) || (assertion->op2.u1.iconVal == 0)); + break; + case O1K_VALUE_NUMBER: + assert((vnStore->TypeOfVN(assertion->op1.vn) != TYP_REF) || (assertion->op2.u1.iconVal == 0)); break; default: break; @@ -3830,10 +3905,10 @@ void Compiler::optImpliedByTypeOfAssertions(ASSERT_TP& activeAssertions) } // impAssertion must be a Non Null assertion on lclNum - if (impAssertion->assertionKind != OAK_NOT_EQUAL || - impAssertion->op1.kind != O1K_LCLVAR || - impAssertion->op2.kind != O2K_CONST_INT || - impAssertion->op1.vn != chkAssertion->op1.vn) + if ((impAssertion->assertionKind != OAK_NOT_EQUAL) || + ((impAssertion->op1.kind != O1K_LCLVAR) && (impAssertion->op1.kind != O1K_VALUE_NUMBER)) || + (impAssertion->op2.kind != O2K_CONST_INT) || + (impAssertion->op1.vn != chkAssertion->op1.vn)) { continue; } diff --git a/src/coreclr/src/jit/block.h b/src/coreclr/src/jit/block.h index bf7f820..beba28d 100644 --- a/src/coreclr/src/jit/block.h +++ b/src/coreclr/src/jit/block.h @@ -299,6 +299,7 @@ struct BasicBlock #define BBF_TRY_BEG 0x00000100 // BB starts a 'try' block #define BBF_FUNCLET_BEG 0x00000200 // BB is the beginning of a funclet +#define BBF_HAS_NULLCHECK 0x00000400 // BB contains a null check #define BBF_NEEDS_GCPOLL 0x00000800 // This BB is the source of a back edge and needs a GC Poll #define BBF_RUN_RARELY 0x00001000 // BB is rarely run (catch clauses, blocks with throws etc) diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index b0125d9..7b2c0a8 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -5534,6 +5534,7 @@ public: #define OMF_HAS_NEWOBJ 0x00000002 // Method contains 'new' of an object type. #define OMF_HAS_ARRAYREF 0x00000004 // Method contains array element loads or stores. #define OMF_HAS_VTABLEREF 0x00000008 // Method contains method table reference. +#define OMF_HAS_NULLCHECK 0x00000010 // Method contains null check. unsigned optMethodFlags; @@ -5545,7 +5546,8 @@ public: { OPK_INVALID, OPK_ARRAYLEN, - OPK_OBJ_GETTYPE + OPK_OBJ_GETTYPE, + OPK_NULLCHECK }; bool gtIsVtableRef(GenTreePtr tree); @@ -5557,6 +5559,8 @@ public: bool optDoEarlyPropForBlock(BasicBlock* block); bool optDoEarlyPropForFunc(); void optEarlyProp(); + void optFoldNullCheck(GenTreePtr tree); + bool optCanMoveNullCheckPastTree(GenTreePtr tree, bool isInsideTry); #if ASSERTION_PROP /************************************************************************** @@ -5584,6 +5588,7 @@ public: O1K_CONSTANT_LOOP_BND, O1K_EXACT_TYPE, O1K_SUBTYPE, + O1K_VALUE_NUMBER, O1K_COUNT }; enum optOp2Kind { O2K_INVALID, diff --git a/src/coreclr/src/jit/earlyprop.cpp b/src/coreclr/src/jit/earlyprop.cpp index ff74d3a..c5a313b 100644 --- a/src/coreclr/src/jit/earlyprop.cpp +++ b/src/coreclr/src/jit/earlyprop.cpp @@ -4,11 +4,12 @@ // // Early Value Propagation // -// This phase performs an SSA-based value propagation optimization, currently only applies to array -// lengths and runtime type handles. An SSA-based backwards tracking of local variables is performed -// at each point of interest, e.g., an array length reference site or a method table reference site. +// This phase performs an SSA-based value propagation optimization that currently only applies to array +// lengths, runtime type handles, and explicit null checks. An SSA-based backwards tracking of local variables +// is performed at each point of interest, e.g., an array length reference site, a method table reference site, or +// an indirection. // The tracking continues until an interesting value is encountered. The value is then used to rewrite -// the source site. +// the source site or the value. // /////////////////////////////////////////////////////////////////////////////////////// @@ -20,14 +21,16 @@ bool Compiler::optDoEarlyPropForFunc() { bool propArrayLen = (optMethodFlags & OMF_HAS_NEWARRAY) && (optMethodFlags & OMF_HAS_ARRAYREF); bool propGetType = (optMethodFlags & OMF_HAS_NEWOBJ) && (optMethodFlags & OMF_HAS_VTABLEREF); - return propArrayLen || propGetType; + bool propNullCheck = (optMethodFlags & OMF_HAS_NULLCHECK) != 0; + return propArrayLen || propGetType || propNullCheck; } bool Compiler::optDoEarlyPropForBlock(BasicBlock* block) { bool bbHasArrayRef = (block->bbFlags & BBF_HAS_INDX) != 0; bool bbHasVtableRef = (block->bbFlags & BBF_HAS_VTABREF) != 0; - return bbHasArrayRef || bbHasVtableRef; + bool bbHasNullCheck = (block->bbFlags & BBF_HAS_NULLCHECK) != 0; + return bbHasArrayRef || bbHasVtableRef || bbHasNullCheck; } //-------------------------------------------------------------------- @@ -136,6 +139,7 @@ GenTreePtr Compiler::getObjectHandleNodeFromAllocation(GenTreePtr tree) // This phase performs an SSA-based value propagation, including // 1. Array length propagation. // 2. Runtime type handle propagation. +// 3. Null check folding. // // For array length propagation, a demand-driven SSA-based backwards tracking of constant // array lengths is performed at each array length reference site which is in form of a @@ -149,6 +153,9 @@ GenTreePtr Compiler::getObjectHandleNodeFromAllocation(GenTreePtr tree) // Similarly, the same algorithm also applies to rewriting a method table (also known as // vtable) reference site which is in form of GT_INDIR node. The base pointer, which is // an object reference pointer, is treated in the same way as an array reference pointer. +// +// Null check folding tries to find GT_INDIR(obj + const) that GT_NULLCHECK(obj) can be folded into +/// and removed. Currently, the algorithm only matches GT_INDIR and GT_NULLCHECK in the same basic block. void Compiler::optEarlyProp() { @@ -183,6 +190,7 @@ void Compiler::optEarlyProp() // Walk the stmt tree in linear order to rewrite any array length reference with a // constant array length. bool isRewritten = false; + bool bbHasNullCheck = (block->bbFlags & BBF_HAS_NULLCHECK) != 0; for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree != nullptr; tree = tree->gtNext) { if (optEarlyPropRewriteTree(tree)) @@ -230,20 +238,30 @@ bool Compiler::optEarlyPropRewriteTree(GenTreePtr tree) objectRefPtr = tree->gtOp.gtOp1; propKind = optPropKind::OPK_ARRAYLEN; } - else if (gtIsVtableRef(tree)) + else if (tree->OperGet() == GT_IND) { - // Don't propagate type handles that are used as null checks, which are usually in - // form of - // * stmtExpr void (top level) - // \--* indir int - // \--* lclVar ref V02 loc0 - if (compCurStmt->gtStmt.gtStmtExpr == tree) + // optFoldNullCheck takes care of updating statement info if a null check is removed. + optFoldNullCheck(tree); + + if (gtIsVtableRef(tree)) + { + // Don't propagate type handles that are used as null checks, which are usually in + // form of + // * stmtExpr void (top level) + // \--* indir int + // \--* lclVar ref V02 loc0 + if (compCurStmt->gtStmt.gtStmtExpr == tree) + { + return false; + } + + objectRefPtr = tree->gtOp.gtOp1; + propKind = optPropKind::OPK_OBJ_GETTYPE; + } + else { return false; } - - objectRefPtr = tree->gtOp.gtOp1; - propKind = optPropKind::OPK_OBJ_GETTYPE; } else { @@ -269,7 +287,7 @@ bool Compiler::optEarlyPropRewriteTree(GenTreePtr tree) if (propKind == optPropKind::OPK_ARRAYLEN) { assert(actualVal->IsCnsIntOrI()); - + if (actualVal->gtIntCon.gtIconVal > INT32_MAX) { // Don't propagate array lengths that are beyond the maximum value of a GT_ARR_LENGTH. @@ -282,7 +300,7 @@ bool Compiler::optEarlyPropRewriteTree(GenTreePtr tree) { assert(actualVal->IsCnsIntOrI()); } - + #ifdef DEBUG if (verbose) { @@ -310,7 +328,7 @@ bool Compiler::optEarlyPropRewriteTree(GenTreePtr tree) actualValCopy->gtType = origType; fgWalkTreePre(&actualValCopy, Compiler::lvaIncRefCntsCB, (void*)this, true); - + if (actualValCopy != tree) { gtReplaceTree(root, tree, actualValCopy); @@ -435,3 +453,205 @@ GenTreePtr Compiler::optPropGetValueRec(unsigned lclNum, unsigned ssaNum, optPro return value; } + +//---------------------------------------------------------------- +// optFoldNullChecks: Try to find a GT_NULLCHECK node that can be folded into the GT_INDIR node. +// +// Arguments: +// tree - The input GT_INDIR tree. +// + +void Compiler::optFoldNullCheck(GenTreePtr tree) +{ + // + // Check for a pattern like this: + // + // = + // / \ + // x comma + // / \ + // nullcheck + + // | / \ + // y y const + // + // + // some trees in the same + // basic block with + // no unsafe side effects + // + // indir + // | + // x + // + // where the const is suitably small + // and transform it into + // + // = + // / \ + // x + + // / \ + // y const + // + // + // some trees with no unsafe side effects here + // + // indir + // | + // x + + assert(tree->OperGet() == GT_IND); + if (tree->gtGetOp1()->OperGet() == GT_LCL_VAR) + { + // Check if we have the pattern above and find the nullcheck node if we do. + + // Find the definition of the indirected local (x in the picture) + GenTreePtr indLocalTree = tree->gtGetOp1(); + unsigned lclNum = indLocalTree->AsLclVarCommon()->GetLclNum(); + unsigned ssaNum = indLocalTree->AsLclVarCommon()->GetSsaNum(); + + if (ssaNum != SsaConfig::RESERVED_SSA_NUM) + { + DefLoc defLoc = lvaTable[lclNum].GetPerSsaData(ssaNum)->m_defLoc; + BasicBlock* defBlock = defLoc.m_blk; + + if (compCurBB == defBlock) + { + GenTreePtr defTree = defLoc.m_tree; + GenTreePtr defParent = defTree->gtGetParent(nullptr); + + if ((defParent->OperGet() == GT_ASG) && (defParent->gtNext == nullptr)) + { + GenTreePtr defRHS = defParent->gtGetOp2(); + if (defRHS->OperGet() == GT_COMMA) + { + if (defRHS->gtGetOp1()->OperGet() == GT_NULLCHECK) + { + GenTreePtr nullCheckTree = defRHS->gtGetOp1(); + if (nullCheckTree->gtGetOp1()->OperGet() == GT_LCL_VAR) + { + // We found a candidate for 'y' in the picture + unsigned nullCheckLclNum = nullCheckTree->gtGetOp1()->AsLclVarCommon()->GetLclNum(); + + if (defRHS->gtGetOp2()->OperGet() == GT_ADD) + { + GenTreePtr additionNode = defRHS->gtGetOp2(); + if ((additionNode->gtGetOp1()->OperGet() == GT_LCL_VAR) && + (additionNode->gtGetOp1()->gtLclVarCommon.gtLclNum == nullCheckLclNum)) + { + GenTreePtr offset = additionNode->gtGetOp2(); + if (offset->IsCnsIntOrI()) + { + if (!fgIsBigOffset(offset->gtIntConCommon.IconValue())) + { + // Walk from the use to the def in reverse execution order to see + // if any nodes have unsafe side effects. + GenTreePtr currentTree = indLocalTree->gtPrev; + bool isInsideTry = compCurBB->hasTryIndex(); + bool canRemoveNullCheck = true; + const unsigned maxNodesWalked = 25; + unsigned nodesWalked = 0; + + // First walk the nodes in the statement containing the indirection + // in reverse execution order starting with the indirection's predecessor. + while (canRemoveNullCheck && (currentTree != nullptr)) + { + if ((nodesWalked++ > maxNodesWalked) || + !optCanMoveNullCheckPastTree(currentTree, isInsideTry)) + { + canRemoveNullCheck = false; + } + else + { + currentTree = currentTree->gtPrev; + } + } + + // Then walk the statement list in reverse execution order + // until we get to the statement containing the null check. + // We only need to check the side effects at the root of each statement. + GenTreePtr curStmt = compCurStmt->gtPrev; + currentTree = curStmt->gtStmt.gtStmtExpr; + while (canRemoveNullCheck && (currentTree != defParent)) + { + if ((nodesWalked++ > maxNodesWalked) || + !optCanMoveNullCheckPastTree(currentTree, isInsideTry)) + { + canRemoveNullCheck = false; + } + else + { + curStmt = curStmt->gtStmt.gtPrevStmt; + assert(curStmt != nullptr); + currentTree = curStmt->gtStmt.gtStmtExpr; + } + } + + if (canRemoveNullCheck) + { + // Remove the null check + nullCheckTree->gtFlags &= ~(GTF_EXCEPT | GTF_DONT_CSE); + + // Set this flag to prevent reordering + nullCheckTree->gtFlags |= GTF_ORDER_SIDEEFF; + + defRHS->gtFlags &= ~(GTF_EXCEPT | GTF_DONT_CSE); + defRHS->gtFlags |= additionNode->gtFlags & (GTF_EXCEPT | GTF_DONT_CSE); + + // Re-morph the statement. + fgMorphBlockStmt(compCurBB, curStmt DEBUGARG("optFoldNullCheck")); + + // Recalculate the gtCostSz, etc... + gtSetStmtInfo(curStmt); + + // Re-thread the nodes + fgSetStmtSeq(curStmt); + } + } + } + } + } + } + } + } + } + } + } + } +} + +//---------------------------------------------------------------- +// optCanMoveNullCheckPastTree: Check if GT_NULLCHECK can be folded into a node that +// is after tree is execution order. +// +// Arguments: +// tree - The input GT_INDIR tree. +// isInsideTry - True if tree is inside try, false otherwise +// +// Return Value: +// True if GT_NULLCHECK can be folded into a node that is after tree is execution order, +// false otherwise. + +bool Compiler::optCanMoveNullCheckPastTree(GenTreePtr tree, bool isInsideTry) +{ + bool result = true; + if (isInsideTry) + { + // We disallow calls, exception sources, and all assignments. + // Assignments to locals are disallowed inside try because + // they may be live in the handler. + if ((tree->gtFlags & GTF_SIDE_EFFECT) != 0) + { + result = false; + } + } + else + { + // We disallow calls, exception sources, and assignments to + // global memory. + if (GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS(tree->gtFlags)) + { + result = false; + } + } + return result; +} \ No newline at end of file diff --git a/src/coreclr/src/jit/morph.cpp b/src/coreclr/src/jit/morph.cpp index 5572d8c..db80948 100755 --- a/src/coreclr/src/jit/morph.cpp +++ b/src/coreclr/src/jit/morph.cpp @@ -6129,6 +6129,9 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* ma // An indirection will cause a GPF if the address is null. nullchk->gtFlags |= GTF_EXCEPT; + compCurBB->bbFlags |= BBF_HAS_NULLCHECK; + optMethodFlags |= OMF_HAS_NULLCHECK; + if (asg) { // Create the "comma" node. @@ -12115,6 +12118,9 @@ CM_ADD_OP: // Originally, I gave all the comma nodes type "byref". But the ADDR(IND(x)) == x transform // might give op1 a type different from byref (like, say, native int). So now go back and give // all the comma nodes the type of op1. + // TODO: the comma flag update below is conservative and can be improved. + // For example, if we made the ADDR(IND(x)) == x transformation, we may be able to + // get rid of some of the the IND flags on the COMMA nodes (e.g., GTF_GLOB_REF). commaNode = tree; while (commaNode->gtOper == GT_COMMA) { @@ -12975,7 +12981,6 @@ ASG_OP: return tree; } - // code to generate a magic number and shift amount for the magic number division // optimization. This code is previously from UTC where it notes it was taken from // _The_PowerPC_Compiler_Writer's_Guide_, pages 57-58. -- 2.7.4