From b8283098f698bf844299f8ca817a3b1c497c724c Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 23 Apr 2020 14:48:16 -0700 Subject: [PATCH] Jit: run throw helper merge phase before morph (#35255) Now that we have pred lists before morph, we can move the throw helper tail merge phase earlier in the phase list. This has two benefits: * we can now merge a few more cases, because morph can introduce unique temps for otherwise identical calls; * it saves some throughput, because we no longer need to morph duplicate calls. There is more opportunity here to reduce code size if we can find the right heuristic in morph to decide if throw helpers should be called or tail-called, though the overall benefit is small (~600 methods, ~2000k bytes). I left the current heuristic in place as I couldn't come up with anything better. Fixes #35135. --- src/coreclr/src/jit/compiler.cpp | 12 +++++++---- src/coreclr/src/jit/flowgraph.cpp | 44 ++++++++++++++++++++++++++++----------- src/coreclr/src/jit/gentree.cpp | 9 ++++++++ src/coreclr/src/jit/morph.cpp | 4 ++++ 4 files changed, 53 insertions(+), 16 deletions(-) diff --git a/src/coreclr/src/jit/compiler.cpp b/src/coreclr/src/jit/compiler.cpp index c526f82..61e4c14 100644 --- a/src/coreclr/src/jit/compiler.cpp +++ b/src/coreclr/src/jit/compiler.cpp @@ -4463,9 +4463,16 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags }; DoPhase(this, PHASE_COMPUTE_PREDS, computePredsPhase); - // Run an early flow graph simplification pass + // Now that we have pred lists, do some flow-related optimizations + // if (opts.OptimizationEnabled()) { + // Merge common throw blocks + // + DoPhase(this, PHASE_MERGE_THROWS, &Compiler::fgTailMergeThrows); + + // Run an early flow graph simplification pass + // auto earlyUpdateFlowGraphPhase = [this]() { const bool doTailDup = false; fgUpdateFlowGraph(doTailDup); @@ -4580,9 +4587,6 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags if (opts.OptimizationEnabled()) { - // Merge common throw blocks - // - DoPhase(this, PHASE_MERGE_THROWS, &Compiler::fgTailMergeThrows); // Optimize block order // DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::optOptimizeLayout); diff --git a/src/coreclr/src/jit/flowgraph.cpp b/src/coreclr/src/jit/flowgraph.cpp index bf0ee4d..af6f834 100644 --- a/src/coreclr/src/jit/flowgraph.cpp +++ b/src/coreclr/src/jit/flowgraph.cpp @@ -25866,6 +25866,10 @@ PhaseStatus Compiler::fgTailMergeThrows() JITDUMP("Method does not have multiple noreturn calls.\n"); return PhaseStatus::MODIFIED_NOTHING; } + else + { + JITDUMP("Scanning the %u candidates\n", optNoReturnCallCount); + } // This transformation requires block pred lists to be built // so that flow can be safely updated. @@ -25931,11 +25935,16 @@ PhaseStatus Compiler::fgTailMergeThrows() continue; } - // For throw helpers the block should have exactly one statement.... - // (this isn't guaranteed, but seems likely) - Statement* stmt = block->firstStmt(); + // We only look at the first statement for throw helper calls. + // Remainder of the block will be dead code. + // + // Throw helper calls could show up later in the block; we + // won't try merging those as we'd need to match up all the + // prior statements or split the block at this point, etc. + // + Statement* const stmt = block->firstStmt(); - if ((stmt == nullptr) || (stmt->GetNextStmt() != nullptr)) + if (stmt == nullptr) { continue; } @@ -25997,13 +26006,14 @@ PhaseStatus Compiler::fgTailMergeThrows() // We walk the map rather than the block list, to save a bit of time. BlockToBlockMap::KeyIterator iter(blockMap.Begin()); BlockToBlockMap::KeyIterator end(blockMap.End()); - int updateCount = 0; + unsigned updateCount = 0; for (; !iter.Equal(end); iter++) { BasicBlock* const nonCanonicalBlock = iter.Get(); BasicBlock* const canonicalBlock = iter.GetValue(); flowList* nextPredEdge = nullptr; + bool updated = false; // Walk pred list of the non canonical block, updating flow to target // the canonical block instead. @@ -26017,14 +26027,14 @@ PhaseStatus Compiler::fgTailMergeThrows() case BBJ_NONE: { fgTailMergeThrowsFallThroughHelper(predBlock, nonCanonicalBlock, canonicalBlock, predEdge); - updateCount++; + updated = true; } break; case BBJ_ALWAYS: { fgTailMergeThrowsJumpToHelper(predBlock, nonCanonicalBlock, canonicalBlock, predEdge); - updateCount++; + updated = true; } break; @@ -26040,7 +26050,7 @@ PhaseStatus Compiler::fgTailMergeThrows() { fgTailMergeThrowsJumpToHelper(predBlock, nonCanonicalBlock, canonicalBlock, predEdge); } - updateCount++; + updated = true; } break; @@ -26048,7 +26058,7 @@ PhaseStatus Compiler::fgTailMergeThrows() { JITDUMP("*** " FMT_BB " now branching to " FMT_BB "\n", predBlock->bbNum, canonicalBlock->bbNum); fgReplaceSwitchJumpTarget(predBlock, canonicalBlock, nonCanonicalBlock); - updateCount++; + updated = true; } break; @@ -26058,6 +26068,11 @@ PhaseStatus Compiler::fgTailMergeThrows() break; } } + + if (updated) + { + updateCount++; + } } if (updateCount == 0) @@ -26065,13 +26080,18 @@ PhaseStatus Compiler::fgTailMergeThrows() return PhaseStatus::MODIFIED_NOTHING; } + // TODO: Update the count of noreturn call sites -- this feeds a heuristic in morph + // to determine if these noreturn calls should be tail called. + // + // Updating the count does not lead to better results, so deferring for now. + // + JITDUMP("Made %u updates\n", updateCount); + assert(updateCount < optNoReturnCallCount); + // If we altered flow, reset fgModified. Given where we sit in the // phase list, flow-dependent side data hasn't been built yet, so // nothing needs invalidation. // - // Note we could invoke a cleanup pass here, but optOptimizeFlow - // seems to be missing some safety checks and doesn't expect to - // see an already cleaned-up flow graph. assert(fgModified); fgModified = false; return PhaseStatus::MODIFIED_EVERYTHING; diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index c474818..4813115 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -1296,6 +1296,15 @@ AGAIN: return true; } break; + + case GT_CNS_STR: + if ((op1->AsStrCon()->gtSconCPX == op2->AsStrCon()->gtSconCPX) && + (op1->AsStrCon()->gtScpHnd == op2->AsStrCon()->gtScpHnd)) + { + return true; + } + break; + #if 0 // TODO-CQ: Enable this in the future case GT_CNS_LNG: diff --git a/src/coreclr/src/jit/morph.cpp b/src/coreclr/src/jit/morph.cpp index 8d50f1d..cb23535 100644 --- a/src/coreclr/src/jit/morph.cpp +++ b/src/coreclr/src/jit/morph.cpp @@ -7106,6 +7106,10 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) // Heuristic: regular calls to noreturn methods can sometimes be // merged, so if we have multiple such calls, we defer tail calling. + // + // TODO: re-examine this; now that we're merging before morph we + // don't need to worry about interfering with merges. + // if (call->IsNoReturn() && (optNoReturnCallCount > 1)) { failTailCall("Defer tail calling throw helper; anticipating merge"); -- 2.7.4