JIT: Add a pass of early liveness and use it for forward sub and last-use copy elisio...
authorJakob Botsch Nielsen <Jakob.botsch.nielsen@gmail.com>
Wed, 11 Jan 2023 08:19:46 +0000 (09:19 +0100)
committerGitHub <noreply@github.com>
Wed, 11 Jan 2023 08:19:46 +0000 (09:19 +0100)
This runs a pass of liveness right after local morph and uses it for forward sub and to omit copies of structs when passed as implicit byrefs at their last use.

Fix #76069
Fix #75206
Fix #65025
Fix #9839

This PR introduces the following new JIT invariants:

* When optimizing, local morph will now thread all locals into a tree list accessed by Statement::LocalsTreeList. This tree list is kept valid starting from local morph and ending with forward sub. There is no memory impact of this since we reuse the GenTree::gtPrev and GenTree::gtNext fields.
* Early liveness information (GTF_VAR_DEATH and the promoted struct death vars map) is kept valid (sound) starting from early liveness and ending with morph.
  There are asserts that the tree list is up to date when it is accessed. This is done through a new member fgNodeThreading that replaces the preexisting fgStmtListThreaded and keeps information about what the current kind of node threading is.

The benefits are large, -2 MB on win-x64 collections (-0.85% on libraries.pmi that only has optimized contexts), with a number of regressions as expected when removing locals.
The improvements primarily come from the omission of copies for implicit byrefs, so the benefits on platforms with fewer implicit byrefs is smaller, but the forward sub change alone is still very impactful (e.g. -300K on linux-x64).

The throughput impact is around 1% in optimized contexts and below 0.1% in unoptimized contexts, the latter due to local morph needing to check if it should be threading nodes.

29 files changed:
src/coreclr/jit/assertionprop.cpp
src/coreclr/jit/block.h
src/coreclr/jit/compiler.cpp
src/coreclr/jit/compiler.h
src/coreclr/jit/compphases.h
src/coreclr/jit/earlyprop.cpp
src/coreclr/jit/fgbasic.cpp
src/coreclr/jit/fgdiagnostic.cpp
src/coreclr/jit/fgopt.cpp
src/coreclr/jit/fgstmt.cpp
src/coreclr/jit/flowgraph.cpp
src/coreclr/jit/forwardsub.cpp
src/coreclr/jit/gentree.cpp
src/coreclr/jit/gentree.h
src/coreclr/jit/importercalls.cpp
src/coreclr/jit/jitconfigvalues.h
src/coreclr/jit/lclmorph.cpp
src/coreclr/jit/lclvars.cpp
src/coreclr/jit/lir.cpp
src/coreclr/jit/lir.h
src/coreclr/jit/liveness.cpp
src/coreclr/jit/loopcloning.cpp
src/coreclr/jit/morph.cpp
src/coreclr/jit/optimizer.cpp
src/coreclr/jit/phase.cpp
src/tests/JIT/Directed/debugging/debuginfo/tester.csproj
src/tests/JIT/Directed/debugging/debuginfo/tests.il
src/tests/JIT/opt/ForwardSub/earlyLiveness.cs [new file with mode: 0644]
src/tests/JIT/opt/ForwardSub/earlyLiveness.csproj [new file with mode: 0644]

index cc7fc272a8d655b27d1f80839e93b3cbe36018d8..648a01774dbbed7fc2015005de1ac4239f18a4db 100644 (file)
@@ -3651,6 +3651,15 @@ GenTree* Compiler::optCopyAssertionProp(AssertionDsc*        curAssertion,
     tree->SetLclNum(copyLclNum);
     tree->SetSsaNum(copySsaNum);
 
+    // Copy prop and last-use copy elision happens at the same time in morph.
+    // This node may potentially not be a last use of the new local.
+    //
+    // TODO-CQ: It is probably better to avoid doing this propagation if we
+    // would otherwise omit an implicit byref copy since this propagation will
+    // force us to create another copy anyway.
+    //
+    tree->gtFlags &= ~GTF_VAR_DEATH;
+
 #ifdef DEBUG
     if (verbose)
     {
index 67df48d1972c63ac440ac0c26d2dfdc1f042da3e..ad33295181010420640e1c6ad89de8cfcfef3ce1 100644 (file)
@@ -542,6 +542,8 @@ enum BasicBlockFlags : unsigned __int64
     BBF_BACKWARD_JUMP_SOURCE           = MAKE_BBFLAG(41), // Block is a source of a backward jump
     BBF_HAS_MDARRAYREF                 = MAKE_BBFLAG(42), // Block has a multi-dimensional array reference
 
+    BBF_RECURSIVE_TAILCALL             = MAKE_BBFLAG(43), // Block has recursive tailcall that may turn into a loop
+
     // The following are sets of flags.
 
     // Flags that relate blocks to loop structure.
@@ -562,7 +564,7 @@ enum BasicBlockFlags : unsigned __int64
     // For example, the top block might or might not have BBF_GC_SAFE_POINT,
     // but we assume it does not have BBF_GC_SAFE_POINT any more.
 
-    BBF_SPLIT_LOST = BBF_GC_SAFE_POINT | BBF_HAS_JMP | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END,
+    BBF_SPLIT_LOST = BBF_GC_SAFE_POINT | BBF_HAS_JMP | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_RECURSIVE_TAILCALL,
 
     // Flags gained by the bottom block when a block is split.
     // Note, this is a conservative guess.
index 68bea7458492bc46335ac1e984f14f52583b40d1..f86e1923f89dc307743534c71524da75b86cb54e 100644 (file)
@@ -4391,8 +4391,8 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
 
     // Enable the post-phase checks that use internal logic to decide when checking makes sense.
     //
-    activePhaseChecks =
-        PhaseChecks::CHECK_EH | PhaseChecks::CHECK_LOOPS | PhaseChecks::CHECK_UNIQUE | PhaseChecks::CHECK_PROFILE;
+    activePhaseChecks = PhaseChecks::CHECK_EH | PhaseChecks::CHECK_LOOPS | PhaseChecks::CHECK_UNIQUE |
+                        PhaseChecks::CHECK_PROFILE | PhaseChecks::CHECK_LINKED_LOCALS;
 
     // Import: convert the instrs in each basic block to a tree based intermediate representation
     //
@@ -4604,10 +4604,23 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
     //
     DoPhase(this, PHASE_STR_ADRLCL, &Compiler::fgMarkAddressExposedLocals);
 
+    if (opts.OptimizationEnabled())
+    {
+        fgNodeThreading = NodeThreading::AllLocals;
+    }
+
+    // Do an early pass of liveness for forward sub and morph. This data is
+    // valid until after morph.
+    //
+    DoPhase(this, PHASE_EARLY_LIVENESS, &Compiler::fgEarlyLiveness);
+
     // Run a simple forward substitution pass.
     //
     DoPhase(this, PHASE_FWD_SUB, &Compiler::fgForwardSub);
 
+    // Locals tree list is no longer kept valid.
+    fgNodeThreading = NodeThreading::None;
+
     // Apply the type update to implicit byref parameters; also choose (based on address-exposed
     // analysis) which implicit byref promotions to keep (requires copy to initialize) or discard.
     //
@@ -4750,6 +4763,8 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
     //
     DoPhase(this, PHASE_SET_BLOCK_ORDER, &Compiler::fgSetBlockOrder);
 
+    fgNodeThreading = NodeThreading::AllTrees;
+
     // At this point we know if we are fully interruptible or not
     if (opts.OptimizationEnabled())
     {
@@ -4942,6 +4957,8 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
     Rationalizer rat(this); // PHASE_RATIONALIZE
     rat.Run();
 
+    fgNodeThreading = NodeThreading::LIR;
+
     // Here we do "simple lowering".  When the RyuJIT backend works for all
     // platforms, this will be part of the more general lowering phase.  For now, though, we do a separate
     // pass of "final lowering."  We must do this before (final) liveness analysis, because this creates
index 7c0e26c8f58250fe5caea9f8aba300a595e8c7c8..46413b54eb59fe885f97a60ca9a6e22e057d6485 100644 (file)
@@ -1454,13 +1454,14 @@ extern const char* PhaseEnums[];
 // clang-format off
 enum class PhaseChecks : unsigned int
 {
-    CHECK_NONE    = 0,
-    CHECK_IR      = 1 << 0, // ir flags, etc
-    CHECK_UNIQUE  = 1 << 1, // tree node uniqueness
-    CHECK_FG      = 1 << 2, // flow graph integrity
-    CHECK_EH      = 1 << 3, // eh table integrity
-    CHECK_LOOPS   = 1 << 4, // loop table integrity
-    CHECK_PROFILE = 1 << 5, // profile data integrity
+    CHECK_NONE          = 0,
+    CHECK_IR            = 1 << 0, // ir flags, etc
+    CHECK_UNIQUE        = 1 << 1, // tree node uniqueness
+    CHECK_FG            = 1 << 2, // flow graph integrity
+    CHECK_EH            = 1 << 3, // eh table integrity
+    CHECK_LOOPS         = 1 << 4, // loop table integrity
+    CHECK_PROFILE       = 1 << 5, // profile data integrity
+    CHECK_LINKED_LOCALS = 1 << 6, // check linked list of locals
 };
 
 inline constexpr PhaseChecks operator ~(PhaseChecks a)
@@ -1860,6 +1861,16 @@ struct RichIPMapping
     DebugInfo    debugInfo;
 };
 
+// Current kind of node threading stored in GenTree::gtPrev and GenTree::gtNext.
+// See fgNodeThreading for more information.
+enum class NodeThreading
+{
+    None,
+    AllLocals, // Locals are threaded (after local morph when optimizing)
+    AllTrees,  // All nodes are threaded (after gtSetBlockOrder)
+    LIR,       // Nodes are in LIR form (after rationalization)
+};
+
 /*
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
@@ -1910,6 +1921,7 @@ class Compiler
     friend class LIR;
     friend class ObjectAllocator;
     friend class LocalAddressVisitor;
+    friend struct Statement;
     friend struct GenTree;
     friend class MorphInitBlockHelper;
     friend class MorphCopyBlockHelper;
@@ -2789,7 +2801,7 @@ public:
     // is #of nodes in subtree) of "tree" is greater than "limit".
     // (This is somewhat redundant with the "GetCostEx()/GetCostSz()" fields, but can be used
     // before they have been set.)
-    bool gtComplexityExceeds(GenTree** tree, unsigned limit);
+    bool gtComplexityExceeds(GenTree* tree, unsigned limit);
 
     GenTree* gtReverseCond(GenTree* tree);
 
@@ -4448,30 +4460,47 @@ public:
     bool fgRemoveRestOfBlock; // true if we know that we will throw
     bool fgStmtRemoved;       // true if we remove statements -> need new DFA
 
-    // There are two modes for ordering of the trees.
-    //  - In FGOrderTree, the dominant ordering is the tree order, and the nodes contained in
-    //    each tree and sub-tree are contiguous, and can be traversed (in gtNext/gtPrev order)
-    //    by traversing the tree according to the order of the operands.
-    //  - In FGOrderLinear, the dominant ordering is the linear order.
-
     enum FlowGraphOrder
     {
         FGOrderTree,
         FGOrderLinear
     };
+    // There are two modes for ordering of the trees.
+    //  - In FGOrderTree, the dominant ordering is the tree order, and the nodes contained in
+    //    each tree and sub-tree are contiguous, and can be traversed (in gtNext/gtPrev order)
+    //    by traversing the tree according to the order of the operands.
+    //  - In FGOrderLinear, the dominant ordering is the linear order.
     FlowGraphOrder fgOrder;
 
-    // The following are boolean flags that keep track of the state of internal data structures
-
-    bool     fgStmtListThreaded;       // true if the node list is now threaded
-    bool     fgCanRelocateEHRegions;   // true if we are allowed to relocate the EH regions
-    bool     fgEdgeWeightsComputed;    // true after we have called fgComputeEdgeWeights
-    bool     fgHaveValidEdgeWeights;   // true if we were successful in computing all of the edge weights
-    bool     fgSlopUsedInEdgeWeights;  // true if their was some slop used when computing the edge weights
-    bool     fgRangeUsedInEdgeWeights; // true if some of the edgeWeight are expressed in Min..Max form
-    weight_t fgCalledCount;            // count of the number of times this method was called
-                                       // This is derived from the profile data
-                                       // or is BB_UNITY_WEIGHT when we don't have profile data
+    // The following are flags that keep track of the state of internal data structures
+
+    // Even in tree form (fgOrder == FGOrderTree) the trees are threaded in a
+    // doubly linked lists during certain phases of the compilation.
+    // - Local morph threads all locals to be used for early liveness and
+    //   forward sub when optimizing. This is kept valid until after forward sub.
+    //   The first local is kept in Statement::GetRootNode()->gtNext and the last
+    //   local in Statement::GetRootNode()->gtPrev. fgSequenceLocals can be used
+    //   to (re-)sequence a statement into this form, and
+    //   Statement::LocalsTreeList for range-based iteration. The order must
+    //   match tree order.
+    //
+    // - fgSetBlockOrder threads all nodes. This is kept valid until LIR form.
+    //   In this form the first node is given by Statement::GetTreeList and the
+    //   last node is given by Statement::GetRootNode(). fgSetStmtSeq can be used
+    //   to (re-)sequence a statement into this form, and Statement::TreeList for
+    //   range-based iteration. The order must match tree order.
+    //
+    // - Rationalization links all nodes into linear form which is kept until
+    //   the end of compilation. The first and last nodes are stored in the block.
+    NodeThreading fgNodeThreading;
+    bool          fgCanRelocateEHRegions;   // true if we are allowed to relocate the EH regions
+    bool          fgEdgeWeightsComputed;    // true after we have called fgComputeEdgeWeights
+    bool          fgHaveValidEdgeWeights;   // true if we were successful in computing all of the edge weights
+    bool          fgSlopUsedInEdgeWeights;  // true if their was some slop used when computing the edge weights
+    bool          fgRangeUsedInEdgeWeights; // true if some of the edgeWeight are expressed in Min..Max form
+    weight_t      fgCalledCount;            // count of the number of times this method was called
+                                            // This is derived from the profile data
+                                            // or is BB_UNITY_WEIGHT when we don't have profile data
 
 #if defined(FEATURE_EH_FUNCLETS)
     bool fgFuncletsCreated; // true if the funclet creation phase has been run
@@ -4724,6 +4753,8 @@ public:
                                      GenTreeLclVarCommon* lclVarNode);
     bool fgComputeLifeLocal(VARSET_TP& life, VARSET_VALARG_TP keepAliveVars, GenTree* lclVarNode);
 
+    GenTree* fgTryRemoveDeadStoreEarly(Statement* stmt, GenTreeLclVarCommon* dst);
+
     void fgComputeLife(VARSET_TP&       life,
                        GenTree*         startNode,
                        GenTree*         endNode,
@@ -5419,6 +5450,7 @@ public:
     void fgDebugCheckLinks(bool morphTrees = false);
     void fgDebugCheckStmtsList(BasicBlock* block, bool morphTrees);
     void fgDebugCheckNodeLinks(BasicBlock* block, Statement* stmt);
+    void fgDebugCheckLinkedLocals();
     void fgDebugCheckNodesUniqueness();
     void fgDebugCheckLoopTable();
     void fgDebugCheckSsa();
@@ -5835,6 +5867,8 @@ private:
 
     bool byrefStatesMatchGcHeapStates; // True iff GcHeap and ByrefExposed memory have all the same def points.
 
+    PhaseStatus fgEarlyLiveness();
+
     void fgMarkUseDef(GenTreeLclVarCommon* tree);
 
     void fgBeginScopeLife(VARSET_TP* inScope, VarScopeDsc* var);
@@ -5926,11 +5960,12 @@ private:
     void fgMarkDemotedImplicitByRefArgs();
 
     PhaseStatus fgMarkAddressExposedLocals();
-    void fgMarkAddressExposedLocals(Statement* stmt);
+    void fgSequenceLocals(Statement* stmt);
 
     PhaseStatus fgForwardSub();
     bool fgForwardSubBlock(BasicBlock* block);
     bool fgForwardSubStatement(Statement* statement);
+    void fgForwardSubUpdateLiveness(GenTree* newSubListFirst, GenTree* newSubListLast);
 
     // The given local variable, required to be a struct variable, is being assigned via
     // a "lclField", to make it masquerade as an integral type in the ABI.  Make sure that
@@ -9049,6 +9084,8 @@ public:
 
     bool fgLocalVarLivenessDone; // Note that this one is used outside of debug.
     bool fgLocalVarLivenessChanged;
+    bool fgIsDoingEarlyLiveness;
+    bool fgDidEarlyLiveness;
     bool compLSRADone;
     bool compRationalIRForm;
 
@@ -10991,7 +11028,6 @@ public:
                 if (TVisitor::UseExecutionOrder && node->IsReverseOp())
                 {
                     assert(node->AsMultiOp()->GetOperandCount() == 2);
-
                     result = WalkTree(&node->AsMultiOp()->Op(2), node);
                     if (result == fgWalkResult::WALK_ABORT)
                     {
index 2fb14b058a31211c4a3c8b30d63b8e5fa57eb219..4d9d39cca25cf96a609ae7e42cf64c34f1554b17 100644 (file)
@@ -43,6 +43,7 @@ CompPhaseNameMacro(PHASE_UPDATE_FINALLY_FLAGS,       "Update finally target flag
 CompPhaseNameMacro(PHASE_COMPUTE_PREDS,              "Compute preds",                  false, -1, false)
 CompPhaseNameMacro(PHASE_EARLY_UPDATE_FLOW_GRAPH,    "Update flow graph early pass",   false, -1, false)
 CompPhaseNameMacro(PHASE_STR_ADRLCL,                 "Morph - Structs/AddrExp",        false, -1, false)
+CompPhaseNameMacro(PHASE_EARLY_LIVENESS,             "Early liveness",                 false, -1, false)
 CompPhaseNameMacro(PHASE_FWD_SUB,                    "Forward Substitution",           false, -1, false)
 CompPhaseNameMacro(PHASE_MORPH_IMPBYREF,             "Morph - ByRefs",                 false, -1, false)
 CompPhaseNameMacro(PHASE_PROMOTE_STRUCTS,            "Morph - Promote Structs",        false, -1, false)
index 22d8d6c5924bde5522424656763c1d5b659fb575..ab858d96fb8705bba6e01641cfaa295a78af93b6 100644 (file)
@@ -650,7 +650,7 @@ bool Compiler::optIsNullCheckFoldingLegal(GenTree*    tree,
     // until we get to the indirection or process the statement root.
     GenTree* previousTree = nullCheckTree;
     GenTree* currentTree  = nullCheckTree->gtNext;
-    assert(fgStmtListThreaded);
+    assert(fgNodeThreading == NodeThreading::AllTrees);
     while (canRemoveNullCheck && (currentTree != tree) && (currentTree != nullptr))
     {
         if ((*nullCheckParent == nullptr) && currentTree->TryGetUse(nullCheckTree))
index e56843264cd6a20f57e9875c0748f6ed8322730c..dc27ecc47cf52e50df3bd70fe0a046b5f4ae13c9 100644 (file)
@@ -87,10 +87,12 @@ void Compiler::fgInit()
 #endif // DEBUG
 
     fgLocalVarLivenessDone = false;
+    fgIsDoingEarlyLiveness = false;
+    fgDidEarlyLiveness     = false;
 
     /* Statement list is not threaded yet */
 
-    fgStmtListThreaded = false;
+    fgNodeThreading = NodeThreading::None;
 
     // Initialize the logic for adding code. This is used to insert code such
     // as the code that raises an exception when an array range check fails.
index c3fb8711721062c910d63d88f394243ef488688a..d900f532bce0f48c2ffd146bc22d0f37043f09a3 100644 (file)
@@ -2800,6 +2800,8 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef
         block->bbTraversalStamp = curTraversalStamp;
     }
 
+    bool allNodesLinked = (fgNodeThreading == NodeThreading::AllTrees) || (fgNodeThreading == NodeThreading::LIR);
+
     for (BasicBlock* const block : Blocks())
     {
         if (checkBBNum)
@@ -2814,11 +2816,12 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef
 
         if (block->bbJumpKind == BBJ_COND)
         {
-            assert(block->lastNode()->gtNext == nullptr && block->lastNode()->OperIsConditionalJump());
+            assert((!allNodesLinked || (block->lastNode()->gtNext == nullptr)) &&
+                   block->lastNode()->OperIsConditionalJump());
         }
         else if (block->bbJumpKind == BBJ_SWITCH)
         {
-            assert(block->lastNode()->gtNext == nullptr &&
+            assert((!allNodesLinked || (block->lastNode()->gtNext == nullptr)) &&
                    (block->lastNode()->gtOper == GT_SWITCH || block->lastNode()->gtOper == GT_SWITCH_TABLE));
         }
 
@@ -3237,7 +3240,7 @@ void Compiler::fgDebugCheckNodeLinks(BasicBlock* block, Statement* stmt)
         // TODO: return?
     }
 
-    assert(fgStmtListThreaded);
+    assert(fgNodeThreading != NodeThreading::None);
 
     noway_assert(stmt->GetTreeList());
 
@@ -3326,6 +3329,136 @@ void Compiler::fgDebugCheckNodeLinks(BasicBlock* block, Statement* stmt)
     }
 }
 
+//------------------------------------------------------------------------------
+// fgDebugCheckLinkedLocals: Check the linked list of locals.
+//
+void Compiler::fgDebugCheckLinkedLocals()
+{
+    if (fgNodeThreading != NodeThreading::AllLocals)
+    {
+        return;
+    }
+
+    class DebugLocalSequencer : public GenTreeVisitor<DebugLocalSequencer>
+    {
+        ArrayStack<GenTree*> m_locals;
+
+        bool ShouldLink(GenTree* node)
+        {
+            return node->OperIsLocal() || node->OperIsLocalAddr();
+        }
+
+    public:
+        enum
+        {
+            DoPostOrder       = true,
+            UseExecutionOrder = true,
+        };
+
+        DebugLocalSequencer(Compiler* comp) : GenTreeVisitor(comp), m_locals(comp->getAllocator(CMK_DebugOnly))
+        {
+        }
+
+        void Sequence(Statement* stmt)
+        {
+            m_locals.Reset();
+            WalkTree(stmt->GetRootNodePointer(), nullptr);
+        }
+
+        ArrayStack<GenTree*>* GetSequence()
+        {
+            return &m_locals;
+        }
+
+        fgWalkResult PostOrderVisit(GenTree** use, GenTree* user)
+        {
+            GenTree* node = *use;
+            if (ShouldLink(node))
+            {
+                if ((user != nullptr) && user->OperIs(GT_ASG) && (node == user->gtGetOp1()))
+                {
+                }
+                else if ((user != nullptr) && user->IsCall() &&
+                         (node == m_compiler->gtCallGetDefinedRetBufLclAddr(user->AsCall())))
+                {
+                }
+                else
+                {
+                    m_locals.Push(node);
+                }
+            }
+
+            if (node->OperIs(GT_ASG) && ShouldLink(node->gtGetOp1()))
+            {
+                m_locals.Push(node->gtGetOp1());
+            }
+
+            if (node->IsCall())
+            {
+                GenTree* defined = m_compiler->gtCallGetDefinedRetBufLclAddr(node->AsCall());
+                if (defined != nullptr)
+                {
+                    assert(ShouldLink(defined));
+                    m_locals.Push(defined);
+                }
+            }
+
+            return WALK_CONTINUE;
+        }
+    };
+
+    DebugLocalSequencer seq(this);
+    for (BasicBlock* block : Blocks())
+    {
+        for (Statement* stmt : block->Statements())
+        {
+            GenTree* first = stmt->GetRootNode()->gtNext;
+            CheckDoublyLinkedList<GenTree, &GenTree::gtPrev, &GenTree::gtNext>(first);
+
+            seq.Sequence(stmt);
+
+            ArrayStack<GenTree*>* expected = seq.GetSequence();
+
+            bool success   = true;
+            int  nodeIndex = 0;
+            for (GenTree* cur = first; cur != nullptr; cur = cur->gtNext)
+            {
+                success &= cur->OperIsLocal() || cur->OperIsLocalAddr();
+                success &= (nodeIndex < expected->Height()) && (cur == expected->Bottom(nodeIndex));
+                nodeIndex++;
+            }
+
+            success &= nodeIndex == expected->Height();
+
+            if (!success && verbose)
+            {
+                printf("Locals are improperly linked in the following statement:\n");
+                DISPSTMT(stmt);
+
+                printf("\nExpected:\n");
+                const char* pref = "  ";
+                for (int i = 0; i < expected->Height(); i++)
+                {
+                    printf("%s[%06u]", pref, dspTreeID(expected->Bottom(i)));
+                    pref = " -> ";
+                }
+
+                printf("\n\nActual:\n");
+                pref = "  ";
+                for (GenTree* cur = first; cur != nullptr; cur = cur->gtNext)
+                {
+                    printf("%s[%06u]", pref, dspTreeID(cur));
+                    pref = " -> ";
+                }
+
+                printf("\n");
+            }
+
+            assert(success && "Locals are improperly linked!");
+        }
+    }
+}
+
 /*****************************************************************************
  *
  * A DEBUG routine to check the correctness of the links between statements
@@ -3431,7 +3564,7 @@ void Compiler::fgDebugCheckStmtsList(BasicBlock* block, bool morphTrees)
         }
 
         // For each statement check that the nodes are threaded correctly - m_treeList.
-        if (fgStmtListThreaded)
+        if (fgNodeThreading != NodeThreading::None)
         {
             fgDebugCheckNodeLinks(block, stmt);
         }
index 16eb52cf3823e3b9359ce00a50ae0776d9a2f745..1c5c9758313a675ab7ad6b3815233caf8a4f50bf 100644 (file)
@@ -2633,7 +2633,7 @@ void Compiler::fgRemoveConditionalJump(BasicBlock* block)
         {
             test->SetRootNode(sideEffList);
 
-            if (fgStmtListThreaded)
+            if (fgNodeThreading != NodeThreading::None)
             {
                 gtSetStmtInfo(test);
                 fgSetStmtSeq(test);
@@ -2950,7 +2950,10 @@ bool Compiler::fgOptimizeEmptyBlock(BasicBlock* block)
                         else
                         {
                             Statement* nopStmt = fgNewStmtAtEnd(block, nop);
-                            fgSetStmtSeq(nopStmt);
+                            if (fgNodeThreading == NodeThreading::AllTrees)
+                            {
+                                fgSetStmtSeq(nopStmt);
+                            }
                             gtSetStmtInfo(nopStmt);
                         }
 
@@ -3216,7 +3219,7 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block)
 
                 switchStmt->SetRootNode(sideEffList);
 
-                if (fgStmtListThreaded)
+                if (fgNodeThreading != NodeThreading::None)
                 {
                     compCurBB = block;
 
@@ -3297,7 +3300,7 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block)
             LIR::ReadOnlyRange range(zeroConstNode, switchTree);
             m_pLowering->LowerRange(block, range);
         }
-        else if (fgStmtListThreaded)
+        else if (fgNodeThreading != NodeThreading::None)
         {
             gtSetStmtInfo(switchStmt);
             fgSetStmtSeq(switchStmt);
@@ -3722,7 +3725,7 @@ bool Compiler::fgOptimizeUncondBranchToSimpleCond(BasicBlock* block, BasicBlock*
         noway_assert(clone);
         Statement* cloneStmt = gtNewStmt(clone);
 
-        if (fgStmtListThreaded)
+        if (fgNodeThreading != NodeThreading::None)
         {
             gtSetStmtInfo(cloneStmt);
         }
@@ -3880,7 +3883,7 @@ bool Compiler::fgOptimizeBranchToNext(BasicBlock* block, BasicBlock* bNext, Basi
 
                     condStmt->SetRootNode(sideEffList);
 
-                    if (fgStmtListThreaded)
+                    if (fgNodeThreading == NodeThreading::AllTrees)
                     {
                         compCurBB = block;
 
@@ -3995,7 +3998,7 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump)
         // links. We don't know if it does or doesn't reorder nodes, so we end up always re-threading the links.
 
         gtSetStmtInfo(stmt);
-        if (fgStmtListThreaded)
+        if (fgNodeThreading == NodeThreading::AllTrees)
         {
             fgSetStmtSeq(stmt);
         }
@@ -4109,7 +4112,7 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump)
             return false;
         }
 
-        if (fgStmtListThreaded)
+        if (fgNodeThreading == NodeThreading::AllTrees)
         {
             gtSetStmtInfo(stmt);
             fgSetStmtSeq(stmt);
@@ -4389,7 +4392,7 @@ bool Compiler::fgOptimizeSwitchJumps()
         //
         newBlock->bbJumpSwt->bbsHasDominantCase = false;
 
-        if (fgStmtListThreaded)
+        if (fgNodeThreading == NodeThreading::AllTrees)
         {
             // The switch tree has been modified.
             JITDUMP("Rethreading " FMT_STMT "\n", switchStmt->GetID());
@@ -5800,7 +5803,7 @@ bool Compiler::fgReorderBlocks(bool useProfile)
 
             // may need to rethread
             //
-            if (fgStmtListThreaded)
+            if (fgNodeThreading == NodeThreading::AllTrees)
             {
                 JITDUMP("Rethreading " FMT_STMT "\n", condTestStmt->GetID());
                 gtSetStmtInfo(condTestStmt);
index 0730540d175e673c3addb22379d3846ce9e8ced7..57124f6f3d306a09202b97ee20d09206abf48b8c 100644 (file)
@@ -386,7 +386,7 @@ Statement* Compiler::fgNewStmtFromTree(GenTree* tree, BasicBlock* block, const D
 {
     Statement* stmt = gtNewStmt(tree, di);
 
-    if (fgStmtListThreaded)
+    if (fgNodeThreading != NodeThreading::None)
     {
         gtSetStmtInfo(stmt);
         fgSetStmtSeq(stmt);
index ae72354b00070792647290c48a27917549bb40e2..3c6f32905570442736fd32d285ac398757893c3a 100644 (file)
@@ -258,7 +258,7 @@ BasicBlock* Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* block)
             }
         }
 
-        if (fgStmtListThreaded)
+        if (fgNodeThreading != NodeThreading::None)
         {
             gtSetStmtInfo(newStmt);
             fgSetStmtSeq(newStmt);
@@ -333,7 +333,7 @@ BasicBlock* Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* block)
 
         // Add the GC_CALL node to Poll.
         Statement* pollStmt = fgNewStmtAtEnd(poll, call);
-        if (fgStmtListThreaded)
+        if (fgNodeThreading != NodeThreading::None)
         {
             gtSetStmtInfo(pollStmt);
             fgSetStmtSeq(pollStmt);
@@ -393,7 +393,7 @@ BasicBlock* Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* block)
         GenTree* trapCheck = gtNewOperNode(GT_JTRUE, TYP_VOID, trapRelop);
         gtSetEvalOrder(trapCheck);
         Statement* trapCheckStmt = fgNewStmtAtEnd(top, trapCheck);
-        if (fgStmtListThreaded)
+        if (fgNodeThreading != NodeThreading::None)
         {
             gtSetStmtInfo(trapCheckStmt);
             fgSetStmtSeq(trapCheckStmt);
@@ -4008,6 +4008,16 @@ GenTree* Compiler::fgSetTreeSeq(GenTree* tree, bool isLIR)
         }
     };
 
+#ifdef DEBUG
+    if (isLIR)
+    {
+        assert((fgNodeThreading == NodeThreading::LIR) || (mostRecentlyActivePhase == PHASE_RATIONALIZE));
+    }
+    else
+    {
+        assert((fgNodeThreading == NodeThreading::AllTrees) || (mostRecentlyActivePhase == PHASE_SET_BLOCK_ORDER));
+    }
+#endif
     return SetTreeSeqVisitor(this, tree, isLIR).Sequence();
 }
 
@@ -4112,10 +4122,6 @@ PhaseStatus Compiler::fgSetBlockOrder()
         fgSetBlockOrder(block);
     }
 
-    /* Remember that now the tree list is threaded */
-
-    fgStmtListThreaded = true;
-
 #ifdef DEBUG
     if (verbose)
     {
index d7b321fbc25c0165b3df6badc0a9eaf02ce16b4c..28a202d54a7caf4b333ba56d1af8dce39fe5eb6d 100644 (file)
@@ -88,8 +88,6 @@
 //   and in the same EH region.
 // * Rerun this later, after we have built SSA, and handle single-def single-use
 //   from SSA perspective.
-// * Fix issue in morph that can unsoundly reorder call args, and remove
-//   extra effects computation from ForwardSubVisitor.
 // * We can be more aggressive with GTF_IND_INVARIANT / GTF_IND_NONFAULTING
 //   nodes--even though they may be marked GTF_GLOB_REF, they can be freely
 //   reordered. See if this offers any benefit.
@@ -191,17 +189,23 @@ public:
         UseExecutionOrder = true
     };
 
-    ForwardSubVisitor(Compiler* compiler, unsigned lclNum)
-        : GenTreeVisitor<ForwardSubVisitor>(compiler)
+    ForwardSubVisitor(Compiler* compiler, unsigned lclNum, bool livenessBased)
+        : GenTreeVisitor(compiler)
         , m_use(nullptr)
         , m_node(nullptr)
         , m_parentNode(nullptr)
         , m_lclNum(lclNum)
-        , m_useCount(0)
+        , m_parentLclNum(BAD_VAR_NUM)
         , m_useFlags(GTF_EMPTY)
         , m_accumulatedFlags(GTF_EMPTY)
         , m_treeSize(0)
+        , m_livenessBased(livenessBased)
     {
+        LclVarDsc* dsc = compiler->lvaGetDesc(m_lclNum);
+        if (dsc->lvIsStructField)
+        {
+            m_parentLclNum = dsc->lvParentLcl;
+        }
     }
 
     Compiler::fgWalkResult PostOrderVisit(GenTree** use, GenTree* user)
@@ -217,8 +221,6 @@ public:
 
             if (lclNum == m_lclNum)
             {
-                m_useCount++;
-
                 // Screen out contextual "uses"
                 //
                 GenTree* const parent = user;
@@ -234,7 +236,7 @@ public:
                     isCallTarget = (parentCall->gtCallType == CT_INDIRECT) && (parentCall->gtCallAddr == node);
                 }
 
-                if (!isDef && !isCallTarget)
+                if (!isDef && !isCallTarget && IsLastUse(node->AsLclVar()))
                 {
                     m_node       = node;
                     m_use        = use;
@@ -246,17 +248,27 @@ public:
 
         // Stores to and uses of address-exposed locals are modelled as global refs.
         //
-        GenTree* lclNode = nullptr;
-        if (node->OperIsLocal() && !isDef)
+        LclVarDsc* lclDsc = nullptr;
+        if (node->OperIsLocal())
         {
-            lclNode = node;
+#ifdef DEBUG
+            if (IsUse(node->AsLclVarCommon()))
+            {
+                m_useCount++;
+            }
+#endif
+
+            if (!isDef)
+            {
+                lclDsc = m_compiler->lvaGetDesc(node->AsLclVarCommon());
+            }
         }
         else if (node->OperIs(GT_ASG) && node->gtGetOp1()->OperIsLocal())
         {
-            lclNode = node->gtGetOp1();
+            lclDsc = m_compiler->lvaGetDesc(node->gtGetOp1()->AsLclVarCommon());
         }
 
-        if ((lclNode != nullptr) && m_compiler->lvaGetDesc(lclNode->AsLclVarCommon())->IsAddressExposed())
+        if ((lclDsc != nullptr) && lclDsc->IsAddressExposed())
         {
             m_accumulatedFlags |= GTF_GLOB_REF;
         }
@@ -266,10 +278,12 @@ public:
         return fgWalkResult::WALK_CONTINUE;
     }
 
+#ifdef DEBUG
     unsigned GetUseCount() const
     {
         return m_useCount;
     }
+#endif
 
     GenTree* GetNode() const
     {
@@ -301,15 +315,74 @@ public:
         return m_treeSize;
     }
 
+    //------------------------------------------------------------------------
+    // IsUse: Check if a local is considered a use of the forward sub candidate
+    // while taking promotion into account.
+    //
+    // Arguments:
+    //    lcl - the local
+    //
+    // Returns:
+    //    true if the node is a use of the local candidate or any of its fields.
+    //
+    bool IsUse(GenTreeLclVarCommon* lcl)
+    {
+        unsigned lclNum = lcl->GetLclNum();
+        if ((lclNum == m_lclNum) || (lclNum == m_parentLclNum))
+        {
+            return true;
+        }
+
+        LclVarDsc* dsc = m_compiler->lvaGetDesc(lclNum);
+        return dsc->lvIsStructField && (dsc->lvParentLcl == m_lclNum);
+    }
+
+    //------------------------------------------------------------------------
+    // IsLastUse: Check if the local node is a last use. The local node is expected
+    // to be a GT_LCL_VAR of the local being forward subbed.
+    //
+    // Arguments:
+    //    lcl - the GT_LCL_VAR of the current local.
+    //
+    // Returns:
+    //    true if the expression is a last use of the local; otherwise false.
+    //
+    bool IsLastUse(GenTreeLclVar* lcl)
+    {
+        assert(lcl->OperIs(GT_LCL_VAR) && (lcl->GetLclNum() == m_lclNum));
+
+        if (!m_livenessBased)
+        {
+            // When not liveness based we can only get here when we have
+            // exactly 2 global references, and we should have already seen the
+            // def.
+            assert(m_compiler->lvaGetDesc(lcl)->lvRefCnt(RCS_EARLY) == 2);
+            return true;
+        }
+
+        if ((lcl->gtFlags & GTF_VAR_DEATH) == 0)
+        {
+            return false;
+        }
+
+        LclVarDsc* dsc = m_compiler->lvaGetDesc(lcl);
+        VARSET_TP* deadFields;
+        return !dsc->lvPromoted || !m_compiler->LookupPromotedStructDeathVars(lcl, &deadFields);
+    }
+
 private:
-    GenTree**    m_use;
-    GenTree*     m_node;
-    GenTree*     m_parentNode;
-    unsigned     m_lclNum;
-    unsigned     m_useCount;
+    GenTree** m_use;
+    GenTree*  m_node;
+    GenTree*  m_parentNode;
+    unsigned  m_lclNum;
+    unsigned  m_parentLclNum;
+#ifdef DEBUG
+    unsigned m_useCount = 0;
+#endif
     GenTreeFlags m_useFlags;
     GenTreeFlags m_accumulatedFlags;
     unsigned     m_treeSize;
+    bool         m_livenessBased;
 };
 
 //------------------------------------------------------------------------
@@ -400,12 +473,23 @@ bool Compiler::fgForwardSubStatement(Statement* stmt)
     }
 
     // Only fwd sub if we expect no code duplication
-    // We expect one def and one use.
     //
+    bool livenessBased = false;
     if (varDsc->lvRefCnt(RCS_EARLY) != 2)
     {
-        JITDUMP(" not asg (single-use lcl)\n");
-        return false;
+        if (!fgDidEarlyLiveness)
+        {
+            JITDUMP(" not asg (single-use lcl)\n");
+            return false;
+        }
+
+        if (varDsc->lvRefCnt(RCS_EARLY) < 2)
+        {
+            JITDUMP(" not asg (no use)\n");
+            return false;
+        }
+
+        livenessBased = true;
     }
 
     // And local is unalised
@@ -465,6 +549,40 @@ bool Compiler::fgForwardSubStatement(Statement* stmt)
         return false;
     }
 
+    // Local and tree to substitute seem suitable.
+    // See if the next statement contains the one and only use.
+    //
+    Statement* const nextStmt = stmt->GetNextStmt();
+
+    ForwardSubVisitor fsv(this, lclNum, livenessBased);
+    assert(fgNodeThreading == NodeThreading::AllLocals);
+    // Do a quick scan through the linked locals list to see if there is a last
+    // use.
+    bool found = false;
+    for (GenTreeLclVarCommon* lcl : nextStmt->LocalsTreeList())
+    {
+        if (lcl->OperIs(GT_LCL_VAR) && (lcl->GetLclNum() == lclNum))
+        {
+            if (fsv.IsLastUse(lcl->AsLclVar()))
+            {
+                found = true;
+                break;
+            }
+        }
+
+        if (fsv.IsUse(lcl))
+        {
+            JITDUMP(" next stmt has non-last use\n");
+            return false;
+        }
+    }
+
+    if (!found)
+    {
+        JITDUMP(" no next stmt use\n");
+        return false;
+    }
+
     // Don't fwd sub overly large trees.
     // Size limit here is ad-hoc. Need to tune.
     //
@@ -472,40 +590,40 @@ bool Compiler::fgForwardSubStatement(Statement* stmt)
     //
     unsigned const nodeLimit = 16;
 
-    if (gtComplexityExceeds(&fwdSubNode, nodeLimit))
+    if (gtComplexityExceeds(fwdSubNode, nodeLimit))
     {
         JITDUMP(" tree to sub has more than %u nodes\n", nodeLimit);
         return false;
     }
 
-    // Local and tree to substitute seem suitable.
-    // See if the next statement contains the one and only use.
-    //
-    Statement* const nextStmt = stmt->GetNextStmt();
-
     // We often see stale flags, eg call flags after inlining.
     // Try and clean these up.
     //
     gtUpdateStmtSideEffects(nextStmt);
     gtUpdateStmtSideEffects(stmt);
 
-    // Scan for the (single) use.
+    // Scan for the (last) use.
     //
-    ForwardSubVisitor fsv(this, lclNum);
     fsv.WalkTree(nextStmt->GetRootNodePointer(), nullptr);
 
-    // LclMorph (via RCS_Early) said there was just one use.
-    // It had better have gotten this right.
-    //
-    assert(fsv.GetUseCount() <= 1);
+    if (!livenessBased)
+    {
+        // LclMorph (via RCS_Early) said there was just one use.
+        // It had better have gotten this right.
+        //
+        assert(fsv.GetUseCount() == 1);
+    }
 
-    if ((fsv.GetUseCount() == 0) || (fsv.GetNode() == nullptr))
+    // The visitor has more contextual information and may not actually deem
+    // the use we found above as a valid forward sub destination so we must
+    // recheck it here.
+    if (fsv.GetNode() == nullptr)
     {
         JITDUMP(" no next stmt use\n");
         return false;
     }
 
-    JITDUMP(" [%06u] is only use of [%06u] (V%02u) ", dspTreeID(fsv.GetNode()), dspTreeID(lhsNode), lclNum);
+    JITDUMP(" [%06u] is last use of [%06u] (V%02u) ", dspTreeID(fsv.GetNode()), dspTreeID(lhsNode), lclNum);
 
     // Qmarks must replace top-level uses. Also, restrict to GT_ASG.
     // And also to where neither local is normalize on store, otherwise
@@ -553,7 +671,7 @@ bool Compiler::fgForwardSubStatement(Statement* stmt)
     // height of the fwdSubNode.
     //
     unsigned const nextTreeLimit = 200;
-    if ((fsv.GetComplexity() > nextTreeLimit) && gtComplexityExceeds(&fwdSubNode, 1))
+    if ((fsv.GetComplexity() > nextTreeLimit) && gtComplexityExceeds(fwdSubNode, 1))
     {
         JITDUMP(" next stmt tree is too large (%u)\n", fsv.GetComplexity());
         return false;
@@ -743,8 +861,26 @@ bool Compiler::fgForwardSubStatement(Statement* stmt)
 
     // Looks good, forward sub!
     //
-    GenTree** use = fsv.GetUse();
-    *use          = fwdSubNode;
+    GenTree**            use    = fsv.GetUse();
+    GenTreeLclVarCommon* useLcl = (*use)->AsLclVarCommon();
+    *use                        = fwdSubNode;
+
+    // We expect the last local in the statement is the defined local and
+    // replace the use of it with the rest from the statement.
+    assert(lhsNode->gtNext == nullptr);
+
+    GenTreeLclVarCommon* firstLcl = *stmt->LocalsTreeList().begin();
+
+    if (firstLcl == lhsNode)
+    {
+        nextStmt->LocalsTreeList().Remove(useLcl);
+    }
+    else
+    {
+        nextStmt->LocalsTreeList().Replace(useLcl, useLcl, firstLcl, lhsNode->gtPrev->AsLclVarCommon());
+
+        fgForwardSubUpdateLiveness(firstLcl, lhsNode->gtPrev);
+    }
 
     if (!fwdSubNodeInvariant)
     {
@@ -756,3 +892,66 @@ bool Compiler::fgForwardSubStatement(Statement* stmt)
 
     return true;
 }
+
+//------------------------------------------------------------------------
+// fgForwardSubUpdateLiveness: correct liveness after performing a forward
+// substitution that added a new sub list of locals in a statement.
+//
+// Arguments:
+//    newSubListFirst - the first local in the new sub list.
+//    newSubListLast - the last local in the new sub list.
+//
+// Remarks:
+//    Forward substitution may add new uses of other locals; these may be
+//    inserted at arbitrary points in the statement, so previous last uses may
+//    be invalidated. This function will conservatively unmark last uses that
+//    may no longer be correct.
+//
+//    The function is not as precise as it could be, in particular it does not
+//    mark any of the new later uses as a last use, and it does not care about
+//    defs. However, currently the only user of last use information after
+//    forward sub is last-use copy omission, and diffs indicate that being
+//    conservative here does not have a large impact.
+//
+void Compiler::fgForwardSubUpdateLiveness(GenTree* newSubListFirst, GenTree* newSubListLast)
+{
+    for (GenTree* node = newSubListFirst->gtPrev; node != nullptr; node = node->gtPrev)
+    {
+        if ((node->gtFlags & GTF_VAR_DEATH) == 0)
+        {
+            continue;
+        }
+
+        unsigned   lclNum = node->AsLclVarCommon()->GetLclNum();
+        LclVarDsc* dsc    = lvaGetDesc(lclNum);
+        // Last-use copy omission does not work for promoted structs today, so
+        // we can always unmark these which saves us from having to update the
+        // promoted struct death vars map.
+        if (dsc->lvPromoted)
+        {
+            node->gtFlags &= ~GTF_VAR_DEATH;
+            continue;
+        }
+
+        unsigned parentLclNum = dsc->lvIsStructField ? dsc->lvParentLcl : BAD_VAR_NUM;
+
+        GenTree* candidate = newSubListFirst;
+        // See if a new instance of this local or its parent appeared.
+        while (true)
+        {
+            unsigned newUseLclNum = candidate->AsLclVarCommon()->GetLclNum();
+            if ((newUseLclNum == lclNum) || (newUseLclNum == parentLclNum))
+            {
+                node->gtFlags &= ~GTF_VAR_DEATH;
+                break;
+            }
+
+            if (candidate == newSubListLast)
+            {
+                break;
+            }
+
+            candidate = candidate->gtNext;
+        }
+    }
+}
index aa5b4f4e3fa17621957078258b592a5e98c8a754..74c9c3fdba9acfb140a502a20cbb48bf127dd069 100644 (file)
@@ -554,6 +554,139 @@ void GenTree::DumpNodeSizes(FILE* fp)
 
 #endif // MEASURE_NODE_SIZE
 
+//-----------------------------------------------------------
+// begin: Get the iterator for the beginning of the locals list.
+//
+// Return Value:
+//     Iterator representing the beginning.
+//
+LocalsGenTreeList::iterator LocalsGenTreeList::begin() const
+{
+    GenTree* first = m_stmt->GetRootNode()->gtNext;
+    assert((first == nullptr) || first->OperIsLocal() || first->OperIsLocalAddr());
+    return iterator(static_cast<GenTreeLclVarCommon*>(first));
+}
+
+//-----------------------------------------------------------
+// GetForwardEdge: Get the edge that points forward to a node.
+//
+// Arguments:
+//     node - The node the edge should be pointing at.
+//
+// Return Value:
+//     The edge, such that *edge == node.
+//
+GenTree** LocalsGenTreeList::GetForwardEdge(GenTreeLclVarCommon* node)
+{
+    if (node->gtPrev == nullptr)
+    {
+        assert(m_stmt->GetRootNode()->gtNext == node);
+        return &m_stmt->GetRootNode()->gtNext;
+    }
+    else
+    {
+        assert(node->gtPrev->gtNext == node);
+        return &node->gtPrev->gtNext;
+    }
+}
+
+//-----------------------------------------------------------
+// GetBackwardEdge: Get the edge that points backwards to a node.
+//
+// Arguments:
+//     node - The node the edge should be pointing at.
+//
+// Return Value:
+//     The edge, such that *edge == node.
+//
+GenTree** LocalsGenTreeList::GetBackwardEdge(GenTreeLclVarCommon* node)
+{
+    if (node->gtNext == nullptr)
+    {
+        assert(m_stmt->GetRootNode()->gtPrev == node);
+        return &m_stmt->GetRootNode()->gtPrev;
+    }
+    else
+    {
+        assert(node->gtNext->gtPrev == node);
+        return &node->gtNext->gtPrev;
+    }
+}
+
+//-----------------------------------------------------------
+// Remove: Remove a specified node from the locals tree list.
+//
+// Arguments:
+//     node - the local node that should be part of this list.
+//
+void LocalsGenTreeList::Remove(GenTreeLclVarCommon* node)
+{
+    GenTree** forwardEdge  = GetForwardEdge(node);
+    GenTree** backwardEdge = GetBackwardEdge(node);
+
+    *forwardEdge  = node->gtNext;
+    *backwardEdge = node->gtPrev;
+}
+
+//-----------------------------------------------------------
+// Replace: Replace a sequence of nodes with another (already linked) sequence of nodes.
+//
+// Arguments:
+//     firstNode - The first node, part of this locals tree list, to be replaced.
+//     lastNode - The last node, part of this locals tree list, to be replaced.
+//     newFirstNode - The start of the replacement sub list.
+//     newLastNode - The last node of the replacement sub list.
+//
+void LocalsGenTreeList::Replace(GenTreeLclVarCommon* firstNode,
+                                GenTreeLclVarCommon* lastNode,
+                                GenTreeLclVarCommon* newFirstNode,
+                                GenTreeLclVarCommon* newLastNode)
+{
+    assert((newFirstNode != nullptr) && (newLastNode != nullptr));
+
+    GenTree** forwardEdge  = GetForwardEdge(firstNode);
+    GenTree** backwardEdge = GetBackwardEdge(lastNode);
+
+    GenTree* prev = firstNode->gtPrev;
+    GenTree* next = lastNode->gtNext;
+
+    *forwardEdge         = newFirstNode;
+    *backwardEdge        = newLastNode;
+    newFirstNode->gtPrev = prev;
+    newLastNode->gtNext  = next;
+}
+
+//-----------------------------------------------------------
+// TreeList: convenience method for enabling range-based `for` iteration over the
+// execution order of the GenTree linked list, e.g.:
+//    for (GenTree* const tree : stmt->TreeList()) ...
+//
+// Only valid between fgSetBlockOrder and rationalization. See fgNodeThreading.
+//
+// Return Value:
+//   The tree list.
+//
+GenTreeList Statement::TreeList() const
+{
+    assert(JitTls::GetCompiler()->fgNodeThreading == NodeThreading::AllTrees);
+    return GenTreeList(GetTreeList());
+}
+
+//-----------------------------------------------------------
+// LocalsTreeList: Manages the locals tree list and allows for range-based
+// iteration.
+//
+// Only valid between local morph and forward sub. See fgNodeThreading.
+//
+// Return Value:
+//    The locals tree list.
+//
+LocalsGenTreeList Statement::LocalsTreeList()
+{
+    assert(JitTls::GetCompiler()->fgNodeThreading == NodeThreading::AllLocals);
+    return LocalsGenTreeList(this);
+}
+
 /*****************************************************************************
  *
  *  Walk all basic blocks and call the given function pointer for all tree
@@ -9085,12 +9218,13 @@ GenTreeCall* Compiler::gtCloneCandidateCall(GenTreeCall* call)
 
 void Compiler::gtUpdateSideEffects(Statement* stmt, GenTree* tree)
 {
-    if (fgStmtListThreaded)
+    if (fgNodeThreading == NodeThreading::AllTrees)
     {
         gtUpdateTreeAncestorsSideEffects(tree);
     }
     else
     {
+        assert(fgNodeThreading != NodeThreading::LIR);
         gtUpdateStmtSideEffects(stmt);
     }
 }
@@ -9104,7 +9238,7 @@ void Compiler::gtUpdateSideEffects(Statement* stmt, GenTree* tree)
 //
 void Compiler::gtUpdateTreeAncestorsSideEffects(GenTree* tree)
 {
-    assert(fgStmtListThreaded);
+    assert(fgNodeThreading == NodeThreading::AllTrees);
     while (tree != nullptr)
     {
         gtUpdateNodeSideEffects(tree);
@@ -14168,7 +14302,7 @@ GenTree* Compiler::gtTryRemoveBoxUpstreamEffects(GenTree* op, BoxRemovalOptions
         }
     }
 
-    if (fgStmtListThreaded)
+    if (fgNodeThreading == NodeThreading::AllTrees)
     {
         fgSetStmtSeq(asgStmt);
         fgSetStmtSeq(copyStmt);
@@ -16047,6 +16181,45 @@ void Compiler::gtExtractSideEffList(GenTree*     expr,
                     return Compiler::WALK_SKIP_SUBTREES;
                 }
 
+                if (node->OperIs(GT_QMARK))
+                {
+                    GenTree* prevSideEffects = m_result;
+                    // Visit children out of order so we know if we can
+                    // completely remove the qmark. We cannot modify the
+                    // condition if we cannot completely remove the qmark, so
+                    // we cannot visit it first.
+
+                    GenTreeQmark* qmark = node->AsQmark();
+                    GenTreeColon* colon = qmark->gtGetOp2()->AsColon();
+
+                    m_result = nullptr;
+                    WalkTree(&colon->gtOp1, colon);
+                    GenTree* thenSideEffects = m_result;
+
+                    m_result = nullptr;
+                    WalkTree(&colon->gtOp2, colon);
+                    GenTree* elseSideEffects = m_result;
+
+                    m_result = prevSideEffects;
+
+                    if ((thenSideEffects == nullptr) && (elseSideEffects == nullptr))
+                    {
+                        WalkTree(&qmark->gtOp1, qmark);
+                    }
+                    else
+                    {
+                        colon->gtOp1  = (thenSideEffects != nullptr) ? thenSideEffects : m_compiler->gtNewNothingNode();
+                        colon->gtOp2  = (elseSideEffects != nullptr) ? elseSideEffects : m_compiler->gtNewNothingNode();
+                        qmark->gtType = TYP_VOID;
+                        colon->gtType = TYP_VOID;
+
+                        qmark->gtFlags &= ~GTF_QMARK_CAST_INSTOF;
+                        Append(qmark);
+                    }
+
+                    return Compiler::WALK_SKIP_SUBTREES;
+                }
+
                 // Generally all GT_CALL nodes are considered to have side-effects.
                 // So if we get here it must be a helper call that we decided it does
                 // not have side effects that we needed to keep.
@@ -16532,41 +16705,47 @@ ExceptionSetFlags Compiler::gtCollectExceptions(GenTree* tree)
     return walker.GetFlags();
 }
 
-/*****************************************************************************/
-
-struct ComplexityStruct
+//-----------------------------------------------------------
+// gtComplexityExceeds: Check if a tree exceeds a specified complexity in terms
+// of number of sub nodes.
+//
+// Arguments:
+//     tree  - The tree to check
+//     limit - The limit in terms of number of nodes
+//
+// Return Value:
+//     True if there are mode sub nodes in tree; otherwise false.
+//
+bool Compiler::gtComplexityExceeds(GenTree* tree, unsigned limit)
 {
-    unsigned m_numNodes;
-    unsigned m_nodeLimit;
-    ComplexityStruct(unsigned nodeLimit) : m_numNodes(0), m_nodeLimit(nodeLimit)
+    struct ComplexityVisitor : GenTreeVisitor<ComplexityVisitor>
     {
-    }
-};
+        enum
+        {
+            DoPreOrder = true,
+        };
 
-static Compiler::fgWalkResult ComplexityExceedsWalker(GenTree** pTree, Compiler::fgWalkData* data)
-{
-    ComplexityStruct* pComplexity = (ComplexityStruct*)data->pCallbackData;
-    if (++pComplexity->m_numNodes > pComplexity->m_nodeLimit)
-    {
-        return Compiler::WALK_ABORT;
-    }
-    else
-    {
-        return Compiler::WALK_CONTINUE;
-    }
-}
+        ComplexityVisitor(Compiler* comp, unsigned limit) : GenTreeVisitor(comp), m_limit(limit)
+        {
+        }
 
-bool Compiler::gtComplexityExceeds(GenTree** tree, unsigned limit)
-{
-    ComplexityStruct complexity(limit);
-    if (fgWalkTreePre(tree, &ComplexityExceedsWalker, &complexity) == WALK_ABORT)
-    {
-        return true;
-    }
-    else
-    {
-        return false;
-    }
+        fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
+        {
+            if (++m_numNodes > m_limit)
+            {
+                return WALK_ABORT;
+            }
+
+            return WALK_CONTINUE;
+        }
+
+    private:
+        unsigned m_limit;
+        unsigned m_numNodes = 0;
+    };
+
+    ComplexityVisitor visitor(this, limit);
+    return visitor.WalkTree(&tree, nullptr) == WALK_ABORT;
 }
 
 bool GenTree::IsPhiNode()
index c3420514603fcadae6eee877b718a7cc2487123b..790d14f2841779d9a48ca6550cc52b5f8e919665 100644 (file)
@@ -7334,6 +7334,7 @@ class GenTreeList
 {
     GenTree* m_trees;
 
+public:
     // Forward iterator for the execution order GenTree linked list (using `gtNext` pointer).
     //
     class iterator
@@ -7341,7 +7342,7 @@ class GenTreeList
         GenTree* m_tree;
 
     public:
-        iterator(GenTree* tree) : m_tree(tree)
+        explicit iterator(GenTree* tree) : m_tree(tree)
         {
         }
 
@@ -7362,8 +7363,7 @@ class GenTreeList
         }
     };
 
-public:
-    GenTreeList(GenTree* trees) : m_trees(trees)
+    explicit GenTreeList(GenTree* trees) : m_trees(trees)
     {
     }
 
@@ -7378,6 +7378,67 @@ public:
     }
 };
 
+class LocalsGenTreeList
+{
+    Statement* m_stmt;
+
+public:
+    class iterator
+    {
+        GenTreeLclVarCommon* m_tree;
+
+    public:
+        explicit iterator(GenTreeLclVarCommon* tree) : m_tree(tree)
+        {
+        }
+
+        GenTreeLclVarCommon* operator*() const
+        {
+            return m_tree;
+        }
+
+        iterator& operator++()
+        {
+            assert((m_tree->gtNext == nullptr) || m_tree->gtNext->OperIsLocal() || m_tree->gtNext->OperIsLocalAddr());
+            m_tree = static_cast<GenTreeLclVarCommon*>(m_tree->gtNext);
+            return *this;
+        }
+
+        iterator& operator--()
+        {
+            assert((m_tree->gtPrev == nullptr) || m_tree->gtPrev->OperIsLocal() || m_tree->gtPrev->OperIsLocalAddr());
+            m_tree = static_cast<GenTreeLclVarCommon*>(m_tree->gtPrev);
+            return *this;
+        }
+
+        bool operator!=(const iterator& i) const
+        {
+            return m_tree != i.m_tree;
+        }
+    };
+
+    explicit LocalsGenTreeList(Statement* stmt) : m_stmt(stmt)
+    {
+    }
+
+    iterator begin() const;
+
+    iterator end() const
+    {
+        return iterator(nullptr);
+    }
+
+    void Remove(GenTreeLclVarCommon* node);
+    void Replace(GenTreeLclVarCommon* firstNode,
+                 GenTreeLclVarCommon* lastNode,
+                 GenTreeLclVarCommon* newFirstNode,
+                 GenTreeLclVarCommon* newLastNode);
+
+private:
+    GenTree** GetForwardEdge(GenTreeLclVarCommon* node);
+    GenTree** GetBackwardEdge(GenTreeLclVarCommon* node);
+};
+
 // We use the following format when printing the Statement number: Statement->GetID()
 // This define is used with string concatenation to put this in printf format strings  (Note that %u means unsigned int)
 #define FMT_STMT "STMT%05u"
@@ -7422,14 +7483,8 @@ public:
         m_treeList = treeHead;
     }
 
-    // TreeList: convenience method for enabling range-based `for` iteration over the
-    // execution order of the GenTree linked list, e.g.:
-    //    for (GenTree* const tree : stmt->TreeList()) ...
-    //
-    GenTreeList TreeList() const
-    {
-        return GenTreeList(GetTreeList());
-    }
+    GenTreeList       TreeList() const;
+    LocalsGenTreeList LocalsTreeList();
 
     const DebugInfo& GetDebugInfo() const
     {
index 3016b0d498322307bde19d3fa4e119451415b34b..51da418d15853b5969e9423e3d3880c56290932c 100644 (file)
@@ -1273,6 +1273,7 @@ DONE:
             fgMarkBackwardJump(loopHead, compCurBB);
 
             compMayConvertTailCallToLoop = true;
+            compCurBB->bbFlags |= BBF_RECURSIVE_TAILCALL;
         }
 
         // We only do these OSR checks in the root method because:
index 90bea49d546a8b29f1fe98036ca7b11e288b2fb8..edc9ccdb1bd172f5cfa7d8578374621a02eddc72 100644 (file)
@@ -429,6 +429,7 @@ CONFIG_INTEGER(JitDoRedundantBranchOpts, W("JitDoRedundantBranchOpts"), 1) // Pe
 CONFIG_STRING(JitEnableRboRange, W("JitEnableRboRange"))
 CONFIG_STRING(JitEnableTailMergeRange, W("JitEnableTailMergeRange"))
 CONFIG_STRING(JitEnableVNBasedDeadStoreRemovalRange, W("JitEnableVNBasedDeadStoreRemovalRange"))
+CONFIG_STRING(JitEnableEarlyLivenessRange, W("JitEnableEarlyLivenessRange"))
 
 CONFIG_INTEGER(JitDoSsa, W("JitDoSsa"), 1) // Perform Static Single Assignment (SSA) numbering on the variables
 CONFIG_INTEGER(JitDoValueNumber, W("JitDoValueNumber"), 1) // Perform value numbering on method expressions
index a5bd33398128b85b1db7e19a21064ac1d73cd2aa..7793f1d2e5348312cd3ce4d7aacd21e85f192d85 100644 (file)
@@ -3,6 +3,210 @@
 
 #include "jitpch.h"
 
+class LocalSequencer final : public GenTreeVisitor<LocalSequencer>
+{
+    GenTree* m_rootNode;
+    GenTree* m_prevNode;
+
+public:
+    enum
+    {
+        DoPostOrder       = true,
+        UseExecutionOrder = true,
+    };
+
+    LocalSequencer(Compiler* comp) : GenTreeVisitor(comp), m_rootNode(nullptr), m_prevNode(nullptr)
+    {
+    }
+
+    //-------------------------------------------------------------------
+    // Start: Start sequencing a statement. Must be called before other members
+    // are called for a specified statement.
+    //
+    // Arguments:
+    //     stmt - the statement
+    //
+    void Start(Statement* stmt)
+    {
+        // We use the root node as a 'sentinel' node that will keep the head
+        // and tail of the sequenced list.
+        m_rootNode = stmt->GetRootNode();
+        assert(!m_rootNode->OperIsLocal() && !m_rootNode->OperIsLocalAddr());
+
+        m_rootNode->gtPrev = nullptr;
+        m_rootNode->gtNext = nullptr;
+        m_prevNode         = m_rootNode;
+    }
+
+    //-------------------------------------------------------------------
+    // Finish: Finish sequencing a statement. Should be called after sub nodes
+    // of the statement have been visited and sequenced.
+    //
+    // Arguments:
+    //     stmt - the statement
+    //
+    void Finish(Statement* stmt)
+    {
+        assert(stmt->GetRootNode() == m_rootNode);
+
+        GenTree* firstNode = m_rootNode->gtNext;
+        if (firstNode == nullptr)
+        {
+            assert(m_rootNode->gtPrev == nullptr);
+        }
+        else
+        {
+            GenTree* lastNode = m_prevNode;
+
+            // We only sequence leaf nodes that we shouldn't see as standalone
+            // statements here.
+            assert(m_rootNode != firstNode);
+            assert((m_rootNode->gtPrev == nullptr) && (lastNode->gtNext == nullptr));
+
+            assert(lastNode->OperIsLocal() || lastNode->OperIsLocalAddr());
+            firstNode->gtPrev  = nullptr;
+            m_rootNode->gtPrev = lastNode;
+        }
+    }
+
+    fgWalkResult PostOrderVisit(GenTree** use, GenTree* user)
+    {
+        GenTree* node = *use;
+        if (node->OperIsLocal() || node->OperIsLocalAddr())
+        {
+            SequenceLocal(node->AsLclVarCommon());
+        }
+
+        if (node->OperIs(GT_ASG))
+        {
+            SequenceAssignment(node->AsOp());
+        }
+
+        if (node->IsCall())
+        {
+            SequenceCall(node->AsCall());
+        }
+
+        return fgWalkResult::WALK_CONTINUE;
+    }
+
+    //-------------------------------------------------------------------
+    // SequenceLocal: Add a local to the list.
+    //
+    // Arguments:
+    //     lcl - the local
+    //
+    void SequenceLocal(GenTreeLclVarCommon* lcl)
+    {
+        lcl->gtPrev        = m_prevNode;
+        lcl->gtNext        = nullptr;
+        m_prevNode->gtNext = lcl;
+        m_prevNode         = lcl;
+    }
+
+    //-------------------------------------------------------------------
+    // SequenceAssignment: Post-process an assignment that may have a local on the LHS.
+    //
+    // Arguments:
+    //     asg - the assignment
+    //
+    // Remarks:
+    //     In execution order the LHS of an assignment is normally visited
+    //     before the RHS. However, for our purposes, we would like to see the
+    //     LHS local which is considered the def after the nodes on the RHS, so
+    //     this function corrects where that local appears in the list.
+    //
+    //     This is handled in later liveness by guaranteeing GTF_REVERSE_OPS is
+    //     set for assignments with tracked locals on the LHS.
+    //
+    void SequenceAssignment(GenTreeOp* asg)
+    {
+        if (asg->gtGetOp1()->OperIsLocal())
+        {
+            // Correct the point at which the definition of the local on the LHS appears.
+            MoveNodeToEnd(asg->gtGetOp1());
+        }
+    }
+
+    //-------------------------------------------------------------------
+    // SequenceCall: Post-process a call that may define a local.
+    //
+    // Arguments:
+    //     call - the call
+    //
+    // Remarks:
+    //     Like above, but calls may also define a local that we would like to
+    //     see after all other operands of the call have been evaluated.
+    //
+    void SequenceCall(GenTreeCall* call)
+    {
+        if (call->IsOptimizingRetBufAsLocal())
+        {
+            // Correct the point at which the definition of the retbuf local appears.
+            MoveNodeToEnd(m_compiler->gtCallGetDefinedRetBufLclAddr(call));
+        }
+    }
+
+    //-------------------------------------------------------------------
+    // Sequence: Fully sequence a statement.
+    //
+    // Arguments:
+    //     stmt - The statement
+    //
+    void Sequence(Statement* stmt)
+    {
+        Start(stmt);
+        WalkTree(stmt->GetRootNodePointer(), nullptr);
+        Finish(stmt);
+    }
+
+private:
+    //-------------------------------------------------------------------
+    // MoveNodeToEnd: Move a node from its current position in the linked list
+    // to the end.
+    //
+    // Arguments:
+    //     node - The node
+    //
+    void MoveNodeToEnd(GenTree* node)
+    {
+        if (node->gtNext == nullptr)
+        {
+            return;
+        }
+
+        assert(m_prevNode != node);
+
+        GenTree* prev = node->gtPrev;
+        GenTree* next = node->gtNext;
+
+        assert(prev != nullptr); // Should have sentinel always, even as the first local.
+        prev->gtNext = next;
+        next->gtPrev = prev;
+
+        m_prevNode->gtNext = node;
+        node->gtPrev       = m_prevNode;
+        node->gtNext       = nullptr;
+        m_prevNode         = node;
+    }
+};
+
+//-------------------------------------------------------------------
+// fgSequenceLocals: Sequence the locals in a statement.
+//
+// Arguments:
+//     stmt - The statement.
+//
+// Remarks:
+//     This is the locals-only (see fgNodeThreading) counterpart to fgSetStmtSeq.
+//
+void Compiler::fgSequenceLocals(Statement* stmt)
+{
+    assert((fgNodeThreading == NodeThreading::AllLocals) || (mostRecentlyActivePhase == PHASE_STR_ADRLCL));
+    LocalSequencer seq(this);
+    seq.Sequence(stmt);
+}
+
 class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
 {
     // During tree traversal every GenTree node produces a "value" that represents:
@@ -265,6 +469,7 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
     ArrayStack<Value> m_valueStack;
     bool              m_stmtModified;
     bool              m_madeChanges;
+    LocalSequencer*   m_sequencer;
 
 public:
     enum
@@ -273,14 +478,15 @@ public:
         DoPostOrder       = true,
         ComputeStack      = true,
         DoLclVarsOnly     = false,
-        UseExecutionOrder = false,
+        UseExecutionOrder = true,
     };
 
-    LocalAddressVisitor(Compiler* comp)
+    LocalAddressVisitor(Compiler* comp, LocalSequencer* sequencer)
         : GenTreeVisitor<LocalAddressVisitor>(comp)
         , m_valueStack(comp->getAllocator(CMK_LocalAddressVisitor))
         , m_stmtModified(false)
         , m_madeChanges(false)
+        , m_sequencer(sequencer)
     {
     }
 
@@ -300,6 +506,12 @@ public:
 #endif // DEBUG
 
         m_stmtModified = false;
+
+        if (m_sequencer != nullptr)
+        {
+            m_sequencer->Start(stmt);
+        }
+
         WalkTree(stmt->GetRootNodePointer(), nullptr);
 
         // We could have something a statement like IND(ADDR(LCL_VAR)) so we need to escape
@@ -321,6 +533,18 @@ public:
         assert(m_valueStack.Empty());
         m_madeChanges |= m_stmtModified;
 
+        if (m_sequencer != nullptr)
+        {
+            if (m_stmtModified)
+            {
+                m_sequencer->Sequence(stmt);
+            }
+            else
+            {
+                m_sequencer->Finish(stmt);
+            }
+        }
+
 #ifdef DEBUG
         if (m_compiler->verbose)
         {
@@ -400,24 +624,28 @@ public:
                 assert(TopValue(0).Node() == node);
 
                 TopValue(0).Location(node->AsLclVar());
+                SequenceLocal(node->AsLclVarCommon());
                 break;
 
             case GT_LCL_VAR_ADDR:
                 assert(TopValue(0).Node() == node);
 
                 TopValue(0).Address(node->AsLclVar());
+                SequenceLocal(node->AsLclVarCommon());
                 break;
 
             case GT_LCL_FLD:
                 assert(TopValue(0).Node() == node);
 
                 TopValue(0).Location(node->AsLclFld());
+                SequenceLocal(node->AsLclVarCommon());
                 break;
 
             case GT_LCL_FLD_ADDR:
                 assert(TopValue(0).Node() == node);
 
                 TopValue(0).Address(node->AsLclFld());
+                SequenceLocal(node->AsLclVarCommon());
                 break;
 
             case GT_ADD:
@@ -542,6 +770,26 @@ public:
                 }
                 break;
 
+            case GT_ASG:
+                EscapeValue(TopValue(0), node);
+                PopValue();
+                EscapeValue(TopValue(0), node);
+                PopValue();
+                assert(TopValue(0).Node() == node);
+
+                SequenceAssignment(node->AsOp());
+                break;
+
+            case GT_CALL:
+                while (TopValue(0).Node() != node)
+                {
+                    EscapeValue(TopValue(0), node);
+                    PopValue();
+                }
+
+                SequenceCall(node->AsCall());
+                break;
+
             default:
                 while (TopValue(0).Node() != node)
                 {
@@ -609,6 +857,7 @@ private:
         unsigned   lclNum = val.LclNum();
         LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum);
 
+        GenTreeFlags defFlag            = GTF_EMPTY;
         GenTreeCall* callUser           = user->IsCall() ? user->AsCall() : nullptr;
         bool         hasHiddenStructArg = false;
         if (m_compiler->opts.compJitOptimizeStructHiddenBuffer && (callUser != nullptr) &&
@@ -633,6 +882,13 @@ private:
                 m_compiler->lvaSetHiddenBufferStructArg(lclNum);
                 hasHiddenStructArg = true;
                 callUser->gtCallMoreFlags |= GTF_CALL_M_RETBUFFARG_LCLOPT;
+                defFlag = GTF_VAR_DEF;
+
+                if ((val.Offset() != 0) ||
+                    (varDsc->lvExactSize != m_compiler->typGetObjLayout(callUser->gtRetClsHnd)->GetSize()))
+                {
+                    defFlag |= GTF_VAR_USEASG;
+                }
             }
         }
 
@@ -657,6 +913,7 @@ private:
 #endif // TARGET_64BIT
 
         MorphLocalAddress(val.Node(), lclNum, val.Offset());
+        val.Node()->gtFlags |= defFlag;
 
         INDEBUG(val.Consume();)
     }
@@ -1347,6 +1604,30 @@ private:
 
         return node->AsLclVar();
     }
+
+    void SequenceLocal(GenTreeLclVarCommon* lcl)
+    {
+        if (m_sequencer != nullptr)
+        {
+            m_sequencer->SequenceLocal(lcl);
+        }
+    }
+
+    void SequenceAssignment(GenTreeOp* asg)
+    {
+        if (m_sequencer != nullptr)
+        {
+            m_sequencer->SequenceAssignment(asg);
+        }
+    }
+
+    void SequenceCall(GenTreeCall* call)
+    {
+        if (m_sequencer != nullptr)
+        {
+            m_sequencer->SequenceCall(call);
+        }
+    }
 };
 
 //------------------------------------------------------------------------
@@ -1364,7 +1645,8 @@ private:
 PhaseStatus Compiler::fgMarkAddressExposedLocals()
 {
     bool                madeChanges = false;
-    LocalAddressVisitor visitor(this);
+    LocalSequencer      sequencer(this);
+    LocalAddressVisitor visitor(this, opts.OptimizationEnabled() ? &sequencer : nullptr);
 
     for (BasicBlock* const block : Blocks())
     {
index f9b1faf7709c1e2e8482d225e0f68dad653ab516..04411de16624c5c7c552c41c3ae8a7e27e8d87c5 100644 (file)
@@ -3437,11 +3437,13 @@ weight_t BasicBlock::getBBWeight(Compiler* comp)
 class LclVarDsc_SmallCode_Less
 {
     const LclVarDsc* m_lvaTable;
+    RefCountState    m_rcs;
     INDEBUG(unsigned m_lvaCount;)
 
 public:
-    LclVarDsc_SmallCode_Less(const LclVarDsc* lvaTable DEBUGARG(unsigned lvaCount))
+    LclVarDsc_SmallCode_Less(const LclVarDsc* lvaTable, RefCountState rcs DEBUGARG(unsigned lvaCount))
         : m_lvaTable(lvaTable)
+        , m_rcs(rcs)
 #ifdef DEBUG
         , m_lvaCount(lvaCount)
 #endif
@@ -3463,8 +3465,8 @@ public:
         assert(!dsc1->lvRegister);
         assert(!dsc2->lvRegister);
 
-        unsigned weight1 = dsc1->lvRefCnt();
-        unsigned weight2 = dsc2->lvRefCnt();
+        unsigned weight1 = dsc1->lvRefCnt(m_rcs);
+        unsigned weight2 = dsc2->lvRefCnt(m_rcs);
 
 #ifndef TARGET_ARM
         // ARM-TODO: this was disabled for ARM under !FEATURE_FP_REGALLOC; it was probably a left-over from
@@ -3546,11 +3548,13 @@ public:
 class LclVarDsc_BlendedCode_Less
 {
     const LclVarDsc* m_lvaTable;
+    RefCountState    m_rcs;
     INDEBUG(unsigned m_lvaCount;)
 
 public:
-    LclVarDsc_BlendedCode_Less(const LclVarDsc* lvaTable DEBUGARG(unsigned lvaCount))
+    LclVarDsc_BlendedCode_Less(const LclVarDsc* lvaTable, RefCountState rcs DEBUGARG(unsigned lvaCount))
         : m_lvaTable(lvaTable)
+        , m_rcs(rcs)
 #ifdef DEBUG
         , m_lvaCount(lvaCount)
 #endif
@@ -3572,8 +3576,8 @@ public:
         assert(!dsc1->lvRegister);
         assert(!dsc2->lvRegister);
 
-        weight_t weight1 = dsc1->lvRefCntWtd();
-        weight_t weight2 = dsc2->lvRefCntWtd();
+        weight_t weight1 = dsc1->lvRefCntWtd(m_rcs);
+        weight_t weight2 = dsc2->lvRefCntWtd(m_rcs);
 
 #ifndef TARGET_ARM
         // ARM-TODO: this was disabled for ARM under !FEATURE_FP_REGALLOC; it was probably a left-over from
@@ -3613,9 +3617,9 @@ public:
         }
 
         // If the weighted ref counts are different then try the unweighted ref counts.
-        if (dsc1->lvRefCnt() != dsc2->lvRefCnt())
+        if (dsc1->lvRefCnt(m_rcs) != dsc2->lvRefCnt(m_rcs))
         {
-            return dsc1->lvRefCnt() > dsc2->lvRefCnt();
+            return dsc1->lvRefCnt(m_rcs) > dsc2->lvRefCnt(m_rcs);
         }
 
         // If one is a GC type and the other is not the GC type wins.
@@ -3656,8 +3660,8 @@ void Compiler::lvaSortByRefCount()
         lvaTrackedToVarNum     = new (getAllocator(CMK_LvaTable)) unsigned[lvaTrackedToVarNumSize];
     }
 
-    unsigned  trackedCount = 0;
-    unsigned* tracked      = lvaTrackedToVarNum;
+    unsigned  trackedCandidateCount = 0;
+    unsigned* trackedCandidates     = lvaTrackedToVarNum;
 
     // Fill in the table used for sorting
 
@@ -3668,11 +3672,11 @@ void Compiler::lvaSortByRefCount()
         // Start by assuming that the variable will be tracked.
         varDsc->lvTracked = 1;
 
-        if (varDsc->lvRefCnt() == 0)
+        if (varDsc->lvRefCnt(lvaRefCountState) == 0)
         {
             // Zero ref count, make this untracked.
             varDsc->lvTracked = 0;
-            varDsc->setLvRefCntWtd(0);
+            varDsc->setLvRefCntWtd(0, lvaRefCountState);
         }
 
 #if !defined(TARGET_64BIT)
@@ -3800,42 +3804,54 @@ void Compiler::lvaSortByRefCount()
 
         if (varDsc->lvTracked)
         {
-            tracked[trackedCount++] = lclNum;
+            trackedCandidates[trackedCandidateCount++] = lclNum;
         }
     }
 
-    // Now sort the tracked variable table by ref-count
-    if (compCodeOpt() == SMALL_CODE)
-    {
-        jitstd::sort(tracked, tracked + trackedCount, LclVarDsc_SmallCode_Less(lvaTable DEBUGARG(lvaCount)));
-    }
-    else
+    lvaTrackedCount = min(trackedCandidateCount, (unsigned)JitConfig.JitMaxLocalsToTrack());
+
+    // Sort the candidates. In the late liveness passes we want lower tracked
+    // indices to be more important variables, so we always do this. In early
+    // liveness it does not matter, so we can skip it when we are going to
+    // track everything.
+    // TODO-TP: For early liveness we could do a partial sort for the large
+    // case.
+    if (!fgIsDoingEarlyLiveness || (lvaTrackedCount < trackedCandidateCount))
     {
-        jitstd::sort(tracked, tracked + trackedCount, LclVarDsc_BlendedCode_Less(lvaTable DEBUGARG(lvaCount)));
+        // Now sort the tracked variable table by ref-count
+        if (compCodeOpt() == SMALL_CODE)
+        {
+            jitstd::sort(trackedCandidates, trackedCandidates + trackedCandidateCount,
+                         LclVarDsc_SmallCode_Less(lvaTable, lvaRefCountState DEBUGARG(lvaCount)));
+        }
+        else
+        {
+            jitstd::sort(trackedCandidates, trackedCandidates + trackedCandidateCount,
+                         LclVarDsc_BlendedCode_Less(lvaTable, lvaRefCountState DEBUGARG(lvaCount)));
+        }
     }
 
-    lvaTrackedCount = min((unsigned)JitConfig.JitMaxLocalsToTrack(), trackedCount);
-
     JITDUMP("Tracked variable (%u out of %u) table:\n", lvaTrackedCount, lvaCount);
 
     // Assign indices to all the variables we've decided to track
     for (unsigned varIndex = 0; varIndex < lvaTrackedCount; varIndex++)
     {
-        LclVarDsc* varDsc = lvaGetDesc(tracked[varIndex]);
+        LclVarDsc* varDsc = lvaGetDesc(trackedCandidates[varIndex]);
         assert(varDsc->lvTracked);
         varDsc->lvVarIndex = static_cast<unsigned short>(varIndex);
 
-        INDEBUG(if (verbose) { gtDispLclVar(tracked[varIndex]); })
-        JITDUMP(" [%6s]: refCnt = %4u, refCntWtd = %6s\n", varTypeName(varDsc->TypeGet()), varDsc->lvRefCnt(),
-                refCntWtd2str(varDsc->lvRefCntWtd(), /* padForDecimalPlaces */ true));
+        INDEBUG(if (verbose) { gtDispLclVar(trackedCandidates[varIndex]); })
+        JITDUMP(" [%6s]: refCnt = %4u, refCntWtd = %6s\n", varTypeName(varDsc->TypeGet()),
+                varDsc->lvRefCnt(lvaRefCountState),
+                refCntWtd2str(varDsc->lvRefCntWtd(lvaRefCountState), /* padForDecimalPlaces */ true));
     }
 
     JITDUMP("\n");
 
     // Mark all variables past the first 'lclMAX_TRACKED' as untracked
-    for (unsigned varIndex = lvaTrackedCount; varIndex < trackedCount; varIndex++)
+    for (unsigned varIndex = lvaTrackedCount; varIndex < trackedCandidateCount; varIndex++)
     {
-        LclVarDsc* varDsc = lvaGetDesc(tracked[varIndex]);
+        LclVarDsc* varDsc = lvaGetDesc(trackedCandidates[varIndex]);
         assert(varDsc->lvTracked);
         varDsc->lvTracked = 0;
     }
@@ -7652,8 +7668,8 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r
             printf("    ]");
         }
 
-        printf(" (%3u,%*s)", varDsc->lvRefCnt(), (int)refCntWtdWidth,
-               refCntWtd2str(varDsc->lvRefCntWtd(), /* padForDecimalPlaces */ true));
+        printf(" (%3u,%*s)", varDsc->lvRefCnt(lvaRefCountState), (int)refCntWtdWidth,
+               refCntWtd2str(varDsc->lvRefCntWtd(lvaRefCountState), /* padForDecimalPlaces */ true));
 
         printf(" %7s ", varTypeName(type));
         if (genTypeSize(type) == 0)
@@ -7666,7 +7682,7 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r
         }
 
         // The register or stack location field is 11 characters wide.
-        if ((varDsc->lvRefCnt() == 0) && !varDsc->lvImplicitlyReferenced)
+        if ((varDsc->lvRefCnt(lvaRefCountState) == 0) && !varDsc->lvImplicitlyReferenced)
         {
             printf("zero-ref   ");
         }
@@ -7914,7 +7930,7 @@ void Compiler::lvaTableDump(FrameLayoutState curState)
     {
         for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
         {
-            size_t width = strlen(refCntWtd2str(varDsc->lvRefCntWtd(), /* padForDecimalPlaces */ true));
+            size_t width = strlen(refCntWtd2str(varDsc->lvRefCntWtd(lvaRefCountState), /* padForDecimalPlaces */ true));
             if (width > refCntWtdWidth)
             {
                 refCntWtdWidth = width;
index 5adef46eff7930babbab335602b0e9913981e639..90a92d7770959ec609c92f9409f54762bf87859e 100644 (file)
@@ -1525,37 +1525,7 @@ bool LIR::Range::CheckLIR(Compiler* compiler, bool checkUnusedValues) const
         return true;
     }
 
-    // Check the gtNext/gtPrev links: (1) ensure there are no circularities, (2) ensure the gtPrev list is
-    // precisely the inverse of the gtNext list.
-    //
-    // To detect circularity, use the "tortoise and hare" 2-pointer algorithm.
-
-    GenTree* slowNode = FirstNode();
-    assert(slowNode != nullptr); // because it's a non-empty range
-    GenTree* fastNode1    = nullptr;
-    GenTree* fastNode2    = slowNode;
-    GenTree* prevSlowNode = nullptr;
-    while (((fastNode1 = fastNode2->gtNext) != nullptr) && ((fastNode2 = fastNode1->gtNext) != nullptr))
-    {
-        if ((slowNode == fastNode1) || (slowNode == fastNode2))
-        {
-            assert(!"gtNext nodes have a circularity!");
-        }
-        assert(slowNode->gtPrev == prevSlowNode);
-        prevSlowNode = slowNode;
-        slowNode     = slowNode->gtNext;
-        assert(slowNode != nullptr); // the fastNodes would have gone null first.
-    }
-    // If we get here, the list had no circularities, so either fastNode1 or fastNode2 must be nullptr.
-    assert((fastNode1 == nullptr) || (fastNode2 == nullptr));
-
-    // Need to check the rest of the gtPrev links.
-    while (slowNode != nullptr)
-    {
-        assert(slowNode->gtPrev == prevSlowNode);
-        prevSlowNode = slowNode;
-        slowNode     = slowNode->gtNext;
-    }
+    CheckDoublyLinkedList<GenTree, &GenTree::gtPrev, &GenTree::gtNext>(FirstNode());
 
     SmallHashTable<GenTree*, bool, 32> unusedDefs(compiler->getAllocatorDebugOnly());
 
index e589c3e8f953b56f96dde34c36013fc156d8d92d..80c8e5a432e85fecf6ef51eeb080e8327c1396b3 100644 (file)
@@ -337,4 +337,45 @@ inline bool GenTree::IsRegOptional() const
     return (gtLIRFlags & LIR::Flags::RegOptional) != 0;
 }
 
+template <typename T, T* T::*prev, T* T::*next>
+static void CheckDoublyLinkedList(T* first)
+{
+    // (1) ensure there are no circularities, (2) ensure the prev list is
+    // precisely the inverse of the gtNext list.
+    //
+    // To detect circularity, use the "tortoise and hare" 2-pointer algorithm.
+
+    if (first == nullptr)
+    {
+        return;
+    }
+
+    GenTree* slowNode = first;
+    assert(slowNode != nullptr);
+    GenTree* fastNode1    = nullptr;
+    GenTree* fastNode2    = slowNode;
+    GenTree* prevSlowNode = nullptr;
+    while (((fastNode1 = fastNode2->*next) != nullptr) && ((fastNode2 = fastNode1->*next) != nullptr))
+    {
+        if ((slowNode == fastNode1) || (slowNode == fastNode2))
+        {
+            assert(!"Circularity detected");
+        }
+        assert(slowNode->*prev == prevSlowNode && "Invalid prev link");
+        prevSlowNode = slowNode;
+        slowNode     = slowNode->*next;
+        assert(slowNode != nullptr); // the fastNodes would have gone null first.
+    }
+    // If we get here, the list had no circularities, so either fastNode1 or fastNode2 must be nullptr.
+    assert((fastNode1 == nullptr) || (fastNode2 == nullptr));
+
+    // Need to check the rest of the gtPrev links.
+    while (slowNode != nullptr)
+    {
+        assert(slowNode->*prev == prevSlowNode && "Invalid prev link");
+        prevSlowNode = slowNode;
+        slowNode     = slowNode->*next;
+    }
+}
+
 #endif // _LIR_H_
index e1f0a9d02658ee7b116e7366dd1ae93e4620a285..ad958d63f89091a036a6f8b91d5bf53bf0219bad 100644 (file)
@@ -28,7 +28,7 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree)
     LclVarDsc* const varDsc = lvaGetDesc(lclNum);
 
     // We should never encounter a reference to a lclVar that has a zero refCnt.
-    if (varDsc->lvRefCnt() == 0 && (!varTypeIsPromotable(varDsc) || !varDsc->lvPromoted))
+    if (varDsc->lvRefCnt(lvaRefCountState) == 0 && (!varTypeIsPromotable(varDsc) || !varDsc->lvPromoted))
     {
         JITDUMP("Found reference to V%02u with zero refCnt.\n", lclNum);
         assert(!"We should never encounter a reference to a lclVar that has a zero refCnt.");
@@ -469,7 +469,7 @@ void Compiler::fgPerBlockLocalVarLiveness()
                 fgPerNodeLocalVarLiveness(node);
             }
         }
-        else
+        else if (fgNodeThreading == NodeThreading::AllTrees)
         {
             for (Statement* const stmt : block->NonPhiStatements())
             {
@@ -480,6 +480,59 @@ void Compiler::fgPerBlockLocalVarLiveness()
                 }
             }
         }
+        else
+        {
+            assert(fgIsDoingEarlyLiveness && (fgNodeThreading == NodeThreading::AllLocals));
+
+            if (compQmarkUsed)
+            {
+                for (Statement* stmt : block->Statements())
+                {
+                    GenTree* dst;
+                    GenTree* qmark = fgGetTopLevelQmark(stmt->GetRootNode(), &dst);
+                    if (qmark == nullptr)
+                    {
+                        for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList())
+                        {
+                            fgMarkUseDef(lcl);
+                        }
+                    }
+                    else
+                    {
+                        // Assigned local should be the very last local.
+                        assert((dst == nullptr) ||
+                               ((stmt->GetRootNode()->gtPrev == dst) && ((dst->gtFlags & GTF_VAR_DEF) != 0)));
+
+                        // Conservatively ignore defs that may be conditional
+                        // but would otherwise still interfere with the
+                        // lifetimes we compute here. We generally do not
+                        // handle qmarks very precisely here -- last uses may
+                        // not be marked as such due to interference with other
+                        // qmark arms.
+                        for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList())
+                        {
+                            bool isUse = ((lcl->gtFlags & GTF_VAR_DEF) == 0) || ((lcl->gtFlags & GTF_VAR_USEASG) != 0);
+                            // We can still handle the pure def at the top level.
+                            bool conditional = lcl != dst;
+                            if (isUse || !conditional)
+                            {
+                                fgMarkUseDef(lcl);
+                            }
+                        }
+                    }
+                }
+            }
+            else
+            {
+                for (Statement* stmt : block->Statements())
+                {
+                    for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList())
+                    {
+                        fgMarkUseDef(lcl);
+                    }
+                }
+            }
+        }
 
         // Mark the FrameListRoot as used, if applicable.
 
@@ -1225,6 +1278,27 @@ class LiveVarAnalysis
             }
         }
 
+        if (m_compiler->fgIsDoingEarlyLiveness && m_compiler->opts.IsOSR() &&
+            ((block->bbFlags & BBF_RECURSIVE_TAILCALL) != 0))
+        {
+            // Early liveness happens between import and morph where we may
+            // have identified a tailcall-to-loop candidate but not yet
+            // expanded it. In OSR compilations we need to model the potential
+            // backedge.
+            //
+            // Technically we would need to do this in normal compilations too,
+            // but given that the tailcall-to-loop optimization is sound we can
+            // rely on the call node we will see in this block having all the
+            // necessary dependencies. That's not the case in OSR where the OSR
+            // state index variable may be live at this point without appearing
+            // as an explicit use anywhere.
+            VarSetOps::UnionD(m_compiler, m_liveOut, m_compiler->fgEntryBB->bbLiveIn);
+            if (m_compiler->fgEntryBB->bbNum <= block->bbNum)
+            {
+                m_hasPossibleBackEdge = true;
+            }
+        }
+
         // Additionally, union in all the live-in tracked vars of successors.
         for (BasicBlock* succ : block->GetAllSuccs(m_compiler))
         {
@@ -1734,6 +1808,7 @@ bool Compiler::fgComputeLifeUntrackedLocal(VARSET_TP&           life,
 //
 // Returns:
 //    `true` if the local var node corresponds to a dead store; `false` otherwise.
+//
 bool Compiler::fgComputeLifeLocal(VARSET_TP& life, VARSET_VALARG_TP keepAliveVars, GenTree* lclVarNode)
 {
     unsigned lclNum = lclVarNode->AsLclVarCommon()->GetLclNum();
@@ -1762,6 +1837,53 @@ bool Compiler::fgComputeLifeLocal(VARSET_TP& life, VARSET_VALARG_TP keepAliveVar
     return false;
 }
 
+//------------------------------------------------------------------------
+// Compiler::fgTryRemoveDeadStoreEarly:
+//    Try to remove a dead store during early liveness.
+//
+// Arguments:
+//    stmt - The statement containing the dead store.
+//    dst  - The destination local of the dead store.
+//
+// Remarks:
+//    We only handle the simple top level case since dead embedded stores are
+//    extremely rare in early liveness.
+//
+// Returns:
+//    The next node to compute liveness for (in a backwards traversal).
+//
+GenTree* Compiler::fgTryRemoveDeadStoreEarly(Statement* stmt, GenTreeLclVarCommon* cur)
+{
+    if (!stmt->GetRootNode()->OperIs(GT_ASG) || (stmt->GetRootNode()->gtGetOp1() != cur))
+    {
+        return cur->gtPrev;
+    }
+
+    JITDUMP("Store [%06u] is dead", dspTreeID(stmt->GetRootNode()));
+    // The def ought to be the last thing.
+    assert(stmt->GetRootNode()->gtPrev == cur);
+
+    GenTree* sideEffects = nullptr;
+    gtExtractSideEffList(stmt->GetRootNode()->gtGetOp2(), &sideEffects);
+
+    if (sideEffects == nullptr)
+    {
+        JITDUMP(" and has no side effects, removing statement\n");
+        fgRemoveStmt(compCurBB, stmt DEBUGARG(false));
+        return nullptr;
+    }
+    else
+    {
+        JITDUMP(" but has side effects. Replacing with:\n\n");
+        stmt->SetRootNode(sideEffects);
+        fgSequenceLocals(stmt);
+        DISPTREE(sideEffects);
+        JITDUMP("\n");
+        // continue at tail of the side effects
+        return stmt->GetRootNode()->gtPrev;
+    }
+}
+
 /*****************************************************************************
  *
  * Compute the set of live variables at each node in a given statement
@@ -1780,6 +1902,7 @@ void Compiler::fgComputeLife(VARSET_TP&       life,
     noway_assert(VarSetOps::IsSubset(this, keepAliveVars, life));
     noway_assert(endNode || (startNode == compCurStmt->GetRootNode()));
 
+    assert(!fgIsDoingEarlyLiveness);
     // NOTE: Live variable analysis will not work if you try
     // to use the result of an assignment node directly!
     for (GenTree* tree = startNode; tree != endNode; tree = tree->gtPrev)
@@ -2570,52 +2693,55 @@ void Compiler::fgInterBlockLocalVarLiveness()
         }
     }
 
-    LclVarDsc* varDsc;
-    unsigned   varNum;
-
-    for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
+    if (!fgIsDoingEarlyLiveness)
     {
-        // Ignore the variable if it's not tracked
+        LclVarDsc* varDsc;
+        unsigned   varNum;
 
-        if (!varDsc->lvTracked)
+        for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
         {
-            continue;
-        }
-
-        // Fields of dependently promoted structs may be tracked. We shouldn't set lvMustInit on them since
-        // the whole parent struct will be initialized; however, lvLiveInOutOfHndlr should be set on them
-        // as appropriate.
+            // Ignore the variable if it's not tracked
 
-        bool fieldOfDependentlyPromotedStruct = lvaIsFieldOfDependentlyPromotedStruct(varDsc);
+            if (!varDsc->lvTracked)
+            {
+                continue;
+            }
 
-        // Un-init locals may need auto-initialization. Note that the
-        // liveness of such locals will bubble to the top (fgFirstBB)
-        // in fgInterBlockLocalVarLiveness()
+            // Fields of dependently promoted structs may be tracked. We shouldn't set lvMustInit on them since
+            // the whole parent struct will be initialized; however, lvLiveInOutOfHndlr should be set on them
+            // as appropriate.
 
-        if (!varDsc->lvIsParam && VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, varDsc->lvVarIndex) &&
-            (info.compInitMem || varTypeIsGC(varDsc->TypeGet())) && !fieldOfDependentlyPromotedStruct)
-        {
-            varDsc->lvMustInit = true;
-        }
+            bool fieldOfDependentlyPromotedStruct = lvaIsFieldOfDependentlyPromotedStruct(varDsc);
 
-        // Mark all variables that are live on entry to an exception handler
-        // or on exit from a filter handler or finally.
+            // Un-init locals may need auto-initialization. Note that the
+            // liveness of such locals will bubble to the top (fgFirstBB)
+            // in fgInterBlockLocalVarLiveness()
 
-        bool isFinallyVar = VarSetOps::IsMember(this, finallyVars, varDsc->lvVarIndex);
-        if (isFinallyVar || VarSetOps::IsMember(this, exceptVars, varDsc->lvVarIndex))
-        {
-            // Mark the variable appropriately.
-            lvaSetVarLiveInOutOfHandler(varNum);
+            if (!varDsc->lvIsParam && VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, varDsc->lvVarIndex) &&
+                (info.compInitMem || varTypeIsGC(varDsc->TypeGet())) && !fieldOfDependentlyPromotedStruct)
+            {
+                varDsc->lvMustInit = true;
+            }
 
-            // Mark all pointer variables live on exit from a 'finally' block as
-            // 'explicitly initialized' (must-init) for GC-ref types.
+            // Mark all variables that are live on entry to an exception handler
+            // or on exit from a filter handler or finally.
 
-            if (isFinallyVar)
+            bool isFinallyVar = VarSetOps::IsMember(this, finallyVars, varDsc->lvVarIndex);
+            if (isFinallyVar || VarSetOps::IsMember(this, exceptVars, varDsc->lvVarIndex))
             {
-                // Set lvMustInit only if we have a non-arg, GC pointer.
-                if (!varDsc->lvIsParam && varTypeIsGC(varDsc->TypeGet()))
+                // Mark the variable appropriately.
+                lvaSetVarLiveInOutOfHandler(varNum);
+
+                // Mark all pointer variables live on exit from a 'finally' block as
+                // 'explicitly initialized' (must-init) for GC-ref types.
+
+                if (isFinallyVar)
                 {
-                    varDsc->lvMustInit = true;
+                    // Set lvMustInit only if we have a non-arg, GC pointer.
+                    if (!varDsc->lvIsParam && varTypeIsGC(varDsc->TypeGet()))
+                    {
+                        varDsc->lvMustInit = true;
+                    }
                 }
             }
         }
@@ -2654,7 +2780,7 @@ void Compiler::fgInterBlockLocalVarLiveness()
         {
             fgComputeLifeLIR(life, block, volatileVars);
         }
-        else
+        else if (fgNodeThreading == NodeThreading::AllTrees)
         {
             /* Get the first statement in the block */
 
@@ -2702,11 +2828,85 @@ void Compiler::fgInterBlockLocalVarLiveness()
 #endif // DEBUG
             } while (compCurStmt != firstStmt);
         }
+        else
+        {
+            assert(fgIsDoingEarlyLiveness && (fgNodeThreading == NodeThreading::AllLocals));
+            compCurStmt = nullptr;
+            VARSET_TP keepAliveVars(VarSetOps::Union(this, volatileVars, compCurBB->bbScope));
+
+            Statement* firstStmt = block->firstStmt();
+
+            if (firstStmt == nullptr)
+            {
+                continue;
+            }
+
+            Statement* stmt = block->lastStmt();
+
+            while (true)
+            {
+                Statement* prevStmt = stmt->GetPrevStmt();
+
+                GenTree* dst   = nullptr;
+                GenTree* qmark = nullptr;
+                if (compQmarkUsed)
+                {
+                    qmark = fgGetTopLevelQmark(stmt->GetRootNode(), &dst);
+                }
+
+                if (qmark != nullptr)
+                {
+                    for (GenTree* cur = stmt->GetRootNode()->gtPrev; cur != nullptr;)
+                    {
+                        assert(cur->OperIsLocal() || cur->OperIsLocalAddr());
+                        bool isDef = ((cur->gtFlags & GTF_VAR_DEF) != 0) && ((cur->gtFlags & GTF_VAR_USEASG) == 0);
+                        bool conditional = cur != dst;
+                        // Ignore conditional defs that would otherwise
+                        // (incorrectly) interfere with liveness in other
+                        // branches of the qmark.
+                        if (isDef && conditional)
+                        {
+                            cur = cur->gtPrev;
+                            continue;
+                        }
+
+                        if (!fgComputeLifeLocal(life, keepAliveVars, cur))
+                        {
+                            cur = cur->gtPrev;
+                            continue;
+                        }
+
+                        assert(cur == dst);
+                        cur = fgTryRemoveDeadStoreEarly(stmt, cur->AsLclVarCommon());
+                    }
+                }
+                else
+                {
+                    for (GenTree* cur = stmt->GetRootNode()->gtPrev; cur != nullptr;)
+                    {
+                        assert(cur->OperIsLocal() || cur->OperIsLocalAddr());
+                        if (!fgComputeLifeLocal(life, keepAliveVars, cur))
+                        {
+                            cur = cur->gtPrev;
+                            continue;
+                        }
+
+                        cur = fgTryRemoveDeadStoreEarly(stmt, cur->AsLclVarCommon());
+                    }
+                }
+
+                if (stmt == firstStmt)
+                {
+                    break;
+                }
+
+                stmt = prevStmt;
+            }
+        }
 
         /* Done with the current block - if we removed any statements, some
          * variables may have become dead at the beginning of the block
          * -> have to update bbLiveIn */
-
         if (!VarSetOps::Equal(this, life, block->bbLiveIn))
         {
             /* some variables have become dead all across the block
@@ -2772,3 +2972,52 @@ void Compiler::fgDispBBLiveness()
 }
 
 #endif // DEBUG
+
+//------------------------------------------------------------------------
+// fgEarlyLiveness: Run the early liveness pass.
+//
+// Return Value:
+//     Returns MODIFIED_EVERYTHING when liveness was computed and DCE was run.
+//
+PhaseStatus Compiler::fgEarlyLiveness()
+{
+    if (!opts.OptimizationEnabled())
+    {
+        return PhaseStatus::MODIFIED_NOTHING;
+    }
+
+#ifdef DEBUG
+    static ConfigMethodRange JitEnableEarlyLivenessRange;
+    JitEnableEarlyLivenessRange.EnsureInit(JitConfig.JitEnableEarlyLivenessRange());
+    const unsigned hash = info.compMethodHash();
+    if (!JitEnableEarlyLivenessRange.Contains(hash))
+    {
+        return PhaseStatus::MODIFIED_NOTHING;
+    }
+#endif
+
+    fgIsDoingEarlyLiveness = true;
+    lvaSortByRefCount();
+
+    ClearPromotedStructDeathVars();
+
+    // Initialize the per-block var sets.
+    fgInitBlockVarSets();
+
+    fgLocalVarLivenessChanged = false;
+    do
+    {
+        /* Figure out use/def info for all basic blocks */
+        fgPerBlockLocalVarLiveness();
+        EndPhase(PHASE_LCLVARLIVENESS_PERBLOCK);
+
+        /* Live variable analysis. */
+
+        fgStmtRemoved = false;
+        fgInterBlockLocalVarLiveness();
+    } while (fgStmtRemoved && fgLocalVarLivenessChanged);
+
+    fgIsDoingEarlyLiveness = false;
+    fgDidEarlyLiveness     = true;
+    return PhaseStatus::MODIFIED_EVERYTHING;
+}
index d99e25ac45c1a8db068429248897389553d49782..4d7351d2011b71bbef98cb144dbd20b03dd98ebb 100644 (file)
@@ -1700,7 +1700,7 @@ void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext*
 
                 indir->gtFlags |= GTF_ORDER_SIDEEFF | GTF_IND_NONFAULTING;
                 indir->gtFlags &= ~GTF_EXCEPT;
-                assert(!fgStmtListThreaded);
+                assert(fgNodeThreading == NodeThreading::None);
                 gtUpdateStmtSideEffects(stmt);
 
                 JITDUMP("After:\n");
index c1b21f20315a460a69f7b71cf33a96b879911fd7..32c278b330178c5f5ef72cfdd714dabeabe26431 100644 (file)
@@ -3943,11 +3943,17 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg)
 
     // If we're optimizing, see if we can avoid making a copy.
     //
-    // We don't need a copy if this is the last use of an implicit by-ref local.
+    // We don't need a copy if this is the last use of the local.
     //
     if (opts.OptimizationEnabled() && arg->AbiInfo.PassedByRef)
     {
-        GenTreeLclVar* const lcl = argx->IsImplicitByrefParameterValue(this);
+        GenTreeLclVarCommon* implicitByRefLcl = argx->IsImplicitByrefParameterValue(this);
+
+        GenTreeLclVarCommon* lcl = implicitByRefLcl;
+        if ((lcl == nullptr) && argx->OperIsLocal())
+        {
+            lcl = argx->AsLclVarCommon();
+        }
 
         if (lcl != nullptr)
         {
@@ -3955,9 +3961,9 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg)
             LclVarDsc* const     varDsc           = lvaGetDesc(varNum);
             const unsigned short totalAppearances = varDsc->lvRefCnt(RCS_EARLY);
 
-            // We don't have liveness so we rely on other indications of last use.
-            //
-            // We handle these cases:
+            // We generally use liveness to figure out if we can omit creating
+            // this copy. However, even without liveness (e.g. due to too many
+            // tracked locals), we also handle some other cases:
             //
             // * (must not copy) If the call is a tail call, the use is a last use.
             //   We must skip the copy if we have a fast tail call.
@@ -3970,14 +3976,46 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg)
             // * (may not copy) if there is exactly one use of the local in the method,
             //   and the call is not in loop, this is a last use.
             //
-            // fgMightHaveLoop() is expensive; check it last, only if necessary.
-            //
-            if (call->IsTailCall() ||                              //
-                ((totalAppearances == 1) && call->IsNoReturn()) || //
-                ((totalAppearances == 1) && !fgMightHaveLoop()))
+            bool omitCopy = call->IsTailCall();
+
+            if (!omitCopy && fgDidEarlyLiveness)
             {
-                arg->SetEarlyNode(lcl);
-                JITDUMP("did not need to make outgoing copy for last use of implicit byref V%2d\n", varNum);
+                omitCopy = !varDsc->lvPromoted && ((lcl->gtFlags & GTF_VAR_DEATH) != 0);
+            }
+
+            if (!omitCopy && (totalAppearances == 1))
+            {
+                // fgMightHaveLoop() is expensive; check it last, only if necessary.
+                omitCopy = call->IsNoReturn() || !fgMightHaveLoop();
+            }
+
+            if (omitCopy)
+            {
+                if (implicitByRefLcl != nullptr)
+                {
+                    arg->SetEarlyNode(lcl);
+                }
+                else
+                {
+                    uint16_t offs = lcl->GetLclOffs();
+                    if (offs == 0)
+                    {
+                        lcl->ChangeOper(GT_LCL_VAR_ADDR);
+                    }
+                    else
+                    {
+                        lcl->ChangeOper(GT_LCL_FLD_ADDR);
+                        lcl->AsLclFld()->SetLclOffs(offs);
+                    }
+
+                    lcl->gtType = TYP_I_IMPL;
+                    lvaSetVarAddrExposed(varNum DEBUGARG(AddressExposedReason::ESCAPE_ADDRESS));
+
+                    // Copy prop could allow creating another later use of lcl if there are live assertions about it.
+                    fgKillDependentAssertions(varNum DEBUGARG(lcl));
+                }
+
+                JITDUMP("did not need to make outgoing copy for last use of V%02d\n", varNum);
                 return;
             }
         }
@@ -4385,7 +4423,7 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr)
         // do this here. Likewise for implicit byrefs.
 
         if (((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) != 0) ||
-            gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY) || arrRef->OperIs(GT_FIELD, GT_LCL_FLD) ||
+            gtComplexityExceeds(arrRef, MAX_ARR_COMPLEXITY) || arrRef->OperIs(GT_FIELD, GT_LCL_FLD) ||
             (arrRef->OperIs(GT_LCL_VAR) && lvaIsLocalImplicitlyAccessedByRef(arrRef->AsLclVar()->GetLclNum())))
         {
             unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
@@ -4400,7 +4438,7 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr)
         }
 
         if (((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) != 0) ||
-            gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY) || index->OperIs(GT_FIELD, GT_LCL_FLD) ||
+            gtComplexityExceeds(index, MAX_ARR_COMPLEXITY) || index->OperIs(GT_FIELD, GT_LCL_FLD) ||
             (index->OperIs(GT_LCL_VAR) && lvaIsLocalImplicitlyAccessedByRef(index->AsLclVar()->GetLclNum())))
         {
             unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("index expr"));
@@ -4744,10 +4782,12 @@ GenTree* Compiler::fgMorphExpandImplicitByRefArg(GenTreeLclVarCommon* lclNode)
     JITDUMP("\nRewriting an implicit by-ref parameter %s:\n", isAddress ? "address" : "reference");
     DISPTREE(lclNode);
 
+    GenTreeFlags lastUse = lclNode->gtFlags & GTF_VAR_DEATH;
     lclNode->ChangeType(TYP_BYREF);
     lclNode->ChangeOper(GT_LCL_VAR);
     lclNode->SetLclNum(newLclNum);
     lclNode->SetAllEffectsFlags(GTF_EMPTY); // Implicit by-ref parameters cannot be address-exposed.
+    lclNode->gtFlags |= lastUse;
 
     GenTree* addrNode = lclNode;
     if (offset != 0)
@@ -13526,8 +13566,11 @@ bool Compiler::fgMorphBlockStmt(BasicBlock* block, Statement* stmt DEBUGARG(cons
         // Have to re-do the evaluation order since for example some later code does not expect constants as op1
         gtSetStmtInfo(stmt);
 
-        // Have to re-link the nodes for this statement
-        fgSetStmtSeq(stmt);
+        // This may be called both when the nodes are linked and when they aren't.
+        if (fgNodeThreading == NodeThreading::AllTrees)
+        {
+            fgSetStmtSeq(stmt);
+        }
     }
 
 #ifdef DEBUG
@@ -14244,13 +14287,18 @@ void Compiler::fgPostExpandQmarkChecks()
 
 #endif // DEBUG
 
-/*****************************************************************************
- *
- *  Get the top level GT_QMARK node in a given "expr", return NULL if such a
- *  node is not present. If the top level GT_QMARK node is assigned to a
- *  GT_LCL_VAR, then return the lcl node in ppDst.
- *
- */
+//------------------------------------------------------------------------
+// fgGetTopLevelQmark:
+//    Get the top level GT_QMARK node in a given expression.
+//
+// Arguments:
+//    expr  - the tree, a root node that may contain a top level qmark.
+//    ppDst - [optional] if the top level GT_QMARK node is assigned ot a
+//            GT_LCL_VAR, then this is that local node. Otherwise nullptr.
+//
+// Returns:
+//    The GT_QMARK node, or nullptr if there is no top level qmark.
+//
 GenTree* Compiler::fgGetTopLevelQmark(GenTree* expr, GenTree** ppDst /* = NULL */)
 {
     if (ppDst != nullptr)
index 2075b601d3bddb6987e668314127fe6ab91299f7..088f7134de2e05b1479a8451936faf5cc956cf7a 100644 (file)
@@ -6455,7 +6455,7 @@ void Compiler::optPerformHoistExpr(GenTree* origExpr, BasicBlock* exprBb, unsign
     }
 #endif
 
-    if (fgStmtListThreaded)
+    if (fgNodeThreading == NodeThreading::AllTrees)
     {
         gtSetStmtInfo(hoistStmt);
         fgSetStmtSeq(hoistStmt);
@@ -9446,7 +9446,7 @@ void OptBoolsDsc::optOptimizeBoolsUpdateTrees()
 
     // Recost/rethread the tree if necessary
     //
-    if (m_comp->fgStmtListThreaded)
+    if (m_comp->fgNodeThreading != NodeThreading::None)
     {
         m_comp->gtSetStmtInfo(m_testInfo1.testStmt);
         m_comp->fgSetStmtSeq(m_testInfo1.testStmt);
@@ -9770,7 +9770,7 @@ void OptBoolsDsc::optOptimizeBoolsGcStress()
 
     // Recost/rethread the tree if necessary
     //
-    if (m_comp->fgStmtListThreaded)
+    if (m_comp->fgNodeThreading != NodeThreading::None)
     {
         m_comp->gtSetStmtInfo(test.testStmt);
         m_comp->fgSetStmtSeq(test.testStmt);
@@ -10103,7 +10103,7 @@ void Compiler::optRemoveRedundantZeroInits()
     bool            hasGCSafePoint = false;
     bool            canThrow       = false;
 
-    assert(fgStmtListThreaded);
+    assert(fgNodeThreading == NodeThreading::AllTrees);
 
     for (BasicBlock* block = fgFirstBB; (block != nullptr) && ((block->bbFlags & BBF_MARKED) == 0);
          block             = block->GetUniqueSucc())
index 5ec8d3e6e84fc7d399f4adc5107227e0f2bbb229..578d805e1b07deba59f319a2031f0ae8235fc70f 100644 (file)
@@ -161,6 +161,11 @@ void Phase::PostPhase(PhaseStatus status)
         {
             comp->fgDebugCheckProfileWeights();
         }
+
+        if ((comp->activePhaseChecks & PhaseChecks::CHECK_LINKED_LOCALS) == PhaseChecks::CHECK_LINKED_LOCALS)
+        {
+            comp->fgDebugCheckLinkedLocals();
+        }
     }
 #endif // DEBUG
 
index ce05fb14146301e3da357dfcf2835f1cc24b1eba..b1c92515a0d946fdf999bb4d9172e8688cb78f0c 100644 (file)
@@ -8,6 +8,10 @@
     <GCStressIncompatible>true</GCStressIncompatible>
   </PropertyGroup>
   <ItemGroup>
+    <!-- These expectedly interfere with debug info -->
+    <CLRTestEnvironmentVariable Include="DOTNET_JitNoForwardSub" Value="1" />
+    <CLRTestEnvironmentVariable Include="DOTNET_JitEnableTailMerge" Value="0" />
+
     <ProjectReference Include="tests_d.ilproj" Aliases="tests_d" />
     <ProjectReference Include="tests_r.ilproj" Aliases="tests_r" />
     <ProjectReference Include="attribute.csproj" />
index 02aa82b42b40fcc3ee382b7a08013900e7fce63a..963479f6cd45fa4b0d989fc247d771e44e73c4c9 100644 (file)
@@ -99,7 +99,7 @@
       // as this is used for the managed-ret-val feature, but the debugger filters out these mappings and does not
       // report them in the ETW event. We should probably change this, those mappings should be useful in any case.
       property int32[] Debug = int32[10]( 0x0 0x6 0xe 0x12 0x1a 0x1c 0x24 0x28 0x2c 0x34 ) 
-      property int32[] Opts = int32[4]( 0x0 0x6 0x12 0x1c )
+      property int32[] Opts = int32[6]( 0x0 0x6 0xe 0x12 0x1c 0x2c )
     }
     .maxstack  2
     .locals init (int32 V_0)
diff --git a/src/tests/JIT/opt/ForwardSub/earlyLiveness.cs b/src/tests/JIT/opt/ForwardSub/earlyLiveness.cs
new file mode 100644 (file)
index 0000000..061bb04
--- /dev/null
@@ -0,0 +1,46 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+
+class EarlyLiveness_ForwardSub
+{
+    static int Main(string[] args)
+    {
+        int result = 100;
+        int test1 = Test1();
+        if (test1 != 0)
+        {
+            Console.WriteLine("Test1 returned {0}", test1);
+            result = -1;
+        }
+
+        return result;
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    private static int Test1()
+    {
+        S1 s1 = new();
+        S1 s2 = s1;
+        return Foo(s1) + Foo(s2);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    private static int Foo(S1 s)
+    {
+        int result = s.A;
+        s.A = 1234;
+        Consume(s);
+        return result;
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    private static void Consume<T>(T value) { }
+
+    private struct S1
+    {
+        public int A, B, C, D, E;
+    }
+}
diff --git a/src/tests/JIT/opt/ForwardSub/earlyLiveness.csproj b/src/tests/JIT/opt/ForwardSub/earlyLiveness.csproj
new file mode 100644 (file)
index 0000000..19781e2
--- /dev/null
@@ -0,0 +1,10 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+</Project>