JIT: change loop inversion edge weight updates and add phase (#48364)
authorAndy Ayers <andya@microsoft.com>
Thu, 18 Feb 2021 20:34:35 +0000 (12:34 -0800)
committerGitHub <noreply@github.com>
Thu, 18 Feb 2021 20:34:35 +0000 (12:34 -0800)
Rename `fgOptWhileLoop` to `optInvertWhileLoop` (using terminology from
Muchnick). Split off this transformation into a new phase so it is easier
to see its impact. Make the block updates / reorderings that follow into
a proper phase as well.

Use the test block exit likelihoods to update the profile weights for the
edges involved in loop inversion. Because edge weight updates are now
consistent we no longer need to recompute edge weights afterwards.

Rename `optOptimizeLoops` to `optFindLoops` since it no longer optimizes.

src/coreclr/jit/compiler.cpp
src/coreclr/jit/compiler.h
src/coreclr/jit/compphases.h
src/coreclr/jit/fgopt.cpp
src/coreclr/jit/fgprofile.cpp
src/coreclr/jit/optimizer.cpp
src/coreclr/jit/phase.cpp

index a8b144f..17b86c8 100644 (file)
@@ -4838,19 +4838,23 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
 
     if (opts.OptimizationEnabled())
     {
+        // Invert loops
+        //
+        DoPhase(this, PHASE_INVERT_LOOPS, &Compiler::optInvertLoops);
+
         // Optimize block order
         //
         DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::optOptimizeLayout);
+
         // Compute reachability sets and dominators.
         //
         DoPhase(this, PHASE_COMPUTE_REACHABILITY, &Compiler::fgComputeReachability);
 
-        // Perform loop inversion (i.e. transform "while" loops into
-        // "repeat" loops) and discover and classify natural loops
+        // Discover and classify natural loops
         // (e.g. mark iterative loops as such). Also marks loop blocks
         // and sets bbWeight to the loop nesting levels
         //
-        DoPhase(this, PHASE_OPTIMIZE_LOOPS, &Compiler::optOptimizeLoops);
+        DoPhase(this, PHASE_FIND_LOOPS, &Compiler::optFindLoops);
 
         // Clone loops with optimization opportunities, and
         // choose the one based on dynamic condition evaluation.
@@ -5299,10 +5303,8 @@ void Compiler::RecomputeLoopInfo()
         block->bbFlags &= ~BBF_LOOP_FLAGS;
     }
     fgComputeReachability();
-    // Rebuild the loop tree annotations themselves.  Since this is performed as
-    // part of 'optOptimizeLoops', this will also re-perform loop rotation, but
-    // not other optimizations, as the others are not part of 'optOptimizeLoops'.
-    optOptimizeLoops();
+    // Rebuild the loop tree annotations themselves
+    optFindLoops();
 }
 
 /*****************************************************************************/
index c6b532e..46d1580 100644 (file)
@@ -5383,7 +5383,7 @@ public:
     void fgComputeCalledCount(BasicBlock::weight_t returnWeight);
     void fgComputeEdgeWeights();
 
-    void fgReorderBlocks();
+    bool fgReorderBlocks();
 
     void fgDetermineFirstColdBlock();
 
@@ -5452,8 +5452,13 @@ public:
     void fgDebugCheckFlagsHelper(GenTree* tree, unsigned treeFlags, unsigned chkFlags);
     void fgDebugCheckTryFinallyExits();
     void fgDebugCheckProfileData();
+    bool fgDebugCheckIncomingProfileData(BasicBlock* block);
+    bool fgDebugCheckOutgoingProfileData(BasicBlock* block);
 #endif
 
+    bool fgProfileWeightsEqual(BasicBlock::weight_t weight1, BasicBlock::weight_t weight2);
+    bool fgProfileWeightsConsistent(BasicBlock::weight_t weight1, BasicBlock::weight_t weight2);
+
     static GenTree* fgGetFirstNode(GenTree* tree);
 
     //--------------------- Walking the trees in the IR -----------------------
@@ -6120,11 +6125,9 @@ private:
     void optOptimizeBoolsGcStress(BasicBlock* condBlock);
 #endif
 public:
-    void optOptimizeLayout(); // Optimize the BasicBlock layout of the method
-
-    void optOptimizeLoops(); // for "while-do" loops duplicates simple loop conditions and transforms
-                             // the loop into a "do-while" loop
-                             // Also finds all natural loops and records them in the loop table
+    PhaseStatus optInvertLoops();    // Invert loops so they're entered at top and tested at bottom.
+    PhaseStatus optOptimizeLayout(); // Optimize the BasicBlock layout of the method
+    PhaseStatus optFindLoops();      // Finds loops and records them in the loop table
 
     // Optionally clone loops in the loop table.
     void optCloneLoops();
@@ -6450,7 +6453,7 @@ protected:
         }
     }
 
-    void fgOptWhileLoop(BasicBlock* block);
+    void optInvertWhileLoop(BasicBlock* block);
 
     bool optComputeLoopRep(int        constInit,
                            int        constLimit,
index fc6df58..49303ec 100644 (file)
@@ -53,10 +53,11 @@ CompPhaseNameMacro(PHASE_COMPUTE_EDGE_WEIGHTS,   "Compute edge weights (1, false
 CompPhaseNameMacro(PHASE_CREATE_FUNCLETS,        "Create EH funclets",             "EH-FUNC",  false, -1, false)
 #endif // FEATURE_EH_FUNCLETS
 CompPhaseNameMacro(PHASE_MERGE_THROWS,           "Merge throw blocks",             "MRGTHROW", false, -1, false)
+CompPhaseNameMacro(PHASE_INVERT_LOOPS,           "Invert loops",                   "LOOP-INV", false, -1, false)
 CompPhaseNameMacro(PHASE_OPTIMIZE_LAYOUT,        "Optimize layout",                "LAYOUT",   false, -1, false)
 CompPhaseNameMacro(PHASE_COMPUTE_REACHABILITY,   "Compute blocks reachability",    "BL_REACH", false, -1, false)
 CompPhaseNameMacro(PHASE_ZERO_INITS,             "Redundant zero Inits",           "ZERO-INIT", false, -1, false)
-CompPhaseNameMacro(PHASE_OPTIMIZE_LOOPS,         "Optimize loops",                 "LOOP-OPT", false, -1, false)
+CompPhaseNameMacro(PHASE_FIND_LOOPS,             "Find loops",                     "LOOP-FND", false, -1, false)
 CompPhaseNameMacro(PHASE_CLONE_LOOPS,            "Clone loops",                    "LP-CLONE", false, -1, false)
 CompPhaseNameMacro(PHASE_UNROLL_LOOPS,           "Unroll loops",                   "UNROLL",   false, -1, false)
 CompPhaseNameMacro(PHASE_HOIST_LOOP_CODE,        "Hoist loop code",                "LP-HOIST", false, -1, false)
index 8941c5b..5fb1255 100644 (file)
@@ -3661,15 +3661,17 @@ bool Compiler::fgOptimizeSwitchJumps()
 #pragma warning(push)
 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
 #endif
-/*****************************************************************************
- *
- *  Function called to reorder the flowgraph of BasicBlocks such that any
- *  rarely run blocks are placed at the end of the block list.
- *  If we have profile information we also use that information to reverse
- *  all conditional jumps that would benefit.
- */
 
-void Compiler::fgReorderBlocks()
+//-----------------------------------------------------------------------------
+// fgReorderBlocks: reorder blocks to favor frequent fall through paths,
+//     move rare blocks to the end of the method/eh region, and move
+//     funclets to the ends of methods.
+//
+// Returns:
+//    True if anything got reordered. Reordering blocks may require changing
+//    IR to reverse branch conditions.
+//
+bool Compiler::fgReorderBlocks()
 {
     noway_assert(opts.compDbgCode == false);
 
@@ -3680,12 +3682,13 @@ void Compiler::fgReorderBlocks()
     // We can't relocate anything if we only have one block
     if (fgFirstBB->bbNext == nullptr)
     {
-        return;
+        return false;
     }
 
     bool newRarelyRun      = false;
     bool movedBlocks       = false;
     bool optimizedSwitches = false;
+    bool optimizedBranches = false;
 
     // First let us expand the set of run rarely blocks
     newRarelyRun |= fgExpandRarelyRunBlocks();
@@ -4094,9 +4097,11 @@ void Compiler::fgReorderBlocks()
             // Check for an unconditional branch to a conditional branch
             // which also branches back to our next block
             //
-            if (fgOptimizeBranch(bPrev))
+            const bool optimizedBranch = fgOptimizeBranch(bPrev);
+            if (optimizedBranch)
             {
                 noway_assert(bPrev->bbJumpKind == BBJ_COND);
+                optimizedBranches = true;
             }
             continue;
         }
@@ -4816,7 +4821,7 @@ void Compiler::fgReorderBlocks()
 
     } // end of for loop(bPrev,block)
 
-    bool changed = movedBlocks || newRarelyRun || optimizedSwitches;
+    const bool changed = movedBlocks || newRarelyRun || optimizedSwitches || optimizedBranches;
 
     if (changed)
     {
@@ -4829,6 +4834,8 @@ void Compiler::fgReorderBlocks()
         }
 #endif // DEBUG
     }
+
+    return changed;
 }
 #ifdef _PREFAST_
 #pragma warning(pop)
index d2c567f..6481095 100644 (file)
@@ -3129,6 +3129,11 @@ void Compiler::fgComputeEdgeWeights()
                     if (!assignOK)
                     {
                         // Here we have inconsistent profile data
+                        JITDUMP("Inconsistent profile data at " FMT_BB " -> " FMT_BB
+                                ": dest weight %f, min/max into dest is %f/%f, edge %f/%f\n",
+                                bSrc->bbNum, bDst->bbNum, bDstWeight, minEdgeWeightSum, maxEdgeWeightSum,
+                                edge->edgeWeightMin(), edge->edgeWeightMax());
+
                         inconsistentProfileData = true;
                         // No point in continuing
                         goto EARLY_EXIT;
@@ -3222,6 +3227,43 @@ EARLY_EXIT:;
     fgEdgeWeightsComputed  = true;
 }
 
+//------------------------------------------------------------------------
+// fgProfileWeightsEqual: check if two profile weights are equal
+//   (or nearly so)
+//
+// Arguments:
+//   weight1 -- first weight
+//   weight2 -- second weight
+//
+// Notes:
+//   In most cases you should probably call fgProfileWeightsConsistent instead
+//   of this method.
+//
+bool Compiler::fgProfileWeightsEqual(BasicBlock::weight_t weight1, BasicBlock::weight_t weight2)
+{
+    return fabs(weight1 - weight2) < 0.01;
+}
+
+//------------------------------------------------------------------------
+// fgProfileWeightsConsistentEqual: check if two profile weights are within
+//   some small percentage of one another.
+//
+// Arguments:
+//   weight1 -- first weight
+//   weight2 -- second weight
+//
+bool Compiler::fgProfileWeightsConsistent(BasicBlock::weight_t weight1, BasicBlock::weight_t weight2)
+{
+    if (weight2 == 0)
+    {
+        return fgProfileWeightsEqual(weight1, weight2);
+    }
+
+    BasicBlock::weight_t const relativeDiff = (weight2 - weight1) / weight2;
+
+    return fgProfileWeightsEqual(relativeDiff, 0.0f);
+}
+
 #ifdef DEBUG
 
 //------------------------------------------------------------------------
@@ -3312,118 +3354,22 @@ void Compiler::fgDebugCheckProfileData()
         // But we have two edge counts... so for now we simply check if the block
         // count falls within the [min,max] range.
         //
+        bool incomingConsistent = true;
+        bool outgoingConsistent = true;
+
         if (verifyIncoming)
         {
-            BasicBlock::weight_t incomingWeightMin = 0;
-            BasicBlock::weight_t incomingWeightMax = 0;
-            bool                 foundPreds        = false;
-
-            for (flowList* predEdge = block->bbPreds; predEdge != nullptr; predEdge = predEdge->flNext)
-            {
-                incomingWeightMin += predEdge->edgeWeightMin();
-                incomingWeightMax += predEdge->edgeWeightMax();
-                foundPreds = true;
-            }
-
-            if (!foundPreds)
-            {
-                // Might need to tone this down as we could see unreachable blocks?
-                problemBlocks++;
-                JITDUMP("  " FMT_BB " - expected to see predecessors\n", block->bbNum);
-            }
-            else
-            {
-                if (incomingWeightMin > incomingWeightMax)
-                {
-                    problemBlocks++;
-                    JITDUMP("  " FMT_BB " - incoming min %d > incoming max %d\n", block->bbNum, incomingWeightMin,
-                            incomingWeightMax);
-                }
-                else if (blockWeight < incomingWeightMin)
-                {
-                    problemBlocks++;
-                    JITDUMP("  " FMT_BB " - block weight %d < incoming min %d\n", block->bbNum, blockWeight,
-                            incomingWeightMin);
-                }
-                else if (blockWeight > incomingWeightMax)
-                {
-                    problemBlocks++;
-                    JITDUMP("  " FMT_BB " - block weight %d > incoming max %d\n", block->bbNum, blockWeight,
-                            incomingWeightMax);
-                }
-            }
+            incomingConsistent = fgDebugCheckIncomingProfileData(block);
         }
 
         if (verifyOutgoing)
         {
-            const unsigned numSuccs = block->NumSucc();
-
-            if (numSuccs == 0)
-            {
-                problemBlocks++;
-                JITDUMP("  " FMT_BB " - expected to see successors\n", block->bbNum);
-            }
-            else
-            {
-                BasicBlock::weight_t outgoingWeightMin = 0;
-                BasicBlock::weight_t outgoingWeightMax = 0;
-
-                // Walking successor edges is a bit wonky. Seems like it should be easier.
-                // Note this can also fail to enumerate all the edges, if we have a multigraph
-                //
-                int missingEdges = 0;
-
-                for (unsigned i = 0; i < numSuccs; i++)
-                {
-                    BasicBlock* succBlock = block->GetSucc(i);
-                    flowList*   succEdge  = nullptr;
-
-                    for (flowList* edge = succBlock->bbPreds; edge != nullptr; edge = edge->flNext)
-                    {
-                        if (edge->getBlock() == block)
-                        {
-                            succEdge = edge;
-                            break;
-                        }
-                    }
-
-                    if (succEdge == nullptr)
-                    {
-                        missingEdges++;
-                        JITDUMP("  " FMT_BB " can't find successor edge to " FMT_BB "\n", block->bbNum,
-                                succBlock->bbNum);
-                    }
-                    else
-                    {
-                        outgoingWeightMin += succEdge->edgeWeightMin();
-                        outgoingWeightMax += succEdge->edgeWeightMax();
-                    }
-                }
+            outgoingConsistent = fgDebugCheckOutgoingProfileData(block);
+        }
 
-                if (missingEdges > 0)
-                {
-                    JITDUMP("  " FMT_BB " - missing %d successor edges\n", block->bbNum, missingEdges);
-                    problemBlocks++;
-                }
-                if (outgoingWeightMin > outgoingWeightMax)
-                {
-                    problemBlocks++;
-                    JITDUMP("  " FMT_BB " - outgoing min %d > outgoing max %d\n", block->bbNum, outgoingWeightMin,
-                            outgoingWeightMax);
-                }
-                else if (blockWeight < outgoingWeightMin)
-                {
-                    problemBlocks++;
-                    JITDUMP("  " FMT_BB " - block weight %d < outgoing min %d\n", block->bbNum, blockWeight,
-                            outgoingWeightMin);
-                }
-                else if (blockWeight > outgoingWeightMax)
-                {
-                    problemBlocks++;
-                    JITDUMP("  " FMT_BB " - block weight %d > outgoing max %d\n", block->bbNum, blockWeight,
-                            outgoingWeightMax);
-                }
-            }
+        if (!incomingConsistent || !outgoingConsistent)
+        {
+            problemBlocks++;
         }
     }
 
@@ -3434,7 +3380,7 @@ void Compiler::fgDebugCheckProfileData()
         if (entryWeight != exitWeight)
         {
             problemBlocks++;
-            JITDUMP("  Entry %d exit %d mismatch\n", entryWeight, exitWeight);
+            JITDUMP("  Entry %f exit %f weight mismatch\n", entryWeight, exitWeight);
         }
     }
 
@@ -3464,4 +3410,145 @@ void Compiler::fgDebugCheckProfileData()
     }
 }
 
+//------------------------------------------------------------------------
+// fgDebugCheckIncomingProfileData: verify profile data flowing into a
+//   block matches the profile weight of the block.
+//
+// Arguments:
+//   block - block to check
+//
+// Returns:
+//   true if counts consistent, false otherwise.
+//
+// Notes:
+//   Only useful to call on blocks with predecessors.
+//
+bool Compiler::fgDebugCheckIncomingProfileData(BasicBlock* block)
+{
+    BasicBlock::weight_t const blockWeight       = block->bbWeight;
+    BasicBlock::weight_t       incomingWeightMin = 0;
+    BasicBlock::weight_t       incomingWeightMax = 0;
+    bool                       foundPreds        = false;
+
+    for (flowList* predEdge = block->bbPreds; predEdge != nullptr; predEdge = predEdge->flNext)
+    {
+        incomingWeightMin += predEdge->edgeWeightMin();
+        incomingWeightMax += predEdge->edgeWeightMax();
+        foundPreds = true;
+    }
+
+    if (!foundPreds)
+    {
+        // Assume this is ok.
+        //
+        return true;
+    }
+
+    if (incomingWeightMin > incomingWeightMax)
+    {
+        JITDUMP("  " FMT_BB " - incoming min %f > incoming max %f\n", block->bbNum, incomingWeightMin,
+                incomingWeightMax);
+        return false;
+    }
+
+    if (blockWeight < incomingWeightMin)
+    {
+        JITDUMP("  " FMT_BB " - block weight %f < incoming min %f\n", block->bbNum, blockWeight, incomingWeightMin);
+        return false;
+    }
+
+    if (blockWeight > incomingWeightMax)
+    {
+        JITDUMP("  " FMT_BB " - block weight %f > incoming max %f\n", block->bbNum, blockWeight, incomingWeightMax);
+        return false;
+    }
+
+    return true;
+}
+
+//------------------------------------------------------------------------
+// fgDebugCheckOutgoingProfileData: verify profile data flowing out of
+//   a block matches the profile weight of the block.
+//
+// Arguments:
+//   block - block to check
+//
+// Returns:
+//   true if counts consistent, false otherwise.
+//
+// Notes:
+//   Only useful to call on blocks with successors.
+//
+bool Compiler::fgDebugCheckOutgoingProfileData(BasicBlock* block)
+{
+    const unsigned numSuccs = block->NumSucc();
+
+    if (numSuccs == 0)
+    {
+        // Assume this is ok.
+        //
+        return true;
+    }
+
+    BasicBlock::weight_t const blockWeight       = block->bbWeight;
+    BasicBlock::weight_t       outgoingWeightMin = 0;
+    BasicBlock::weight_t       outgoingWeightMax = 0;
+
+    // Walking successor edges is a bit wonky. Seems like it should be easier.
+    // Note this can also fail to enumerate all the edges, if we have a multigraph
+    //
+    int missingEdges = 0;
+
+    for (unsigned i = 0; i < numSuccs; i++)
+    {
+        BasicBlock* succBlock = block->GetSucc(i);
+        flowList*   succEdge  = nullptr;
+
+        for (flowList* edge = succBlock->bbPreds; edge != nullptr; edge = edge->flNext)
+        {
+            if (edge->getBlock() == block)
+            {
+                succEdge = edge;
+                break;
+            }
+        }
+
+        if (succEdge == nullptr)
+        {
+            missingEdges++;
+            JITDUMP("  " FMT_BB " can't find successor edge to " FMT_BB "\n", block->bbNum, succBlock->bbNum);
+            continue;
+        }
+
+        outgoingWeightMin += succEdge->edgeWeightMin();
+        outgoingWeightMax += succEdge->edgeWeightMax();
+    }
+
+    if (missingEdges > 0)
+    {
+        JITDUMP("  " FMT_BB " - missing %d successor edges\n", block->bbNum, missingEdges);
+    }
+
+    if (outgoingWeightMin > outgoingWeightMax)
+    {
+        JITDUMP("  " FMT_BB " - outgoing min %f > outgoing max %f\n", block->bbNum, outgoingWeightMin,
+                outgoingWeightMax);
+        return false;
+    }
+
+    if (blockWeight < outgoingWeightMin)
+    {
+        JITDUMP("  " FMT_BB " - block weight %f < outgoing min %f\n", block->bbNum, blockWeight, outgoingWeightMin);
+        return false;
+    }
+
+    if (blockWeight > outgoingWeightMax)
+    {
+        JITDUMP("  " FMT_BB " - block weight %f > outgoing max %f\n", block->bbNum, blockWeight, outgoingWeightMax);
+        return false;
+    }
+
+    return missingEdges == 0;
+}
+
 #endif // DEBUG
index 5a4d50b..4c174f1 100644 (file)
@@ -4085,11 +4085,23 @@ static Statement* optFindLoopTermTest(BasicBlock* bottom)
     return result;
 }
 
-/*****************************************************************************
- * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
- */
-
-void Compiler::fgOptWhileLoop(BasicBlock* block)
+//-----------------------------------------------------------------------------
+// optInvertWhileLoop: modify flow and duplicate code so that for/while loops are
+//   entered at top and tested at bottom (aka loop rotation or bottom testing).
+//
+// Arguments:
+//   block -- block that may be the predecessor of the un-rotated loop's test block.
+//
+// Notes:
+//  Optimizes "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
+//  Does not modify every loop
+//
+//  Makes no changes if the flow pattern match fails.
+//
+//  May not modify a loop if profile is unfavorable, if the cost of duplicating
+//  code is large (factoring in potential CSEs)
+//
+void Compiler::optInvertWhileLoop(BasicBlock* block)
 {
     noway_assert(opts.OptimizationEnabled());
     noway_assert(compCodeOpt() != SMALL_CODE);
@@ -4211,12 +4223,12 @@ void Compiler::fgOptWhileLoop(BasicBlock* block)
     gtPrepareCost(condTree);
     unsigned estDupCostSz = condTree->GetCostSz();
 
-    double loopIterations = (double)BB_LOOP_WEIGHT_SCALE;
+    BasicBlock::weight_t loopIterations = BB_LOOP_WEIGHT_SCALE;
 
-    bool                 allProfileWeightsAreValid = false;
-    BasicBlock::weight_t weightBlock               = block->bbWeight;
-    BasicBlock::weight_t weightTest                = bTest->bbWeight;
-    BasicBlock::weight_t weightNext                = block->bbNext->bbWeight;
+    bool                       allProfileWeightsAreValid = false;
+    BasicBlock::weight_t const weightBlock               = block->bbWeight;
+    BasicBlock::weight_t const weightTest                = bTest->bbWeight;
+    BasicBlock::weight_t const weightNext                = block->bbNext->bbWeight;
 
     // If we have profile data then we calculate the number of time
     // the loop will iterate into loopIterations
@@ -4226,26 +4238,39 @@ void Compiler::fgOptWhileLoop(BasicBlock* block)
         // have good profile weights
         if (block->hasProfileWeight() && bTest->hasProfileWeight() && block->bbNext->hasProfileWeight())
         {
-            allProfileWeightsAreValid = true;
-
             // If this while loop never iterates then don't bother transforming
+            //
             if (weightNext == 0)
             {
                 return;
             }
 
-            // with (weighNext > 0) we should also have (weightTest >= weightBlock)
-            // if the profile weights are all valid.
+            // We generally expect weightTest == weightNext + weightBlock.
             //
-            //   weightNext is the number of time this loop iterates
-            //   weightBlock is the number of times that we enter the while loop
-            //   loopIterations is the average number of times that this loop iterates
+            // Tolerate small inconsistencies...
             //
-            if (weightTest >= weightBlock)
+            if (!fgProfileWeightsConsistent(weightBlock + weightNext, weightTest))
             {
-                loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight;
+                JITDUMP("Profile weights locally inconsistent: block %f, next %f, test %f\n", weightBlock, weightNext,
+                        weightTest);
+            }
+            else
+            {
+                allProfileWeightsAreValid = true;
+
+                // Determine iteration count
+                //
+                //   weightNext is the number of time this loop iterates
+                //   weightBlock is the number of times that we enter the while loop
+                //   loopIterations is the average number of times that this loop iterates
+                //
+                loopIterations = weightNext / weightBlock;
             }
         }
+        else
+        {
+            JITDUMP("Missing profile data for loop!\n");
+        }
     }
 
     unsigned maxDupCostSz = 32;
@@ -4335,44 +4360,6 @@ void Compiler::fgOptWhileLoop(BasicBlock* block)
         block->bbFlags |= copyFlags;
     }
 
-    // If we have profile data for all blocks and we know that we are cloning the
-    //  bTest block into block and thus changing the control flow from block so
-    //  that it no longer goes directly to bTest anymore, we have to adjust the
-    //  weight of bTest by subtracting out the weight of block.
-    //
-    if (allProfileWeightsAreValid)
-    {
-        //
-        // Some additional sanity checks before adjusting the weight of bTest
-        //
-        if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
-        {
-            // Get the two edge that flow out of bTest
-            flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
-            flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);
-
-            // Calculate the new weight for block bTest
-
-            BasicBlock::weight_t newWeightTest =
-                (weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT;
-            bTest->bbWeight = newWeightTest;
-
-            if (newWeightTest == BB_ZERO_WEIGHT)
-            {
-                bTest->bbFlags |= BBF_RUN_RARELY;
-                // All out edge weights are set to zero
-                edgeToNext->setEdgeWeights(BB_ZERO_WEIGHT, BB_ZERO_WEIGHT);
-                edgeToJump->setEdgeWeights(BB_ZERO_WEIGHT, BB_ZERO_WEIGHT);
-            }
-            else
-            {
-                // Update the our edge weights
-                edgeToNext->setEdgeWeights(BB_ZERO_WEIGHT, min(edgeToNext->edgeWeightMax(), newWeightTest));
-                edgeToJump->setEdgeWeights(BB_ZERO_WEIGHT, min(edgeToJump->edgeWeightMax(), newWeightTest));
-            }
-        }
-    }
-
     /* Change the block to end with a conditional jump */
 
     block->bbJumpKind = BBJ_COND;
@@ -4391,7 +4378,7 @@ void Compiler::fgOptWhileLoop(BasicBlock* block)
 #ifdef DEBUG
     if (verbose)
     {
-        printf("\nDuplicating loop condition in " FMT_BB " for loop (" FMT_BB " - " FMT_BB ")", block->bbNum,
+        printf("\nDuplicated loop condition in " FMT_BB " for loop (" FMT_BB " - " FMT_BB ")", block->bbNum,
                block->bbNext->bbNum, bTest->bbNum);
         printf("\nEstimated code size expansion is %d\n ", estDupCostSz);
 
@@ -4399,34 +4386,91 @@ void Compiler::fgOptWhileLoop(BasicBlock* block)
     }
 
 #endif
-}
 
-/*****************************************************************************
- *
- *  Optimize the BasicBlock layout of the method
- */
+    // If we have profile data for all blocks and we know that we are cloning the
+    // bTest block into block and thus changing the control flow from block so
+    // that it no longer goes directly to bTest anymore, we have to adjust
+    // various weights.
+    //
+    if (allProfileWeightsAreValid)
+    {
+        // Update the weight for bTest
+        //
+        JITDUMP("Reducing profile weight of " FMT_BB " from %f to %f\n", bTest->bbNum, weightTest, weightNext);
+        bTest->bbWeight = weightNext;
 
-void Compiler::optOptimizeLayout()
-{
-    noway_assert(opts.OptimizationEnabled());
+        // Determine the new edge weights.
+        //
+        // We project the next/jump ratio for block and bTest by using
+        // the original likelihoods out of bTest.
+        //
+        // Note "next" is the loop top block, not bTest's bbNext,
+        // we'll call this latter block "after".
+        //
+        BasicBlock::weight_t const testToNextLikelihood  = weightNext / weightTest;
+        BasicBlock::weight_t const testToAfterLikelihood = 1.0f - testToNextLikelihood;
 
-#ifdef DEBUG
-    if (verbose)
-    {
-        printf("*************** In optOptimizeLayout()\n");
-        fgDispHandlerTab();
-    }
+        // Adjust edges out of bTest (which now has weight weightNext)
+        //
+        BasicBlock::weight_t const testToNextWeight  = weightNext * testToNextLikelihood;
+        BasicBlock::weight_t const testToAfterWeight = weightNext * testToAfterLikelihood;
 
-    /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
-    fgDebugCheckBBlist();
+        flowList* const edgeTestToNext  = fgGetPredForBlock(bTest->bbJumpDest, bTest);
+        flowList* const edgeTestToAfter = fgGetPredForBlock(bTest->bbNext, bTest);
+
+        JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to %f (iterate loop)\n", bTest->bbNum,
+                bTest->bbJumpDest->bbNum, testToNextWeight);
+        JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to %f (exit loop)\n", bTest->bbNum, bTest->bbNext->bbNum,
+                testToAfterWeight);
+
+        edgeTestToNext->setEdgeWeights(testToNextWeight, testToNextWeight);
+        edgeTestToAfter->setEdgeWeights(testToAfterWeight, testToAfterWeight);
+
+        // Adjust edges out of block, using the same distribution.
+        //
+        JITDUMP("Profile weight of " FMT_BB " remains unchanged at %f\n", block->bbNum, weightBlock);
+
+        BasicBlock::weight_t const blockToNextLikelihood  = testToNextLikelihood;
+        BasicBlock::weight_t const blockToAfterLikelihood = testToAfterLikelihood;
+
+        BasicBlock::weight_t const blockToNextWeight  = weightBlock * blockToNextLikelihood;
+        BasicBlock::weight_t const blockToAfterWeight = weightBlock * blockToAfterLikelihood;
+
+        flowList* const edgeBlockToNext  = fgGetPredForBlock(block->bbNext, block);
+        flowList* const edgeBlockToAfter = fgGetPredForBlock(block->bbJumpDest, block);
+
+        JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to %f (enter loop)\n", block->bbNum, block->bbNext->bbNum,
+                blockToNextWeight);
+        JITDUMP("Setting weight of " FMT_BB " -> " FMT_BB " to %f (avoid loop)\n", block->bbNum,
+                block->bbJumpDest->bbNum, blockToAfterWeight);
+
+        edgeBlockToNext->setEdgeWeights(blockToNextWeight, blockToNextWeight);
+        edgeBlockToAfter->setEdgeWeights(blockToAfterWeight, blockToAfterWeight);
+
+#ifdef DEBUG
+        // Verify profile for the two target blocks is consistent.
+        //
+        fgDebugCheckIncomingProfileData(block->bbNext);
+        fgDebugCheckIncomingProfileData(block->bbJumpDest);
 #endif
+    }
+}
 
+//-----------------------------------------------------------------------------
+// optInvertLoops: invert while loops in the method
+//
+// Returns:
+//   suitable phase status
+//
+PhaseStatus Compiler::optInvertLoops()
+{
+    noway_assert(opts.OptimizationEnabled());
     noway_assert(fgModified == false);
 
     for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
     {
-        /* Make sure the appropriate fields are initialized */
-
+        // Make sure the appropriate fields are initialized
+        //
         if (block->bbWeight == BB_ZERO_WEIGHT)
         {
             /* Zero weighted block can't have a LOOP_HEAD flag */
@@ -4436,36 +4480,58 @@ void Compiler::optOptimizeLayout()
 
         if (compCodeOpt() != SMALL_CODE)
         {
-            /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */
-
-            fgOptWhileLoop(block);
+            optInvertWhileLoop(block);
         }
     }
 
-    if (fgModified)
+    const bool madeChanges = fgModified;
+
+    if (madeChanges)
     {
-        // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
-        fgComputeEdgeWeights();
+        // Reset fgModified here as we've done a consistent set of edits.
+        //
+        fgModified = false;
     }
 
-    fgUpdateFlowGraph(true);
-    fgReorderBlocks();
-    fgUpdateFlowGraph();
+    return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
 }
 
-/*****************************************************************************
- *
- *  Perform loop inversion, find and classify natural loops
- */
+//-----------------------------------------------------------------------------
+// optOptimizeLayout: reorder blocks to reduce cost of control flow
+//
+// Returns:
+//   suitable phase status
+//
+PhaseStatus Compiler::optOptimizeLayout()
+{
+    noway_assert(opts.OptimizationEnabled());
+    noway_assert(fgModified == false);
 
-void Compiler::optOptimizeLoops()
+    bool       madeChanges          = false;
+    const bool allowTailDuplication = true;
+
+    madeChanges |= fgUpdateFlowGraph(allowTailDuplication);
+    madeChanges |= fgReorderBlocks();
+    madeChanges |= fgUpdateFlowGraph();
+
+    return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
+}
+
+//-----------------------------------------------------------------------------
+// optFindLoops: find and classify natural loops
+//
+// Notes:
+//  Also (re)sets all non-IBC block weights, and marks loops potentially needing
+//  alignment padding.
+//
+PhaseStatus Compiler::optFindLoops()
 {
     noway_assert(opts.OptimizationEnabled());
 
 #ifdef DEBUG
     if (verbose)
     {
-        printf("*************** In optOptimizeLoops()\n");
+        printf("*************** In optFindLoops()\n");
     }
 #endif
 
@@ -4581,6 +4647,8 @@ void Compiler::optOptimizeLoops()
 #endif
         optLoopsMarked = true;
     }
+
+    return PhaseStatus::MODIFIED_EVERYTHING;
 }
 
 //------------------------------------------------------------------------
index 860f7d6..b56d29b 100644 (file)
@@ -157,15 +157,16 @@ void Phase::PostPhase(PhaseStatus status)
     // well as the new-style phases that have been updated to return
     // PhaseStatus from their DoPhase methods.
     //
-    static Phases s_allowlist[] = {PHASE_IMPORTATION,       PHASE_IBCINSTR,
-                                   PHASE_IBCPREP,           PHASE_INCPROFILE,
-                                   PHASE_INDXCALL,          PHASE_MORPH_INLINE,
-                                   PHASE_ALLOCATE_OBJECTS,  PHASE_EMPTY_TRY,
-                                   PHASE_EMPTY_FINALLY,     PHASE_MERGE_FINALLY_CHAINS,
-                                   PHASE_CLONE_FINALLY,     PHASE_MERGE_THROWS,
-                                   PHASE_MORPH_GLOBAL,      PHASE_BUILD_SSA,
-                                   PHASE_RATIONALIZE,       PHASE_LOWERING,
-                                   PHASE_STACK_LEVEL_SETTER};
+    static Phases s_allowlist[] = {PHASE_IMPORTATION,      PHASE_IBCINSTR,
+                                   PHASE_IBCPREP,          PHASE_INCPROFILE,
+                                   PHASE_INDXCALL,         PHASE_MORPH_INLINE,
+                                   PHASE_ALLOCATE_OBJECTS, PHASE_EMPTY_TRY,
+                                   PHASE_EMPTY_FINALLY,    PHASE_MERGE_FINALLY_CHAINS,
+                                   PHASE_CLONE_FINALLY,    PHASE_MERGE_THROWS,
+                                   PHASE_MORPH_GLOBAL,     PHASE_INVERT_LOOPS,
+                                   PHASE_OPTIMIZE_LAYOUT,  PHASE_FIND_LOOPS,
+                                   PHASE_BUILD_SSA,        PHASE_RATIONALIZE,
+                                   PHASE_LOWERING,         PHASE_STACK_LEVEL_SETTER};
 
     if (madeChanges)
     {