From: Andy Ayers <andya@microsoft.com>
Date: Sun, 6 Mar 2022 16:26:43 +0000 (-0800)
Subject: JIT: add OSR patchpoint strategy, inhibit tail duplication (#66208)
X-Git-Tag: accepted/tizen/unified/riscv/20231226.055536~10483
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f9da3db92420d15b5bba283a44271cd81d83ad1a;p=platform%2Fupstream%2Fdotnet%2Fruntime.git

JIT: add OSR patchpoint strategy, inhibit tail duplication (#66208)

Two changes for OSR:
* add new strategies for placing patchpoints -- either at
  backedge sources (instead of targets) or adaptive. depending
  on number of backedges. Change default to adaptive, since this
  works better with the flow we see from C# `for` loops.
* inhibit tail duplication for OSR as it may end up interfering
  with loop recognition.

We may not be able to place patchpoints at sources, for various reasons;
if so we fall back to placing them at targets.

We also can't place patchpoints unless block entries are also stack empty
ponts. This means forgoing patchpoints in some IL cases..
---

diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp
index 9073d3d..3972a5c 100644
--- a/src/coreclr/jit/block.cpp
+++ b/src/coreclr/jit/block.cpp
@@ -464,6 +464,10 @@ void BasicBlock::dspFlags()
     {
         printf("bwd-target ");
     }
+    if (bbFlags & BBF_BACKWARD_JUMP_SOURCE)
+    {
+        printf("bwd-src ");
+    }
     if (bbFlags & BBF_PATCHPOINT)
     {
         printf("ppoint ");
diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h
index 81a511f..092bad7 100644
--- a/src/coreclr/jit/block.h
+++ b/src/coreclr/jit/block.h
@@ -551,6 +551,8 @@ enum BasicBlockFlags : unsigned __int64
     BBF_HAS_ALIGN            = MAKE_BBFLAG(39), // BB ends with 'align' instruction
     BBF_TAILCALL_SUCCESSOR   = MAKE_BBFLAG(40), // BB has pred that has potential tail call
 
+    BBF_BACKWARD_JUMP_SOURCE = MAKE_BBFLAG(41), // Block is a source of a backward jump
+
     // The following are sets of flags.
 
     // Flags that relate blocks to loop structure.
diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp
index 7f8bd7b..bf6eba0 100644
--- a/src/coreclr/jit/fgbasic.cpp
+++ b/src/coreclr/jit/fgbasic.cpp
@@ -2385,6 +2385,7 @@ void Compiler::fgMarkBackwardJump(BasicBlock* targetBlock, BasicBlock* sourceBlo
         }
     }
 
+    sourceBlock->bbFlags |= BBF_BACKWARD_JUMP_SOURCE;
     targetBlock->bbFlags |= BBF_BACKWARD_JUMP_TARGET;
 }
 
diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp
index eebe8a8..58f8962 100644
--- a/src/coreclr/jit/fgopt.cpp
+++ b/src/coreclr/jit/fgopt.cpp
@@ -3477,6 +3477,31 @@ bool Compiler::fgOptimizeUncondBranchToSimpleCond(BasicBlock* block, BasicBlock*
         return false;
     }
 
+    // At this point we know target is BBJ_COND.
+    //
+    // Bail out if OSR, as we can have unusual flow into loops. If one
+    // of target's successors is also a backedge target, this optimization
+    // may mess up loop recognition by creating too many non-loop preds.
+    //
+    if (opts.IsOSR())
+    {
+        assert(target->bbJumpKind == BBJ_COND);
+
+        if ((target->bbNext->bbFlags & BBF_BACKWARD_JUMP_TARGET) != 0)
+        {
+            JITDUMP("Deferring: " FMT_BB " --> " FMT_BB "; latter looks like loop top\n", target->bbNum,
+                    target->bbNext->bbNum);
+            return false;
+        }
+
+        if ((target->bbJumpDest->bbFlags & BBF_BACKWARD_JUMP_TARGET) != 0)
+        {
+            JITDUMP("Deferring: " FMT_BB " --> " FMT_BB "; latter looks like loop top\n", target->bbNum,
+                    target->bbJumpDest->bbNum);
+            return false;
+        }
+    }
+
     // See if this block assigns constant or other interesting tree to that same local.
     //
     if (!fgBlockEndFavorsTailDuplication(block, lclNum))
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index 0759c4f..54ad01ea 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -11873,35 +11873,151 @@ void Compiler::impImportBlockCode(BasicBlock* block)
 
         // OSR is not yet supported for methods with explicit tail calls.
         //
-        // But we also do not have to switch these methods to be optimized as we should be
+        // But we also do not have to switch these methods to be optimized, as we should be
         // able to avoid getting trapped in Tier0 code by normal call counting.
         // So instead, just suppress adding patchpoints.
         //
         if (!compTailPrefixSeen)
         {
-            // The normaly policy is only to add patchpoints to the targets of lexically
-            // backwards branches.
+            // We only need to add patchpoints if the method can loop.
             //
             if (compHasBackwardJump)
             {
                 assert(compCanHavePatchpoints());
 
-                // Is the start of this block a suitable patchpoint?
+                // By default we use the "adaptive" strategy.
                 //
-                if (((block->bbFlags & BBF_BACKWARD_JUMP_TARGET) != 0) && (verCurrentState.esStackDepth == 0))
+                // This can create both source and target patchpoints within a given
+                // loop structure, which isn't ideal, but is not incorrect. We will
+                // just have some extra Tier0 overhead.
+                //
+                // Todo: implement support for mid-block patchpoints. If `block`
+                // is truly a backedge source (and not in a handler) then we should be
+                // able to find a stack empty point somewhere in the block.
+                //
+                const int patchpointStrategy      = JitConfig.TC_PatchpointStrategy();
+                bool      addPatchpoint           = false;
+                bool      mustUseTargetPatchpoint = false;
+
+                switch (patchpointStrategy)
                 {
-                    // We should have noted this earlier and bailed out of OSR.
-                    //
-                    assert(!block->hasHndIndex());
+                    default:
+                    {
+                        // Patchpoints at backedge sources, if possible, otherwise targets.
+                        //
+                        addPatchpoint = ((block->bbFlags & BBF_BACKWARD_JUMP_SOURCE) == BBF_BACKWARD_JUMP_SOURCE);
+                        mustUseTargetPatchpoint = (verCurrentState.esStackDepth != 0) || block->hasHndIndex();
+                        break;
+                    }
+
+                    case 1:
+                    {
+                        // Patchpoints at stackempty backedge targets.
+                        // Note if we have loops where the IL stack is not empty on the backedge we can't patchpoint
+                        // them.
+                        //
+                        // We should not have allowed OSR if there were backedges in handlers.
+                        //
+                        assert(!block->hasHndIndex());
+                        addPatchpoint = ((block->bbFlags & BBF_BACKWARD_JUMP_TARGET) == BBF_BACKWARD_JUMP_TARGET) &&
+                                        (verCurrentState.esStackDepth == 0);
+                        break;
+                    }
+
+                    case 2:
+                    {
+                        // Adaptive strategy.
+                        //
+                        // Patchpoints at backedge targets if there are multiple backedges,
+                        // otherwise at backedge sources, if possible. Note a block can be both; if so we
+                        // just need one patchpoint.
+                        //
+                        if ((block->bbFlags & BBF_BACKWARD_JUMP_TARGET) == BBF_BACKWARD_JUMP_TARGET)
+                        {
+                            // We don't know backedge count, so just use ref count.
+                            //
+                            addPatchpoint = (block->bbRefs > 1) && (verCurrentState.esStackDepth == 0);
+                        }
+
+                        if (!addPatchpoint && ((block->bbFlags & BBF_BACKWARD_JUMP_SOURCE) == BBF_BACKWARD_JUMP_SOURCE))
+                        {
+                            addPatchpoint           = true;
+                            mustUseTargetPatchpoint = (verCurrentState.esStackDepth != 0) || block->hasHndIndex();
+
+                            // Also force target patchpoint if target block has multiple (backedge) preds.
+                            //
+                            if (!mustUseTargetPatchpoint)
+                            {
+                                for (BasicBlock* const succBlock : block->Succs(this))
+                                {
+                                    if ((succBlock->bbNum <= block->bbNum) && (succBlock->bbRefs > 1))
+                                    {
+                                        mustUseTargetPatchpoint = true;
+                                        break;
+                                    }
+                                }
+                            }
+                        }
+                        break;
+                    }
+                }
+
+                if (addPatchpoint)
+                {
+                    if (mustUseTargetPatchpoint)
+                    {
+                        // We wanted a source patchpoint, but could not have one.
+                        // So, add patchpoints to the backedge targets.
+                        //
+                        for (BasicBlock* const succBlock : block->Succs(this))
+                        {
+                            if (succBlock->bbNum <= block->bbNum)
+                            {
+                                // The succBlock had better agree it's a target.
+                                //
+                                assert((succBlock->bbFlags & BBF_BACKWARD_JUMP_TARGET) == BBF_BACKWARD_JUMP_TARGET);
+
+                                // We may already have decided to put a patchpoint in succBlock. If not, add one.
+                                //
+                                if ((succBlock->bbFlags & BBF_PATCHPOINT) != 0)
+                                {
+                                    // In some cases the target may not be stack-empty at entry.
+                                    // If so, we will bypass patchpoints for this backedge.
+                                    //
+                                    if (succBlock->bbStackDepthOnEntry() > 0)
+                                    {
+                                        JITDUMP("\nCan't set source patchpoint at " FMT_BB ", can't use target " FMT_BB
+                                                " as it has non-empty stack on entry.\n",
+                                                block->bbNum, succBlock->bbNum);
+                                    }
+                                    else
+                                    {
+                                        JITDUMP("\nCan't set source patchpoint at " FMT_BB ", using target " FMT_BB
+                                                " instead\n",
+                                                block->bbNum, succBlock->bbNum);
+
+                                        assert(!succBlock->hasHndIndex());
+                                        succBlock->bbFlags |= BBF_PATCHPOINT;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    else
+                    {
+                        assert(!block->hasHndIndex());
+                        block->bbFlags |= BBF_PATCHPOINT;
+                    }
 
-                    block->bbFlags |= BBF_PATCHPOINT;
                     setMethodHasPatchpoint();
                 }
             }
             else
             {
-                // Should not see backward branch targets w/o backwards branches
-                assert((block->bbFlags & BBF_BACKWARD_JUMP_TARGET) == 0);
+                // Should not see backward branch targets w/o backwards branches.
+                // So if !compHasBackwardsBranch, these flags should never be set.
+                //
+                assert((block->bbFlags & (BBF_BACKWARD_JUMP_TARGET | BBF_BACKWARD_JUMP_SOURCE)) == 0);
             }
         }
 
diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h
index 051fcc4..b965577 100644
--- a/src/coreclr/jit/jitconfigvalues.h
+++ b/src/coreclr/jit/jitconfigvalues.h
@@ -464,6 +464,11 @@ CONFIG_INTEGER(TC_OnStackReplacement, W("TC_OnStackReplacement"), 0)
 CONFIG_INTEGER(TC_OnStackReplacement_InitialCounter, W("TC_OnStackReplacement_InitialCounter"), 1000)
 // Enable partial compilation for Tier0 methods
 CONFIG_INTEGER(TC_PartialCompilation, W("TC_PartialCompilation"), 0)
+// Patchpoint strategy:
+// 0 - backedge sources
+// 1 - backedge targets
+// 2 - adaptive (default)
+CONFIG_INTEGER(TC_PatchpointStrategy, W("TC_PatchpointStrategy"), 2)
 #if defined(DEBUG)
 // Randomly sprinkle patchpoints. Value is the likelyhood any given stack-empty point becomes a patchpoint.
 CONFIG_INTEGER(JitRandomOnStackReplacement, W("JitRandomOnStackReplacement"), 0)
diff --git a/src/coreclr/jit/jiteh.cpp b/src/coreclr/jit/jiteh.cpp
index 5cf2d7b..9590279 100644
--- a/src/coreclr/jit/jiteh.cpp
+++ b/src/coreclr/jit/jiteh.cpp
@@ -2350,6 +2350,7 @@ bool Compiler::fgNormalizeEHCase2()
 
                         BasicBlock* newTryStart = bbNewBasicBlock(BBJ_NONE);
                         fgInsertBBbefore(insertBeforeBlk, newTryStart);
+                        insertBeforeBlk->bbRefs++;
 
 #ifdef DEBUG
                         if (verbose)
@@ -2376,6 +2377,11 @@ bool Compiler::fgNormalizeEHCase2()
                         // start.
                         newTryStart->bbFlags |= (BBF_TRY_BEG | BBF_DONT_REMOVE | BBF_INTERNAL);
 
+                        if (insertBeforeBlk->bbFlags & BBF_BACKWARD_JUMP_TARGET)
+                        {
+                            newTryStart->bbFlags |= BBF_BACKWARD_JUMP_TARGET;
+                        }
+
                         // Now we need to split any flow edges targetting the old try begin block between the old
                         // and new block. Note that if we are handling a multiply-nested 'try', we may have already
                         // split the inner set. So we need to split again, from the most enclosing block that we've