From: Andy Ayers Date: Sun, 6 Mar 2022 16:26:43 +0000 (-0800) Subject: JIT: add OSR patchpoint strategy, inhibit tail duplication (#66208) X-Git-Tag: accepted/tizen/unified/riscv/20231226.055536~10483 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f9da3db92420d15b5bba283a44271cd81d83ad1a;p=platform%2Fupstream%2Fdotnet%2Fruntime.git JIT: add OSR patchpoint strategy, inhibit tail duplication (#66208) Two changes for OSR: * add new strategies for placing patchpoints -- either at backedge sources (instead of targets) or adaptive. depending on number of backedges. Change default to adaptive, since this works better with the flow we see from C# `for` loops. * inhibit tail duplication for OSR as it may end up interfering with loop recognition. We may not be able to place patchpoints at sources, for various reasons; if so we fall back to placing them at targets. We also can't place patchpoints unless block entries are also stack empty ponts. This means forgoing patchpoints in some IL cases.. --- diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index 9073d3d..3972a5c 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -464,6 +464,10 @@ void BasicBlock::dspFlags() { printf("bwd-target "); } + if (bbFlags & BBF_BACKWARD_JUMP_SOURCE) + { + printf("bwd-src "); + } if (bbFlags & BBF_PATCHPOINT) { printf("ppoint "); diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index 81a511f..092bad7 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -551,6 +551,8 @@ enum BasicBlockFlags : unsigned __int64 BBF_HAS_ALIGN = MAKE_BBFLAG(39), // BB ends with 'align' instruction BBF_TAILCALL_SUCCESSOR = MAKE_BBFLAG(40), // BB has pred that has potential tail call + BBF_BACKWARD_JUMP_SOURCE = MAKE_BBFLAG(41), // Block is a source of a backward jump + // The following are sets of flags. // Flags that relate blocks to loop structure. diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 7f8bd7b..bf6eba0 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -2385,6 +2385,7 @@ void Compiler::fgMarkBackwardJump(BasicBlock* targetBlock, BasicBlock* sourceBlo } } + sourceBlock->bbFlags |= BBF_BACKWARD_JUMP_SOURCE; targetBlock->bbFlags |= BBF_BACKWARD_JUMP_TARGET; } diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index eebe8a8..58f8962 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -3477,6 +3477,31 @@ bool Compiler::fgOptimizeUncondBranchToSimpleCond(BasicBlock* block, BasicBlock* return false; } + // At this point we know target is BBJ_COND. + // + // Bail out if OSR, as we can have unusual flow into loops. If one + // of target's successors is also a backedge target, this optimization + // may mess up loop recognition by creating too many non-loop preds. + // + if (opts.IsOSR()) + { + assert(target->bbJumpKind == BBJ_COND); + + if ((target->bbNext->bbFlags & BBF_BACKWARD_JUMP_TARGET) != 0) + { + JITDUMP("Deferring: " FMT_BB " --> " FMT_BB "; latter looks like loop top\n", target->bbNum, + target->bbNext->bbNum); + return false; + } + + if ((target->bbJumpDest->bbFlags & BBF_BACKWARD_JUMP_TARGET) != 0) + { + JITDUMP("Deferring: " FMT_BB " --> " FMT_BB "; latter looks like loop top\n", target->bbNum, + target->bbJumpDest->bbNum); + return false; + } + } + // See if this block assigns constant or other interesting tree to that same local. // if (!fgBlockEndFavorsTailDuplication(block, lclNum)) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 0759c4f..54ad01ea 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -11873,35 +11873,151 @@ void Compiler::impImportBlockCode(BasicBlock* block) // OSR is not yet supported for methods with explicit tail calls. // - // But we also do not have to switch these methods to be optimized as we should be + // But we also do not have to switch these methods to be optimized, as we should be // able to avoid getting trapped in Tier0 code by normal call counting. // So instead, just suppress adding patchpoints. // if (!compTailPrefixSeen) { - // The normaly policy is only to add patchpoints to the targets of lexically - // backwards branches. + // We only need to add patchpoints if the method can loop. // if (compHasBackwardJump) { assert(compCanHavePatchpoints()); - // Is the start of this block a suitable patchpoint? + // By default we use the "adaptive" strategy. // - if (((block->bbFlags & BBF_BACKWARD_JUMP_TARGET) != 0) && (verCurrentState.esStackDepth == 0)) + // This can create both source and target patchpoints within a given + // loop structure, which isn't ideal, but is not incorrect. We will + // just have some extra Tier0 overhead. + // + // Todo: implement support for mid-block patchpoints. If `block` + // is truly a backedge source (and not in a handler) then we should be + // able to find a stack empty point somewhere in the block. + // + const int patchpointStrategy = JitConfig.TC_PatchpointStrategy(); + bool addPatchpoint = false; + bool mustUseTargetPatchpoint = false; + + switch (patchpointStrategy) { - // We should have noted this earlier and bailed out of OSR. - // - assert(!block->hasHndIndex()); + default: + { + // Patchpoints at backedge sources, if possible, otherwise targets. + // + addPatchpoint = ((block->bbFlags & BBF_BACKWARD_JUMP_SOURCE) == BBF_BACKWARD_JUMP_SOURCE); + mustUseTargetPatchpoint = (verCurrentState.esStackDepth != 0) || block->hasHndIndex(); + break; + } + + case 1: + { + // Patchpoints at stackempty backedge targets. + // Note if we have loops where the IL stack is not empty on the backedge we can't patchpoint + // them. + // + // We should not have allowed OSR if there were backedges in handlers. + // + assert(!block->hasHndIndex()); + addPatchpoint = ((block->bbFlags & BBF_BACKWARD_JUMP_TARGET) == BBF_BACKWARD_JUMP_TARGET) && + (verCurrentState.esStackDepth == 0); + break; + } + + case 2: + { + // Adaptive strategy. + // + // Patchpoints at backedge targets if there are multiple backedges, + // otherwise at backedge sources, if possible. Note a block can be both; if so we + // just need one patchpoint. + // + if ((block->bbFlags & BBF_BACKWARD_JUMP_TARGET) == BBF_BACKWARD_JUMP_TARGET) + { + // We don't know backedge count, so just use ref count. + // + addPatchpoint = (block->bbRefs > 1) && (verCurrentState.esStackDepth == 0); + } + + if (!addPatchpoint && ((block->bbFlags & BBF_BACKWARD_JUMP_SOURCE) == BBF_BACKWARD_JUMP_SOURCE)) + { + addPatchpoint = true; + mustUseTargetPatchpoint = (verCurrentState.esStackDepth != 0) || block->hasHndIndex(); + + // Also force target patchpoint if target block has multiple (backedge) preds. + // + if (!mustUseTargetPatchpoint) + { + for (BasicBlock* const succBlock : block->Succs(this)) + { + if ((succBlock->bbNum <= block->bbNum) && (succBlock->bbRefs > 1)) + { + mustUseTargetPatchpoint = true; + break; + } + } + } + } + break; + } + } + + if (addPatchpoint) + { + if (mustUseTargetPatchpoint) + { + // We wanted a source patchpoint, but could not have one. + // So, add patchpoints to the backedge targets. + // + for (BasicBlock* const succBlock : block->Succs(this)) + { + if (succBlock->bbNum <= block->bbNum) + { + // The succBlock had better agree it's a target. + // + assert((succBlock->bbFlags & BBF_BACKWARD_JUMP_TARGET) == BBF_BACKWARD_JUMP_TARGET); + + // We may already have decided to put a patchpoint in succBlock. If not, add one. + // + if ((succBlock->bbFlags & BBF_PATCHPOINT) != 0) + { + // In some cases the target may not be stack-empty at entry. + // If so, we will bypass patchpoints for this backedge. + // + if (succBlock->bbStackDepthOnEntry() > 0) + { + JITDUMP("\nCan't set source patchpoint at " FMT_BB ", can't use target " FMT_BB + " as it has non-empty stack on entry.\n", + block->bbNum, succBlock->bbNum); + } + else + { + JITDUMP("\nCan't set source patchpoint at " FMT_BB ", using target " FMT_BB + " instead\n", + block->bbNum, succBlock->bbNum); + + assert(!succBlock->hasHndIndex()); + succBlock->bbFlags |= BBF_PATCHPOINT; + } + } + } + } + } + else + { + assert(!block->hasHndIndex()); + block->bbFlags |= BBF_PATCHPOINT; + } - block->bbFlags |= BBF_PATCHPOINT; setMethodHasPatchpoint(); } } else { - // Should not see backward branch targets w/o backwards branches - assert((block->bbFlags & BBF_BACKWARD_JUMP_TARGET) == 0); + // Should not see backward branch targets w/o backwards branches. + // So if !compHasBackwardsBranch, these flags should never be set. + // + assert((block->bbFlags & (BBF_BACKWARD_JUMP_TARGET | BBF_BACKWARD_JUMP_SOURCE)) == 0); } } diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 051fcc4..b965577 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -464,6 +464,11 @@ CONFIG_INTEGER(TC_OnStackReplacement, W("TC_OnStackReplacement"), 0) CONFIG_INTEGER(TC_OnStackReplacement_InitialCounter, W("TC_OnStackReplacement_InitialCounter"), 1000) // Enable partial compilation for Tier0 methods CONFIG_INTEGER(TC_PartialCompilation, W("TC_PartialCompilation"), 0) +// Patchpoint strategy: +// 0 - backedge sources +// 1 - backedge targets +// 2 - adaptive (default) +CONFIG_INTEGER(TC_PatchpointStrategy, W("TC_PatchpointStrategy"), 2) #if defined(DEBUG) // Randomly sprinkle patchpoints. Value is the likelyhood any given stack-empty point becomes a patchpoint. CONFIG_INTEGER(JitRandomOnStackReplacement, W("JitRandomOnStackReplacement"), 0) diff --git a/src/coreclr/jit/jiteh.cpp b/src/coreclr/jit/jiteh.cpp index 5cf2d7b..9590279 100644 --- a/src/coreclr/jit/jiteh.cpp +++ b/src/coreclr/jit/jiteh.cpp @@ -2350,6 +2350,7 @@ bool Compiler::fgNormalizeEHCase2() BasicBlock* newTryStart = bbNewBasicBlock(BBJ_NONE); fgInsertBBbefore(insertBeforeBlk, newTryStart); + insertBeforeBlk->bbRefs++; #ifdef DEBUG if (verbose) @@ -2376,6 +2377,11 @@ bool Compiler::fgNormalizeEHCase2() // start. newTryStart->bbFlags |= (BBF_TRY_BEG | BBF_DONT_REMOVE | BBF_INTERNAL); + if (insertBeforeBlk->bbFlags & BBF_BACKWARD_JUMP_TARGET) + { + newTryStart->bbFlags |= BBF_BACKWARD_JUMP_TARGET; + } + // Now we need to split any flow edges targetting the old try begin block between the old // and new block. Note that if we are handling a multiply-nested 'try', we may have already // split the inner set. So we need to split again, from the most enclosing block that we've