// before lvaMarkLocalVars: identifies ref type locals that can get type updates
// after lvaMarkLocalVars: identifies locals that are suitable for optAddCopies
+ unsigned char lvEhWriteThruCandidate : 1; // variable has a single def and hence is a register candidate if
+ // if it is an EH variable
+
+ unsigned char lvDisqualifyForEhWriteThru : 1; // tracks variable that are disqualified from register candidancy
+
#if ASSERTION_PROP
unsigned char lvDisqualify : 1; // variable is no longer OK for add copy optimization
unsigned char lvVolatileHint : 1; // hint for AssertionProp
void raMarkStkVars();
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+#if defined(TARGET_AMD64)
+ static bool varTypeNeedsPartialCalleeSave(var_types type)
+ {
+ return (type == TYP_SIMD32);
+ }
+#elif defined(TARGET_ARM64)
+ static bool varTypeNeedsPartialCalleeSave(var_types type)
+ {
+ // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes
+ // For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes.
+ return ((type == TYP_SIMD16) || (type == TYP_SIMD12));
+ }
+#else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64)
+#error("Unknown target architecture for FEATURE_SIMD")
+#endif // !defined(TARGET_AMD64) && !defined(TARGET_ARM64)
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
protected:
// Some things are used by both LSRA and regpredict allocators.
CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 0)
#endif // !defined(TARGET_AMD64) && !defined(TARGET_X86)
-CONFIG_INTEGER(EnableEHWriteThru, W("EnableEHWriteThru"), 0) // Enable the register allocator to support EH-write thru:
+CONFIG_INTEGER(EnableEHWriteThru, W("EnableEHWriteThru"), 1) // Enable the register allocator to support EH-write thru:
// partial enregistration of vars exposed on EH boundaries
CONFIG_INTEGER(EnableMultiRegLocals, W("EnableMultiRegLocals"), 1) // Enable the enregistration of locals that are
// defined or used in a multireg context.
{
noway_assert(lvaTable[i].lvIsStructField);
lvaTable[i].lvLiveInOutOfHndlr = 1;
- if (!lvaEnregEHVars)
+ // For now, only enregister an EH Var if it is a single def and whose refCnt > 1.
+ if (!lvaEnregEHVars || !lvaTable[i].lvEhWriteThruCandidate || lvaTable[i].lvRefCnt() <= 1)
{
lvaSetVarDoNotEnregister(i DEBUGARG(DNER_LiveInOutOfHandler));
}
}
}
- if (!lvaEnregEHVars)
+ // For now, only enregister an EH Var if it is a single def and whose refCnt > 1.
+ if (!lvaEnregEHVars || !varDsc->lvEhWriteThruCandidate || varDsc->lvRefCnt() <= 1)
{
lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LiveInOutOfHandler));
}
/* Record if the variable has a single def or not */
- if (!varDsc->lvDisqualify) // If this variable is already disqualified we can skip this
+ if (!varDsc->lvDisqualify) // If this variable is already disqualified, we can skip this
{
if (tree->gtFlags & GTF_VAR_DEF) // Is this is a def of our variable
{
BlockSetOps::AddElemD(this, varDsc->lvRefBlks, block->bbNum);
}
}
+
+ if (!varDsc->lvDisqualifyForEhWriteThru) // If this EH var already disqualified, we can skip this
+ {
+ if (tree->gtFlags & GTF_VAR_DEF) // Is this is a def of our variable
+ {
+ bool bbInALoop = (block->bbFlags & BBF_BACKWARD_JUMP) != 0;
+ bool bbIsReturn = block->bbJumpKind == BBJ_RETURN;
+ bool needsExplicitZeroInit = fgVarNeedsExplicitZeroInit(lclNum, bbInALoop, bbIsReturn);
+
+ if (varDsc->lvEhWriteThruCandidate || needsExplicitZeroInit)
+ {
+ varDsc->lvEhWriteThruCandidate = false;
+ varDsc->lvDisqualifyForEhWriteThru = true;
+ }
+ else
+ {
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ // TODO-CQ: If the varType needs partial callee save, conservatively do not enregister
+ // such variable. In future, need to enable enregisteration for such variables.
+ if (!varTypeNeedsPartialCalleeSave(varDsc->lvType))
+#endif
+ {
+ varDsc->lvEhWriteThruCandidate = true;
+ }
+ }
+ }
+ }
+
#endif // ASSERTION_PROP
bool allowStructs = false;
Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
{
+ // TODO: Stop passing isRecompute once we are sure that this assert is never hit.
+ assert(!m_isRecompute);
m_compiler->lvaMarkLclRefs(*use, m_block, m_stmt, m_isRecompute);
return WALK_CONTINUE;
}
// Set initial value for lvSingleDef for explicit and implicit
// argument locals as they are "defined" on entry.
- varDsc->lvSingleDef = varDsc->lvIsParam;
+ // However, if we are just recomputing the ref counts, retain the value
+ // that was set by past phases.
+ if (!isRecompute)
+ {
+ varDsc->lvSingleDef = varDsc->lvIsParam;
+ varDsc->lvEhWriteThruCandidate = varDsc->lvIsParam;
+ }
}
// Remember current state of generic context use, and prepare
baseReg = EBPbased ? REG_FPBASE : REG_SPBASE;
#endif
- printf("[%2s%1s0x%02X] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset));
+ printf("[%2s%1s%02XH] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset));
}
/*****************************************************************************
if (varDsc->lvLiveInOutOfHndlr)
{
- newInt->isWriteThru = true;
+ newInt->isWriteThru = varDsc->lvEhWriteThruCandidate;
setIntervalAsSpilled(newInt);
}
// Additionally, when we are generating code for a target with partial SIMD callee-save
// (AVX on non-UNIX amd64 and 16-byte vectors on arm64), we keep a separate set of the
// LargeVectorType vars.
- if (varTypeNeedsPartialCalleeSave(varDsc->lvType))
+ if (Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType))
{
largeVectorVarCount++;
VarSetOps::AddElemD(compiler, largeVectorVars, varDsc->lvVarIndex);
}
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
// Ensure that we have no partially-spilled large vector locals.
- assert(!varTypeNeedsPartialCalleeSave(interval->registerType) || !interval->isPartiallySpilled);
+ assert(!Compiler::varTypeNeedsPartialCalleeSave(interval->registerType) || !interval->isPartiallySpilled);
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
}
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_END_BB));
}
LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum;
- assert(varTypeNeedsPartialCalleeSave(varDsc->lvType));
+ assert(Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType));
// On Arm64, we must always have a register to save the upper half,
// while on x86 we can spill directly to memory.
// lclVar as spilled).
assert(lclVarReg != REG_NA);
LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum;
- assert(varTypeNeedsPartialCalleeSave(varDsc->lvType));
+ assert(Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType));
GenTree* restoreLcl = nullptr;
restoreLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType);
void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition);
- void buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation loc);
+ void buildRefPositionsForNode(GenTree* tree, LsraLocation loc);
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
void buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, regMaskTP fpCalleeKillSet);
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
#if defined(TARGET_AMD64)
- static bool varTypeNeedsPartialCalleeSave(var_types type)
- {
- return (type == TYP_SIMD32);
- }
static const var_types LargeVectorSaveType = TYP_SIMD16;
#elif defined(TARGET_ARM64)
- static bool varTypeNeedsPartialCalleeSave(var_types type)
- {
- // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes
- // For SIMD types longer than 8 bytes Caller is responsible for saving and restoring Upper bytes.
- return ((type == TYP_SIMD16) || (type == TYP_SIMD12));
- }
- static const var_types LargeVectorSaveType = TYP_DOUBLE;
-#else // !defined(TARGET_AMD64) && !defined(TARGET_ARM64)
-#error("Unknown target architecture for FEATURE_SIMD")
+ static const var_types LargeVectorSaveType = TYP_DOUBLE;
#endif // !defined(TARGET_AMD64) && !defined(TARGET_ARM64)
-
// Set of large vector (TYP_SIMD32 on AVX) variables.
VARSET_TP largeVectorVars;
// Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers.
{
LclVarDsc* varDsc = compiler->lvaGetDescByTrackedIndex(varIndex);
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- if (varTypeNeedsPartialCalleeSave(varDsc->lvType))
+ if (Compiler::varTypeNeedsPartialCalleeSave(varDsc->lvType))
{
if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex))
{
void LinearScan::makeUpperVectorInterval(unsigned varIndex)
{
Interval* lclVarInterval = getIntervalForLocalVar(varIndex);
- assert(varTypeNeedsPartialCalleeSave(lclVarInterval->registerType));
+ assert(Compiler::varTypeNeedsPartialCalleeSave(lclVarInterval->registerType));
Interval* newInt = newInterval(LargeVectorSaveType);
newInt->relatedInterval = lclVarInterval;
newInt->isUpperVector = true;
for (RefInfoListNode *listNode = defList.Begin(), *end = defList.End(); listNode != end;
listNode = listNode->Next())
{
- if (varTypeNeedsPartialCalleeSave(listNode->treeNode->TypeGet()))
+ if (Compiler::varTypeNeedsPartialCalleeSave(listNode->treeNode->TypeGet()))
{
// In the rare case where such an interval is live across nested calls, we don't need to insert another.
if (listNode->ref->getInterval()->recentRefPosition->refType != RefTypeUpperVectorSave)
//
// Arguments:
// tree - The node for which we are building RefPositions
-// block - The BasicBlock in which the node resides
// currentLoc - The LsraLocation of the given node
//
-void LinearScan::buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation currentLoc)
+void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc)
{
// The LIR traversal doesn't visit GT_LIST or GT_ARGPLACE nodes.
// GT_CLS_VAR nodes should have been eliminated by rationalizer.
node->SetRegNum(node->GetRegNum());
#endif
- buildRefPositionsForNode(node, block, currentLoc);
+ buildRefPositionsForNode(node, currentLoc);
#ifdef DEBUG
if (currentLoc > maxNodeLocation)
def->regOptional = true;
}
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- if (varTypeNeedsPartialCalleeSave(varDefInterval->registerType))
+ if (Compiler::varTypeNeedsPartialCalleeSave(varDefInterval->registerType))
{
varDefInterval->isPartiallySpilled = false;
}
shutil.copy2(item, repro_location)
logging.info("")
- logging.info("Repro .mc files created for failures:")
+ logging.info("Repro {} .mc file(s) created for failures:".format(len(repro_files)))
for item in repro_files:
logging.info(item)
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.stateMachine);
}
- // enregistrer variables with 0 post-fix so they can be used in registers without EH forcing them to stack
- // Capture references to Thread Contexts
- Thread currentThread0 = Thread.CurrentThread;
- Thread currentThread = currentThread0;
- ExecutionContext? previousExecutionCtx0 = currentThread0._executionContext;
+ Thread currentThread = Thread.CurrentThread;
// Store current ExecutionContext and SynchronizationContext as "previousXxx".
// This allows us to restore them and undo any Context changes made in stateMachine.MoveNext
// so that they won't "leak" out of the first await.
- ExecutionContext? previousExecutionCtx = previousExecutionCtx0;
- SynchronizationContext? previousSyncCtx = currentThread0._synchronizationContext;
+ ExecutionContext? previousExecutionCtx = currentThread._executionContext;
+ SynchronizationContext? previousSyncCtx = currentThread._synchronizationContext;
try
{
}
finally
{
- // Re-enregistrer variables post EH with 1 post-fix so they can be used in registers rather than from stack
- SynchronizationContext? previousSyncCtx1 = previousSyncCtx;
- Thread currentThread1 = currentThread;
// The common case is that these have not changed, so avoid the cost of a write barrier if not needed.
- if (previousSyncCtx1 != currentThread1._synchronizationContext)
+ if (previousSyncCtx != currentThread._synchronizationContext)
{
// Restore changed SynchronizationContext back to previous
- currentThread1._synchronizationContext = previousSyncCtx1;
+ currentThread._synchronizationContext = previousSyncCtx;
}
- ExecutionContext? previousExecutionCtx1 = previousExecutionCtx;
- ExecutionContext? currentExecutionCtx1 = currentThread1._executionContext;
- if (previousExecutionCtx1 != currentExecutionCtx1)
+ ExecutionContext? currentExecutionCtx = currentThread._executionContext;
+ if (previousExecutionCtx != currentExecutionCtx)
{
- ExecutionContext.RestoreChangedContextToThread(currentThread1, previousExecutionCtx1, currentExecutionCtx1);
+ ExecutionContext.RestoreChangedContextToThread(currentThread, previousExecutionCtx, currentExecutionCtx);
}
}
}
// Note: Manual enregistering may be addressed by "Exception Handling Write Through Optimization"
// https://github.com/dotnet/runtime/blob/main/docs/design/features/eh-writethru.md
- // Enregister variables with 0 post-fix so they can be used in registers without EH forcing them to stack
- // Capture references to Thread Contexts
- Thread currentThread0 = Thread.CurrentThread;
- Thread currentThread = currentThread0;
- ExecutionContext? previousExecutionCtx0 = currentThread0._executionContext;
+ // Enregister previousExecutionCtx0 so they can be used in registers without EH forcing them to stack
+
+ Thread currentThread = Thread.CurrentThread;
+ ExecutionContext? previousExecutionCtx0 = currentThread._executionContext;
if (previousExecutionCtx0 != null && previousExecutionCtx0.m_isDefault)
{
// Default is a null ExecutionContext internally
previousExecutionCtx0 = null;
}
- // Store current ExecutionContext and SynchronizationContext as "previousXxx".
- // This allows us to restore them and undo any Context changes made in callback.Invoke
- // so that they won't "leak" back into caller.
- // These variables will cross EH so be forced to stack
ExecutionContext? previousExecutionCtx = previousExecutionCtx0;
- SynchronizationContext? previousSyncCtx = currentThread0._synchronizationContext;
+ SynchronizationContext? previousSyncCtx = currentThread._synchronizationContext;
if (executionContext != null && executionContext.m_isDefault)
{
executionContext = null;
}
- if (previousExecutionCtx0 != executionContext)
+ if (previousExecutionCtx != executionContext)
{
- RestoreChangedContextToThread(currentThread0, executionContext, previousExecutionCtx0);
+ RestoreChangedContextToThread(currentThread, executionContext, previousExecutionCtx);
}
ExceptionDispatchInfo? edi = null;
edi = ExceptionDispatchInfo.Capture(ex);
}
- // Re-enregistrer variables post EH with 1 post-fix so they can be used in registers rather than from stack
- SynchronizationContext? previousSyncCtx1 = previousSyncCtx;
- Thread currentThread1 = currentThread;
// The common case is that these have not changed, so avoid the cost of a write barrier if not needed.
- if (currentThread1._synchronizationContext != previousSyncCtx1)
+ if (currentThread._synchronizationContext != previousSyncCtx)
{
// Restore changed SynchronizationContext back to previous
- currentThread1._synchronizationContext = previousSyncCtx1;
+ currentThread._synchronizationContext = previousSyncCtx;
}
- ExecutionContext? previousExecutionCtx1 = previousExecutionCtx;
- ExecutionContext? currentExecutionCtx1 = currentThread1._executionContext;
- if (currentExecutionCtx1 != previousExecutionCtx1)
+ ExecutionContext? currentExecutionCtx = currentThread._executionContext;
+ if (currentExecutionCtx != previousExecutionCtx)
{
- RestoreChangedContextToThread(currentThread1, previousExecutionCtx1, currentExecutionCtx1);
+ RestoreChangedContextToThread(currentThread, previousExecutionCtx, currentExecutionCtx);
}
// If exception was thrown by callback, rethrow it now original contexts are restored