From 1b5ec390dde02567f2e2f504eefe917f779fad38 Mon Sep 17 00:00:00 2001 From: Carol Eidt Date: Thu, 27 Apr 2017 15:28:17 -0700 Subject: [PATCH] Avoid over-constrained delayed-use register case When a node has a delayed-use register (either due to an RMW def or due to an internalRegDelayFree), if its result has a fixed-use, don't propagate the register constraint to the def, because there's no associated fixed-reference to ensure that it will not be used by the delay-free register. Fix dotnet/coreclr#11141 Commit migrated from https://github.com/dotnet/coreclr/commit/df3c59c17507f0bb602c2e352a9ca549edaaac49 --- src/coreclr/src/jit/lsra.cpp | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index d558bcc..88d287a 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -737,15 +737,30 @@ void LinearScan::associateRefPosWithInterval(RefPosition* rp) else if (rp->refType == RefTypeUse) { // Ensure that we have consistent def/use on SDSU temps. - // However, in the case of a non-commutative rmw def, we must avoid over-constraining - // the def, so don't propagate a single-register restriction from the consumer to the producer + // However, there are a couple of cases where this may over-constrain allocation: + // 1. In the case of a non-commutative rmw def (in which the rmw source must be delay-free), or + // 2. In the case where the defining node requires a temp distinct from the target (also a + // delay-free case). + // In those cases, if we propagate a single-register restriction from the consumer to the producer + // the delayed uses will not see a fixed reference in the PhysReg at that position, and may + // incorrectly allocate that register. + // TODO-CQ: This means that we may often require a copy at the use of this node's result. + // This case could be moved to BuildRefPositionsForNode, at the point where the def RefPosition is + // created, causing a RefTypeFixedRef to be added at that location. This, however, results in + // more PhysReg RefPositions (a throughput impact), and a large number of diffs that require + // further analysis to determine benefit. + // See Issue #11274. RefPosition* prevRefPosition = theInterval->recentRefPosition; assert(prevRefPosition != nullptr && theInterval->firstRefPosition == prevRefPosition); + // All defs must have a valid treeNode, but we check it below to be conservative. + assert(prevRefPosition->treeNode != nullptr); regMaskTP prevAssignment = prevRefPosition->registerAssignment; regMaskTP newAssignment = (prevAssignment & rp->registerAssignment); if (newAssignment != RBM_NONE) { - if (!theInterval->hasNonCommutativeRMWDef || !isSingleRegister(newAssignment)) + if (!isSingleRegister(newAssignment) || + (!theInterval->hasNonCommutativeRMWDef && (prevRefPosition->treeNode != nullptr) && + !prevRefPosition->treeNode->gtLsraInfo.isInternalRegDelayFree)) { prevRefPosition->registerAssignment = newAssignment; } -- 2.7.4