genConsumeOperands(treeNode->AsOp());
if (varTypeIsFloating(targetType))
{
- // divisor is not contained or if contained is a memory op
- assert(!divisor->isContained() || divisor->isMemoryOp() || divisor->IsCnsFltOrDbl());
+ // divisor is not contained or if contained is a memory op.
+ // Note that a reg optional operand is a treated as a memory op
+ // if no register is allocated to it.
+ assert(!divisor->isContained() ||
+ divisor->isMemoryOp() ||
+ divisor->IsCnsFltOrDbl() ||
+ divisor->IsRegOptional());
// Floating point div/rem operation
assert(oper == GT_DIV || oper == GT_MOD);
if (op1->isContained())
{
assert(treeNode->OperIsCommutative());
- assert(op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl() || op1->IsIntCnsFitsInI32());
+ assert(op1->isMemoryOp() ||
+ op1->IsCnsNonZeroFltOrDbl() ||
+ op1->IsIntCnsFitsInI32() ||
+ op1->IsRegOptional());
op1 = treeNode->gtGetOp2();
op2 = treeNode->gtGetOp1();
bool isUnsignedMultiply = ((treeNode->gtFlags & GTF_UNSIGNED) != 0);
bool requiresOverflowCheck = treeNode->gtOverflowEx();
- GenTree *op1 = treeNode->gtOp.gtOp1;
- GenTree *op2 = treeNode->gtOp.gtOp2;
+ GenTree* op1 = treeNode->gtGetOp1();
+ GenTree* op2 = treeNode->gtGetOp2();
// there are 3 forms of x64 multiply:
// 1-op form with 128 result: RDX:RAX = RAX * rm
// Now we need to consume the operands of the GT_AND node.
genConsumeOperands(tree->AsOp());
}
+ else if (tree->OperGet() == GT_LCL_VAR)
+ {
+ // A contained lcl var must be living on stack and marked as reg optional.
+ unsigned varNum = tree->AsLclVarCommon()->GetLclNum();
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+ noway_assert(varDsc->lvRegNum == REG_STK);
+ noway_assert(tree->IsRegOptional());
+
+ // Update the life of reg optional lcl var.
+ genUpdateLife(tree);
+ }
else
{
assert(tree->OperIsLeaf());
rmwDst = data->gtGetOp2();
rmwSrc = data->gtGetOp1();
}
+
+ genConsumeRegs(rmwSrc);
}
else
{
- // For unary RMW ops, src and dst of RMW memory op is the same.
+ // *(p) = oper *(p): Here addr = p, rmwsrc=rmwDst = *(p) i.e. GT_IND(p)
+ // For unary RMW ops, src and dst of RMW memory op is the same. Lower
+ // clears operand counts on rmwSrc and we don't need to perform a
+ // genConsumeReg() on it.
assert(storeInd->IsRMWDstOp1());
rmwSrc = data->gtGetOp1();
rmwDst = data->gtGetOp1();
assert(rmwSrc != nullptr);
assert(rmwDst != nullptr);
- assert(Lowering::IndirsAreEquivalent(rmwDst, storeInd));
-
- genConsumeRegs(rmwSrc);
+ assert(Lowering::IndirsAreEquivalent(rmwDst, storeInd));
}
else
{
{
// dst can only be a reg or modrm
assert(!dst->isContained() ||
- dst->isContainedIndir() ||
- dst->isContainedLclField() ||
+ dst->isContainedMemoryOp() ||
instrIs3opImul(ins)); // dst on these isn't really the dst
+#ifdef DEBUG
// src can be anything but both src and dst cannot be addr modes
// or at least cannot be contained addr modes
- if (dst->isContainedIndir())
- assert(!src->isContainedIndir());
+ if (dst->isContainedMemoryOp())
+ {
+ assert(!src->isContainedMemoryOp());
+ }
- if (src->isContainedLclField())
- assert(!dst->isContained());
+ if (src->isContainedMemoryOp())
+ {
+ assert(!dst->isContainedMemoryOp());
+ }
+#endif
// find which operand is a memory op (if any)
// and what its base is
}
// find local field if any
- GenTreeLclFld* lclField = nullptr;
+ GenTreeLclFld* lclField = nullptr;
if (src->isContainedLclField())
{
lclField = src->AsLclFld();
lclField = dst->AsLclFld();
}
+ // find contained lcl var if any
+ GenTreeLclVar* lclVar = nullptr;
+ if (src->isContainedLclVar())
+ {
+ assert(src->IsRegOptional());
+ lclVar = src->AsLclVar();
+ }
+ else if (dst->isContainedLclVar())
+ {
+ assert(dst->IsRegOptional());
+ lclVar = dst->AsLclVar();
+ }
+
// First handle the simple non-memory cases
//
- if ((mem == nullptr) && (lclField == nullptr))
+ if ((mem == nullptr) && (lclField == nullptr) && (lclVar == nullptr))
{
if (intConst != nullptr)
{
return dst->gtRegNum;
}
- // Next handle the cases where we have a stack based local memory operand
+ // Next handle the cases where we have a stack based local memory operand.
//
- if (lclField)
+ unsigned varNum = BAD_VAR_NUM;
+ unsigned offset = (unsigned) -1;
+
+ if (lclField != nullptr)
{
- unsigned offset = lclField->gtLclFld.gtLclOffs;
- unsigned varNum = lclField->gtLclVarCommon.gtLclNum;
+ varNum = lclField->AsLclVarCommon()->GetLclNum();
+ offset = lclField->gtLclFld.gtLclOffs;
+ }
+ else if (lclVar != nullptr)
+ {
+ varNum = lclVar->AsLclVarCommon()->GetLclNum();
+ offset = 0;
+ }
+ if (varNum != BAD_VAR_NUM)
+ {
// Is the memory op in the source position?
- if (src->isContainedLclField())
+ if (src->isContainedLclField() ||
+ src->isContainedLclVar())
{
if (instrHasImplicitRegPairDest(ins))
{
else // The memory op is in the dest position.
{
assert(dst->gtRegNum == REG_NA);
+
// src could be int or reg
if (src->isContainedIntOrIImmed())
{
}
}
+// -------------------------------------------------------------------------
+// IsRegOptional: Returns true if this gentree node is marked by lowering to
+// indicate that codegen can still generate code even if it wasn't allocated
+// a register.
+bool GenTree::IsRegOptional() const
+{
+#ifdef LEGACY_BACKEND
+ return false;
+#else
+ return gtLsraInfo.regOptional;
+#endif
+}
+
bool GenTree::IsPhiDefn()
{
bool res =
bool isContainedLclField() const { return isContained() && isLclField(); }
+ bool isContainedLclVar() const { return isContained() && (OperGet() == GT_LCL_VAR); }
+
// Indicates whether it is a memory op.
// Right now it includes Indir and LclField ops.
bool isMemoryOp() const { return isIndir() || isLclField(); }
- bool isContainedMemoryOp() const { return isContained() && isMemoryOp(); }
+ bool isContainedMemoryOp() const
+ {
+ return (isContained() && isMemoryOp()) || isContainedLclVar();
+ }
regNumber GetRegNum() const
{
inline var_types CastFromType();
inline var_types& CastToType();
+ // Returns true if this gentree node is marked by lowering to indicate
+ // that codegen can still generate code even if it wasn't allocated a
+ // register.
+ bool IsRegOptional() const;
+
// Returns "true" iff "*this" is an assignment (GT_ASG) tree that defines an SSA name (lcl = phi(...));
bool IsPhiDefn();
void TreeNodeInfoInit(GenTreePtr* tree, GenTree* parent);
#if defined(_TARGET_XARCH_)
void TreeNodeInfoInitSimple(GenTree* tree);
+ void SetRegOptionalForBinOp(GenTree* tree);
+ void TryToSetRegOptional(GenTree* operand);
#endif // defined(_TARGET_XARCH_)
void TreeNodeInfoInitReturn(GenTree* tree);
void TreeNodeInfoInitShiftRotate(GenTree* tree);
// overflow operations aren't supported on float/double types.
assert(!tree->gtOverflow());
+ op1 = tree->gtGetOp1();
+ op2 = tree->gtGetOp2();
+
// No implicit conversions at this stage as the expectation is that
// everything is made explicit by adding casts.
- assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet());
+ assert(op1->TypeGet() == op2->TypeGet());
info->srcCount = 2;
info->dstCount = 1;
-
- op1 = tree->gtOp.gtOp1;
- op2 = tree->gtOp.gtOp2;
+
if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
{
MakeSrcContained(tree, op2);
// movss op1Reg, [memOp]; movaps targetReg, op1Reg, addss/sd targetReg, Op2Reg
MakeSrcContained(tree, op1);
}
+ else
+ {
+ // If there are no containable operands, we can make an operand reg optional.
+ SetRegOptionalForBinOp(tree);
+ }
break;
}
other = node->gtArrLen;
}
- if (other->isMemoryOp() && node->gtIndex->TypeGet() == node->gtArrLen->TypeGet())
+ if (other->isMemoryOp())
+ {
+ if (node->gtIndex->TypeGet() == node->gtArrLen->TypeGet())
+ {
+ MakeSrcContained(tree, other);
+ }
+ }
+ else
{
- MakeSrcContained(tree, other);
+ // since 'other' operand is not contained, we can mark it as reg optional
+ TryToSetRegOptional(other);
}
}
break;
//
// Example2: GT_CAST(int <- bool <- int) - here type of GT_CAST node is int and castToType is bool.
//
+ // Example3: GT_EQ(int, op1 of type ubyte, op2 of type ubyte) - in this case codegen uses
+ // ubyte as the result of comparison and if the result needs to be materialized into a reg
+ // simply zero extend it to TYP_INT size. Here is an example of generated code:
+ // cmp dl, byte ptr[addr mode]
+ // movzx edx, dl
+ //
// Though this looks conservative in theory, in practice we could not think of a case where
// the below logic leads to conservative register specification. In future when or if we find
// one such case, this logic needs to be fine tuned for that case(s).
- if (varTypeIsByte(tree) || ((tree->OperGet() == GT_CAST) && varTypeIsByte(tree->CastToType())))
+ if (varTypeIsByte(tree) ||
+ ((tree->OperGet() == GT_CAST) && varTypeIsByte(tree->CastToType())) ||
+ (tree->OperIsCompare() && varTypeIsByte(tree->gtGetOp1()) && varTypeIsByte(tree->gtGetOp2()))
+ )
{
regMaskTP regMask;
if (info->dstCount > 0)
// for GT_ADD(Constant, SomeTree)
info->srcCount = 2;
info->dstCount = 1;
- GenTree* op1 = tree->gtOp.gtOp1;
- GenTree* op2 = tree->gtOp.gtOp2;
+
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
// We can directly encode the second operand if it is either a containable constant or a memory-op.
// In case of memory-op, we can encode it directly provided its type matches with 'tree' type.
// This is because during codegen, type of 'tree' is used to determine emit Type size. If the types
// do not match, they get normalized (i.e. sign/zero extended) on load into a register.
bool directlyEncodable = false;
+ bool binOpInRMW = false;
GenTreePtr operand = nullptr;
if (IsContainableImmed(tree, op2))
directlyEncodable = true;
operand = op2;
}
- else if (!IsBinOpInRMWStoreInd(tree))
+ else
{
- if (op2->isMemoryOp() && tree->TypeGet() == op2->TypeGet())
+ binOpInRMW = IsBinOpInRMWStoreInd(tree);
+ if (!binOpInRMW)
{
- directlyEncodable = true;
- operand = op2;
- }
- else if (tree->OperIsCommutative())
- {
- if (IsContainableImmed(tree, op1) ||
- (op1->isMemoryOp() && tree->TypeGet() == op1->TypeGet() && IsSafeToContainMem(tree, op1)))
+ if (op2->isMemoryOp() && tree->TypeGet() == op2->TypeGet())
{
- // If it is safe, we can reverse the order of operands of commutative operations for efficient codegen
directlyEncodable = true;
- operand = op1;
+ operand = op2;
+ }
+ else if (tree->OperIsCommutative())
+ {
+ if (IsContainableImmed(tree, op1) ||
+ (op1->isMemoryOp() && tree->TypeGet() == op1->TypeGet() && IsSafeToContainMem(tree, op1)))
+ {
+ // If it is safe, we can reverse the order of operands of commutative operations for efficient codegen
+ directlyEncodable = true;
+ operand = op1;
+ }
}
}
}
assert(operand != nullptr);
MakeSrcContained(tree, operand);
}
+ else if (!binOpInRMW)
+ {
+ // If this binary op neither has contained operands, nor is a
+ // Read-Modify-Write (RMW) operation, we can mark its operands
+ // as reg optional.
+ SetRegOptionalForBinOp(tree);
+ }
}
TreeNodeInfo* info = &(tree->gtLsraInfo);
LinearScan* l = m_lsra;
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+
switch (tree->OperGet())
{
case GT_MOD:
{
// No implicit conversions at this stage as the expectation is that
// everything is made explicit by adding casts.
- assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet());
+ assert(op1->TypeGet() == op2->TypeGet());
- info->srcCount = 2;
- info->dstCount = 1;
-
- GenTree* op2 = tree->gtOp.gtOp2;
if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
{
MakeSrcContained(tree, op2);
}
+ else
+ {
+ // If there are no containable operands, we can make an operand reg optional.
+ SetRegOptionalForBinOp(tree);
+ }
+
return;
}
break;
break;
}
- info->srcCount = 2;
- info->dstCount = 1;
-
- GenTree* op1 = tree->gtOp.gtOp1;
- GenTree* op2 = tree->gtOp.gtOp2;
-
// Amd64 Div/Idiv instruction:
// Dividend in RAX:RDX and computes
// Quotient in RAX, Remainder in RDX
else
{
op2->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
+
+ // If there are no containable operands, we can make an operand reg optional.
+ SetRegOptionalForBinOp(tree);
}
}
LinearScan* l = m_lsra;
// Both operand and its result must be of floating point type.
- GenTree* op1 = tree->gtOp.gtOp1;
+ GenTree* op1 = tree->gtGetOp1();
assert(varTypeIsFloating(op1));
assert(op1->TypeGet() == tree->TypeGet());
{
MakeSrcContained(tree, op1);
}
+ else
+ {
+ // Mark the operand as reg optional since codegen can still
+ // generate code if op1 is on stack.
+ TryToSetRegOptional(op1);
+ }
break;
case CORINFO_INTRINSIC_Abs:
{
MakeSrcContained(tree, castOp);
}
+ else
+ {
+ // Mark castOp as reg optional to indicate codegen
+ // can still generate code if it is on stack.
+ TryToSetRegOptional(castOp);
+ }
}
}
MakeSrcContained(tree, otherOp);
}
}
+ else
+ {
+ // Mark otherOp as reg optional to indicate codgen can still generate
+ // code even if otherOp is on stack.
+ TryToSetRegOptional(otherOp);
+ }
return;
}
{
MakeSrcContained(tree, op1);
}
+ else
+ {
+ // One of op1 or op2 could be marked as reg optional
+ // to indicate that codgen can still generate code
+ // if one of them is on stack.
+ TryToSetRegOptional(op2);
+
+ if (!op2->IsRegOptional())
+ {
+ TryToSetRegOptional(op1);
+ }
+ }
if (varTypeIsSmall(op1Type) && varTypeIsUnsigned(op1Type))
{
// generate more efficient code sequence for the case of GT_MUL(op1=memOp, op2=non-memOp)
MakeSrcContained(tree, op1);
}
+ else
+ {
+ // If there are no containable operands, we can make an operand reg optional.
+ SetRegOptionalForBinOp(tree);
+ }
return;
}
// To generate an LEA we need to force memOp into a register
// so don't allow memOp to be 'contained'
//
- if ((memOp != nullptr) &&
- !useLeaEncoding &&
- (memOp->TypeGet() == tree->TypeGet()) &&
- IsSafeToContainMem(tree, memOp))
+ if (!useLeaEncoding)
{
- MakeSrcContained(tree, memOp);
+ if (memOp != nullptr)
+ {
+ if ((memOp->TypeGet() == tree->TypeGet()) &&
+ IsSafeToContainMem(tree, memOp))
+ {
+ MakeSrcContained(tree, memOp);
+ }
+ }
+ else
+ {
+ // If there are no containable operands, we can make an operand reg optional.
+ SetRegOptionalForBinOp(tree);
+ }
}
}
return true;
}
+//----------------------------------------------------------------------
+// TryToSetRegOptional - sets a bit to indicate to LSRA that register
+// for a given tree node is optional for codegen purpose. If no
+// register is allocated to such a tree node, its parent node treats
+// it as a contained memory operand during codegen.
+//
+// Arguments:
+// tree - GenTree node
+//
+// Returns
+// None
+//
+// Note: Right now a tree node is marked as reg optional only
+// if is it a GT_LCL_VAR. This routine needs to be modified if
+// in future if lower/codegen needs to support other tree node
+// types.
+void Lowering::TryToSetRegOptional(GenTree* tree)
+{
+ if (tree->OperGet() == GT_LCL_VAR)
+ {
+ tree->gtLsraInfo.regOptional = true;
+ }
+}
+
+// ------------------------------------------------------------------
+// SetRegOptionalBinOp - Indicates which of the operands of a bin-op
+// register requirement is optional. Xarch instruction set allows
+// either of op1 or op2 of binary operation (e.g. add, mul etc) to be
+// a memory operand. This routine provides info to register allocator
+// which of its operands optionally require a register. Lsra might not
+// allocate a register to RefTypeUse positions of such operands if it
+// is beneficial. In such a case codegen will treat them as memory
+// operands.
+//
+// Arguments:
+// tree - Gentree of a bininary operation.
+//
+// Returns
+// None.
+//
+// Note: On xarch at most only one of the operands will be marked as
+// reg optional, even when both operands could be considered register
+// optional.
+void Lowering::SetRegOptionalForBinOp(GenTree* tree)
+{
+ assert(GenTree::OperIsBinary(tree->OperGet()));
+
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
+
+ if (tree->TypeGet() == op2->TypeGet())
+ {
+ TryToSetRegOptional(op2);
+ }
+
+ if (!op2->IsRegOptional() &&
+ tree->OperIsCommutative() &&
+ tree->TypeGet() == op1->TypeGet())
+ {
+ TryToSetRegOptional(op1);
+ }
+}
+
#endif // _TARGET_XARCH_
#endif // !LEGACY_BACKEND
}
}
+//-------------------------------------------------------------
+// getWeight: Returns the weight of the RefPosition.
+//
+// Arguments:
+// refPos - ref position
+//
+// Returns:
+// Weight of ref position.
+unsigned LinearScan::getWeight(RefPosition* refPos)
+{
+ unsigned weight;
+ GenTreePtr treeNode = refPos->treeNode;
+
+ if (treeNode != nullptr)
+ {
+ if (isCandidateLocalRef(treeNode))
+ {
+ // Tracked locals: use weighted ref cnt as the weight of the
+ // ref position.
+ GenTreeLclVarCommon* lclCommon = treeNode->AsLclVarCommon();
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclCommon->gtLclNum]);
+ weight = varDsc->lvRefCntWtd;
+ }
+ else
+ {
+ // Non-candidate local ref or non-lcl tree node.
+ // These are considered to have two references in the basic block:
+ // a def and a use and hence weighted ref count is 2 times
+ // the basic block weight in which they appear.
+ weight = 2 * this->blockInfo[refPos->bbNum].weight;
+ }
+ }
+ else
+ {
+ // Non-tree node ref positions. These will have a single
+ // reference in the basic block and hence their weighted
+ // refcount is equal to the block weight in which they
+ // appear.
+ weight = this->blockInfo[refPos->bbNum].weight;
+ }
+
+ return weight;
+}
+
// allRegs represents a set of registers that can
// be used to allocate the specified type in any point
// in time (more of a 'bank' of registers).
// and that can be spilled.
//
// Arguments:
-// current: The interval for the current allocation
-// refPosition: The RefPosition of the current Interval for which a register is being allocated
+// current The interval for the current allocation
+// refPosition The RefPosition of the current Interval for which a register is being allocated
+// allocationOptional If true, a reg may not be allocated if all other ref positions currently
+// occupying registers are more important than the 'refPosition'.
//
// Return Value:
// The regNumber allocated to the RefPositon. Returns REG_NA if no free register is found.
-
+//
+// Note: Currently this routine uses weight and farthest distance of next reference
+// to select a ref position for spilling.
+// a) if allocationOptional = false
+// The ref position chosen for spilling will be the lowest weight
+// of all and if there is is more than one ref position with the
+// same lowest weight, among them choses the one with farthest
+// distance to its next reference.
+//
+// b) if allocationOptional = true
+// The ref position chosen for spilling will not only be lowest weight
+// of all but also has a weight lower than 'refPosition'. If there is
+// no such ref position, reg will not be allocated.
regNumber
-LinearScan::allocateBusyReg(Interval *current, RefPosition *refPosition)
+LinearScan::allocateBusyReg(Interval* current,
+ RefPosition* refPosition,
+ bool allocationOptional)
{
regNumber foundReg = REG_NA;
// TODO-CQ: Determine whether/how to take preferences into account in addition to
// prefering the one with the furthest ref position when considering
// a candidate to spill
- RegRecord * farthestRefPhysRegRecord = nullptr;
+ RegRecord* farthestRefPhysRegRecord = nullptr;
LsraLocation farthestLocation = MinLocation;
LsraLocation refLocation = refPosition->nodeLocation;
+ unsigned farthestRefPosWeight;
+ if (allocationOptional)
+ {
+ // If allocating a reg is optional, we will consider those ref positions
+ // whose weight is less than 'refPosition' for spilling.
+ farthestRefPosWeight = getWeight(refPosition);
+ }
+ else
+ {
+ // If allocating a reg is a must, we start off with max weight so
+ // that the first spill candidate will be selected based on
+ // farthest distance alone. Since we start off with farthestLocation
+ // initialized to MinLocation, the first available ref position
+ // will be selected as spill candidate and its weight as the
+ // fathestRefPosWeight.
+ farthestRefPosWeight = BB_MAX_WEIGHT;
+ }
+
for (regNumber regNum : Registers(regType))
{
regMaskTP candidateBit = genRegMask(regNum);
{
continue;
}
- Interval * assignedInterval = physRegRecord->assignedInterval;
+ Interval* assignedInterval = physRegRecord->assignedInterval;
// If there is a fixed reference at the same location (and it's not due to this reference),
// don't use it.
// to avoid a spill - codegen can then insert the copy.
assert(candidates == candidateBit);
physRegNextLocation = MaxLocation;
+ farthestRefPosWeight = BB_MAX_WEIGHT;
}
else
{
physRegNextLocation = physRegRecord->getNextRefLocation();
+
+ // If refPosition requires a fixed register, we should reject all others.
+ // Otherwise, we will still evaluate all phyRegs though their next location is
+ // not better than farthestLocation found so far.
+ //
+ // TODO: this method should be using an approach similar to tryAllocateFreeReg()
+ // where it uses a regOrder array to avoid iterating over any but the single
+ // fixed candidate.
+ if (refPosition->isFixedRegRef && physRegNextLocation < farthestLocation)
+ {
+ continue;
+ }
}
- if (physRegNextLocation < farthestLocation)
- continue;
-
+
// If this register is not assigned to an interval, either
// - it has a FixedReg reference at the current location that is not this reference, OR
// - this is the special case of a fixed loReg, where this interval has a use at the same location
continue;
}
- RefPosition * recentAssignedRef = assignedInterval->recentRefPosition;
+ RefPosition* recentAssignedRef = assignedInterval->recentRefPosition;
if (!assignedInterval->isActive)
{
RefPosition* nextAssignedRef = recentAssignedRef->nextRefPosition;
assert(nextAssignedRef != nullptr);
assert(nextAssignedRef->nodeLocation == refLocation ||
- (nextAssignedRef->nodeLocation + 1 == refLocation && nextAssignedRef->delayRegFree));
+ (nextAssignedRef->nodeLocation + 1 == refLocation && nextAssignedRef->delayRegFree));
}
}
continue;
}
-
+
// If we have a recentAssignedRef, check that it is going to be OK to spill it
+ //
+ // TODO-Review: Under what conditions recentAssginedRef would be null?
+ unsigned recentAssignedRefWeight = BB_ZERO_WEIGHT;
if (recentAssignedRef != nullptr)
{
if (recentAssignedRef->nodeLocation == refLocation)
{
// We can't spill a register that's being used at the current location
- RefPosition * physRegRef = physRegRecord->recentRefPosition;
+ RefPosition* physRegRef = physRegRecord->recentRefPosition;
continue;
}
-
+
// If the current position has the candidate register marked to be delayed,
// check if the previous location is using this register, if that's the case we have to skip
// since we can't spill this register.
- if(recentAssignedRef->delayRegFree &&
- (refLocation == recentAssignedRef->nodeLocation + 1))
+ if (recentAssignedRef->delayRegFree &&
+ (refLocation == recentAssignedRef->nodeLocation + 1))
+ {
+ continue;
+ }
+
+ // We don't prefer to spill a register if the weight of recentAssignedRef > weight
+ // of the spill candidate found so far. We would consider spilling a greater weight
+ // ref position only if the refPosition being allocated must need a reg.
+ recentAssignedRefWeight = getWeight(recentAssignedRef);
+ if (recentAssignedRefWeight > farthestRefPosWeight)
{
continue;
}
}
if (nextLocation > physRegNextLocation)
+ {
nextLocation = physRegNextLocation;
+ }
+
+ bool isBetterLocation;
- bool isBetterLocation = (nextLocation > farthestLocation);
#ifdef DEBUG
if (doSelectNearest() && farthestRefPhysRegRecord != nullptr)
{
- isBetterLocation = !isBetterLocation;
+ isBetterLocation = (nextLocation <= farthestLocation);
}
-#endif // DEBUG
+ else
+ // the below if-stmt is associated with this else
+#endif
+ if (recentAssignedRefWeight < farthestRefPosWeight)
+ {
+ isBetterLocation = true;
+ }
+ else
+ {
+ // This would mean the weight of spill ref position we found so far is equal
+ // to the weight of the ref position that is being evaluated. In this case
+ // we prefer to spill ref position whose distance to its next reference is
+ // the farthest.
+ assert(recentAssignedRefWeight == farthestRefPosWeight);
+
+ // If allocation is optional, the first spill candidate selected
+ // will be based on weight alone. After we have found a spill
+ // candidate whose weight is less than the 'refPosition', we will
+ // consider farthest distance when there is a tie in weights.
+ // This is to ensure that we don't spill a ref position whose
+ // weight is equal to weight of 'refPosition'.
+ if (allocationOptional && farthestRefPhysRegRecord == nullptr)
+ {
+ isBetterLocation = false;
+ }
+ else
+ {
+ isBetterLocation = (nextLocation > farthestLocation);
+ }
+ }
+
if (isBetterLocation)
{
farthestLocation = nextLocation;
farthestRefPhysRegRecord = physRegRecord;
+ farthestRefPosWeight = recentAssignedRefWeight;
}
}
- assert(farthestRefPhysRegRecord != nullptr &&
- (farthestLocation > refLocation || refPosition->isFixedRegRef));
- foundReg = farthestRefPhysRegRecord->regNum;
- unassignPhysReg(farthestRefPhysRegRecord, farthestRefPhysRegRecord->assignedInterval->recentRefPosition);
- assignPhysReg(farthestRefPhysRegRecord, current);
- refPosition->registerAssignment = genRegMask(foundReg);
+#if DEBUG
+ if (allocationOptional)
+ {
+ // There may not be a spill candidate or if one is found
+ // its weight must be less than the weight of 'refPosition'
+ assert((farthestRefPhysRegRecord == nullptr) ||
+ (farthestRefPosWeight < getWeight(refPosition)));
+ }
+ else
+ {
+ // Must have found a spill candidate.
+ assert((farthestRefPhysRegRecord != nullptr) &&
+ (farthestLocation > refLocation || refPosition->isFixedRegRef));
+ }
+#endif
+
+ if (farthestRefPhysRegRecord != nullptr)
+ {
+ foundReg = farthestRefPhysRegRecord->regNum;
+ unassignPhysReg(farthestRefPhysRegRecord, farthestRefPhysRegRecord->assignedInterval->recentRefPosition);
+ assignPhysReg(farthestRefPhysRegRecord, current);
+ refPosition->registerAssignment = genRegMask(foundReg);
+ }
+ else
+ {
+ foundReg = REG_NA;
+ refPosition->registerAssignment = RBM_NONE;
+ }
+
return foundReg;
}
regNumber allocatedReg = tryAllocateFreeReg(currentInterval, refPosition);
if (allocatedReg == REG_NA)
{
- allocatedReg = allocateBusyReg(currentInterval, refPosition);
+ allocatedReg = allocateBusyReg(currentInterval, refPosition, false);
}
// Now restore the old info
if (!fromRefPosition->lastUse)
{
- if (!fromRefPosition->RequiresRegister())
+ if (!fromRefPosition->IsActualRef())
{
fromRefPosition->registerAssignment = RBM_NONE;
}
if (srcInterval->isActive &&
genRegMask(srcInterval->physReg) == currentRefPosition->registerAssignment &&
currentInterval->getNextRefLocation() == physRegRecord->getNextRefLocation())
- {
-
+ {
assert(physRegRecord->regNum == srcInterval->physReg);
- // Is the next use of this lclVar prior to the next kill of the physReg?
- if (srcInterval->getNextRefLocation() <= physRegRecord->getNextRefLocation())
- {
- physRegRecord->isBusyUntilNextKill = true;
- assert(physRegRecord->getNextRefLocation() == currentInterval->getNextRefLocation());
- }
+
+ // Special putarg_reg acts as a pass-thru since both source lcl var
+ // and putarg_reg have the same register allocated. Physical reg
+ // record of reg continue to point to source lcl var's interval
+ // instead of to putarg_reg's interval. So if a spill of reg
+ // allocated to source lcl var happens, to reallocate to another
+ // tree node, before its use at call node it will lead to spill of
+ // lcl var instead of putarg_reg since physical reg record is pointing
+ // to lcl var's interval. As a result, arg reg would get trashed leading
+ // to bad codegen. The assumption here is that source lcl var of a
+ // special putarg_reg doesn't get spilled and re-allocated prior to
+ // its use at the call node. This is ensured by marking physical reg
+ // record as busy until next kill.
+ physRegRecord->isBusyUntilNextKill = true;
}
else
{
continue;
}
}
+
if (assignedRegister == REG_NA && RefTypeIsUse(refType))
{
currentRefPosition->reload = true;
// code here, but otherwise we may wind up in this register anyway.
keepAssignment = false;
}
+
if (keepAssignment == false)
{
currentRefPosition->registerAssignment = allRegs(currentInterval->registerType);
}
}
}
+
if (assignedRegister == REG_NA)
{
- // Try to allocate a register
- assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition);
+ bool allocateReg = true;
+
+ if (currentRefPosition->IsRegOptional())
+ {
+ if (currentRefPosition->lastUse &&
+ currentRefPosition->reload)
+ {
+ // We can avoid allocating a register if it is a the last use requiring a reload
+ allocateReg = false;
+ }
+
+#ifdef DEBUG
+ // Under stress mode, don't attempt to allocate a reg to
+ // reg optional ref position.
+ if (allocateReg && regOptionalNoAlloc())
+ {
+ allocateReg = false;
+ }
+#endif
+ }
+
+ if (allocateReg)
+ {
+ // Try to allocate a register
+ assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition);
+ }
+
// If no register was found, and if the currentRefPosition must have a register,
// then find a register to spill
if (assignedRegister == REG_NA)
}
else
#endif // FEATURE_SIMD
- if (currentRefPosition->RequiresRegister())
+ if (currentRefPosition->IsActualRef())
{
- assignedRegister = allocateBusyReg(currentInterval, currentRefPosition);
- INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_SPILLED_REG, currentInterval, assignedRegister));
+ if (allocateReg)
+ {
+ // Though Lower/Codegen has indicated that it can generate code even if
+ // no reg is allocated to this ref position, we will make an attempt
+ // to get a busy reg if it is allocated to a lesser important ref position.
+ // If all the refpositions currently occupying registers are more
+ // important than currentRefPosition, no reg will be allocated.
+ assignedRegister = allocateBusyReg(currentInterval, currentRefPosition, currentRefPosition->IsRegOptional());
+ }
+
+ if (assignedRegister != REG_NA)
+ {
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_SPILLED_REG, currentInterval, assignedRegister));
+ }
+ else
+ {
+ // This can happen only if ref position requires a reg optionally
+ noway_assert(currentRefPosition->IsRegOptional());
+
+ currentRefPosition->registerAssignment = RBM_NONE;
+ currentRefPosition->reload = false;
+
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
+ }
}
else
{
}
}
#endif // DEBUG
+
if (refType == RefTypeDummyDef && assignedRegister != REG_NA)
{
setInVarRegForBB(curBBNum, currentInterval->varNum, assignedRegister);
assert(currentRefPosition->reload);
}
}
+
// If we allocated a register, record it
- if (currentInterval
- && assignedRegister != REG_NA)
+ if (currentInterval != nullptr &&
+ assignedRegister != REG_NA)
{
assignedRegBit = genRegMask(assignedRegister);
currentRefPosition->registerAssignment = assignedRegBit;
// The interval could be dead if this is a user variable, or if the
// node is being evaluated for side effects, or a call whose result
// is not used, etc.
-
if (currentRefPosition->lastUse || currentRefPosition->nextRefPosition == nullptr)
{
assert(currentRefPosition->isIntervalRef());
- if (refType != RefTypeExpUse
- && currentRefPosition->nextRefPosition == nullptr)
+
+ if (refType != RefTypeExpUse &&
+ currentRefPosition->nextRefPosition == nullptr)
{
if (currentRefPosition->delayRegFree)
{
currentInterval->isActive = false;
}
}
+
lastAllocatedRefPosition = currentRefPosition;
}
}
if (currentRefPosition->registerAssignment == RBM_NONE)
{
- assert(!currentRefPosition->RequiresRegister());
+ // Either requires no register or reg is optional.
+ assert(!currentRefPosition->IsActualRef() ||
+ currentRefPosition->IsRegOptional());
+
interval->isSpilled = true;
varDsc->lvRegNum = REG_STK;
if (interval->assignedReg != nullptr && interval->assignedReg->assignedInterval == interval)
}
interval->assignedReg = nullptr;
interval->physReg = REG_NA;
+
return;
}
{
interval->physReg = REG_NA;
interval->assignedReg = nullptr;
- regRecord->assignedInterval = nullptr;
+
+ // regRegcord could be null if RefPosition requires a reg optionally
+ if (regRecord != nullptr)
+ {
+ regRecord->assignedInterval = nullptr;
+ }
+ else
+ {
+ assert(currentRefPosition->IsRegOptional());
+ }
}
}
}
LsraSpill getLsraSpill() { return (LsraSpill) (lsraStressMask & LSRA_SPILL_MASK); }
bool spillAlways() { return getLsraSpill() == LSRA_SPILL_ALWAYS; }
+ // This controls whether RefPositions that require a register optionally should
+ // be allocated a reg at all.
+ enum LsraRegOptionalControl { LSRA_REG_OPTIONAL_DEFAULT = 0,
+ LSRA_REG_OPTIONAL_NO_ALLOC = 0x1000,
+ LSRA_REG_OPTIONAL_MASK = 0x1000 };
+
+ LsraRegOptionalControl getLsraRegOptionalControl()
+ {
+ return (LsraRegOptionalControl) (lsraStressMask & LSRA_REG_OPTIONAL_MASK);
+ }
+
+ bool regOptionalNoAlloc()
+ {
+ return getLsraRegOptionalControl() == LSRA_REG_OPTIONAL_NO_ALLOC;
+ }
+
// Dump support
void lsraDumpIntervals(const char* msg);
void dumpRefPositions(const char *msg);
void associateRefPosWithRegister(RefPosition *rp);
+ unsigned getWeight(RefPosition* refPos);
+
/*****************************************************************************
* Register management
****************************************************************************/
regNumber tryAllocateFreeReg(Interval *current, RefPosition *refPosition);
RegRecord* findBestPhysicalReg(RegisterType regType, LsraLocation endLocation,
regMaskTP candidates, regMaskTP preferences);
- regNumber allocateBusyReg(Interval *current, RefPosition *refPosition);
+ regNumber allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocationOptional);
regNumber assignCopyReg(RefPosition * refPosition);
void checkAndAssignInterval(RegRecord * regRec, Interval * interval);
LsraLocation nodeLocation;
regMaskTP registerAssignment;
- regNumber assignedReg() { return genRegNumFromMask(registerAssignment); }
+ regNumber assignedReg() {
+ if (registerAssignment == RBM_NONE)
+ {
+ return REG_NA;
+ }
+
+ return genRegNumFromMask(registerAssignment);
+ }
RefType refType;
- bool RequiresRegister()
+ bool IsActualRef()
{
- return (refType == RefTypeDef || refType == RefTypeUse
+ return (refType == RefTypeDef ||
+ refType == RefTypeUse
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- || refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse
+ || refType == RefTypeUpperVectorSaveDef
+ || refType == RefTypeUpperVectorSaveUse
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
);
}
unsigned getMultiRegIdx() { return multiRegIdx; }
+ // Returns true if codegen has indicated that the tree node
+ // referred to by RefPosition can be treated as a contained
+ // memory operand if no register was allocated.
+ bool IsRegOptional()
+ {
+ // TODO-CQ: Right now if a ref position is marked as
+ // copyreg or movereg, then it is always allocated a
+ // register, though it is marked as reg optional.
+ // This is an implementation limitation that needs to
+ // be addressed.
+ return (refType == RefTypeUse) &&
+ !copyReg &&
+ !moveReg &&
+ (treeNode != nullptr) &&
+ treeNode->IsRegOptional();
+ }
+
// Last Use - this may be true for multiple RefPositions in the same Interval
bool lastUse : 1;
isDelayFree = false;
hasDelayFreeSrc = false;
isTgtPref = false;
+ regOptional = false;
}
// dst
// isTgtPref is set to true when we have a rmw op, where we would like the result to be allocated
// in the same register as op1.
unsigned char isTgtPref:1;
-
+ // Whether a spilled second src can be treated as a contained operand
+ unsigned char regOptional:1;
public: