Generate TreeNodeInfo into the map when building RefPositions.
Add some new methods and flags for former gtLsraInfo functionality that's used outside of LSRA:
- GenTree::GetRegisterDstCount() (number of registers defined by a node)
- LIR::Flags::RegOptional
- gtDebugFlags::GTF_DEBUG_NODE_LSRA_ADDED
Fix #7255
printf("#");
}
- // Munge any pointers if we want diff-able disassembly
+ // Munge any pointers if we want diff-able disassembly.
+ // Since some may be emitted as partial words, print as diffable anything that has
+ // significant bits beyond the lowest 8-bits.
if (emitComp->opts.disDiffable)
{
- ssize_t top44bits = (imm >> 20);
- if ((top44bits != 0) && (top44bits != -1))
+ ssize_t top56bits = (imm >> 8);
+ if ((top56bits != 0) && (top56bits != -1))
imm = 0xD1FFAB1E;
}
return hasReg;
}
+//-----------------------------------------------------------------------------
+// GetRegisterDstCount: Get the number of registers defined by the node.
+//
+// Arguments:
+// None
+//
+// Return Value:
+// The number of registers that this node defines.
+//
+// Notes:
+// This should not be called on a contained node.
+// This does not look at the actual register assignments, if any, and so
+// is valid after Lowering.
+//
+int GenTree::GetRegisterDstCount() const
+{
+ assert(!isContained());
+ if (!IsMultiRegNode())
+ {
+ return (IsValue()) ? 1 : 0;
+ }
+ else if (IsMultiRegCall())
+ {
+ // temporarily cast away const-ness as AsCall() method is not declared const
+ GenTree* temp = const_cast<GenTree*>(this);
+ return temp->AsCall()->GetReturnTypeDesc()->GetReturnRegCount();
+ }
+ else if (IsCopyOrReloadOfMultiRegCall())
+ {
+ // A multi-reg copy or reload, will have valid regs for only those
+ // positions that need to be copied or reloaded. Hence we need
+ // to consider only those registers for computing reg mask.
+
+ GenTree* tree = const_cast<GenTree*>(this);
+ GenTreeCopyOrReload* copyOrReload = tree->AsCopyOrReload();
+ GenTreeCall* call = copyOrReload->gtGetOp1()->AsCall();
+ return call->GetReturnTypeDesc()->GetReturnRegCount();
+ }
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
+ else if (OperIsPutArgSplit())
+ {
+ return (const_cast<GenTree*>(this))->AsPutArgSplit()->gtNumRegs;
+ }
+ // A PUTARG_REG could be a MultiRegOp on ARM since we could move a double register to two int registers,
+ // either for all double parameters w/SoftFP or for varargs).
+ else
+ {
+ assert(OperIsMultiRegOp());
+ return (TypeGet() == TYP_LONG) ? 2 : 1;
+ }
+#endif // !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
+ assert(!"Unexpected multi-reg node");
+ return 0;
+}
+
//---------------------------------------------------------------
// gtGetRegMask: Get the reg mask of the node.
//
}
}
-// -------------------------------------------------------------------------
-// IsRegOptional: Returns true if this gentree node is marked by lowering to
-// indicate that codegen can still generate code even if it wasn't allocated
-// a register.
-bool GenTree::IsRegOptional() const
-{
-#ifdef LEGACY_BACKEND
- return false;
-#else
- return gtLsraInfo.regOptional;
-#endif
-}
-
bool GenTree::IsPhiNode()
{
return (OperGet() == GT_PHI_ARG) || (OperGet() == GT_PHI) || IsPhiDefn();
void CopyReg(GenTreePtr from);
bool gtHasReg() const;
+ int GetRegisterDstCount() const;
+
regMaskTP gtGetRegMask() const;
unsigned gtFlags; // see GTF_xxxx below
regMaskSmall gtUsedRegs; // set of used (trashed) registers
#endif // LEGACY_BACKEND
-#ifndef LEGACY_BACKEND
- TreeNodeInfo gtLsraInfo;
-#endif // !LEGACY_BACKEND
-
void SetVNsFromNode(GenTreePtr tree)
{
gtVNPair = tree->gtVNPair;
#define GTF_DEBUG_NODE_LARGE 0x00000004
#define GTF_DEBUG_NODE_CG_PRODUCED 0x00000008 // genProduceReg has been called on this node
#define GTF_DEBUG_NODE_CG_CONSUMED 0x00000010 // genConsumeReg has been called on this node
+#define GTF_DEBUG_NODE_LSRA_ADDED 0x00000020 // This node was added by LSRA
-#define GTF_DEBUG_NODE_MASK 0x0000001F // These flags are all node (rather than operation) properties.
+#define GTF_DEBUG_NODE_MASK 0x0000003F // These flags are all node (rather than operation) properties.
#define GTF_DEBUG_VAR_CSE_REF 0x00800000 // GT_LCL_VAR -- This is a CSE LCL_VAR node
#endif // defined(DEBUG)
}
}
- // NOTE: the three UnusedValue helpers immediately below are defined in lir.h.
+ // LIR flags
+ // These helper methods, along with the flag values they manipulate, are defined in lir.h
+ //
+ // UnusedValue indicates that, although this node produces a value, it is unused.
inline void SetUnusedValue();
inline void ClearUnusedValue();
inline bool IsUnusedValue() const;
+ // RegOptional indicates that codegen can still generate code even if it isn't allocated a register.
+ inline bool IsRegOptional() const;
+ inline void SetRegOptional();
+ inline void ClearRegOptional();
+#ifdef DEBUG
+ void dumpLIRFlags();
+#endif
bool OperIs(genTreeOps oper) const
{
inline var_types CastFromType();
inline var_types& CastToType();
- // Returns true if this gentree node is marked by lowering to indicate
- // that codegen can still generate code even if it wasn't allocated a
- // register.
- bool IsRegOptional() const;
-#ifndef LEGACY_BACKEND
- void ClearRegOptional()
- {
- gtLsraInfo.regOptional = false;
- }
-#endif
-
// Returns "true" iff "this" is a phi-related node (i.e. a GT_PHI_ARG, GT_PHI, or a PhiDefn).
bool IsPhiNode();
blockRange.InsertBefore(insertionPoint, std::move(range));
}
+
+#ifdef DEBUG
+void GenTree::dumpLIRFlags()
+{
+ JITDUMP("[%c%c%c]", IsUnusedValue() ? 'U' : '-', IsRegOptional() ? 'O' : '-');
+}
+#endif
// that this bit should not be assumed to be valid
// at all points during compilation: it is currently
// only computed during target-dependent lowering.
+
+ RegOptional = 0x04, // Set on a node if it produces a value, but does not
+ // require a register (i.e. it can be used from memory).
};
};
return (gtLIRFlags & LIR::Flags::UnusedValue) != 0;
}
+inline void GenTree::SetRegOptional()
+{
+ gtLIRFlags |= LIR::Flags::RegOptional;
+}
+
+inline void GenTree::ClearRegOptional()
+{
+ gtLIRFlags &= ~LIR::Flags::RegOptional;
+}
+
+inline bool GenTree::IsRegOptional() const
+{
+ return (gtLIRFlags & LIR::Flags::RegOptional) != 0;
+}
+
#endif // _LIR_H_
// node - pointer to the DIV or MOD node
//
// Returns:
-// The next node to lower.
+// nullptr if no transformation is done, or the next node in the transformed node sequence that
+// needs to be lowered.
//
GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node)
{
assert((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD));
- GenTree* next = node->gtNext;
GenTree* divMod = node;
GenTree* dividend = divMod->gtGetOp1();
GenTree* divisor = divMod->gtGetOp2();
if (!divisor->IsCnsIntOrI())
{
- return next; // no transformations to make
+ return nullptr; // no transformations to make
}
const var_types type = divMod->TypeGet();
// We shouldn't see a divmod with constant operands here but if we do then it's likely
// because optimizations are disabled or it's a case that's supposed to throw an exception.
// Don't optimize this.
- return next;
+ return nullptr;
}
ssize_t divisorValue = divisor->gtIntCon.IconValue();
// case so optimizing this case would break C# code.
// A runtime check could be used to handle this case but it's probably too rare to matter.
- return next;
+ return nullptr;
}
bool isDiv = divMod->OperGet() == GT_DIV;
// If the divisor is the minimum representable integer value then we can use a compare,
// the result is 1 iff the dividend equals divisor.
divMod->SetOper(GT_EQ);
- ContainCheckCompare(divMod->AsOp());
- return next;
+ return node;
}
}
{
if (comp->opts.MinOpts())
{
- return next;
+ return nullptr;
}
#if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
return mulhi;
#else
// Currently there's no GT_MULHI for ARM32
- return next;
+ return nullptr;
#endif
}
LIR::Use use;
if (!BlockRange().TryGetUse(node, &use))
{
- return next;
+ return nullptr;
}
// We need to use the dividend node multiple times so its value needs to be
if (!varTypeIsFloating(node->TypeGet()))
#endif // _TARGET_XARCH_
{
- next = LowerConstIntDivOrMod(node);
- }
-
- if ((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD))
- {
- ContainCheckDivOrMod(node->AsOp());
+ // LowerConstIntDivOrMod will return nullptr if it doesn't transform the node.
+ GenTree* newNode = LowerConstIntDivOrMod(node);
+ if (newNode != nullptr)
+ {
+ return newNode;
+ }
}
+ ContainCheckDivOrMod(node->AsOp());
return next;
}
{
// If there are no containable operands, we can make an operand reg optional.
// SSE2 allows only divisor to be a memory-op.
- SetRegOptional(divisor);
+ divisor->SetRegOptional();
}
return;
}
{
// If there are no containable operands, we can make an operand reg optional.
// Div instruction allows only divisor to be a memory op.
- SetRegOptional(divisor);
+ divisor->SetRegOptional();
}
#endif // _TARGET_XARCH_
}
bool IsCallTargetInRange(void* addr);
#if defined(_TARGET_XARCH_)
- //----------------------------------------------------------------------
- // SetRegOptional - sets a bit to indicate to LSRA that register
- // for a given tree node is optional for codegen purpose. If no
- // register is allocated to such a tree node, its parent node treats
- // it as a contained memory operand during codegen.
- //
- // Arguments:
- // tree - GenTree node
- //
- // Returns
- // None
- void SetRegOptional(GenTree* tree)
- {
- tree->gtLsraInfo.regOptional = true;
- }
-
GenTree* PreferredRegOptionalOperand(GenTree* tree);
// ------------------------------------------------------------------
const bool op1Legal = tree->OperIsCommutative() && (operatorSize == genTypeSize(op1->TypeGet()));
const bool op2Legal = operatorSize == genTypeSize(op2->TypeGet());
+ GenTree* regOptionalOperand = nullptr;
if (op1Legal)
{
- SetRegOptional(op2Legal ? PreferredRegOptionalOperand(tree) : op1);
+ regOptionalOperand = op2Legal ? PreferredRegOptionalOperand(tree) : op1;
}
else if (op2Legal)
{
- SetRegOptional(op2);
+ regOptionalOperand = op2;
+ }
+ if (regOptionalOperand != nullptr)
+ {
+ regOptionalOperand->SetRegOptional();
}
}
#endif // defined(_TARGET_XARCH_)
head->gtSeqNum = fieldList->gtSeqNum;
#endif // DEBUG
- head->gtLsraInfo = fieldList->gtLsraInfo;
-
BlockRange().InsertAfter(fieldList, head);
BlockRange().Remove(fieldList);
LclVarDsc* varDsc = &(comp->lvaTable[fieldNode->AsLclVarCommon()->gtLclNum]);
if (!varDsc->lvDoNotEnregister)
{
- SetRegOptional(fieldNode);
+ fieldNode->SetRegOptional();
}
else
{
// than spilling, but this situation is not all that common, as most cases of promoted
// structs do not have a large number of fields, and of those most are lclVars or
// copy-propagated constants.
- SetRegOptional(fieldNode);
+ fieldNode->SetRegOptional();
}
}
// Has a contained immediate operand.
// Only 'other' operand can be marked as reg optional.
assert(other != nullptr);
- SetRegOptional(other);
+ other->SetRegOptional();
}
else if (hasImpliedFirstOperand)
{
// Only op2 can be marke as reg optional.
- SetRegOptional(op2);
+ op2->SetRegOptional();
}
else
{
{
// Mark castOp as reg optional to indicate codegen
// can still generate code if it is on stack.
- SetRegOptional(castOp);
+ castOp->SetRegOptional();
}
}
}
{
// SSE2 allows only otherOp to be a memory-op. Since otherOp is not
// contained, we can mark it reg-optional.
- SetRegOptional(otherOp);
+ otherOp->SetRegOptional();
}
return;
}
else
{
- SetRegOptional(op1);
+ op1->SetRegOptional();
}
}
}
}
else if (op1->IsCnsIntOrI())
{
- SetRegOptional(op2);
+ op2->SetRegOptional();
}
else
{
// One of op1 or op2 could be marked as reg optional
// to indicate that codegen can still generate code
// if one of them is on stack.
- SetRegOptional(PreferredRegOptionalOperand(cmp));
+ PreferredRegOptionalOperand(cmp)->SetRegOptional();
}
}
}
else
{
// We can mark 'other' as reg optional, since it is not contained.
- SetRegOptional(other);
+ other->SetRegOptional();
}
}
}
{
// Mark the operand as reg optional since codegen can still
// generate code if op1 is on stack.
- SetRegOptional(op1);
+ op1->SetRegOptional();
}
}
}
Preconditions
- All register requirements are expressed in the code stream, either as destination
registers of tree nodes, or as internal registers. These requirements are
- expressed in the TreeNodeInfo (gtLsraInfo) on each node, which includes:
+ expressed in the TreeNodeInfo computed for each node, which includes:
- The number of register sources and destinations.
- The register restrictions (candidates) of the target register, both from itself,
as producer of the value (dstCandidates), and from its consuming node (srcCandidates).
regMaskTP newAssignment = (prevAssignment & rp->registerAssignment);
if (newAssignment != RBM_NONE)
{
- if (!isSingleRegister(newAssignment) ||
- (!theInterval->hasNonCommutativeRMWDef && (prevRefPosition->treeNode != nullptr) &&
- !prevRefPosition->treeNode->gtLsraInfo.isInternalRegDelayFree))
+ if (!isSingleRegister(newAssignment) || !theInterval->hasInterferingUses)
{
prevRefPosition->registerAssignment = newAssignment;
}
// Return Value:
// None.
//
-// Assumptions:
-// Lowering must have set the NodeInfo (gtLsraInfo) on each node to communicate
-// the register requirements.
void LinearScan::doLinearScan()
{
// usual kill location which is the same as the defs at tree loc+1.
// Note that we don't have to add interference for the live vars, because that
// will be done below, and is not sensitive to the precise location.
- LsraLocation currentLoc = tree->gtLsraInfo.loc;
assert(currentLoc != 0);
addRefsForPhysRegMask(RBM_RDX, currentLoc, RefTypeKill, true);
// Both RAX and RDX are killed by the operation
// DEBUG only arg.
RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree,
RegisterType regType,
- LsraLocation currentLoc,
regMaskTP regMask DEBUGARG(unsigned minRegCandidateCount))
{
Interval* current = newInterval(regType);
//
// Arguments:
// tree - Gentree node that needs internal registers
-// currentLoc - Location at which Def positions need to be defined
// temps - in-out array which is populated with ref positions
// created for Def of internal registers
// minRegCandidateCount - Minimum registers to be ensured in candidate
//
// Returns:
// The total number of Def positions created for internal registers of tree node.
-int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree,
- LsraLocation currentLoc,
- RefPosition* temps[] // populates
+int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree,
+ TreeNodeInfo* info,
+ RefPosition* temps[] // populates
DEBUGARG(unsigned minRegCandidateCount))
{
int count;
- int internalIntCount = tree->gtLsraInfo.internalIntCount;
- regMaskTP internalCands = tree->gtLsraInfo.getInternalCandidates(this);
+ int internalIntCount = info->internalIntCount;
+ regMaskTP internalCands = info->getInternalCandidates(this);
// If the number of internal integer registers required is the same as the number of candidate integer registers in
// the candidate set, then they must be handled as fixed registers.
internalIntCands = genFindLowestBit(internalIntCands);
internalCands &= ~internalIntCands;
}
- temps[count] =
- defineNewInternalTemp(tree, IntRegisterType, currentLoc, internalIntCands DEBUG_ARG(minRegCandidateCount));
+ temps[count] = defineNewInternalTemp(tree, IntRegisterType, internalIntCands DEBUG_ARG(minRegCandidateCount));
}
- int internalFloatCount = tree->gtLsraInfo.internalFloatCount;
+ int internalFloatCount = info->internalFloatCount;
for (int i = 0; i < internalFloatCount; i++)
{
regMaskTP internalFPCands = (internalCands & internalFloatRegCandidates());
temps[count++] =
- defineNewInternalTemp(tree, FloatRegisterType, currentLoc, internalFPCands DEBUG_ARG(minRegCandidateCount));
+ defineNewInternalTemp(tree, FloatRegisterType, internalFPCands DEBUG_ARG(minRegCandidateCount));
}
assert(count < MaxInternalRegisters);
//
// Arguments:
// tree - Gentree node that needs internal registers
-// currentLoc - Location at which Use positions need to be defined
// defs - int array containing Def positions of internal
// registers.
// total - Total number of Def positions in 'defs' array.
//
// Returns:
// Void.
-void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree,
- LsraLocation currentLoc,
- RefPosition* defs[],
+void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree,
+ TreeNodeInfo* info,
+ RefPosition* defs[],
int total DEBUGARG(unsigned minRegCandidateCount))
{
assert(total < MaxInternalRegisters);
RefPosition* newest = newRefPosition(defs[i]->getInterval(), currentLoc, RefTypeUse, tree, mask,
0 DEBUG_ARG(minRegCandidateCount));
- if (tree->gtLsraInfo.isInternalRegDelayFree)
+ if (info->isInternalRegDelayFree)
{
newest->delayRegFree = true;
}
}
}
-regMaskTP LinearScan::getUseCandidates(GenTree* useNode)
-{
- TreeNodeInfo info = useNode->gtLsraInfo;
- return info.getSrcCandidates(this);
-}
-
-regMaskTP LinearScan::getDefCandidates(GenTree* tree)
-{
- TreeNodeInfo info = tree->gtLsraInfo;
- return info.getDstCandidates(this);
-}
-
RegisterType LinearScan::getDefType(GenTree* tree)
{
return tree->TypeGet();
}
//------------------------------------------------------------------------
-// LocationInfoListNode: used to store a single `LocationInfo` value for a
-// node during `buildIntervals`.
-//
-// This is the node type for `LocationInfoList` below.
-//
-class LocationInfoListNode final : public LocationInfo
-{
- friend class LocationInfoList;
- friend class LocationInfoListNodePool;
-
- LocationInfoListNode* m_next; // The next node in the list
-
-public:
- LocationInfoListNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0) : LocationInfo(l, i, t, regIdx)
- {
- }
-
- //------------------------------------------------------------------------
- // LocationInfoListNode::Next: Returns the next node in the list.
- LocationInfoListNode* Next() const
- {
- return m_next;
- }
-};
-
-//------------------------------------------------------------------------
-// LocationInfoList: used to store a list of `LocationInfo` values for a
-// node during `buildIntervals`.
-//
-// Given an IR node that either directly defines N registers or that is a
-// contained node with uses that define a total of N registers, that node
-// will map to N `LocationInfo` values. These values are stored as a
-// linked list of `LocationInfoListNode` values.
-//
-class LocationInfoList final
-{
- friend class LocationInfoListNodePool;
-
- LocationInfoListNode* m_head; // The head of the list
- LocationInfoListNode* m_tail; // The tail of the list
-
-public:
- LocationInfoList() : m_head(nullptr), m_tail(nullptr)
- {
- }
-
- LocationInfoList(LocationInfoListNode* node) : m_head(node), m_tail(node)
- {
- assert(m_head->m_next == nullptr);
- }
-
- //------------------------------------------------------------------------
- // LocationInfoList::IsEmpty: Returns true if the list is empty.
- //
- bool IsEmpty() const
- {
- return m_head == nullptr;
- }
-
- //------------------------------------------------------------------------
- // LocationInfoList::Begin: Returns the first node in the list.
- //
- LocationInfoListNode* Begin() const
- {
- return m_head;
- }
-
- //------------------------------------------------------------------------
- // LocationInfoList::End: Returns the position after the last node in the
- // list. The returned value is suitable for use as
- // a sentinel for iteration.
- //
- LocationInfoListNode* End() const
- {
- return nullptr;
- }
-
- //------------------------------------------------------------------------
- // LocationInfoList::Append: Appends a node to the list.
- //
- // Arguments:
- // node - The node to append. Must not be part of an existing list.
- //
- void Append(LocationInfoListNode* node)
- {
- assert(node->m_next == nullptr);
-
- if (m_tail == nullptr)
- {
- assert(m_head == nullptr);
- m_head = node;
- }
- else
- {
- m_tail->m_next = node;
- }
-
- m_tail = node;
- }
-
- //------------------------------------------------------------------------
- // LocationInfoList::Append: Appends another list to this list.
- //
- // Arguments:
- // other - The list to append.
- //
- void Append(LocationInfoList other)
- {
- if (m_tail == nullptr)
- {
- assert(m_head == nullptr);
- m_head = other.m_head;
- }
- else
- {
- m_tail->m_next = other.m_head;
- }
-
- m_tail = other.m_tail;
- }
-};
-
-//------------------------------------------------------------------------
// LocationInfoListNodePool: manages a pool of `LocationInfoListNode`
// values to decrease overall memory usage
// during `buildIntervals`.
m_freeList = head->m_next;
}
- head->loc = l;
- head->interval = i;
- head->treeNode = t;
- head->multiRegIdx = regIdx;
- head->m_next = nullptr;
+ head->loc = l;
+ head->interval = i;
+ head->treeNode = t;
+ head->m_next = nullptr;
return head;
}
LocationInfoListNode* head = m_freeList;
list.m_tail->m_next = head;
m_freeList = list.m_head;
+
+ list.m_head = nullptr;
+ list.m_tail = nullptr;
}
};
//
static int ComputeOperandDstCount(GenTree* operand)
{
- TreeNodeInfo& operandInfo = operand->gtLsraInfo;
-
- if (operandInfo.isLocalDefUse)
+ // GT_ARGPLACE is the only non-LIR node that is currently in the trees at this stage, though
+ // note that it is not in the linear order. It seems best to check for !IsLIR() rather than
+ // GT_ARGPLACE directly, since it's that characteristic that makes it irrelevant for this method.
+ if (!operand->IsLIR())
{
- // Operands that define an unused value do not produce any registers.
return 0;
}
- else if (operandInfo.dstCount != 0)
+ if (operand->isContained())
{
- // Operands that have a specified number of destination registers consume all of their operands
- // and therefore produce exactly that number of registers.
- return operandInfo.dstCount;
- }
- else if (operandInfo.srcCount != 0)
- {
- // If an operand has no destination registers but does have source registers, it must be a store
- // or a compare.
- assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() ||
- operand->OperIsCompare() || operand->OperIs(GT_CMP, GT_JCMP) || operand->IsSIMDEqualityOrInequality());
- return 0;
+ int dstCount = 0;
+ for (GenTree* op : operand->Operands())
+ {
+ dstCount += ComputeOperandDstCount(op);
+ }
+
+ return dstCount;
}
- else if (!operand->OperIsFieldListHead() && (operand->OperIsStore() || operand->TypeGet() == TYP_VOID))
+ if (operand->IsUnusedValue())
{
- // Stores and void-typed operands may be encountered when processing call nodes, which contain
- // pointers to argument setup stores.
+ // Operands that define an unused value do not produce any registers.
return 0;
}
- else if (operand->OperIsPutArgStk())
+ if (operand->IsValue())
{
- // A PUTARG_STK argument is an operand of a call, but is neither contained, nor does it produce
- // a result.
- assert(!operand->isContained());
- return 0;
+ // Operands that are values and are not contained consume all of their operands
+ // and produce one or more registers.
+ return operand->GetRegisterDstCount();
}
else
{
- // If a field list or non-void-typed operand is not an unused value and does not have source registers,
- // that argument is contained within its parent and produces `sum(operand_dst_count)` registers.
- int dstCount = 0;
- for (GenTree* op : operand->Operands())
- {
- dstCount += ComputeOperandDstCount(op);
- }
-
- return dstCount;
+ // This must be one of the operand types that are neither contained nor produce a value.
+ // Stores and void-typed operands may be encountered when processing call nodes, which contain
+ // pointers to argument setup stores.
+ assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() ||
+ operand->OperIsCompare() || operand->OperIs(GT_CMP) || operand->IsSIMDEqualityOrInequality() ||
+ operand->TypeGet() == TYP_VOID);
+ return 0;
}
}
}
#endif // DEBUG
-static GenTree* GetFirstOperand(GenTree* node)
-{
- GenTree* firstOperand = nullptr;
- node->VisitOperands([&firstOperand](GenTree* operand) -> GenTree::VisitResult {
- firstOperand = operand;
- return GenTree::VisitResult::Abort;
- });
- return firstOperand;
-}
-
void LinearScan::buildRefPositionsForNode(GenTree* tree,
BasicBlock* block,
LocationInfoListNodePool& listNodePool,
- HashTableBase<GenTree*, LocationInfoList>& operandToLocationInfoMap,
- LsraLocation currentLoc)
+ LsraLocation currentLoc)
{
#ifdef _TARGET_ARM_
assert(!isRegPairType(tree->TypeGet()));
// gtRsvdRegs register mask. Clear it out.
tree->gtRsvdRegs = RBM_NONE;
- TreeNodeInfo info = tree->gtLsraInfo;
- assert(info.IsValid(this));
- int consume = info.srcCount;
- int produce = info.dstCount;
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ dumpOperandToLocationInfoMap();
+ compiler->gtDispTree(tree, nullptr, nullptr, true);
+ }
+#endif // DEBUG
+
+ // If the node produces a value that will be consumed by a parent node, its TreeNodeInfo will
+ // be allocated in the LocationInfoListNode. Otherwise, we'll just use a local value that will
+ // be thrown away when we're done.
+ LocationInfoListNode* locationInfo = nullptr;
+ TreeNodeInfo tempInfo;
+ TreeNodeInfo* info = nullptr;
+ if (!tree->isContained() && tree->IsValue())
+ {
+ locationInfo = listNodePool.GetNode(currentLoc, nullptr, tree);
+ info = &locationInfo->info;
+ }
+ else
+ {
+ info = &tempInfo;
+ }
+ info->Initialize(this, tree);
+ TreeNodeInfoInit(tree, info);
+
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ printf(" +");
+ info->dump(this);
+ tree->dumpLIRFlags();
+ printf("\n");
+ }
+#endif // DEBUG
+
+ assert(info->IsValid(this));
+ int consume = info->srcCount;
+ int produce = info->dstCount;
#ifdef DEBUG
if (VERBOSE)
{
- lsraDispNode(tree, LSRA_DUMP_REFPOS, (produce != 0));
- JITDUMP("\n");
if (tree->isContained())
{
JITDUMP("Contained\n");
}
- else if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD) && info.isLocalDefUse)
+ else if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD) && info->isLocalDefUse)
{
JITDUMP("Unused\n");
}
{
JITDUMP(" consume=%d produce=%d\n", consume, produce);
}
-
- if (consume != 0)
- {
- JITDUMP("at start of tree, map contains: { ");
- bool first = true;
- for (auto kvp : operandToLocationInfoMap)
- {
- GenTree* node = kvp.Key();
- LocationInfoList defList = kvp.Value();
-
- JITDUMP("%sN%03u. %s -> (", first ? "" : "; ", node->gtSeqNum, GenTree::OpName(node->OperGet()));
- for (LocationInfoListNode *def = defList.Begin(), *end = defList.End(); def != end; def = def->Next())
- {
- JITDUMP("%s%d.N%03u", def == defList.Begin() ? "" : ", ", def->loc, def->treeNode->gtSeqNum);
- }
- JITDUMP(")");
-
- first = false;
- }
- JITDUMP(" }\n");
- }
}
#endif // DEBUG
VarSetOps::RemoveElemD(compiler, currentLiveVars, varIndex);
}
- if (!info.isLocalDefUse && !tree->isContained())
+ if (!info->isLocalDefUse && !tree->isContained())
{
assert(produce != 0);
- LocationInfoList list(listNodePool.GetNode(currentLoc, getIntervalForLocalVar(varIndex), tree));
- bool added = operandToLocationInfoMap.AddOrUpdate(tree, list);
+ locationInfo->interval = getIntervalForLocalVar(varIndex);
+ bool added = operandToLocationInfoMap->AddOrUpdate(tree, locationInfo);
assert(added);
-
- tree->gtLsraInfo.definesAnyRegisters = true;
}
return;
}
}
- if (tree->isContained())
- {
- assert(!info.isLocalDefUse);
- assert(consume == 0);
- assert(produce == 0);
- assert(info.internalIntCount == 0);
- assert(info.internalFloatCount == 0);
-
- // Contained nodes map to the concatenated lists of their operands.
- LocationInfoList locationInfoList;
- tree->VisitOperands([&](GenTree* op) -> GenTree::VisitResult {
- if (!op->gtLsraInfo.definesAnyRegisters)
- {
- assert(ComputeOperandDstCount(op) == 0);
- return GenTree::VisitResult::Continue;
- }
-
- LocationInfoList operandList;
- bool removed = operandToLocationInfoMap.TryRemove(op, &operandList);
- assert(removed);
-
- locationInfoList.Append(operandList);
- return GenTree::VisitResult::Continue;
- });
-
- if (!locationInfoList.IsEmpty())
- {
- bool added = operandToLocationInfoMap.AddOrUpdate(tree, locationInfoList);
- assert(added);
- tree->gtLsraInfo.definesAnyRegisters = true;
- }
- JITDUMP("\n");
- return;
- }
-
// Handle the case of local variable assignment
Interval* varDefInterval = nullptr;
RefType defRefType = RefTypeDef;
// noAdd means the node creates a def but for purposes of map
// management do not add it because data is not flowing up the
- // tree but over (as in ASG nodes)
+ // tree
- bool noAdd = info.isLocalDefUse;
+ bool noAdd = info->isLocalDefUse;
RefPosition* prevPos = nullptr;
bool isSpecialPutArg = false;
if (consume == 1)
{
// Get the location info for the register defined by the first operand.
- LocationInfoList operandDefs;
- bool found = operandToLocationInfoMap.TryGetValue(GetFirstOperand(tree), &operandDefs);
- assert(found);
-
- // Since we only expect to consume one register, we should only have a single register to
- // consume.
- assert(operandDefs.Begin()->Next() == operandDefs.End());
-
- LocationInfo& operandInfo = *static_cast<LocationInfo*>(operandDefs.Begin());
+ LocationInfoListNode& operandInfo = *(useList.Begin());
+ assert(operandInfo.treeNode == tree->gtGetOp1());
Interval* srcInterval = operandInfo.interval;
if (srcInterval->relatedInterval == nullptr)
store->gtType = store->gtOp1->gtType = store->gtOp1->AsUnOp()->gtOp1->TypeGet();
// Get the location info for the register defined by the first operand.
- LocationInfoList operandDefs;
- bool found = operandToLocationInfoMap.TryGetValue(GetFirstOperand(store), &operandDefs);
- assert(found);
-
- // Since we only expect to consume one register, we should only have a single register to consume.
- assert(operandDefs.Begin()->Next() == operandDefs.End());
-
- LocationInfo& operandInfo = *static_cast<LocationInfo*>(operandDefs.Begin());
+ LocationInfoListNode& operandInfo = *(useList.Begin());
+ assert(operandInfo.treeNode == tree->gtGetOp1());
Interval* srcInterval = operandInfo.interval;
srcInterval->registerType = regType(store->TypeGet());
assert(srcDefPosition->treeNode == store->gtOp1);
srcDefPosition->registerAssignment = allRegs(store->TypeGet());
- store->gtOp1->gtLsraInfo.setSrcCandidates(this, allRegs(store->TypeGet()));
+ operandInfo.info.setSrcCandidates(this, allRegs(store->TypeGet()));
}
}
else if (noAdd && produce == 0)
{
- // This is the case for dead nodes that occur after
- // tree rationalization
+ // Dead nodes may remain after tree rationalization, decomposition or lowering.
+ // They should be marked as UnusedValue.
// TODO-Cleanup: Identify and remove these dead nodes prior to register allocation.
- if (tree->IsMultiRegCall())
- {
- // In case of multi-reg call node, produce = number of return registers
- produce = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount();
- }
- else
- {
- produce = 1;
- }
+ assert(!noAdd || (produce != 0));
}
Interval* prefSrcInterval = nullptr;
// If this is a binary operator that will be encoded with 2 operand fields
// (i.e. the target is read-modify-write), preference the dst to op1.
- bool hasDelayFreeSrc = tree->gtLsraInfo.hasDelayFreeSrc;
+ bool hasDelayFreeSrc = info->hasDelayFreeSrc;
#if defined(DEBUG) && defined(_TARGET_X86_)
// On x86, `LSRA_LIMIT_CALLER` is too restrictive to allow the use of special put args: this stress mode
JITDUMP("Setting putarg_reg as a pass-through of a non-last use lclVar\n");
// Get the register information for the first operand of the node.
- LocationInfoList operandDefs;
- bool found = operandToLocationInfoMap.TryGetValue(GetFirstOperand(tree), &operandDefs);
- assert(found);
+ LocationInfoListNode* operandDef = useList.Begin();
+ assert(operandDef->treeNode == tree->gtGetOp1());
// Preference the destination to the interval of the first register defined by the first operand.
- Interval* srcInterval = operandDefs.Begin()->interval;
+ Interval* srcInterval = operandDef->interval;
assert(srcInterval->isLocalVar);
prefSrcInterval = srcInterval;
isSpecialPutArg = true;
// consume + produce + internalCount. This is the minimum
// set of registers that needs to be ensured in candidate
// set of ref positions created.
- unsigned minRegCount = consume + produce + info.internalIntCount + info.internalFloatCount;
+ unsigned minRegCount = consume + produce + info->internalIntCount + info->internalFloatCount;
#endif // DEBUG
- // make intervals for all the 'internal' register requirements for this node
- // where internal means additional registers required temporarily
- int internalCount = buildInternalRegisterDefsForNode(tree, currentLoc, internalRefs DEBUG_ARG(minRegCount));
+ // Make intervals for all the 'internal' register requirements for this node,
+ // where internal means additional registers required temporarily.
+ // Create a RefTypeDef RefPosition for each such interval.
+ int internalCount = buildInternalRegisterDefsForNode(tree, info, internalRefs DEBUG_ARG(minRegCount));
- // pop all ref'd tree temps
- tree->VisitOperands([&](GenTree* operand) -> GenTree::VisitResult {
- // Skip operands that do not define any registers, whether directly or indirectly.
- if (!operand->gtLsraInfo.definesAnyRegisters)
- {
- return GenTree::VisitResult::Continue;
- }
-
- // Remove the list of registers defined by the current operand from the map. Note that this
- // is only correct because tree nodes are singly-used: if this property ever changes (e.g.
- // if tree nodes are eventually allowed to be multiply-used), then the removal is only
- // correct at the last use.
- LocationInfoList operandDefs;
- bool removed = operandToLocationInfoMap.TryRemove(operand, &operandDefs);
- assert(removed);
- assert(!operandDefs.IsEmpty());
+ // Make use RefPositions for all used values.
+ int consumed = 0;
+ for (LocationInfoListNode *listNode = useList.Begin(), *end = useList.End(); listNode != end;
+ listNode = listNode->Next())
+ {
+ LocationInfo& locInfo = *static_cast<LocationInfo*>(listNode);
-#ifdef _TARGET_ARM_
- regMaskTP currCandidates = RBM_NONE;
-#endif // _TARGET_ARM_
+ // For tree temps, a use is always a last use and the end of the range;
+ // this is set by default in newRefPosition
+ GenTree* const useNode = locInfo.treeNode;
+ assert(useNode != nullptr);
- LocationInfoListNode* const operandDefsEnd = operandDefs.End();
- for (LocationInfoListNode* operandDefsIterator = operandDefs.Begin(); operandDefsIterator != operandDefsEnd;
- operandDefsIterator = operandDefsIterator->Next())
+ Interval* srcInterval = locInfo.interval;
+ TreeNodeInfo& useNodeInfo = locInfo.info;
+ if (useNodeInfo.isTgtPref)
{
- LocationInfo& locInfo = *static_cast<LocationInfo*>(operandDefsIterator);
-
- // for interstitial tree temps, a use is always last and end; this is set by default in newRefPosition
- GenTree* const useNode = locInfo.treeNode;
- assert(useNode != nullptr);
-
- Interval* const i = locInfo.interval;
- if (useNode->gtLsraInfo.isTgtPref)
- {
- prefSrcInterval = i;
- }
+ prefSrcInterval = srcInterval;
+ }
- const bool delayRegFree = (hasDelayFreeSrc && useNode->gtLsraInfo.isDelayFree);
+ const bool delayRegFree = (hasDelayFreeSrc && useNodeInfo.isDelayFree);
#ifdef DEBUG
- // If delayRegFree, then Use will interfere with the destination of
- // the consuming node. Therefore, we also need add the kill set of
- // consuming node to minRegCount.
- //
- // For example consider the following IR on x86, where v01 and v02
- // are method args coming in ecx and edx respectively.
- // GT_DIV(v01, v02)
- //
- // For GT_DIV minRegCount will be 3 without adding kill set
- // of GT_DIV node.
- //
- // Assume further JitStressRegs=2, which would constrain
- // candidates to callee trashable regs { eax, ecx, edx } on
- // use positions of v01 and v02. LSRA allocates ecx for v01.
- // Use position of v02 cannot be allocated a regs since it
- // is marked delay-reg free and {eax,edx} are getting killed
- // before the def of GT_DIV. For this reason, minRegCount
- // for Use position of v02 also needs to take into account
- // of kill set of its consuming node.
- unsigned minRegCountForUsePos = minRegCount;
- if (delayRegFree && (lsraStressMask != 0))
+ // If delayRegFree, then Use will interfere with the destination of
+ // the consuming node. Therefore, we also need add the kill set of
+ // consuming node to minRegCount.
+ //
+ // For example consider the following IR on x86, where v01 and v02
+ // are method args coming in ecx and edx respectively.
+ // GT_DIV(v01, v02)
+ //
+ // For GT_DIV minRegCount will be 3 without adding kill set
+ // of GT_DIV node.
+ //
+ // Assume further JitStressRegs=2, which would constrain
+ // candidates to callee trashable regs { eax, ecx, edx } on
+ // use positions of v01 and v02. LSRA allocates ecx for v01.
+ // Use position of v02 cannot be allocated a regs since it
+ // is marked delay-reg free and {eax,edx} are getting killed
+ // before the def of GT_DIV. For this reason, minRegCount
+ // for Use position of v02 also needs to take into account
+ // of kill set of its consuming node.
+ unsigned minRegCountForUsePos = minRegCount;
+ if (delayRegFree && (lsraStressMask != 0))
+ {
+ regMaskTP killMask = getKillSetForNode(tree);
+ if (killMask != RBM_NONE)
{
- regMaskTP killMask = getKillSetForNode(tree);
- if (killMask != RBM_NONE)
- {
- minRegCountForUsePos += genCountBits(killMask);
- }
+ minRegCountForUsePos += genCountBits(killMask);
}
+ }
#endif // DEBUG
- regMaskTP candidates = getUseCandidates(useNode);
+ regMaskTP candidates = useNodeInfo.getSrcCandidates(this);
#ifdef _TARGET_ARM_
- if (useNode->OperIsPutArgSplit() || useNode->OperIsMultiRegOp())
- {
- // get i-th candidate, set bits in useCandidates must be in sequential order.
- candidates = genFindLowestReg(candidates & ~currCandidates);
- currCandidates |= candidates;
- }
+ regMaskTP allCandidates = candidates;
+
+ if (useNode->OperIsPutArgSplit() || useNode->OperIsMultiRegOp())
+ {
+ // get i-th candidate, set bits in useCandidates must be in sequential order.
+ candidates = genFindLowestReg(allCandidates);
+ allCandidates &= ~candidates;
+ }
#endif // _TARGET_ARM_
- assert((candidates & allRegs(i->registerType)) != 0);
+ assert((candidates & allRegs(srcInterval->registerType)) != 0);
- // For non-localVar uses we record nothing, as nothing needs to be written back to the tree.
- GenTree* const refPosNode = i->isLocalVar ? useNode : nullptr;
- RefPosition* pos = newRefPosition(i, currentLoc, RefTypeUse, refPosNode, candidates,
- locInfo.multiRegIdx DEBUG_ARG(minRegCountForUsePos));
+ // For non-localVar uses we record nothing, as nothing needs to be written back to the tree.
+ GenTree* const refPosNode = srcInterval->isLocalVar ? useNode : nullptr;
+ RefPosition* pos = newRefPosition(srcInterval, currentLoc, RefTypeUse, refPosNode, candidates,
+ 0 DEBUG_ARG(minRegCountForUsePos));
+ if (delayRegFree)
+ {
+ pos->delayRegFree = true;
+ }
- if (delayRegFree)
- {
- pos->delayRegFree = true;
- }
+ if (useNode->IsRegOptional())
+ {
+ pos->setAllocateIfProfitable(true);
+ }
+ consumed++;
- if (useNode->IsRegOptional())
+ // Create additional use RefPositions for multi-reg nodes.
+ for (int idx = 1; idx < locInfo.info.dstCount; idx++)
+ {
+ noway_assert(srcInterval->relatedInterval != nullptr);
+ srcInterval = srcInterval->relatedInterval;
+#ifdef _TARGET_ARM_
+ if (useNode->OperIsPutArgSplit() ||
+ (compiler->opts.compUseSoftFP && (useNode->OperIsPutArgReg() || useNode->OperGet() == GT_BITCAST)))
{
- pos->setAllocateIfProfitable(true);
+ // get first candidate, set bits in useCandidates must be in sequential order.
+ candidates = genFindLowestReg(allCandidates);
+ allCandidates &= ~candidates;
}
+#endif // _TARGET_ARM_
+ RefPosition* pos = newRefPosition(srcInterval, currentLoc, RefTypeUse, refPosNode, candidates,
+ idx DEBUG_ARG(minRegCountForUsePos));
+ consumed++;
}
+ }
- listNodePool.ReturnNodes(operandDefs);
-
- return GenTree::VisitResult::Continue;
- });
+ assert(consumed == consume);
+ if (consume != 0)
+ {
+ listNodePool.ReturnNodes(useList);
+ }
- buildInternalRegisterUsesForNode(tree, currentLoc, internalRefs, internalCount DEBUG_ARG(minRegCount));
+ buildInternalRegisterUsesForNode(tree, info, internalRefs, internalCount DEBUG_ARG(minRegCount));
RegisterType registerType = getDefType(tree);
- regMaskTP candidates = getDefCandidates(tree);
- regMaskTP useCandidates = getUseCandidates(tree);
+ regMaskTP candidates = info->getDstCandidates(this);
+ regMaskTP useCandidates = info->getSrcCandidates(this);
#ifdef DEBUG
if (VERBOSE && produce)
// push defs
LocationInfoList locationInfoList;
LsraLocation defLocation = currentLoc + 1;
+ Interval* interval = varDefInterval;
+ // For nodes that define multiple registers, subsequent intervals will be linked using the 'relatedInterval' field.
+ // Keep track of the previous interval allocated, for that purpose.
+ Interval* prevInterval = nullptr;
for (int i = 0; i < produce; i++)
{
regMaskTP currCandidates = candidates;
- Interval* interval = varDefInterval;
// In case of multi-reg call node, registerType is given by
// the type of ith position return register.
{
// Make a new interval
interval = newInterval(registerType);
- if (hasDelayFreeSrc)
+ if (hasDelayFreeSrc || info->isInternalRegDelayFree)
{
- interval->hasNonCommutativeRMWDef = true;
+ interval->hasInterferingUses = true;
}
else if (tree->OperIsConst())
{
// but not push it
if (!noAdd)
{
- locationInfoList.Append(listNodePool.GetNode(defLocation, interval, tree, (unsigned)i));
+ if (i == 0)
+ {
+ locationInfo->interval = interval;
+ prevInterval = interval;
+ bool added = operandToLocationInfoMap->AddOrUpdate(tree, locationInfo);
+ assert(added);
+ }
+ else
+ {
+ // This is the 2nd or subsequent register defined by a multi-reg node.
+ // Connect them using 'relatedInterval'.
+ noway_assert((prevInterval != nullptr) && (prevInterval->relatedInterval == nullptr));
+ prevInterval->relatedInterval = interval;
+ prevInterval = interval;
+ prevInterval->isMultiReg = true;
+ interval->isMultiReg = true;
+ }
}
RefPosition* pos = newRefPosition(interval, defLocation, defRefType, defNode, currCandidates,
(unsigned)i DEBUG_ARG(minRegCount));
- if (info.isLocalDefUse)
+ if (info->isLocalDefUse)
{
// This must be an unused value, OR it is a special node for which we allocate
// a target register even though it produces no value.
}
interval->updateRegisterPreferences(currCandidates);
interval->updateRegisterPreferences(useCandidates);
+ interval = nullptr;
}
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
}
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- if (!locationInfoList.IsEmpty())
- {
- bool added = operandToLocationInfoMap.AddOrUpdate(tree, locationInfoList);
- assert(added);
- tree->gtLsraInfo.definesAnyRegisters = true;
- }
JITDUMP("\n");
}
// second part:
JITDUMP("\nbuildIntervals second part ========\n");
- LsraLocation currentLoc = 0;
+ currentLoc = 0;
// TODO-Cleanup: This duplicates prior behavior where entry (ParamDef) RefPositions were
// being assigned the bbNum of the last block traversed in the 2nd phase of Lowering.
// Previously, the block sequencing was done for the (formerly separate) TreeNodeInfoInit pass,
}
LocationInfoListNodePool listNodePool(compiler, 8);
- SmallHashTable<GenTree*, LocationInfoList, 32> operandToLocationInfoMap(compiler);
+ OperandToLocationInfoMap theOperandToLocationInfoMap(compiler);
+ operandToLocationInfoMap = &theOperandToLocationInfoMap;
BasicBlock* predBlock = nullptr;
BasicBlock* prevBlock = nullptr;
node->gtRegNum = node->gtRegNum;
#endif
- node->gtLsraInfo.Initialize(this, node, currentLoc);
-
- TreeNodeInfoInit(node);
-
- // If the node produces an unused value, mark it as a local def-use
- if (node->IsValue() && node->IsUnusedValue())
- {
- node->gtLsraInfo.isLocalDefUse = true;
- node->gtLsraInfo.dstCount = 0;
- }
-
-#ifdef DEBUG
- if (VERBOSE)
- {
- compiler->gtDispTree(node, nullptr, nullptr, true);
- printf(" +");
- node->gtLsraInfo.dump(this);
- }
-#endif // DEBUG
-
- // Only nodes that produce values should have a non-zero dstCount.
- assert((node->gtLsraInfo.dstCount == 0) || node->IsValue());
-
- buildRefPositionsForNode(node, block, listNodePool, operandToLocationInfoMap, currentLoc);
+ buildRefPositionsForNode(node, block, listNodePool, currentLoc);
#ifdef DEBUG
if (currentLoc > maxNodeLocation)
// Note: the visited set is cleared in LinearScan::doLinearScan()
markBlockVisited(block);
+ assert(operandToLocationInfoMap->Count() == 0);
if (enregisterLocalVars)
{
RefPosition* nextRelatedRefPosition = relatedInterval->getNextRefPosition();
if (nextRelatedRefPosition != nullptr)
{
- // Don't use the relatedInterval for preferencing if its next reference is not a new definition.
- if (!RefTypeIsDef(nextRelatedRefPosition->refType))
+ // Don't use the relatedInterval for preferencing if its next reference is not a new definition,
+ // or if it is only related because they are multi-reg targets of the same node.
+ if (!RefTypeIsDef(nextRelatedRefPosition->refType) ||
+ isMultiRegRelated(nextRelatedRefPosition, refPosition->nodeLocation))
{
relatedInterval = nullptr;
}
return isFree;
}
+// isMultiRegRelated: is this RefPosition defining part of a multi-reg value
+// at the given location?
+//
+bool LinearScan::isMultiRegRelated(RefPosition* refPosition, LsraLocation location)
+{
+#ifdef FEATURE_MULTIREG_ARGS_OR_RET
+ return ((refPosition->nodeLocation == location) && refPosition->getInterval()->isMultiReg);
+#else
+ return false;
+#endif
+}
+
//------------------------------------------------------------------------
// LinearScan::freeRegister: Make a register available for use
//
GenTreeCopyOrReload* newNode = new (compiler, oper) GenTreeCopyOrReload(oper, treeType, tree);
assert(refPosition->registerAssignment != RBM_NONE);
+ SetLsraAdded(newNode);
newNode->SetRegNumByIdx(refPosition->assignedReg(), multiRegIdx);
- newNode->gtLsraInfo.isLsraAdded = true;
- newNode->gtLsraInfo.isLocalDefUse = false;
if (refPosition->copyReg)
{
// This is a TEMPORARY copy
// First, insert the save before the call.
- GenTreePtr saveLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType);
- saveLcl->gtLsraInfo.isLsraAdded = true;
- saveLcl->gtRegNum = lclVarReg;
- saveLcl->gtLsraInfo.isLocalDefUse = false;
+ GenTreePtr saveLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType);
+ saveLcl->gtRegNum = lclVarReg;
+ SetLsraAdded(saveLcl);
GenTreeSIMD* simdNode =
new (compiler, GT_SIMD) GenTreeSIMD(LargeVectorSaveType, saveLcl, nullptr, SIMDIntrinsicUpperSave,
varDsc->lvBaseType, genTypeSize(varDsc->lvType));
- simdNode->gtLsraInfo.isLsraAdded = true;
- simdNode->gtRegNum = spillReg;
+ SetLsraAdded(simdNode);
+ simdNode->gtRegNum = spillReg;
if (spillToMem)
{
simdNode->gtFlags |= GTF_SPILL;
// Now insert the restore after the call.
- GenTreePtr restoreLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType);
- restoreLcl->gtLsraInfo.isLsraAdded = true;
- restoreLcl->gtRegNum = lclVarReg;
- restoreLcl->gtLsraInfo.isLocalDefUse = false;
+ GenTreePtr restoreLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType);
+ restoreLcl->gtRegNum = lclVarReg;
+ SetLsraAdded(restoreLcl);
simdNode = new (compiler, GT_SIMD) GenTreeSIMD(varDsc->lvType, restoreLcl, nullptr, SIMDIntrinsicUpperRestore,
varDsc->lvBaseType, genTypeSize(varDsc->lvType));
- simdNode->gtLsraInfo.isLsraAdded = true;
- simdNode->gtRegNum = spillReg;
+ simdNode->gtRegNum = spillReg;
+ SetLsraAdded(simdNode);
if (spillToMem)
{
simdNode->gtFlags |= GTF_SPILLED;
continue;
}
- LsraLocation loc = treeNode->gtLsraInfo.loc;
- assert(treeNode->IsLocal() || currentLocation == loc || currentLocation == loc + 1);
-
if (currentRefPosition->isIntervalRef() && currentRefPosition->getInterval()->isInternal)
{
treeNode->gtRsvdRegs |= currentRefPosition->registerAssignment;
// This var can't be marked lvRegister now
varDsc->lvRegNum = REG_STK;
- GenTreePtr src = compiler->gtNewLclvNode(lclNum, varDsc->TypeGet());
- src->gtLsraInfo.isLsraAdded = true;
+ GenTreePtr src = compiler->gtNewLclvNode(lclNum, varDsc->TypeGet());
+ SetLsraAdded(src);
// There are three cases we need to handle:
// - We are loading a lclVar from the stack.
// This is the new home of the lclVar - indicate that by clearing the GTF_VAR_DEATH flag.
// Note that if src is itself a lastUse, this will have no effect.
dst->gtFlags &= ~(GTF_VAR_DEATH);
- src->gtRegNum = fromReg;
- dst->gtRegNum = toReg;
- src->gtLsraInfo.isLocalDefUse = false;
- dst->gtLsraInfo.isLsraAdded = true;
+ src->gtRegNum = fromReg;
+ dst->gtRegNum = toReg;
+ SetLsraAdded(dst);
}
- dst->gtLsraInfo.isLocalDefUse = true;
dst->SetUnusedValue();
LIR::Range treeRange = LIR::SeqTree(compiler, dst);
LclVarDsc* varDsc2 = compiler->lvaTable + lclNum2;
assert(reg1 != REG_STK && reg1 != REG_NA && reg2 != REG_STK && reg2 != REG_NA);
- GenTreePtr lcl1 = compiler->gtNewLclvNode(lclNum1, varDsc1->TypeGet());
- lcl1->gtLsraInfo.isLsraAdded = true;
- lcl1->gtLsraInfo.isLocalDefUse = false;
- lcl1->gtRegNum = reg1;
+ GenTreePtr lcl1 = compiler->gtNewLclvNode(lclNum1, varDsc1->TypeGet());
+ lcl1->gtRegNum = reg1;
+ SetLsraAdded(lcl1);
- GenTreePtr lcl2 = compiler->gtNewLclvNode(lclNum2, varDsc2->TypeGet());
- lcl2->gtLsraInfo.isLsraAdded = true;
- lcl2->gtLsraInfo.isLocalDefUse = false;
- lcl2->gtRegNum = reg2;
+ GenTreePtr lcl2 = compiler->gtNewLclvNode(lclNum2, varDsc2->TypeGet());
+ lcl2->gtRegNum = reg2;
+ SetLsraAdded(lcl2);
- GenTreePtr swap = compiler->gtNewOperNode(GT_SWAP, TYP_VOID, lcl1, lcl2);
- swap->gtLsraInfo.isLsraAdded = true;
- swap->gtLsraInfo.isLocalDefUse = false;
- swap->gtRegNum = REG_NA;
+ GenTreePtr swap = compiler->gtNewOperNode(GT_SWAP, TYP_VOID, lcl1, lcl2);
+ swap->gtRegNum = REG_NA;
+ SetLsraAdded(swap);
lcl1->gtNext = lcl2;
lcl2->gtPrev = lcl1;
}
//------------------------------------------------------------------------
-// GetIndirSourceCount: Get the source registers for an indirection that might be contained.
+// GetIndirInfo: Get the source registers for an indirection that might be contained.
//
// Arguments:
// node - The node of interest
// Return Value:
// The number of source registers used by the *parent* of this node.
//
-int LinearScan::GetIndirSourceCount(GenTreeIndir* indirTree)
+// Notes:
+// Adds the defining node for each register to the useList.
+//
+int LinearScan::GetIndirInfo(GenTreeIndir* indirTree)
{
GenTree* const addr = indirTree->gtOp1;
if (!addr->isContained())
{
+ appendLocationInfoToList(addr);
return 1;
}
if (!addr->OperIs(GT_LEA))
unsigned srcCount = 0;
if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained())
{
+ appendLocationInfoToList(addrMode->Base());
srcCount++;
}
- if (addrMode->Index() != nullptr)
+ if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained())
{
- // We never have a contained index.
- assert(!addrMode->Index()->isContained());
+ appendLocationInfoToList(addrMode->Index());
srcCount++;
}
return srcCount;
}
-void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node, LsraLocation location)
+//------------------------------------------------------------------------
+// GetOperandInfo: Get the source registers for an operand that might be contained.
+//
+// Arguments:
+// node - The node of interest
+// useList - The list of uses for the node that we're currently processing
+//
+// Return Value:
+// The number of source registers used by the *parent* of this node.
+//
+// Notes:
+// Adds the defining node for each register to the given useList.
+//
+int LinearScan::GetOperandInfo(GenTree* node)
{
+ if (!node->isContained())
+ {
+ appendLocationInfoToList(node);
+ return 1;
+ }
+
+#if !defined(_TARGET_64BIT_)
+ if (node->OperIs(GT_LONG))
+ {
+ return appendBinaryLocationInfoToList(node->AsOp());
+ }
+#endif // !defined(_TARGET_64BIT_)
+ if (node->OperIsIndir())
+ {
+ const unsigned srcCount = GetIndirInfo(node->AsIndir());
+ return srcCount;
+ }
+
+ return 0;
+}
+
+//------------------------------------------------------------------------
+// GetOperandInfo: Get the source registers for an operand that might be contained.
+//
+// Arguments:
+// node - The node of interest
+// useList - The list of uses for the node that we're currently processing
+//
+// Return Value:
+// The number of source registers used by the *parent* of this node.
+//
+// Notes:
+// Adds the defining node for each register to the useList.
+//
+int LinearScan::GetOperandInfo(GenTree* node, LocationInfoListNode** pFirstInfo)
+{
+ LocationInfoListNode* prevLast = useList.Last();
+ int srcCount = GetOperandInfo(node);
+ if (prevLast == nullptr)
+ {
+ *pFirstInfo = useList.Begin();
+ }
+ else
+ {
+ *pFirstInfo = prevLast->Next();
+ }
+ return srcCount;
+}
+
+void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node)
+{
+ _dstCount = 0;
+ _srcCount = 0;
+ _internalIntCount = 0;
+ _internalFloatCount = 0;
+
+ isLocalDefUse = false;
+ isDelayFree = false;
+ hasDelayFreeSrc = false;
+ isTgtPref = false;
+ isInternalRegDelayFree = false;
+
regMaskTP dstCandidates;
// if there is a reg indicated on the tree node, use that for dstCandidates
dstCandidates = genRegMask(node->gtRegNum);
}
- internalIntCount = 0;
- internalFloatCount = 0;
- isLocalDefUse = false;
- isLsraAdded = false;
- definesAnyRegisters = false;
-
setDstCandidates(lsra, dstCandidates);
srcCandsIndex = dstCandsIndex;
setInternalCandidates(lsra, lsra->allRegs(TYP_INT));
- loc = location;
#ifdef DEBUG
isInitialized = true;
#endif
{
printf(" (struct)");
}
+ if (isPromotedStruct)
+ {
+ printf(" (promoted struct)");
+ }
+ if (hasConflictingDefUse)
+ {
+ printf(" (def-use conflict)");
+ }
+ if (hasInterferingUses)
+ {
+ printf(" (interfering uses)");
+ }
if (isSpecialPutArg)
{
printf(" (specialPutArg)");
{
printf(" (constant)");
}
+ if (isMultiReg)
+ {
+ printf(" (multireg)");
+ }
printf(" RefPositions {");
for (RefPosition* refPosition = this->firstRefPosition; refPosition != nullptr;
void TreeNodeInfo::dump(LinearScan* lsra)
{
- printf("<TreeNodeInfo @ %2u %d=%d %di %df", loc, dstCount, srcCount, internalIntCount, internalFloatCount);
+ printf("<TreeNodeInfo %d=%d %di %df", dstCount, srcCount, internalIntCount, internalFloatCount);
printf(" src=");
dumpRegMask(getSrcCandidates(lsra));
printf(" int=");
{
printf(" I");
}
- if (isLsraAdded)
- {
- printf(" A");
- }
if (isDelayFree)
{
printf(" D");
{
printf(" P");
}
- if (regOptional)
- {
- printf(" O");
- }
if (isInternalRegDelayFree)
{
printf(" ID");
}
- printf(">\n");
+ printf(">");
+}
+
+void LinearScan::dumpOperandToLocationInfoMap()
+{
+ JITDUMP("OperandToLocationInfoMap: { ");
+ bool first = true;
+ for (auto kvp : *operandToLocationInfoMap)
+ {
+ GenTree* node = kvp.Key();
+ LocationInfoListNode* def = kvp.Value();
+
+ JITDUMP("%sN%03u.t%d. %s", first ? "" : "; ", node->gtSeqNum, node->gtTreeID, GenTree::OpName(node->OperGet()));
+
+ first = false;
+ }
+ JITDUMP(" }\n");
}
void LinearScan::lsraDumpIntervals(const char* msg)
{
assert(operand != nullptr);
assert(operandString != nullptr);
-
- if (ComputeOperandDstCount(operand) == 0)
+ if (!operand->IsLIR())
{
return;
}
- if (operand->gtLsraInfo.dstCount != 0)
+ int dstCount = ComputeOperandDstCount(operand);
+
+ if (dstCount != 0)
{
// This operand directly produces registers; print it.
- for (int i = 0; i < operand->gtLsraInfo.dstCount; i++)
+ for (int i = 0; i < dstCount; i++)
{
if (!first)
{
first = false;
}
}
- else
+ else if (operand->isContained())
{
// This is a contained node. Dump the defs produced by its operands.
for (GenTree* op : operand->Operands())
{
GenTree* tree = node;
- genTreeOps oper = tree->OperGet();
- TreeNodeInfo& info = tree->gtLsraInfo;
- if (tree->gtLsraInfo.isLsraAdded)
- {
- // This must be one of the nodes that we add during LSRA
-
- if (oper == GT_LCL_VAR)
- {
- info.srcCount = 0;
- info.dstCount = 1;
- }
- else if (oper == GT_RELOAD || oper == GT_COPY)
- {
- info.srcCount = 1;
- info.dstCount = 1;
- }
-#ifdef FEATURE_SIMD
- else if (oper == GT_SIMD)
- {
- if (tree->gtSIMD.gtSIMDIntrinsicID == SIMDIntrinsicUpperSave)
- {
- info.srcCount = 1;
- info.dstCount = 1;
- }
- else
- {
- assert(tree->gtSIMD.gtSIMDIntrinsicID == SIMDIntrinsicUpperRestore);
- info.srcCount = 2;
- info.dstCount = 0;
- }
- }
-#endif // FEATURE_SIMD
- else
- {
- assert(oper == GT_SWAP);
- info.srcCount = 2;
- info.dstCount = 0;
- }
- info.internalIntCount = 0;
- info.internalFloatCount = 0;
- }
-
- int consume = info.srcCount;
- int produce = info.dstCount;
- regMaskTP killMask = RBM_NONE;
- regMaskTP fixedMask = RBM_NONE;
+ genTreeOps oper = tree->OperGet();
+ int produce = tree->IsValue() ? ComputeOperandDstCount(tree) : 0;
+ int consume = ComputeAvailableSrcCount(tree);
+ regMaskTP killMask = RBM_NONE;
+ regMaskTP fixedMask = RBM_NONE;
lsraDispNode(tree, mode, produce != 0 && mode != LSRA_DUMP_REFPOS);
}
}
printf("\n");
- if (info.internalIntCount != 0 && mode != LSRA_DUMP_REFPOS)
- {
- printf("\tinternal (%d):\t", info.internalIntCount);
- if (mode == LSRA_DUMP_POST)
- {
- dumpRegMask(tree->gtRsvdRegs);
- }
- else if ((info.getInternalCandidates(this) & allRegs(TYP_INT)) != allRegs(TYP_INT))
- {
- dumpRegMask(info.getInternalCandidates(this) & allRegs(TYP_INT));
- }
- printf("\n");
- }
- if (info.internalFloatCount != 0 && mode != LSRA_DUMP_REFPOS)
- {
- printf("\tinternal (%d):\t", info.internalFloatCount);
- if (mode == LSRA_DUMP_POST)
- {
- dumpRegMask(tree->gtRsvdRegs);
- }
- else if ((info.getInternalCandidates(this) & allRegs(TYP_INT)) != allRegs(TYP_INT))
- {
- dumpRegMask(info.getInternalCandidates(this) & allRegs(TYP_INT));
- }
- printf("\n");
- }
}
if (enregisterLocalVars && mode == LSRA_DUMP_POST)
{
//
bool LinearScan::IsResolutionMove(GenTree* node)
{
- if (!node->gtLsraInfo.isLsraAdded)
+ if (!IsLsraAdded(node))
{
return false;
}
{
case GT_LCL_VAR:
case GT_COPY:
- return node->gtLsraInfo.isLocalDefUse;
+ return node->IsUnusedValue();
case GT_SWAP:
return true;
return true;
}
- if (!node->gtLsraInfo.isLsraAdded || (node->OperGet() != GT_LCL_VAR))
+ if (!IsLsraAdded(node) || (node->OperGet() != GT_LCL_VAR))
{
return false;
}
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-/*****************************************************************************/
-
-#ifndef _LSRA_H_
-#define _LSRA_H_
-
-#include "arraylist.h"
-#include "smallhash.h"
-#include "nodeinfo.h"
-
-// Minor and forward-reference types
-class Interval;
-class RefPosition;
-class LinearScan;
-class RegRecord;
-
-template <class T>
-class ArrayStack;
-
-// LsraLocation tracks the linearized order of the nodes.
-// Each node is assigned two LsraLocations - one for all the uses and all but the last
-// def, and a second location for the last def (if any)
-
-typedef unsigned int LsraLocation;
-const unsigned int MinLocation = 0;
-const unsigned int MaxLocation = UINT_MAX;
-// max number of registers an operation could require internally (in addition to uses and defs)
-const unsigned int MaxInternalRegisters = 8;
-const unsigned int RegisterTypeCount = 2;
-
-typedef var_types RegisterType;
-#define IntRegisterType TYP_INT
-#define FloatRegisterType TYP_FLOAT
-
-inline regMaskTP calleeSaveRegs(RegisterType rt)
-{
- return varTypeIsIntegralOrI(rt) ? RBM_INT_CALLEE_SAVED : RBM_FLT_CALLEE_SAVED;
-}
-
-struct LocationInfo
-{
- LsraLocation loc;
-
- // Reg Index in case of multi-reg result producing call node.
- // Indicates the position of the register that this location refers to.
- // The max bits needed is based on max value of MAX_RET_REG_COUNT value
- // across all targets and that happens 4 on on Arm. Hence index value
- // would be 0..MAX_RET_REG_COUNT-1.
- unsigned multiRegIdx : 2;
-
- Interval* interval;
- GenTree* treeNode;
-
- LocationInfo(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0)
- : loc(l), multiRegIdx(regIdx), interval(i), treeNode(t)
- {
- assert(multiRegIdx == regIdx);
- }
-
- // default constructor for data structures
- LocationInfo()
- {
- }
-};
-
-struct LsraBlockInfo
-{
- // bbNum of the predecessor to use for the register location of live-in variables.
- // 0 for fgFirstBB.
- unsigned int predBBNum;
- BasicBlock::weight_t weight;
- bool hasCriticalInEdge;
- bool hasCriticalOutEdge;
-
-#if TRACK_LSRA_STATS
- // Per block maintained LSRA statistics.
-
- // Number of spills of local vars or tree temps in this basic block.
- unsigned spillCount;
-
- // Number of GT_COPY nodes inserted in this basic block while allocating regs.
- // Note that GT_COPY nodes are also inserted as part of basic block boundary
- // resolution, which are accounted against resolutionMovCount but not
- // against copyRegCount.
- unsigned copyRegCount;
-
- // Number of resolution moves inserted in this basic block.
- unsigned resolutionMovCount;
-
- // Number of critical edges from this block that are split.
- unsigned splitEdgeCount;
-#endif // TRACK_LSRA_STATS
-};
-
-// This is sort of a bit mask
-// The low order 2 bits will be 1 for defs, and 2 for uses
-enum RefType : unsigned char
-{
-#define DEF_REFTYPE(memberName, memberValue, shortName) memberName = memberValue,
-#include "lsra_reftypes.h"
-#undef DEF_REFTYPE
-};
-
-// position in a block (for resolution)
-enum BlockStartOrEnd
-{
- BlockPositionStart = 0,
- BlockPositionEnd = 1,
- PositionCount = 2
-};
-
-inline bool RefTypeIsUse(RefType refType)
-{
- return ((refType & RefTypeUse) == RefTypeUse);
-}
-
-inline bool RefTypeIsDef(RefType refType)
-{
- return ((refType & RefTypeDef) == RefTypeDef);
-}
-
-typedef regNumberSmall* VarToRegMap;
-
-template <typename ElementType, CompMemKind MemKind>
-class ListElementAllocator
-{
-private:
- template <typename U, CompMemKind CMK>
- friend class ListElementAllocator;
-
- Compiler* m_compiler;
-
-public:
- ListElementAllocator(Compiler* compiler) : m_compiler(compiler)
- {
- }
-
- template <typename U>
- ListElementAllocator(const ListElementAllocator<U, MemKind>& other) : m_compiler(other.m_compiler)
- {
- }
-
- ElementType* allocate(size_t count)
- {
- return reinterpret_cast<ElementType*>(m_compiler->compGetMem(sizeof(ElementType) * count, MemKind));
- }
-
- void deallocate(ElementType* pointer, size_t count)
- {
- }
-
- template <typename U>
- struct rebind
- {
- typedef ListElementAllocator<U, MemKind> allocator;
- };
-};
-
-typedef ListElementAllocator<Interval, CMK_LSRA_Interval> LinearScanMemoryAllocatorInterval;
-typedef ListElementAllocator<RefPosition, CMK_LSRA_RefPosition> LinearScanMemoryAllocatorRefPosition;
-
-typedef jitstd::list<Interval, LinearScanMemoryAllocatorInterval> IntervalList;
-typedef jitstd::list<RefPosition, LinearScanMemoryAllocatorRefPosition> RefPositionList;
-
-class Referenceable
-{
-public:
- Referenceable()
- {
- firstRefPosition = nullptr;
- recentRefPosition = nullptr;
- lastRefPosition = nullptr;
- isActive = false;
- }
-
- // A linked list of RefPositions. These are only traversed in the forward
- // direction, and are not moved, so they don't need to be doubly linked
- // (see RefPosition).
-
- RefPosition* firstRefPosition;
- RefPosition* recentRefPosition;
- RefPosition* lastRefPosition;
-
- bool isActive;
-
- // Get the position of the next reference which is at or greater than
- // the current location (relies upon recentRefPosition being udpated
- // during traversal).
- RefPosition* getNextRefPosition();
- LsraLocation getNextRefLocation();
-};
-
-class RegRecord : public Referenceable
-{
-public:
- RegRecord()
- {
- assignedInterval = nullptr;
- previousInterval = nullptr;
- regNum = REG_NA;
- isCalleeSave = false;
- registerType = IntRegisterType;
- isBusyUntilNextKill = false;
- }
-
- void init(regNumber reg)
- {
-#ifdef _TARGET_ARM64_
- // The Zero register, or the SP
- if ((reg == REG_ZR) || (reg == REG_SP))
- {
- // IsGeneralRegister returns false for REG_ZR and REG_SP
- regNum = reg;
- registerType = IntRegisterType;
- }
- else
-#endif
- if (emitter::isFloatReg(reg))
- {
- registerType = FloatRegisterType;
- }
- else
- {
- // The constructor defaults to IntRegisterType
- assert(emitter::isGeneralRegister(reg) && registerType == IntRegisterType);
- }
- regNum = reg;
- isCalleeSave = ((RBM_CALLEE_SAVED & genRegMask(reg)) != 0);
- }
-
-#ifdef DEBUG
- // print out representation
- void dump();
- // concise representation for embedding
- void tinyDump();
-#endif // DEBUG
-
- bool isFree();
-
- // RefPosition * getNextRefPosition();
- // LsraLocation getNextRefLocation();
-
- // DATA
-
- // interval to which this register is currently allocated.
- // If the interval is inactive (isActive == false) then it is not currently live,
- // and the register call be unassigned (i.e. setting assignedInterval to nullptr)
- // without spilling the register.
- Interval* assignedInterval;
- // Interval to which this register was previously allocated, and which was unassigned
- // because it was inactive. This register will be reassigned to this Interval when
- // assignedInterval becomes inactive.
- Interval* previousInterval;
-
- regNumber regNum;
- bool isCalleeSave;
- RegisterType registerType;
- // This register must be considered busy until the next time it is explicitly killed.
- // This is used so that putarg_reg can avoid killing its lclVar source, while avoiding
- // the problem with the reg becoming free if the last-use is encountered before the call.
- bool isBusyUntilNextKill;
-
- bool conflictingFixedRegReference(RefPosition* refPosition);
-};
-
-inline bool leafInRange(GenTree* leaf, int lower, int upper)
-{
- if (!leaf->IsIntCnsFitsInI32())
- {
- return false;
- }
- if (leaf->gtIntCon.gtIconVal < lower)
- {
- return false;
- }
- if (leaf->gtIntCon.gtIconVal > upper)
- {
- return false;
- }
-
- return true;
-}
-
-inline bool leafInRange(GenTree* leaf, int lower, int upper, int multiple)
-{
- if (!leafInRange(leaf, lower, upper))
- {
- return false;
- }
- if (leaf->gtIntCon.gtIconVal % multiple)
- {
- return false;
- }
-
- return true;
-}
-
-inline bool leafAddInRange(GenTree* leaf, int lower, int upper, int multiple = 1)
-{
- if (leaf->OperGet() != GT_ADD)
- {
- return false;
- }
- return leafInRange(leaf->gtOp.gtOp2, lower, upper, multiple);
-}
-
-inline bool isCandidateVar(LclVarDsc* varDsc)
-{
- return varDsc->lvLRACandidate;
-}
-
-/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XX XX
-XX LinearScan XX
-XX XX
-XX This is the container for the Linear Scan data structures and methods. XX
-XX XX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-*/
-// OPTION 1: The algorithm as described in "Optimized Interval Splitting in a
-// Linear Scan Register Allocator". It is driven by iterating over the Interval
-// lists. In this case, we need multiple IntervalLists, and Intervals will be
-// moved between them so they must be easily updated.
-
-// OPTION 2: The algorithm is driven by iterating over the RefPositions. In this
-// case, we only need a single IntervalList, and it won't be updated.
-// The RefPosition must refer to its Interval, and we need to be able to traverse
-// to the next RefPosition in code order
-// THIS IS THE OPTION CURRENTLY BEING PURSUED
-
-class LocationInfoList;
-class LocationInfoListNodePool;
-
-class LinearScan : public LinearScanInterface
-{
- friend class RefPosition;
- friend class Interval;
- friend class Lowering;
- friend class TreeNodeInfo;
-
-public:
- // This could use further abstraction. From Compiler we need the tree,
- // the flowgraph and the allocator.
- LinearScan(Compiler* theCompiler);
-
- // This is the main driver
- virtual void doLinearScan();
-
- // TreeNodeInfo contains three register masks: src candidates, dst candidates, and internal condidates.
- // Instead of storing actual register masks, however, which are large, we store a small index into a table
- // of register masks, stored in this class. We create only as many distinct register masks as are needed.
- // All identical register masks get the same index. The register mask table contains:
- // 1. A mask containing all eligible integer registers.
- // 2. A mask containing all elibible floating-point registers.
- // 3. A mask for each of single register.
- // 4. A mask for each combination of registers, created dynamically as required.
- //
- // Currently, the maximum number of masks allowed is a constant defined by 'numMasks'. The register mask
- // table is never resized. It is also limited by the size of the index, currently an unsigned char.
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if defined(_TARGET_ARM64_)
- static const int numMasks = 128;
-#else
- static const int numMasks = 64;
-#endif
-
- regMaskTP* regMaskTable;
- int nextFreeMask;
-
- typedef int RegMaskIndex;
-
- // allint is 0, allfloat is 1, all the single-bit masks start at 2
- enum KnownRegIndex
- {
- ALLINT_IDX = 0,
- ALLFLOAT_IDX = 1,
- FIRST_SINGLE_REG_IDX = 2
- };
-
- RegMaskIndex GetIndexForRegMask(regMaskTP mask);
- regMaskTP GetRegMaskForIndex(RegMaskIndex index);
- void RemoveRegisterFromMasks(regNumber reg);
-
-#ifdef DEBUG
- void dspRegisterMaskTable();
-#endif // DEBUG
-
- // Initialize the block traversal for LSRA.
- // This resets the bbVisitedSet, and on the first invocation sets the blockSequence array,
- // which determines the order in which blocks will be allocated (currently called during Lowering).
- BasicBlock* startBlockSequence();
- // Move to the next block in sequence, updating the current block information.
- BasicBlock* moveToNextBlock();
- // Get the next block to be scheduled without changing the current block,
- // but updating the blockSequence during the first iteration if it is not fully computed.
- BasicBlock* getNextBlock();
-
- // This is called during code generation to update the location of variables
- virtual void recordVarLocationsAtStartOfBB(BasicBlock* bb);
-
- // This does the dataflow analysis and builds the intervals
- void buildIntervals();
-
- // This is where the actual assignment is done
- void allocateRegisters();
-
- // This is the resolution phase, where cross-block mismatches are fixed up
- void resolveRegisters();
-
- void writeRegisters(RefPosition* currentRefPosition, GenTree* tree);
-
- // Insert a copy in the case where a tree node value must be moved to a different
- // register at the point of use, or it is reloaded to a different register
- // than the one it was spilled from
- void insertCopyOrReload(BasicBlock* block, GenTreePtr tree, unsigned multiRegIdx, RefPosition* refPosition);
-
-#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- // Insert code to save and restore the upper half of a vector that lives
- // in a callee-save register at the point of a call (the upper half is
- // not preserved).
- void insertUpperVectorSaveAndReload(GenTreePtr tree, RefPosition* refPosition, BasicBlock* block);
-#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
-
- // resolve along one block-block edge
- enum ResolveType
- {
- ResolveSplit,
- ResolveJoin,
- ResolveCritical,
- ResolveSharedCritical,
- ResolveTypeCount
- };
-#ifdef DEBUG
- static const char* resolveTypeName[ResolveTypeCount];
-#endif
-
- enum WhereToInsert
- {
- InsertAtTop,
- InsertAtBottom
- };
-
-#ifdef _TARGET_ARM_
- void addResolutionForDouble(BasicBlock* block,
- GenTreePtr insertionPoint,
- Interval** sourceIntervals,
- regNumberSmall* location,
- regNumber toReg,
- regNumber fromReg,
- ResolveType resolveType);
-#endif
- void addResolution(
- BasicBlock* block, GenTreePtr insertionPoint, Interval* interval, regNumber outReg, regNumber inReg);
-
- void handleOutgoingCriticalEdges(BasicBlock* block);
-
- void resolveEdge(BasicBlock* fromBlock, BasicBlock* toBlock, ResolveType resolveType, VARSET_VALARG_TP liveSet);
-
- void resolveEdges();
-
- // Finally, the register assignments are written back to the tree nodes.
- void recordRegisterAssignments();
-
- // Keep track of how many temp locations we'll need for spill
- void initMaxSpill();
- void updateMaxSpill(RefPosition* refPosition);
- void recordMaxSpill();
-
- // max simultaneous spill locations used of every type
- unsigned int maxSpill[TYP_COUNT];
- unsigned int currentSpill[TYP_COUNT];
- bool needFloatTmpForFPCall;
- bool needDoubleTmpForFPCall;
-
-#ifdef DEBUG
-private:
- //------------------------------------------------------------------------
- // Should we stress lsra?
- // This uses the same COMPLUS variable as rsStressRegs (COMPlus_JitStressRegs)
- // However, the possible values and their interpretation are entirely different.
- //
- // The mask bits are currently divided into fields in which each non-zero value
- // is a distinct stress option (e.g. 0x3 is not a combination of 0x1 and 0x2).
- // However, subject to possible constraints (to be determined), the different
- // fields can be combined (e.g. 0x7 is a combination of 0x3 and 0x4).
- // Note that the field values are declared in a public enum, but the actual bits are
- // only accessed via accessors.
-
- unsigned lsraStressMask;
-
- // This controls the registers available for allocation
- enum LsraStressLimitRegs{LSRA_LIMIT_NONE = 0, LSRA_LIMIT_CALLEE = 0x1, LSRA_LIMIT_CALLER = 0x2,
- LSRA_LIMIT_SMALL_SET = 0x3, LSRA_LIMIT_MASK = 0x3};
-
- // When LSRA_LIMIT_SMALL_SET is specified, it is desirable to select a "mixed" set of caller- and callee-save
- // registers, so as to get different coverage than limiting to callee or caller.
- // At least for x86 and AMD64, and potentially other architecture that will support SIMD,
- // we need a minimum of 5 fp regs in order to support the InitN intrinsic for Vector4.
- // Hence the "SmallFPSet" has 5 elements.
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if defined(_TARGET_AMD64_)
-#ifdef UNIX_AMD64_ABI
- // On System V the RDI and RSI are not callee saved. Use R12 ans R13 as callee saved registers.
- static const regMaskTP LsraLimitSmallIntSet =
- (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13);
-#else // !UNIX_AMD64_ABI
- // On Windows Amd64 use the RDI and RSI as callee saved registers.
- static const regMaskTP LsraLimitSmallIntSet =
- (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI);
-#endif // !UNIX_AMD64_ABI
- static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
-#elif defined(_TARGET_ARM_)
- static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4);
- static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17);
-#elif defined(_TARGET_ARM64_)
- static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20);
- static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9);
-#elif defined(_TARGET_X86_)
- static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI);
- static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
-#else
-#error Unsupported or unset target architecture
-#endif // target
-
- LsraStressLimitRegs getStressLimitRegs()
- {
- return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK);
- }
-
- regMaskTP getConstrainedRegMask(regMaskTP regMaskActual, regMaskTP regMaskConstrain, unsigned minRegCount);
- regMaskTP stressLimitRegs(RefPosition* refPosition, regMaskTP mask);
-
- // This controls the heuristics used to select registers
- // These can be combined.
- enum LsraSelect{LSRA_SELECT_DEFAULT = 0, LSRA_SELECT_REVERSE_HEURISTICS = 0x04,
- LSRA_SELECT_REVERSE_CALLER_CALLEE = 0x08, LSRA_SELECT_NEAREST = 0x10, LSRA_SELECT_MASK = 0x1c};
- LsraSelect getSelectionHeuristics()
- {
- return (LsraSelect)(lsraStressMask & LSRA_SELECT_MASK);
- }
- bool doReverseSelect()
- {
- return ((lsraStressMask & LSRA_SELECT_REVERSE_HEURISTICS) != 0);
- }
- bool doReverseCallerCallee()
- {
- return ((lsraStressMask & LSRA_SELECT_REVERSE_CALLER_CALLEE) != 0);
- }
- bool doSelectNearest()
- {
- return ((lsraStressMask & LSRA_SELECT_NEAREST) != 0);
- }
-
- // This controls the order in which basic blocks are visited during allocation
- enum LsraTraversalOrder{LSRA_TRAVERSE_LAYOUT = 0x20, LSRA_TRAVERSE_PRED_FIRST = 0x40,
- LSRA_TRAVERSE_RANDOM = 0x60, // NYI
- LSRA_TRAVERSE_DEFAULT = LSRA_TRAVERSE_PRED_FIRST, LSRA_TRAVERSE_MASK = 0x60};
- LsraTraversalOrder getLsraTraversalOrder()
- {
- if ((lsraStressMask & LSRA_TRAVERSE_MASK) == 0)
- {
- return LSRA_TRAVERSE_DEFAULT;
- }
- return (LsraTraversalOrder)(lsraStressMask & LSRA_TRAVERSE_MASK);
- }
- bool isTraversalLayoutOrder()
- {
- return getLsraTraversalOrder() == LSRA_TRAVERSE_LAYOUT;
- }
- bool isTraversalPredFirstOrder()
- {
- return getLsraTraversalOrder() == LSRA_TRAVERSE_PRED_FIRST;
- }
-
- // This controls whether lifetimes should be extended to the entire method.
- // Note that this has no effect under MinOpts
- enum LsraExtendLifetimes{LSRA_DONT_EXTEND = 0, LSRA_EXTEND_LIFETIMES = 0x80, LSRA_EXTEND_LIFETIMES_MASK = 0x80};
- LsraExtendLifetimes getLsraExtendLifeTimes()
- {
- return (LsraExtendLifetimes)(lsraStressMask & LSRA_EXTEND_LIFETIMES_MASK);
- }
- bool extendLifetimes()
- {
- return getLsraExtendLifeTimes() == LSRA_EXTEND_LIFETIMES;
- }
-
- // This controls whether variables locations should be set to the previous block in layout order
- // (LSRA_BLOCK_BOUNDARY_LAYOUT), or to that of the highest-weight predecessor (LSRA_BLOCK_BOUNDARY_PRED -
- // the default), or rotated (LSRA_BLOCK_BOUNDARY_ROTATE).
- enum LsraBlockBoundaryLocations{LSRA_BLOCK_BOUNDARY_PRED = 0, LSRA_BLOCK_BOUNDARY_LAYOUT = 0x100,
- LSRA_BLOCK_BOUNDARY_ROTATE = 0x200, LSRA_BLOCK_BOUNDARY_MASK = 0x300};
- LsraBlockBoundaryLocations getLsraBlockBoundaryLocations()
- {
- return (LsraBlockBoundaryLocations)(lsraStressMask & LSRA_BLOCK_BOUNDARY_MASK);
- }
- regNumber rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs);
-
- // This controls whether we always insert a GT_RELOAD instruction after a spill
- // Note that this can be combined with LSRA_SPILL_ALWAYS (or not)
- enum LsraReload{LSRA_NO_RELOAD_IF_SAME = 0, LSRA_ALWAYS_INSERT_RELOAD = 0x400, LSRA_RELOAD_MASK = 0x400};
- LsraReload getLsraReload()
- {
- return (LsraReload)(lsraStressMask & LSRA_RELOAD_MASK);
- }
- bool alwaysInsertReload()
- {
- return getLsraReload() == LSRA_ALWAYS_INSERT_RELOAD;
- }
-
- // This controls whether we spill everywhere
- enum LsraSpill{LSRA_DONT_SPILL_ALWAYS = 0, LSRA_SPILL_ALWAYS = 0x800, LSRA_SPILL_MASK = 0x800};
- LsraSpill getLsraSpill()
- {
- return (LsraSpill)(lsraStressMask & LSRA_SPILL_MASK);
- }
- bool spillAlways()
- {
- return getLsraSpill() == LSRA_SPILL_ALWAYS;
- }
-
- // This controls whether RefPositions that lower/codegen indicated as reg optional be
- // allocated a reg at all.
- enum LsraRegOptionalControl{LSRA_REG_OPTIONAL_DEFAULT = 0, LSRA_REG_OPTIONAL_NO_ALLOC = 0x1000,
- LSRA_REG_OPTIONAL_MASK = 0x1000};
-
- LsraRegOptionalControl getLsraRegOptionalControl()
- {
- return (LsraRegOptionalControl)(lsraStressMask & LSRA_REG_OPTIONAL_MASK);
- }
-
- bool regOptionalNoAlloc()
- {
- return getLsraRegOptionalControl() == LSRA_REG_OPTIONAL_NO_ALLOC;
- }
-
- bool candidatesAreStressLimited()
- {
- return ((lsraStressMask & (LSRA_LIMIT_MASK | LSRA_SELECT_MASK)) != 0);
- }
-
- // Dump support
- void lsraDumpIntervals(const char* msg);
- void dumpRefPositions(const char* msg);
- void dumpVarRefPositions(const char* msg);
-
- static bool IsResolutionMove(GenTree* node);
- static bool IsResolutionNode(LIR::Range& containingRange, GenTree* node);
-
- void verifyFinalAllocation();
- void verifyResolutionMove(GenTree* resolutionNode, LsraLocation currentLocation);
-#else // !DEBUG
- bool doSelectNearest()
- {
- return false;
- }
- bool extendLifetimes()
- {
- return false;
- }
- bool spillAlways()
- {
- return false;
- }
- // In a retail build we support only the default traversal order
- bool isTraversalLayoutOrder()
- {
- return false;
- }
- bool isTraversalPredFirstOrder()
- {
- return true;
- }
- bool getLsraExtendLifeTimes()
- {
- return false;
- }
- bool candidatesAreStressLimited()
- {
- return false;
- }
-#endif // !DEBUG
-
-public:
- // Used by Lowering when considering whether to split Longs, as well as by identifyCandidates().
- bool isRegCandidate(LclVarDsc* varDsc);
-
- bool isContainableMemoryOp(GenTree* node);
-
-private:
- // Determine which locals are candidates for allocation
- void identifyCandidates();
-
- // determine which locals are used in EH constructs we don't want to deal with
- void identifyCandidatesExceptionDataflow();
-
- void buildPhysRegRecords();
-
-#ifdef DEBUG
- void checkLastUses(BasicBlock* block);
-#endif // DEBUG
-
- void setFrameType();
-
- // Update allocations at start/end of block
- void unassignIntervalBlockStart(RegRecord* regRecord, VarToRegMap inVarToRegMap);
- void processBlockEndAllocation(BasicBlock* current);
-
- // Record variable locations at start/end of block
- void processBlockStartLocations(BasicBlock* current, bool allocationPass);
- void processBlockEndLocations(BasicBlock* current);
-
-#ifdef _TARGET_ARM_
- bool isSecondHalfReg(RegRecord* regRec, Interval* interval);
- RegRecord* getSecondHalfRegRec(RegRecord* regRec);
- RegRecord* findAnotherHalfRegRec(RegRecord* regRec);
- bool canSpillDoubleReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight);
- void unassignDoublePhysReg(RegRecord* doubleRegRecord);
-#endif
- void updateAssignedInterval(RegRecord* reg, Interval* interval, RegisterType regType);
- void updatePreviousInterval(RegRecord* reg, Interval* interval, RegisterType regType);
- bool canRestorePreviousInterval(RegRecord* regRec, Interval* assignedInterval);
- bool isAssignedToInterval(Interval* interval, RegRecord* regRec);
- bool isRefPositionActive(RefPosition* refPosition, LsraLocation refLocation);
- bool canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight);
- bool isRegInUse(RegRecord* regRec, RefPosition* refPosition);
-
- RefType CheckBlockType(BasicBlock* block, BasicBlock* prevBlock);
-
- // insert refpositions representing prolog zero-inits which will be added later
- void insertZeroInitRefPositions();
-
- void AddMapping(GenTree* node, LsraLocation loc);
-
- // add physreg refpositions for a tree node, based on calling convention and instruction selection predictions
- void addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse);
-
- void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition);
-
- void buildRefPositionsForNode(GenTree* tree,
- BasicBlock* block,
- LocationInfoListNodePool& listNodePool,
- HashTableBase<GenTree*, LocationInfoList>& operandToLocationInfoMap,
- LsraLocation loc);
-
-#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- VARSET_VALRET_TP buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc);
- void buildUpperVectorRestoreRefPositions(GenTree* tree, LsraLocation currentLoc, VARSET_VALARG_TP liveLargeVectors);
-#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
-
-#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
- // For AMD64 on SystemV machines. This method
- // is called as replacement for raUpdateRegStateForArg
- // that is used on Windows. On System V systems a struct can be passed
- // partially using registers from the 2 register files.
- void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc);
-#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
-
- // Update reg state for an incoming register argument
- void updateRegStateForArg(LclVarDsc* argDsc);
-
- inline bool isLocalDefUse(GenTree* tree)
- {
- return tree->gtLsraInfo.isLocalDefUse;
- }
-
- inline bool isCandidateLocalRef(GenTree* tree)
- {
- if (tree->IsLocal())
- {
- unsigned int lclNum = tree->gtLclVarCommon.gtLclNum;
- assert(lclNum < compiler->lvaCount);
- LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
-
- return isCandidateVar(varDsc);
- }
- return false;
- }
-
- static Compiler::fgWalkResult markAddrModeOperandsHelperMD(GenTreePtr tree, void* p);
-
- // Return the registers killed by the given tree node.
- regMaskTP getKillSetForNode(GenTree* tree);
-
- // Given some tree node add refpositions for all the registers this node kills
- bool buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc);
-
- regMaskTP allRegs(RegisterType rt);
- regMaskTP allRegs(GenTree* tree);
- regMaskTP allMultiRegCallNodeRegs(GenTreeCall* tree);
- regMaskTP allSIMDRegs();
- regMaskTP internalFloatRegCandidates();
-
- bool registerIsFree(regNumber regNum, RegisterType regType);
- bool registerIsAvailable(RegRecord* physRegRecord,
- LsraLocation currentLoc,
- LsraLocation* nextRefLocationPtr,
- RegisterType regType);
- void freeRegister(RegRecord* physRegRecord);
- void freeRegisters(regMaskTP regsToFree);
-
- regMaskTP getUseCandidates(GenTree* useNode);
- regMaskTP getDefCandidates(GenTree* tree);
- var_types getDefType(GenTree* tree);
-
- RefPosition* defineNewInternalTemp(GenTree* tree,
- RegisterType regType,
- LsraLocation currentLoc,
- regMaskTP regMask DEBUGARG(unsigned minRegCandidateCount));
-
- int buildInternalRegisterDefsForNode(GenTree* tree,
- LsraLocation currentLoc,
- RefPosition* defs[] DEBUGARG(unsigned minRegCandidateCount));
-
- void buildInternalRegisterUsesForNode(GenTree* tree,
- LsraLocation currentLoc,
- RefPosition* defs[],
- int total DEBUGARG(unsigned minRegCandidateCount));
-
- void resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosition* currentRefPosition);
-
- void insertMove(BasicBlock* block, GenTreePtr insertionPoint, unsigned lclNum, regNumber inReg, regNumber outReg);
-
- void insertSwap(BasicBlock* block,
- GenTreePtr insertionPoint,
- unsigned lclNum1,
- regNumber reg1,
- unsigned lclNum2,
- regNumber reg2);
-
-public:
- // TODO-Cleanup: unused?
- class PhysRegIntervalIterator
- {
- public:
- PhysRegIntervalIterator(LinearScan* theLinearScan)
- {
- nextRegNumber = (regNumber)0;
- linearScan = theLinearScan;
- }
- RegRecord* GetNext()
- {
- return &linearScan->physRegs[nextRegNumber];
- }
-
- private:
- // This assumes that the physical registers are contiguous, starting
- // with a register number of 0
- regNumber nextRegNumber;
- LinearScan* linearScan;
- };
-
-private:
- Interval* newInterval(RegisterType regType);
-
- Interval* getIntervalForLocalVar(unsigned varIndex)
- {
- assert(varIndex < compiler->lvaTrackedCount);
- assert(localVarIntervals[varIndex] != nullptr);
- return localVarIntervals[varIndex];
- }
-
- Interval* getIntervalForLocalVarNode(GenTreeLclVarCommon* tree)
- {
- LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclNum];
- assert(varDsc->lvTracked);
- return getIntervalForLocalVar(varDsc->lvVarIndex);
- }
-
- RegRecord* getRegisterRecord(regNumber regNum);
-
- RefPosition* newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType);
-
- RefPosition* newRefPosition(Interval* theInterval,
- LsraLocation theLocation,
- RefType theRefType,
- GenTree* theTreeNode,
- regMaskTP mask,
- unsigned multiRegIdx = 0 DEBUGARG(unsigned minRegCandidateCount = 1));
-
- RefPosition* newRefPosition(
- regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask);
-
- void applyCalleeSaveHeuristics(RefPosition* rp);
-
- void associateRefPosWithInterval(RefPosition* rp);
-
- void associateRefPosWithRegister(RefPosition* rp);
-
- unsigned getWeight(RefPosition* refPos);
-
- /*****************************************************************************
- * Register management
- ****************************************************************************/
- RegisterType getRegisterType(Interval* currentInterval, RefPosition* refPosition);
- regNumber tryAllocateFreeReg(Interval* current, RefPosition* refPosition);
- regNumber allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable);
- regNumber assignCopyReg(RefPosition* refPosition);
-
- bool isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition);
- bool isSpillCandidate(Interval* current,
- RefPosition* refPosition,
- RegRecord* physRegRecord,
- LsraLocation& nextLocation);
- void checkAndAssignInterval(RegRecord* regRec, Interval* interval);
- void assignPhysReg(RegRecord* regRec, Interval* interval);
- void assignPhysReg(regNumber reg, Interval* interval)
- {
- assignPhysReg(getRegisterRecord(reg), interval);
- }
-
- bool isAssigned(RegRecord* regRec ARM_ARG(RegisterType newRegType));
- bool isAssigned(RegRecord* regRec, LsraLocation lastLocation ARM_ARG(RegisterType newRegType));
- void checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefPosition);
- void unassignPhysReg(RegRecord* regRec ARM_ARG(RegisterType newRegType));
- void unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPosition);
- void unassignPhysRegNoSpill(RegRecord* reg);
- void unassignPhysReg(regNumber reg)
- {
- unassignPhysReg(getRegisterRecord(reg), nullptr);
- }
-
- void setIntervalAsSpilled(Interval* interval);
- void setIntervalAsSplit(Interval* interval);
- void spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition);
-
- void spillGCRefs(RefPosition* killRefPosition);
-
- /*****************************************************************************
- * For Resolution phase
- ****************************************************************************/
- // TODO-Throughput: Consider refactoring this so that we keep a map from regs to vars for better scaling
- unsigned int regMapCount;
-
- // When we split edges, we create new blocks, and instead of expanding the VarToRegMaps, we
- // rely on the property that the "in" map is the same as the "from" block of the edge, and the
- // "out" map is the same as the "to" block of the edge (by construction).
- // So, for any block whose bbNum is greater than bbNumMaxBeforeResolution, we use the
- // splitBBNumToTargetBBNumMap.
- // TODO-Throughput: We may want to look into the cost/benefit tradeoff of doing this vs. expanding
- // the arrays.
-
- unsigned bbNumMaxBeforeResolution;
- struct SplitEdgeInfo
- {
- unsigned fromBBNum;
- unsigned toBBNum;
- };
- typedef JitHashTable<unsigned, JitSmallPrimitiveKeyFuncs<unsigned>, SplitEdgeInfo> SplitBBNumToTargetBBNumMap;
- SplitBBNumToTargetBBNumMap* splitBBNumToTargetBBNumMap;
- SplitBBNumToTargetBBNumMap* getSplitBBNumToTargetBBNumMap()
- {
- if (splitBBNumToTargetBBNumMap == nullptr)
- {
- splitBBNumToTargetBBNumMap =
- new (getAllocator(compiler)) SplitBBNumToTargetBBNumMap(getAllocator(compiler));
- }
- return splitBBNumToTargetBBNumMap;
- }
- SplitEdgeInfo getSplitEdgeInfo(unsigned int bbNum);
-
- void initVarRegMaps();
- void setInVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg);
- void setOutVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg);
- VarToRegMap getInVarToRegMap(unsigned int bbNum);
- VarToRegMap getOutVarToRegMap(unsigned int bbNum);
- void setVarReg(VarToRegMap map, unsigned int trackedVarIndex, regNumber reg);
- regNumber getVarReg(VarToRegMap map, unsigned int trackedVarIndex);
- // Initialize the incoming VarToRegMap to the given map values (generally a predecessor of
- // the block)
- VarToRegMap setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarToRegMap);
-
- regNumber getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type);
-
-#ifdef DEBUG
- void dumpVarToRegMap(VarToRegMap map);
- void dumpInVarToRegMap(BasicBlock* block);
- void dumpOutVarToRegMap(BasicBlock* block);
-
- // There are three points at which a tuple-style dump is produced, and each
- // differs slightly:
- // - In LSRA_DUMP_PRE, it does a simple dump of each node, with indications of what
- // tree nodes are consumed.
- // - In LSRA_DUMP_REFPOS, which is after the intervals are built, but before
- // register allocation, each node is dumped, along with all of the RefPositions,
- // The Intervals are identifed as Lnnn for lclVar intervals, Innn for for other
- // intervals, and Tnnn for internal temps.
- // - In LSRA_DUMP_POST, which is after register allocation, the registers are
- // shown.
-
- enum LsraTupleDumpMode{LSRA_DUMP_PRE, LSRA_DUMP_REFPOS, LSRA_DUMP_POST};
- void lsraGetOperandString(GenTreePtr tree,
- LsraTupleDumpMode mode,
- char* operandString,
- unsigned operandStringLength);
- void lsraDispNode(GenTreePtr tree, LsraTupleDumpMode mode, bool hasDest);
- void DumpOperandDefs(
- GenTree* operand, bool& first, LsraTupleDumpMode mode, char* operandString, const unsigned operandStringLength);
- void TupleStyleDump(LsraTupleDumpMode mode);
-
- LsraLocation maxNodeLocation;
-
- // Width of various fields - used to create a streamlined dump during allocation that shows the
- // state of all the registers in columns.
- int regColumnWidth;
- int regTableIndent;
-
- const char* columnSeparator;
- const char* line;
- const char* leftBox;
- const char* middleBox;
- const char* rightBox;
-
- static const int MAX_FORMAT_CHARS = 12;
- char intervalNameFormat[MAX_FORMAT_CHARS];
- char regNameFormat[MAX_FORMAT_CHARS];
- char shortRefPositionFormat[MAX_FORMAT_CHARS];
- char emptyRefPositionFormat[MAX_FORMAT_CHARS];
- char indentFormat[MAX_FORMAT_CHARS];
- static const int MAX_LEGEND_FORMAT_CHARS = 25;
- char bbRefPosFormat[MAX_LEGEND_FORMAT_CHARS];
- char legendFormat[MAX_LEGEND_FORMAT_CHARS];
-
- // How many rows have we printed since last printing a "title row"?
- static const int MAX_ROWS_BETWEEN_TITLES = 50;
- int rowCountSinceLastTitle;
- // Current mask of registers being printed in the dump.
- regMaskTP lastDumpedRegisters;
- regMaskTP registersToDump;
- int lastUsedRegNumIndex;
- bool shouldDumpReg(regNumber regNum)
- {
- return (registersToDump & genRegMask(regNum)) != 0;
- }
-
- void dumpRegRecordHeader();
- void dumpRegRecordTitle();
- void dumpRegRecordTitleIfNeeded();
- void dumpRegRecordTitleLines();
- void dumpRegRecords();
- // An abbreviated RefPosition dump for printing with column-based register state
- void dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBlock);
- // Print the number of spaces occupied by a dumpRefPositionShort()
- void dumpEmptyRefPosition();
- // A dump of Referent, in exactly regColumnWidth characters
- void dumpIntervalName(Interval* interval);
-
- // Events during the allocation phase that cause some dump output
- enum LsraDumpEvent{
- // Conflicting def/use
- LSRA_EVENT_DEFUSE_CONFLICT, LSRA_EVENT_DEFUSE_FIXED_DELAY_USE, LSRA_EVENT_DEFUSE_CASE1, LSRA_EVENT_DEFUSE_CASE2,
- LSRA_EVENT_DEFUSE_CASE3, LSRA_EVENT_DEFUSE_CASE4, LSRA_EVENT_DEFUSE_CASE5, LSRA_EVENT_DEFUSE_CASE6,
-
- // Spilling
- LSRA_EVENT_SPILL, LSRA_EVENT_SPILL_EXTENDED_LIFETIME, LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL,
- LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL, LSRA_EVENT_DONE_KILL_GC_REFS,
-
- // Block boundaries
- LSRA_EVENT_START_BB, LSRA_EVENT_END_BB,
-
- // Miscellaneous
- LSRA_EVENT_FREE_REGS,
-
- // Characteristics of the current RefPosition
- LSRA_EVENT_INCREMENT_RANGE_END, // ???
- LSRA_EVENT_LAST_USE, LSRA_EVENT_LAST_USE_DELAYED, LSRA_EVENT_NEEDS_NEW_REG,
-
- // Allocation decisions
- LSRA_EVENT_FIXED_REG, LSRA_EVENT_EXP_USE, LSRA_EVENT_ZERO_REF, LSRA_EVENT_NO_ENTRY_REG_ALLOCATED,
- LSRA_EVENT_KEPT_ALLOCATION, LSRA_EVENT_COPY_REG, LSRA_EVENT_MOVE_REG, LSRA_EVENT_ALLOC_REG,
- LSRA_EVENT_ALLOC_SPILLED_REG, LSRA_EVENT_NO_REG_ALLOCATED, LSRA_EVENT_RELOAD, LSRA_EVENT_SPECIAL_PUTARG,
- LSRA_EVENT_REUSE_REG,
- };
- void dumpLsraAllocationEvent(LsraDumpEvent event,
- Interval* interval = nullptr,
- regNumber reg = REG_NA,
- BasicBlock* currentBlock = nullptr);
-
- void dumpBlockHeader(BasicBlock* block);
-
- void validateIntervals();
-#endif // DEBUG
-
-#if TRACK_LSRA_STATS
- enum LsraStat{
- LSRA_STAT_SPILL, LSRA_STAT_COPY_REG, LSRA_STAT_RESOLUTION_MOV, LSRA_STAT_SPLIT_EDGE,
- };
-
- unsigned regCandidateVarCount;
- void updateLsraStat(LsraStat stat, unsigned currentBBNum);
-
- void dumpLsraStats(FILE* file);
-
-#define INTRACK_STATS(x) x
-#else // !TRACK_LSRA_STATS
-#define INTRACK_STATS(x)
-#endif // !TRACK_LSRA_STATS
-
- Compiler* compiler;
-
-private:
-#if MEASURE_MEM_ALLOC
- CompAllocator* lsraAllocator;
-#endif
-
- CompAllocator* getAllocator(Compiler* comp)
- {
-#if MEASURE_MEM_ALLOC
- if (lsraAllocator == nullptr)
- {
- lsraAllocator = new (comp, CMK_LSRA) CompAllocator(comp, CMK_LSRA);
- }
- return lsraAllocator;
-#else
- return comp->getAllocator();
-#endif
- }
-
-#ifdef DEBUG
- // This is used for dumping
- RefPosition* activeRefPosition;
-#endif // DEBUG
-
- IntervalList intervals;
-
- RegRecord physRegs[REG_COUNT];
-
- // Map from tracked variable index to Interval*.
- Interval** localVarIntervals;
-
- // Set of blocks that have been visited.
- BlockSet bbVisitedSet;
- void markBlockVisited(BasicBlock* block)
- {
- BlockSetOps::AddElemD(compiler, bbVisitedSet, block->bbNum);
- }
- void clearVisitedBlocks()
- {
- BlockSetOps::ClearD(compiler, bbVisitedSet);
- }
- bool isBlockVisited(BasicBlock* block)
- {
- return BlockSetOps::IsMember(compiler, bbVisitedSet, block->bbNum);
- }
-
-#if DOUBLE_ALIGN
- bool doDoubleAlign;
-#endif
-
- // A map from bbNum to the block information used during register allocation.
- LsraBlockInfo* blockInfo;
- BasicBlock* findPredBlockForLiveIn(BasicBlock* block, BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated));
-
- // The order in which the blocks will be allocated.
- // This is any array of BasicBlock*, in the order in which they should be traversed.
- BasicBlock** blockSequence;
- // The verifiedAllBBs flag indicates whether we have verified that all BBs have been
- // included in the blockSeuqence above, during setBlockSequence().
- bool verifiedAllBBs;
- void setBlockSequence();
- int compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights);
- BasicBlockList* blockSequenceWorkList;
- bool blockSequencingDone;
- void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet);
- void removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode);
- BasicBlock* getNextCandidateFromWorkList();
-
- // The bbNum of the block being currently allocated or resolved.
- unsigned int curBBNum;
- // The ordinal of the block we're on (i.e. this is the curBBSeqNum-th block we've allocated).
- unsigned int curBBSeqNum;
- // The number of blocks that we've sequenced.
- unsigned int bbSeqCount;
- // The Location of the start of the current block.
- LsraLocation curBBStartLocation;
- // True if the method contains any critical edges.
- bool hasCriticalEdges;
-
- // True if there are any register candidate lclVars available for allocation.
- bool enregisterLocalVars;
-
- virtual bool willEnregisterLocalVars() const
- {
- return enregisterLocalVars;
- }
-
- // Ordered list of RefPositions
- RefPositionList refPositions;
-
- // Per-block variable location mappings: an array indexed by block number that yields a
- // pointer to an array of regNumber, one per variable.
- VarToRegMap* inVarToRegMaps;
- VarToRegMap* outVarToRegMaps;
-
- // A temporary VarToRegMap used during the resolution of critical edges.
- VarToRegMap sharedCriticalVarToRegMap;
-
- PhasedVar<regMaskTP> availableIntRegs;
- PhasedVar<regMaskTP> availableFloatRegs;
- PhasedVar<regMaskTP> availableDoubleRegs;
-
- // The set of all register candidates. Note that this may be a subset of tracked vars.
- VARSET_TP registerCandidateVars;
- // Current set of live register candidate vars, used during building of RefPositions to determine
- // whether to preference to callee-save.
- VARSET_TP currentLiveVars;
- // Set of variables that may require resolution across an edge.
- // This is first constructed during interval building, to contain all the lclVars that are live at BB edges.
- // Then, any lclVar that is always in the same register is removed from the set.
- VARSET_TP resolutionCandidateVars;
- // This set contains all the lclVars that are ever spilled or split.
- VARSET_TP splitOrSpilledVars;
- // Set of floating point variables to consider for callee-save registers.
- VARSET_TP fpCalleeSaveCandidateVars;
-#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
-#if defined(_TARGET_AMD64_)
- static bool varTypeNeedsPartialCalleeSave(var_types type)
- {
- return (emitTypeSize(type) == 32);
- }
- static const var_types LargeVectorSaveType = TYP_SIMD16;
-#elif defined(_TARGET_ARM64_)
- static bool varTypeNeedsPartialCalleeSave(var_types type)
- {
- // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes
- // For SIMD types longer then 8 bytes Caller is responsible for saving and restoring Upper bytes.
- return (emitTypeSize(type) == 16);
- }
- static const var_types LargeVectorSaveType = TYP_DOUBLE;
-#else // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
-#error("Unknown target architecture for FEATURE_SIMD")
-#endif // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
-
- // Set of large vector (TYP_SIMD32 on AVX) variables.
- VARSET_TP largeVectorVars;
- // Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers.
- VARSET_TP largeVectorCalleeSaveCandidateVars;
-#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
-
- //-----------------------------------------------------------------------
- // TreeNodeInfo methods
- //-----------------------------------------------------------------------
-
- void TreeNodeInfoInit(GenTree* stmt);
-
- void TreeNodeInfoInitCheckByteable(GenTree* tree);
-
- bool CheckAndSetDelayFree(GenTree* delayUseSrc);
-
- void TreeNodeInfoInitSimple(GenTree* tree);
- int GetOperandSourceCount(GenTree* node);
- int GetIndirSourceCount(GenTreeIndir* indirTree);
- void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs);
-
- void TreeNodeInfoInitStoreLoc(GenTree* tree);
- void TreeNodeInfoInitReturn(GenTree* tree);
- void TreeNodeInfoInitShiftRotate(GenTree* tree);
- void TreeNodeInfoInitPutArgReg(GenTreeUnOp* node);
- void TreeNodeInfoInitCall(GenTreeCall* call);
- void TreeNodeInfoInitCmp(GenTreePtr tree);
- void TreeNodeInfoInitStructArg(GenTreePtr structArg);
- void TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode);
- void TreeNodeInfoInitModDiv(GenTree* tree);
- void TreeNodeInfoInitIntrinsic(GenTree* tree);
- void TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* tree);
- void TreeNodeInfoInitIndir(GenTreeIndir* indirTree);
- void TreeNodeInfoInitGCWriteBarrier(GenTree* tree);
- void TreeNodeInfoInitCast(GenTree* tree);
-
-#ifdef _TARGET_X86_
- bool ExcludeNonByteableRegisters(GenTree* tree);
-#endif
-
-#if defined(_TARGET_XARCH_)
- // returns true if the tree can use the read-modify-write memory instruction form
- bool isRMWRegOper(GenTreePtr tree);
- void TreeNodeInfoInitMul(GenTreePtr tree);
- void SetContainsAVXFlags(bool isFloatingPointType = true, unsigned sizeOfSIMDVector = 0);
-#endif // defined(_TARGET_XARCH_)
-
-#ifdef FEATURE_SIMD
- void TreeNodeInfoInitSIMD(GenTreeSIMD* tree);
-#endif // FEATURE_SIMD
-
-#if FEATURE_HW_INTRINSICS
- void TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree);
-#endif // FEATURE_HW_INTRINSICS
-
- void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode);
-#ifdef _TARGET_ARM_
- void TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* tree);
-#endif
- void TreeNodeInfoInitLclHeap(GenTree* tree);
-};
-
-/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XX XX
-XX Interval XX
-XX XX
-XX This is the fundamental data structure for linear scan register XX
-XX allocation. It represents the live range(s) for a variable or temp. XX
-XX XX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-*/
-
-class Interval : public Referenceable
-{
-public:
- Interval(RegisterType registerType, regMaskTP registerPreferences)
- : registerPreferences(registerPreferences)
- , relatedInterval(nullptr)
- , assignedReg(nullptr)
- , registerType(registerType)
- , isLocalVar(false)
- , isSplit(false)
- , isSpilled(false)
- , isInternal(false)
- , isStructField(false)
- , isPromotedStruct(false)
- , hasConflictingDefUse(false)
- , hasNonCommutativeRMWDef(false)
- , isSpecialPutArg(false)
- , preferCalleeSave(false)
- , isConstant(false)
- , physReg(REG_COUNT)
-#ifdef DEBUG
- , intervalIndex(0)
-#endif
- , varNum(0)
- {
- }
-
-#ifdef DEBUG
- // print out representation
- void dump();
- // concise representation for embedding
- void tinyDump();
- // extremely concise representation
- void microDump();
-#endif // DEBUG
-
- void setLocalNumber(Compiler* compiler, unsigned lclNum, LinearScan* l);
-
- // Fixed registers for which this Interval has a preference
- regMaskTP registerPreferences;
-
- // The relatedInterval is:
- // - for any other interval, it is the interval to which this interval
- // is currently preferenced (e.g. because they are related by a copy)
- Interval* relatedInterval;
-
- // The assignedReg is the RecRecord for the register to which this interval
- // has been assigned at some point - if the interval is active, this is the
- // register it currently occupies.
- RegRecord* assignedReg;
-
- // DECIDE : put this in a union or do something w/ inheritance?
- // this is an interval for a physical register, not a allocatable entity
-
- RegisterType registerType;
- bool isLocalVar : 1;
- // Indicates whether this interval has been assigned to different registers
- bool isSplit : 1;
- // Indicates whether this interval is ever spilled
- bool isSpilled : 1;
- // indicates an interval representing the internal requirements for
- // generating code for a node (temp registers internal to the node)
- // Note that this interval may live beyond a node in the GT_ARR_LENREF/GT_IND
- // case (though never lives beyond a stmt)
- bool isInternal : 1;
- // true if this is a LocalVar for a struct field
- bool isStructField : 1;
- // true iff this is a GT_LDOBJ for a fully promoted (PROMOTION_TYPE_INDEPENDENT) struct
- bool isPromotedStruct : 1;
- // true if this is an SDSU interval for which the def and use have conflicting register
- // requirements
- bool hasConflictingDefUse : 1;
- // true if this interval is defined by a non-commutative 2-operand instruction
- bool hasNonCommutativeRMWDef : 1;
-
- // True if this interval is defined by a putArg, whose source is a non-last-use lclVar.
- // During allocation, this flag will be cleared if the source is not already in the required register.
- // Othewise, we will leave the register allocated to the lclVar, but mark the RegRecord as
- // isBusyUntilNextKill, so that it won't be reused if the lclVar goes dead before the call.
- bool isSpecialPutArg : 1;
-
- // True if this interval interferes with a call.
- bool preferCalleeSave : 1;
-
- // True if this interval is defined by a constant node that may be reused and/or may be
- // able to reuse a constant that's already in a register.
- bool isConstant : 1;
-
- // The register to which it is currently assigned.
- regNumber physReg;
-
-#ifdef DEBUG
- unsigned int intervalIndex;
-#endif // DEBUG
-
- unsigned int varNum; // This is the "variable number": the index into the lvaTable array
-
- LclVarDsc* getLocalVar(Compiler* comp)
- {
- assert(isLocalVar);
- return &(comp->lvaTable[this->varNum]);
- }
-
- // Get the local tracked variable "index" (lvVarIndex), used in bitmasks.
- unsigned getVarIndex(Compiler* comp)
- {
- LclVarDsc* varDsc = getLocalVar(comp);
- assert(varDsc->lvTracked); // If this isn't true, we shouldn't be calling this function!
- return varDsc->lvVarIndex;
- }
-
- bool isAssignedTo(regNumber regNum)
- {
- // This uses regMasks to handle the case where a double actually occupies two registers
- // TODO-Throughput: This could/should be done more cheaply.
- return (physReg != REG_NA && (genRegMask(physReg, registerType) & genRegMask(regNum)) != RBM_NONE);
- }
-
- // Assign the related interval.
- void assignRelatedInterval(Interval* newRelatedInterval)
- {
-#ifdef DEBUG
- if (VERBOSE)
- {
- printf("Assigning related ");
- newRelatedInterval->microDump();
- printf(" to ");
- this->microDump();
- printf("\n");
- }
-#endif // DEBUG
- relatedInterval = newRelatedInterval;
- }
-
- // Assign the related interval, but only if it isn't already assigned.
- void assignRelatedIntervalIfUnassigned(Interval* newRelatedInterval)
- {
- if (relatedInterval == nullptr)
- {
- assignRelatedInterval(newRelatedInterval);
- }
- else
- {
-#ifdef DEBUG
- if (VERBOSE)
- {
- printf("Interval ");
- this->microDump();
- printf(" already has a related interval\n");
- }
-#endif // DEBUG
- }
- }
-
- // Update the registerPreferences on the interval.
- // If there are conflicting requirements on this interval, set the preferences to
- // the union of them. That way maybe we'll get at least one of them.
- // An exception is made in the case where one of the existing or new
- // preferences are all callee-save, in which case we "prefer" the callee-save
-
- void updateRegisterPreferences(regMaskTP preferences)
- {
- // We require registerPreferences to have been initialized.
- assert(registerPreferences != RBM_NONE);
- // It is invalid to update with empty preferences
- assert(preferences != RBM_NONE);
-
- regMaskTP commonPreferences = (registerPreferences & preferences);
- if (commonPreferences != RBM_NONE)
- {
- registerPreferences = commonPreferences;
- return;
- }
-
- // There are no preferences in common.
- // Preferences need to reflect both cases where a var must occupy a specific register,
- // as well as cases where a var is live when a register is killed.
- // In the former case, we would like to record all such registers, however we don't
- // really want to use any registers that will interfere.
- // To approximate this, we never "or" together multi-reg sets, which are generally kill sets.
-
- if (!genMaxOneBit(preferences))
- {
- // The new preference value is a multi-reg set, so it's probably a kill.
- // Keep the new value.
- registerPreferences = preferences;
- return;
- }
-
- if (!genMaxOneBit(registerPreferences))
- {
- // The old preference value is a multi-reg set.
- // Keep the existing preference set, as it probably reflects one or more kills.
- // It may have been a union of multiple individual registers, but we can't
- // distinguish that case without extra cost.
- return;
- }
-
- // If we reach here, we have two disjoint single-reg sets.
- // Keep only the callee-save preferences, if not empty.
- // Otherwise, take the union of the preferences.
-
- regMaskTP newPreferences = registerPreferences | preferences;
-
- if (preferCalleeSave)
- {
- regMaskTP calleeSaveMask = (calleeSaveRegs(this->registerType) & (newPreferences));
- if (calleeSaveMask != RBM_NONE)
- {
- newPreferences = calleeSaveMask;
- }
- }
- registerPreferences = newPreferences;
- }
-};
-
-class RefPosition
-{
-public:
- // A RefPosition refers to either an Interval or a RegRecord. 'referent' points to one
- // of these types. If it refers to a RegRecord, then 'isPhysRegRef' is true. If it
- // refers to an Interval, then 'isPhysRegRef' is false.
- //
- // Q: can 'referent' be NULL?
-
- Referenceable* referent;
-
- // nextRefPosition is the next in code order.
- // Note that in either case there is no need for these to be doubly linked, as they
- // are only traversed in the forward direction, and are not moved.
- RefPosition* nextRefPosition;
-
- // The remaining fields are common to both options
- GenTree* treeNode;
- unsigned int bbNum;
-
- // Prior to the allocation pass, registerAssignment captures the valid registers
- // for this RefPosition. An empty set means that any register is valid. A non-empty
- // set means that it must be one of the given registers (may be the full set if the
- // only constraint is that it must reside in SOME register)
- // After the allocation pass, this contains the actual assignment
- LsraLocation nodeLocation;
- regMaskTP registerAssignment;
-
- RefType refType;
-
- // NOTE: C++ only packs bitfields if the base type is the same. So make all the base
- // NOTE: types of the logically "bool" types that follow 'unsigned char', so they match
- // NOTE: RefType that precedes this, and multiRegIdx can also match.
-
- // Indicates whether this ref position is to be allocated a reg only if profitable. Currently these are the
- // ref positions that lower/codegen has indicated as reg optional and is considered a contained memory operand if
- // no reg is allocated.
- unsigned char allocRegIfProfitable : 1;
-
- // Used by RefTypeDef/Use positions of a multi-reg call node.
- // Indicates the position of the register that this ref position refers to.
- // The max bits needed is based on max value of MAX_RET_REG_COUNT value
- // across all targets and that happens 4 on on Arm. Hence index value
- // would be 0..MAX_RET_REG_COUNT-1.
- unsigned char multiRegIdx : 2;
-
- // Last Use - this may be true for multiple RefPositions in the same Interval
- unsigned char lastUse : 1;
-
- // Spill and Copy info
- // reload indicates that the value was spilled, and must be reloaded here.
- // spillAfter indicates that the value is spilled here, so a spill must be added.
- // copyReg indicates that the value needs to be copied to a specific register,
- // but that it will also retain its current assigned register.
- // moveReg indicates that the value needs to be moved to a different register,
- // and that this will be its new assigned register.
- // A RefPosition may have any flag individually or the following combinations:
- // - reload and spillAfter (i.e. it remains in memory), but not in combination with copyReg or moveReg
- // (reload cannot exist with copyReg or moveReg; it should be reloaded into the appropriate reg)
- // - spillAfter and copyReg (i.e. it must be copied to a new reg for use, but is then spilled)
- // - spillAfter and moveReg (i.e. it most be both spilled and moved)
- // NOTE: a moveReg involves an explicit move, and would usually not be needed for a fixed Reg if it is going
- // to be spilled, because the code generator will do the move to the fixed register, and doesn't need to
- // record the new register location as the new "home" location of the lclVar. However, if there is a conflicting
- // use at the same location (e.g. lclVar V1 is in rdx and needs to be in rcx, but V2 needs to be in rdx), then
- // we need an explicit move.
- // - copyReg and moveReg must not exist with each other.
-
- unsigned char reload : 1;
- unsigned char spillAfter : 1;
- unsigned char copyReg : 1;
- unsigned char moveReg : 1; // true if this var is moved to a new register
-
- unsigned char isPhysRegRef : 1; // true if 'referent' points of a RegRecord, false if it points to an Interval
- unsigned char isFixedRegRef : 1;
- unsigned char isLocalDefUse : 1;
-
- // delayRegFree indicates that the register should not be freed right away, but instead wait
- // until the next Location after it would normally be freed. This is used for the case of
- // non-commutative binary operators, where op2 must not be assigned the same register as
- // the target. We do this by not freeing it until after the target has been defined.
- // Another option would be to actually change the Location of the op2 use until the same
- // Location as the def, but then it could potentially reuse a register that has been freed
- // from the other source(s), e.g. if it's a lastUse or spilled.
- unsigned char delayRegFree : 1;
-
- // outOfOrder is marked on a (non-def) RefPosition that doesn't follow a definition of the
- // register currently assigned to the Interval. This happens when we use the assigned
- // register from a predecessor that is not the most recently allocated BasicBlock.
- unsigned char outOfOrder : 1;
-
-#ifdef DEBUG
- // Minimum number registers that needs to be ensured while
- // constraining candidates for this ref position under
- // LSRA stress.
- unsigned minRegCandidateCount;
-
- // The unique RefPosition number, equal to its index in the
- // refPositions list. Only used for debugging dumps.
- unsigned rpNum;
-#endif // DEBUG
-
- RefPosition(unsigned int bbNum, LsraLocation nodeLocation, GenTree* treeNode, RefType refType)
- : referent(nullptr)
- , nextRefPosition(nullptr)
- , treeNode(treeNode)
- , bbNum(bbNum)
- , nodeLocation(nodeLocation)
- , registerAssignment(RBM_NONE)
- , refType(refType)
- , multiRegIdx(0)
- , lastUse(false)
- , reload(false)
- , spillAfter(false)
- , copyReg(false)
- , moveReg(false)
- , isPhysRegRef(false)
- , isFixedRegRef(false)
- , isLocalDefUse(false)
- , delayRegFree(false)
- , outOfOrder(false)
-#ifdef DEBUG
- , minRegCandidateCount(1)
- , rpNum(0)
-#endif
- {
- }
-
- Interval* getInterval()
- {
- assert(!isPhysRegRef);
- return (Interval*)referent;
- }
- void setInterval(Interval* i)
- {
- referent = i;
- isPhysRegRef = false;
- }
-
- RegRecord* getReg()
- {
- assert(isPhysRegRef);
- return (RegRecord*)referent;
- }
- void setReg(RegRecord* r)
- {
- referent = r;
- isPhysRegRef = true;
- registerAssignment = genRegMask(r->regNum);
- }
-
- regNumber assignedReg()
- {
- if (registerAssignment == RBM_NONE)
- {
- return REG_NA;
- }
-
- return genRegNumFromMask(registerAssignment);
- }
-
- // Returns true if it is a reference on a gentree node.
- bool IsActualRef()
- {
- return (refType == RefTypeDef || refType == RefTypeUse);
- }
-
- bool RequiresRegister()
- {
- return (IsActualRef()
-#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- || refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse
-#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- ) &&
- !AllocateIfProfitable();
- }
-
- void setAllocateIfProfitable(bool val)
- {
- allocRegIfProfitable = val;
- }
-
- // Returns true whether this ref position is to be allocated
- // a reg only if it is profitable.
- bool AllocateIfProfitable()
- {
- // TODO-CQ: Right now if a ref position is marked as
- // copyreg or movereg, then it is not treated as
- // 'allocate if profitable'. This is an implementation
- // limitation that needs to be addressed.
- return allocRegIfProfitable && !copyReg && !moveReg;
- }
-
- void setMultiRegIdx(unsigned idx)
- {
- multiRegIdx = idx;
- assert(multiRegIdx == idx);
- }
-
- unsigned getMultiRegIdx()
- {
- return multiRegIdx;
- }
-
- LsraLocation getRefEndLocation()
- {
- return delayRegFree ? nodeLocation + 1 : nodeLocation;
- }
-
- bool isIntervalRef()
- {
- return (!isPhysRegRef && (referent != nullptr));
- }
-
- // isTrueDef indicates that the RefPosition is a non-update def of a non-internal
- // interval
- bool isTrueDef()
- {
- return (refType == RefTypeDef && isIntervalRef() && !getInterval()->isInternal);
- }
-
- // isFixedRefOfRegMask indicates that the RefPosition has a fixed assignment to the register
- // specified by the given mask
- bool isFixedRefOfRegMask(regMaskTP regMask)
- {
- assert(genMaxOneBit(regMask));
- return (registerAssignment == regMask);
- }
-
- // isFixedRefOfReg indicates that the RefPosition has a fixed assignment to the given register
- bool isFixedRefOfReg(regNumber regNum)
- {
- return (isFixedRefOfRegMask(genRegMask(regNum)));
- }
-
-#ifdef DEBUG
- // operator= copies everything except 'rpNum', which must remain unique
- RefPosition& operator=(const RefPosition& rp)
- {
- unsigned rpNumSave = rpNum;
- memcpy(this, &rp, sizeof(rp));
- rpNum = rpNumSave;
- return *this;
- }
-
- void dump();
-#endif // DEBUG
-};
-
-#ifdef DEBUG
-void dumpRegMask(regMaskTP regs);
-#endif // DEBUG
-
-/*****************************************************************************/
-#endif //_LSRA_H_
-/*****************************************************************************/
+// Licensed to the .NET Foundation under one or more agreements.\r
+// The .NET Foundation licenses this file to you under the MIT license.\r
+// See the LICENSE file in the project root for more information.\r
+/*****************************************************************************/\r
+\r
+#ifndef _LSRA_H_\r
+#define _LSRA_H_\r
+\r
+#include "arraylist.h"\r
+#include "smallhash.h"\r
+#include "nodeinfo.h"\r
+\r
+// Minor and forward-reference types\r
+class Interval;\r
+class RefPosition;\r
+class LinearScan;\r
+class RegRecord;\r
+\r
+template <class T>\r
+class ArrayStack;\r
+\r
+// LsraLocation tracks the linearized order of the nodes.\r
+// Each node is assigned two LsraLocations - one for all the uses and all but the last\r
+// def, and a second location for the last def (if any)\r
+\r
+typedef unsigned int LsraLocation;\r
+const unsigned int MinLocation = 0;\r
+const unsigned int MaxLocation = UINT_MAX;\r
+// max number of registers an operation could require internally (in addition to uses and defs)\r
+const unsigned int MaxInternalRegisters = 8;\r
+const unsigned int RegisterTypeCount = 2;\r
+\r
+typedef var_types RegisterType;\r
+#define IntRegisterType TYP_INT\r
+#define FloatRegisterType TYP_FLOAT\r
+\r
+inline regMaskTP calleeSaveRegs(RegisterType rt)\r
+{\r
+ return varTypeIsIntegralOrI(rt) ? RBM_INT_CALLEE_SAVED : RBM_FLT_CALLEE_SAVED;\r
+}\r
+\r
+struct LocationInfo\r
+{\r
+ Interval* interval;\r
+ GenTree* treeNode;\r
+ LsraLocation loc;\r
+ TreeNodeInfo info;\r
+\r
+ LocationInfo(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0) : interval(i), treeNode(t), loc(l)\r
+ {\r
+ }\r
+\r
+ // default constructor for data structures\r
+ LocationInfo()\r
+ {\r
+ }\r
+};\r
+\r
+//------------------------------------------------------------------------\r
+// LocationInfoListNode: used to store a single `LocationInfo` value for a\r
+// node during `buildIntervals`.\r
+//\r
+// This is the node type for `LocationInfoList` below.\r
+//\r
+class LocationInfoListNode final : public LocationInfo\r
+{\r
+ friend class LocationInfoList;\r
+ friend class LocationInfoListNodePool;\r
+\r
+ LocationInfoListNode* m_next; // The next node in the list\r
+\r
+public:\r
+ LocationInfoListNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0) : LocationInfo(l, i, t, regIdx)\r
+ {\r
+ }\r
+\r
+ //------------------------------------------------------------------------\r
+ // LocationInfoListNode::Next: Returns the next node in the list.\r
+ LocationInfoListNode* Next() const\r
+ {\r
+ return m_next;\r
+ }\r
+};\r
+\r
+//------------------------------------------------------------------------\r
+// LocationInfoList: used to store a list of `LocationInfo` values for a\r
+// node during `buildIntervals`.\r
+//\r
+// This list of 'LocationInfoListNode's contains the source nodes consumed by\r
+// a node, and is created by 'TreeNodeInfoInit'.\r
+//\r
+class LocationInfoList final\r
+{\r
+ friend class LocationInfoListNodePool;\r
+\r
+ LocationInfoListNode* m_head; // The head of the list\r
+ LocationInfoListNode* m_tail; // The tail of the list\r
+\r
+public:\r
+ LocationInfoList() : m_head(nullptr), m_tail(nullptr)\r
+ {\r
+ }\r
+\r
+ LocationInfoList(LocationInfoListNode* node) : m_head(node), m_tail(node)\r
+ {\r
+ assert(m_head->m_next == nullptr);\r
+ }\r
+\r
+ //------------------------------------------------------------------------\r
+ // LocationInfoList::IsEmpty: Returns true if the list is empty.\r
+ //\r
+ bool IsEmpty() const\r
+ {\r
+ return m_head == nullptr;\r
+ }\r
+\r
+ //------------------------------------------------------------------------\r
+ // LocationInfoList::Begin: Returns the first node in the list.\r
+ //\r
+ LocationInfoListNode* Begin() const\r
+ {\r
+ return m_head;\r
+ }\r
+\r
+ //------------------------------------------------------------------------\r
+ // LocationInfoList::End: Returns the position after the last node in the\r
+ // list. The returned value is suitable for use as\r
+ // a sentinel for iteration.\r
+ //\r
+ LocationInfoListNode* End() const\r
+ {\r
+ return nullptr;\r
+ }\r
+\r
+ //------------------------------------------------------------------------\r
+ // LocationInfoList::End: Returns the position after the last node in the\r
+ // list. The returned value is suitable for use as\r
+ // a sentinel for iteration.\r
+ //\r
+ LocationInfoListNode* Last() const\r
+ {\r
+ return m_tail;\r
+ }\r
+\r
+ //------------------------------------------------------------------------\r
+ // LocationInfoList::Append: Appends a node to the list.\r
+ //\r
+ // Arguments:\r
+ // node - The node to append. Must not be part of an existing list.\r
+ //\r
+ void Append(LocationInfoListNode* node)\r
+ {\r
+ assert(node->m_next == nullptr);\r
+\r
+ if (m_tail == nullptr)\r
+ {\r
+ assert(m_head == nullptr);\r
+ m_head = node;\r
+ }\r
+ else\r
+ {\r
+ m_tail->m_next = node;\r
+ }\r
+\r
+ m_tail = node;\r
+ }\r
+ //------------------------------------------------------------------------\r
+ // LocationInfoList::Append: Appends another list to this list.\r
+ //\r
+ // Arguments:\r
+ // other - The list to append.\r
+ //\r
+ void Append(LocationInfoList other)\r
+ {\r
+ if (m_tail == nullptr)\r
+ {\r
+ assert(m_head == nullptr);\r
+ m_head = other.m_head;\r
+ }\r
+ else\r
+ {\r
+ m_tail->m_next = other.m_head;\r
+ }\r
+\r
+ m_tail = other.m_tail;\r
+ }\r
+\r
+ //------------------------------------------------------------------------\r
+ // LocationInfoList::Prepend: Prepends a node to the list.\r
+ //\r
+ // Arguments:\r
+ // node - The node to prepend. Must not be part of an existing list.\r
+ //\r
+ void Prepend(LocationInfoListNode* node)\r
+ {\r
+ assert(node->m_next == nullptr);\r
+\r
+ if (m_head == nullptr)\r
+ {\r
+ assert(m_tail == nullptr);\r
+ m_tail = node;\r
+ }\r
+ else\r
+ {\r
+ node->m_next = m_head;\r
+ }\r
+\r
+ m_head = node;\r
+ }\r
+\r
+ //------------------------------------------------------------------------\r
+ // LocationInfoList::Add: Adds a node to the list.\r
+ //\r
+ // Arguments:\r
+ // node - The node to add. Must not be part of an existing list.\r
+ // prepend - True if it should be prepended (otherwise is appended)\r
+ //\r
+ void Add(LocationInfoListNode* node, bool prepend)\r
+ {\r
+ if (prepend)\r
+ {\r
+ Prepend(node);\r
+ }\r
+ else\r
+ {\r
+ Append(node);\r
+ }\r
+ }\r
+\r
+ //------------------------------------------------------------------------\r
+ // GetTreeNodeInfo - retrieve the TreeNodeInfo for the given node\r
+ //\r
+ // Notes:\r
+ // The TreeNodeInfoInit methods use this helper to retrieve the TreeNodeInfo for child nodes\r
+ // from the useList being constructed. Note that, if the user knows the order of the operands,\r
+ // it is expected that they should just retrieve them directly.\r
+\r
+ TreeNodeInfo& GetTreeNodeInfo(GenTree* node)\r
+ {\r
+ for (LocationInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next())\r
+ {\r
+ if (listNode->treeNode == node)\r
+ {\r
+ return listNode->info;\r
+ }\r
+ }\r
+ assert(!"GetTreeNodeInfo didn't find the node");\r
+ unreached();\r
+ }\r
+\r
+ //------------------------------------------------------------------------\r
+ // LocationInfoList::GetSecond: Gets the second node in the list.\r
+ //\r
+ // Arguments:\r
+ // (DEBUG ONLY) treeNode - The GenTree* we expect to be in the second node.\r
+ //\r
+ LocationInfoListNode* GetSecond(INDEBUG(GenTree* treeNode))\r
+ {\r
+ noway_assert((Begin() != nullptr) && (Begin()->Next() != nullptr));\r
+ LocationInfoListNode* second = Begin()->Next();\r
+ assert(second->treeNode == treeNode);\r
+ return second;\r
+ }\r
+};\r
+\r
+struct LsraBlockInfo\r
+{\r
+ // bbNum of the predecessor to use for the register location of live-in variables.\r
+ // 0 for fgFirstBB.\r
+ unsigned int predBBNum;\r
+ BasicBlock::weight_t weight;\r
+ bool hasCriticalInEdge;\r
+ bool hasCriticalOutEdge;\r
+\r
+#if TRACK_LSRA_STATS\r
+ // Per block maintained LSRA statistics.\r
+\r
+ // Number of spills of local vars or tree temps in this basic block.\r
+ unsigned spillCount;\r
+\r
+ // Number of GT_COPY nodes inserted in this basic block while allocating regs.\r
+ // Note that GT_COPY nodes are also inserted as part of basic block boundary\r
+ // resolution, which are accounted against resolutionMovCount but not\r
+ // against copyRegCount.\r
+ unsigned copyRegCount;\r
+\r
+ // Number of resolution moves inserted in this basic block.\r
+ unsigned resolutionMovCount;\r
+\r
+ // Number of critical edges from this block that are split.\r
+ unsigned splitEdgeCount;\r
+#endif // TRACK_LSRA_STATS\r
+};\r
+\r
+// This is sort of a bit mask\r
+// The low order 2 bits will be 1 for defs, and 2 for uses\r
+enum RefType : unsigned char\r
+{\r
+#define DEF_REFTYPE(memberName, memberValue, shortName) memberName = memberValue,\r
+#include "lsra_reftypes.h"\r
+#undef DEF_REFTYPE\r
+};\r
+\r
+// position in a block (for resolution)\r
+enum BlockStartOrEnd\r
+{\r
+ BlockPositionStart = 0,\r
+ BlockPositionEnd = 1,\r
+ PositionCount = 2\r
+};\r
+\r
+inline bool RefTypeIsUse(RefType refType)\r
+{\r
+ return ((refType & RefTypeUse) == RefTypeUse);\r
+}\r
+\r
+inline bool RefTypeIsDef(RefType refType)\r
+{\r
+ return ((refType & RefTypeDef) == RefTypeDef);\r
+}\r
+\r
+typedef regNumberSmall* VarToRegMap;\r
+\r
+template <typename ElementType, CompMemKind MemKind>\r
+class ListElementAllocator\r
+{\r
+private:\r
+ template <typename U, CompMemKind CMK>\r
+ friend class ListElementAllocator;\r
+\r
+ Compiler* m_compiler;\r
+\r
+public:\r
+ ListElementAllocator(Compiler* compiler) : m_compiler(compiler)\r
+ {\r
+ }\r
+\r
+ template <typename U>\r
+ ListElementAllocator(const ListElementAllocator<U, MemKind>& other) : m_compiler(other.m_compiler)\r
+ {\r
+ }\r
+\r
+ ElementType* allocate(size_t count)\r
+ {\r
+ return reinterpret_cast<ElementType*>(m_compiler->compGetMem(sizeof(ElementType) * count, MemKind));\r
+ }\r
+\r
+ void deallocate(ElementType* pointer, size_t count)\r
+ {\r
+ }\r
+\r
+ template <typename U>\r
+ struct rebind\r
+ {\r
+ typedef ListElementAllocator<U, MemKind> allocator;\r
+ };\r
+};\r
+\r
+typedef ListElementAllocator<Interval, CMK_LSRA_Interval> LinearScanMemoryAllocatorInterval;\r
+typedef ListElementAllocator<RefPosition, CMK_LSRA_RefPosition> LinearScanMemoryAllocatorRefPosition;\r
+\r
+typedef jitstd::list<Interval, LinearScanMemoryAllocatorInterval> IntervalList;\r
+typedef jitstd::list<RefPosition, LinearScanMemoryAllocatorRefPosition> RefPositionList;\r
+\r
+class Referenceable\r
+{\r
+public:\r
+ Referenceable()\r
+ {\r
+ firstRefPosition = nullptr;\r
+ recentRefPosition = nullptr;\r
+ lastRefPosition = nullptr;\r
+ isActive = false;\r
+ }\r
+\r
+ // A linked list of RefPositions. These are only traversed in the forward\r
+ // direction, and are not moved, so they don't need to be doubly linked\r
+ // (see RefPosition).\r
+\r
+ RefPosition* firstRefPosition;\r
+ RefPosition* recentRefPosition;\r
+ RefPosition* lastRefPosition;\r
+\r
+ bool isActive;\r
+\r
+ // Get the position of the next reference which is at or greater than\r
+ // the current location (relies upon recentRefPosition being udpated\r
+ // during traversal).\r
+ RefPosition* getNextRefPosition();\r
+ LsraLocation getNextRefLocation();\r
+};\r
+\r
+class RegRecord : public Referenceable\r
+{\r
+public:\r
+ RegRecord()\r
+ {\r
+ assignedInterval = nullptr;\r
+ previousInterval = nullptr;\r
+ regNum = REG_NA;\r
+ isCalleeSave = false;\r
+ registerType = IntRegisterType;\r
+ isBusyUntilNextKill = false;\r
+ }\r
+\r
+ void init(regNumber reg)\r
+ {\r
+#ifdef _TARGET_ARM64_\r
+ // The Zero register, or the SP\r
+ if ((reg == REG_ZR) || (reg == REG_SP))\r
+ {\r
+ // IsGeneralRegister returns false for REG_ZR and REG_SP\r
+ regNum = reg;\r
+ registerType = IntRegisterType;\r
+ }\r
+ else\r
+#endif\r
+ if (emitter::isFloatReg(reg))\r
+ {\r
+ registerType = FloatRegisterType;\r
+ }\r
+ else\r
+ {\r
+ // The constructor defaults to IntRegisterType\r
+ assert(emitter::isGeneralRegister(reg) && registerType == IntRegisterType);\r
+ }\r
+ regNum = reg;\r
+ isCalleeSave = ((RBM_CALLEE_SAVED & genRegMask(reg)) != 0);\r
+ }\r
+\r
+#ifdef DEBUG\r
+ // print out representation\r
+ void dump();\r
+ // concise representation for embedding\r
+ void tinyDump();\r
+#endif // DEBUG\r
+\r
+ bool isFree();\r
+\r
+ // RefPosition * getNextRefPosition();\r
+ // LsraLocation getNextRefLocation();\r
+\r
+ // DATA\r
+\r
+ // interval to which this register is currently allocated.\r
+ // If the interval is inactive (isActive == false) then it is not currently live,\r
+ // and the register call be unassigned (i.e. setting assignedInterval to nullptr)\r
+ // without spilling the register.\r
+ Interval* assignedInterval;\r
+ // Interval to which this register was previously allocated, and which was unassigned\r
+ // because it was inactive. This register will be reassigned to this Interval when\r
+ // assignedInterval becomes inactive.\r
+ Interval* previousInterval;\r
+\r
+ regNumber regNum;\r
+ bool isCalleeSave;\r
+ RegisterType registerType;\r
+ // This register must be considered busy until the next time it is explicitly killed.\r
+ // This is used so that putarg_reg can avoid killing its lclVar source, while avoiding\r
+ // the problem with the reg becoming free if the last-use is encountered before the call.\r
+ bool isBusyUntilNextKill;\r
+\r
+ bool conflictingFixedRegReference(RefPosition* refPosition);\r
+};\r
+\r
+inline bool leafInRange(GenTree* leaf, int lower, int upper)\r
+{\r
+ if (!leaf->IsIntCnsFitsInI32())\r
+ {\r
+ return false;\r
+ }\r
+ if (leaf->gtIntCon.gtIconVal < lower)\r
+ {\r
+ return false;\r
+ }\r
+ if (leaf->gtIntCon.gtIconVal > upper)\r
+ {\r
+ return false;\r
+ }\r
+\r
+ return true;\r
+}\r
+\r
+inline bool leafInRange(GenTree* leaf, int lower, int upper, int multiple)\r
+{\r
+ if (!leafInRange(leaf, lower, upper))\r
+ {\r
+ return false;\r
+ }\r
+ if (leaf->gtIntCon.gtIconVal % multiple)\r
+ {\r
+ return false;\r
+ }\r
+\r
+ return true;\r
+}\r
+\r
+inline bool leafAddInRange(GenTree* leaf, int lower, int upper, int multiple = 1)\r
+{\r
+ if (leaf->OperGet() != GT_ADD)\r
+ {\r
+ return false;\r
+ }\r
+ return leafInRange(leaf->gtOp.gtOp2, lower, upper, multiple);\r
+}\r
+\r
+inline bool isCandidateVar(LclVarDsc* varDsc)\r
+{\r
+ return varDsc->lvLRACandidate;\r
+}\r
+\r
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\r
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\r
+XX XX\r
+XX LinearScan XX\r
+XX XX\r
+XX This is the container for the Linear Scan data structures and methods. XX\r
+XX XX\r
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\r
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\r
+*/\r
+// OPTION 1: The algorithm as described in "Optimized Interval Splitting in a\r
+// Linear Scan Register Allocator". It is driven by iterating over the Interval\r
+// lists. In this case, we need multiple IntervalLists, and Intervals will be\r
+// moved between them so they must be easily updated.\r
+\r
+// OPTION 2: The algorithm is driven by iterating over the RefPositions. In this\r
+// case, we only need a single IntervalList, and it won't be updated.\r
+// The RefPosition must refer to its Interval, and we need to be able to traverse\r
+// to the next RefPosition in code order\r
+// THIS IS THE OPTION CURRENTLY BEING PURSUED\r
+\r
+class LocationInfoList;\r
+class LocationInfoListNodePool;\r
+\r
+class LinearScan : public LinearScanInterface\r
+{\r
+ friend class RefPosition;\r
+ friend class Interval;\r
+ friend class Lowering;\r
+ friend class TreeNodeInfo;\r
+\r
+public:\r
+ // This could use further abstraction. From Compiler we need the tree,\r
+ // the flowgraph and the allocator.\r
+ LinearScan(Compiler* theCompiler);\r
+\r
+ // This is the main driver\r
+ virtual void doLinearScan();\r
+\r
+ // TreeNodeInfo contains three register masks: src candidates, dst candidates, and internal condidates.\r
+ // Instead of storing actual register masks, however, which are large, we store a small index into a table\r
+ // of register masks, stored in this class. We create only as many distinct register masks as are needed.\r
+ // All identical register masks get the same index. The register mask table contains:\r
+ // 1. A mask containing all eligible integer registers.\r
+ // 2. A mask containing all elibible floating-point registers.\r
+ // 3. A mask for each of single register.\r
+ // 4. A mask for each combination of registers, created dynamically as required.\r
+ //\r
+ // Currently, the maximum number of masks allowed is a constant defined by 'numMasks'. The register mask\r
+ // table is never resized. It is also limited by the size of the index, currently an unsigned char.\r
+ CLANG_FORMAT_COMMENT_ANCHOR;\r
+\r
+#if defined(_TARGET_ARM64_)\r
+ static const int numMasks = 128;\r
+#else\r
+ static const int numMasks = 64;\r
+#endif\r
+\r
+ regMaskTP* regMaskTable;\r
+ int nextFreeMask;\r
+\r
+ typedef int RegMaskIndex;\r
+\r
+ // allint is 0, allfloat is 1, all the single-bit masks start at 2\r
+ enum KnownRegIndex\r
+ {\r
+ ALLINT_IDX = 0,\r
+ ALLFLOAT_IDX = 1,\r
+ FIRST_SINGLE_REG_IDX = 2\r
+ };\r
+\r
+ RegMaskIndex GetIndexForRegMask(regMaskTP mask);\r
+ regMaskTP GetRegMaskForIndex(RegMaskIndex index);\r
+ void RemoveRegisterFromMasks(regNumber reg);\r
+\r
+#ifdef DEBUG\r
+ void dspRegisterMaskTable();\r
+#endif // DEBUG\r
+\r
+ // Initialize the block traversal for LSRA.\r
+ // This resets the bbVisitedSet, and on the first invocation sets the blockSequence array,\r
+ // which determines the order in which blocks will be allocated (currently called during Lowering).\r
+ BasicBlock* startBlockSequence();\r
+ // Move to the next block in sequence, updating the current block information.\r
+ BasicBlock* moveToNextBlock();\r
+ // Get the next block to be scheduled without changing the current block,\r
+ // but updating the blockSequence during the first iteration if it is not fully computed.\r
+ BasicBlock* getNextBlock();\r
+\r
+ // This is called during code generation to update the location of variables\r
+ virtual void recordVarLocationsAtStartOfBB(BasicBlock* bb);\r
+\r
+ // This does the dataflow analysis and builds the intervals\r
+ void buildIntervals();\r
+\r
+ // This is where the actual assignment is done\r
+ void allocateRegisters();\r
+\r
+ // This is the resolution phase, where cross-block mismatches are fixed up\r
+ void resolveRegisters();\r
+\r
+ void writeRegisters(RefPosition* currentRefPosition, GenTree* tree);\r
+\r
+ // Insert a copy in the case where a tree node value must be moved to a different\r
+ // register at the point of use, or it is reloaded to a different register\r
+ // than the one it was spilled from\r
+ void insertCopyOrReload(BasicBlock* block, GenTree* tree, unsigned multiRegIdx, RefPosition* refPosition);\r
+\r
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE\r
+ // Insert code to save and restore the upper half of a vector that lives\r
+ // in a callee-save register at the point of a call (the upper half is\r
+ // not preserved).\r
+ void insertUpperVectorSaveAndReload(GenTree* tree, RefPosition* refPosition, BasicBlock* block);\r
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE\r
+\r
+ // resolve along one block-block edge\r
+ enum ResolveType\r
+ {\r
+ ResolveSplit,\r
+ ResolveJoin,\r
+ ResolveCritical,\r
+ ResolveSharedCritical,\r
+ ResolveTypeCount\r
+ };\r
+#ifdef DEBUG\r
+ static const char* resolveTypeName[ResolveTypeCount];\r
+#endif\r
+\r
+ enum WhereToInsert\r
+ {\r
+ InsertAtTop,\r
+ InsertAtBottom\r
+ };\r
+\r
+#ifdef _TARGET_ARM_\r
+ void addResolutionForDouble(BasicBlock* block,\r
+ GenTreePtr insertionPoint,\r
+ Interval** sourceIntervals,\r
+ regNumberSmall* location,\r
+ regNumber toReg,\r
+ regNumber fromReg,\r
+ ResolveType resolveType);\r
+#endif\r
+ void addResolution(\r
+ BasicBlock* block, GenTree* insertionPoint, Interval* interval, regNumber outReg, regNumber inReg);\r
+\r
+ void handleOutgoingCriticalEdges(BasicBlock* block);\r
+\r
+ void resolveEdge(BasicBlock* fromBlock, BasicBlock* toBlock, ResolveType resolveType, VARSET_VALARG_TP liveSet);\r
+\r
+ void resolveEdges();\r
+\r
+ // Finally, the register assignments are written back to the tree nodes.\r
+ void recordRegisterAssignments();\r
+\r
+ // Keep track of how many temp locations we'll need for spill\r
+ void initMaxSpill();\r
+ void updateMaxSpill(RefPosition* refPosition);\r
+ void recordMaxSpill();\r
+\r
+ // max simultaneous spill locations used of every type\r
+ unsigned int maxSpill[TYP_COUNT];\r
+ unsigned int currentSpill[TYP_COUNT];\r
+ bool needFloatTmpForFPCall;\r
+ bool needDoubleTmpForFPCall;\r
+\r
+#ifdef DEBUG\r
+private:\r
+ //------------------------------------------------------------------------\r
+ // Should we stress lsra?\r
+ // This uses the same COMPLUS variable as rsStressRegs (COMPlus_JitStressRegs)\r
+ // However, the possible values and their interpretation are entirely different.\r
+ //\r
+ // The mask bits are currently divided into fields in which each non-zero value\r
+ // is a distinct stress option (e.g. 0x3 is not a combination of 0x1 and 0x2).\r
+ // However, subject to possible constraints (to be determined), the different\r
+ // fields can be combined (e.g. 0x7 is a combination of 0x3 and 0x4).\r
+ // Note that the field values are declared in a public enum, but the actual bits are\r
+ // only accessed via accessors.\r
+\r
+ unsigned lsraStressMask;\r
+\r
+ // This controls the registers available for allocation\r
+ enum LsraStressLimitRegs{LSRA_LIMIT_NONE = 0, LSRA_LIMIT_CALLEE = 0x1, LSRA_LIMIT_CALLER = 0x2,\r
+ LSRA_LIMIT_SMALL_SET = 0x3, LSRA_LIMIT_MASK = 0x3};\r
+\r
+ // When LSRA_LIMIT_SMALL_SET is specified, it is desirable to select a "mixed" set of caller- and callee-save\r
+ // registers, so as to get different coverage than limiting to callee or caller.\r
+ // At least for x86 and AMD64, and potentially other architecture that will support SIMD,\r
+ // we need a minimum of 5 fp regs in order to support the InitN intrinsic for Vector4.\r
+ // Hence the "SmallFPSet" has 5 elements.\r
+ CLANG_FORMAT_COMMENT_ANCHOR;\r
+\r
+#if defined(_TARGET_AMD64_)\r
+#ifdef UNIX_AMD64_ABI\r
+ // On System V the RDI and RSI are not callee saved. Use R12 ans R13 as callee saved registers.\r
+ static const regMaskTP LsraLimitSmallIntSet =\r
+ (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13);\r
+#else // !UNIX_AMD64_ABI\r
+ // On Windows Amd64 use the RDI and RSI as callee saved registers.\r
+ static const regMaskTP LsraLimitSmallIntSet =\r
+ (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI);\r
+#endif // !UNIX_AMD64_ABI\r
+ static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);\r
+#elif defined(_TARGET_ARM_)\r
+ static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4);\r
+ static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17);\r
+#elif defined(_TARGET_ARM64_)\r
+ static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20);\r
+ static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9);\r
+#elif defined(_TARGET_X86_)\r
+ static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI);\r
+ static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);\r
+#else\r
+#error Unsupported or unset target architecture\r
+#endif // target\r
+\r
+ LsraStressLimitRegs getStressLimitRegs()\r
+ {\r
+ return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK);\r
+ }\r
+\r
+ regMaskTP getConstrainedRegMask(regMaskTP regMaskActual, regMaskTP regMaskConstrain, unsigned minRegCount);\r
+ regMaskTP stressLimitRegs(RefPosition* refPosition, regMaskTP mask);\r
+\r
+ // This controls the heuristics used to select registers\r
+ // These can be combined.\r
+ enum LsraSelect{LSRA_SELECT_DEFAULT = 0, LSRA_SELECT_REVERSE_HEURISTICS = 0x04,\r
+ LSRA_SELECT_REVERSE_CALLER_CALLEE = 0x08, LSRA_SELECT_NEAREST = 0x10, LSRA_SELECT_MASK = 0x1c};\r
+ LsraSelect getSelectionHeuristics()\r
+ {\r
+ return (LsraSelect)(lsraStressMask & LSRA_SELECT_MASK);\r
+ }\r
+ bool doReverseSelect()\r
+ {\r
+ return ((lsraStressMask & LSRA_SELECT_REVERSE_HEURISTICS) != 0);\r
+ }\r
+ bool doReverseCallerCallee()\r
+ {\r
+ return ((lsraStressMask & LSRA_SELECT_REVERSE_CALLER_CALLEE) != 0);\r
+ }\r
+ bool doSelectNearest()\r
+ {\r
+ return ((lsraStressMask & LSRA_SELECT_NEAREST) != 0);\r
+ }\r
+\r
+ // This controls the order in which basic blocks are visited during allocation\r
+ enum LsraTraversalOrder{LSRA_TRAVERSE_LAYOUT = 0x20, LSRA_TRAVERSE_PRED_FIRST = 0x40,\r
+ LSRA_TRAVERSE_RANDOM = 0x60, // NYI\r
+ LSRA_TRAVERSE_DEFAULT = LSRA_TRAVERSE_PRED_FIRST, LSRA_TRAVERSE_MASK = 0x60};\r
+ LsraTraversalOrder getLsraTraversalOrder()\r
+ {\r
+ if ((lsraStressMask & LSRA_TRAVERSE_MASK) == 0)\r
+ {\r
+ return LSRA_TRAVERSE_DEFAULT;\r
+ }\r
+ return (LsraTraversalOrder)(lsraStressMask & LSRA_TRAVERSE_MASK);\r
+ }\r
+ bool isTraversalLayoutOrder()\r
+ {\r
+ return getLsraTraversalOrder() == LSRA_TRAVERSE_LAYOUT;\r
+ }\r
+ bool isTraversalPredFirstOrder()\r
+ {\r
+ return getLsraTraversalOrder() == LSRA_TRAVERSE_PRED_FIRST;\r
+ }\r
+\r
+ // This controls whether lifetimes should be extended to the entire method.\r
+ // Note that this has no effect under MinOpts\r
+ enum LsraExtendLifetimes{LSRA_DONT_EXTEND = 0, LSRA_EXTEND_LIFETIMES = 0x80, LSRA_EXTEND_LIFETIMES_MASK = 0x80};\r
+ LsraExtendLifetimes getLsraExtendLifeTimes()\r
+ {\r
+ return (LsraExtendLifetimes)(lsraStressMask & LSRA_EXTEND_LIFETIMES_MASK);\r
+ }\r
+ bool extendLifetimes()\r
+ {\r
+ return getLsraExtendLifeTimes() == LSRA_EXTEND_LIFETIMES;\r
+ }\r
+\r
+ // This controls whether variables locations should be set to the previous block in layout order\r
+ // (LSRA_BLOCK_BOUNDARY_LAYOUT), or to that of the highest-weight predecessor (LSRA_BLOCK_BOUNDARY_PRED -\r
+ // the default), or rotated (LSRA_BLOCK_BOUNDARY_ROTATE).\r
+ enum LsraBlockBoundaryLocations{LSRA_BLOCK_BOUNDARY_PRED = 0, LSRA_BLOCK_BOUNDARY_LAYOUT = 0x100,\r
+ LSRA_BLOCK_BOUNDARY_ROTATE = 0x200, LSRA_BLOCK_BOUNDARY_MASK = 0x300};\r
+ LsraBlockBoundaryLocations getLsraBlockBoundaryLocations()\r
+ {\r
+ return (LsraBlockBoundaryLocations)(lsraStressMask & LSRA_BLOCK_BOUNDARY_MASK);\r
+ }\r
+ regNumber rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs);\r
+\r
+ // This controls whether we always insert a GT_RELOAD instruction after a spill\r
+ // Note that this can be combined with LSRA_SPILL_ALWAYS (or not)\r
+ enum LsraReload{LSRA_NO_RELOAD_IF_SAME = 0, LSRA_ALWAYS_INSERT_RELOAD = 0x400, LSRA_RELOAD_MASK = 0x400};\r
+ LsraReload getLsraReload()\r
+ {\r
+ return (LsraReload)(lsraStressMask & LSRA_RELOAD_MASK);\r
+ }\r
+ bool alwaysInsertReload()\r
+ {\r
+ return getLsraReload() == LSRA_ALWAYS_INSERT_RELOAD;\r
+ }\r
+\r
+ // This controls whether we spill everywhere\r
+ enum LsraSpill{LSRA_DONT_SPILL_ALWAYS = 0, LSRA_SPILL_ALWAYS = 0x800, LSRA_SPILL_MASK = 0x800};\r
+ LsraSpill getLsraSpill()\r
+ {\r
+ return (LsraSpill)(lsraStressMask & LSRA_SPILL_MASK);\r
+ }\r
+ bool spillAlways()\r
+ {\r
+ return getLsraSpill() == LSRA_SPILL_ALWAYS;\r
+ }\r
+\r
+ // This controls whether RefPositions that lower/codegen indicated as reg optional be\r
+ // allocated a reg at all.\r
+ enum LsraRegOptionalControl{LSRA_REG_OPTIONAL_DEFAULT = 0, LSRA_REG_OPTIONAL_NO_ALLOC = 0x1000,\r
+ LSRA_REG_OPTIONAL_MASK = 0x1000};\r
+\r
+ LsraRegOptionalControl getLsraRegOptionalControl()\r
+ {\r
+ return (LsraRegOptionalControl)(lsraStressMask & LSRA_REG_OPTIONAL_MASK);\r
+ }\r
+\r
+ bool regOptionalNoAlloc()\r
+ {\r
+ return getLsraRegOptionalControl() == LSRA_REG_OPTIONAL_NO_ALLOC;\r
+ }\r
+\r
+ bool candidatesAreStressLimited()\r
+ {\r
+ return ((lsraStressMask & (LSRA_LIMIT_MASK | LSRA_SELECT_MASK)) != 0);\r
+ }\r
+\r
+ // Dump support\r
+ void dumpOperandToLocationInfoMap();\r
+ void lsraDumpIntervals(const char* msg);\r
+ void dumpRefPositions(const char* msg);\r
+ void dumpVarRefPositions(const char* msg);\r
+\r
+ // Checking code\r
+ static bool IsLsraAdded(GenTree* node)\r
+ {\r
+ return ((node->gtDebugFlags & GTF_DEBUG_NODE_LSRA_ADDED) != 0);\r
+ }\r
+ static void SetLsraAdded(GenTree* node)\r
+ {\r
+ node->gtDebugFlags |= GTF_DEBUG_NODE_LSRA_ADDED;\r
+ }\r
+ static bool IsResolutionMove(GenTree* node);\r
+ static bool IsResolutionNode(LIR::Range& containingRange, GenTree* node);\r
+\r
+ void verifyFinalAllocation();\r
+ void verifyResolutionMove(GenTree* resolutionNode, LsraLocation currentLocation);\r
+#else // !DEBUG\r
+ bool doSelectNearest()\r
+ {\r
+ return false;\r
+ }\r
+ bool extendLifetimes()\r
+ {\r
+ return false;\r
+ }\r
+ bool spillAlways()\r
+ {\r
+ return false;\r
+ }\r
+ // In a retail build we support only the default traversal order\r
+ bool isTraversalLayoutOrder()\r
+ {\r
+ return false;\r
+ }\r
+ bool isTraversalPredFirstOrder()\r
+ {\r
+ return true;\r
+ }\r
+ bool getLsraExtendLifeTimes()\r
+ {\r
+ return false;\r
+ }\r
+ static void SetLsraAdded(GenTree* node)\r
+ {\r
+ // do nothing; checked only under #DEBUG\r
+ }\r
+ bool candidatesAreStressLimited()\r
+ {\r
+ return false;\r
+ }\r
+#endif // !DEBUG\r
+\r
+public:\r
+ // Used by Lowering when considering whether to split Longs, as well as by identifyCandidates().\r
+ bool isRegCandidate(LclVarDsc* varDsc);\r
+\r
+ bool isContainableMemoryOp(GenTree* node);\r
+\r
+private:\r
+ // Determine which locals are candidates for allocation\r
+ void identifyCandidates();\r
+\r
+ // determine which locals are used in EH constructs we don't want to deal with\r
+ void identifyCandidatesExceptionDataflow();\r
+\r
+ void buildPhysRegRecords();\r
+\r
+#ifdef DEBUG\r
+ void checkLastUses(BasicBlock* block);\r
+#endif // DEBUG\r
+\r
+ void setFrameType();\r
+\r
+ // Update allocations at start/end of block\r
+ void unassignIntervalBlockStart(RegRecord* regRecord, VarToRegMap inVarToRegMap);\r
+ void processBlockEndAllocation(BasicBlock* current);\r
+\r
+ // Record variable locations at start/end of block\r
+ void processBlockStartLocations(BasicBlock* current, bool allocationPass);\r
+ void processBlockEndLocations(BasicBlock* current);\r
+\r
+#ifdef _TARGET_ARM_\r
+ bool isSecondHalfReg(RegRecord* regRec, Interval* interval);\r
+ RegRecord* getSecondHalfRegRec(RegRecord* regRec);\r
+ RegRecord* findAnotherHalfRegRec(RegRecord* regRec);\r
+ bool canSpillDoubleReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight);\r
+ void unassignDoublePhysReg(RegRecord* doubleRegRecord);\r
+#endif\r
+ void updateAssignedInterval(RegRecord* reg, Interval* interval, RegisterType regType);\r
+ void updatePreviousInterval(RegRecord* reg, Interval* interval, RegisterType regType);\r
+ bool canRestorePreviousInterval(RegRecord* regRec, Interval* assignedInterval);\r
+ bool isAssignedToInterval(Interval* interval, RegRecord* regRec);\r
+ bool isRefPositionActive(RefPosition* refPosition, LsraLocation refLocation);\r
+ bool canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight);\r
+ bool isRegInUse(RegRecord* regRec, RefPosition* refPosition);\r
+\r
+ RefType CheckBlockType(BasicBlock* block, BasicBlock* prevBlock);\r
+\r
+ // insert refpositions representing prolog zero-inits which will be added later\r
+ void insertZeroInitRefPositions();\r
+\r
+ void AddMapping(GenTree* node, LsraLocation loc);\r
+\r
+ // add physreg refpositions for a tree node, based on calling convention and instruction selection predictions\r
+ void addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse);\r
+\r
+ void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition);\r
+\r
+ void buildRefPositionsForNode(GenTree* tree,\r
+ BasicBlock* block,\r
+ LocationInfoListNodePool& listNodePool,\r
+ LsraLocation loc);\r
+\r
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE\r
+ VARSET_VALRET_TP buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc);\r
+ void buildUpperVectorRestoreRefPositions(GenTree* tree, LsraLocation currentLoc, VARSET_VALARG_TP liveLargeVectors);\r
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE\r
+\r
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)\r
+ // For AMD64 on SystemV machines. This method\r
+ // is called as replacement for raUpdateRegStateForArg\r
+ // that is used on Windows. On System V systems a struct can be passed\r
+ // partially using registers from the 2 register files.\r
+ void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc);\r
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)\r
+\r
+ // Update reg state for an incoming register argument\r
+ void updateRegStateForArg(LclVarDsc* argDsc);\r
+\r
+ inline bool isCandidateLocalRef(GenTree* tree)\r
+ {\r
+ if (tree->IsLocal())\r
+ {\r
+ unsigned int lclNum = tree->gtLclVarCommon.gtLclNum;\r
+ assert(lclNum < compiler->lvaCount);\r
+ LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;\r
+\r
+ return isCandidateVar(varDsc);\r
+ }\r
+ return false;\r
+ }\r
+\r
+ static Compiler::fgWalkResult markAddrModeOperandsHelperMD(GenTree* tree, void* p);\r
+\r
+ // Return the registers killed by the given tree node.\r
+ regMaskTP getKillSetForNode(GenTree* tree);\r
+\r
+ // Given some tree node add refpositions for all the registers this node kills\r
+ bool buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc);\r
+\r
+ regMaskTP allRegs(RegisterType rt);\r
+ regMaskTP allRegs(GenTree* tree);\r
+ regMaskTP allMultiRegCallNodeRegs(GenTreeCall* tree);\r
+ regMaskTP allSIMDRegs();\r
+ regMaskTP internalFloatRegCandidates();\r
+\r
+ bool isMultiRegRelated(RefPosition* refPosition, LsraLocation location);\r
+ bool registerIsFree(regNumber regNum, RegisterType regType);\r
+ bool registerIsAvailable(RegRecord* physRegRecord,\r
+ LsraLocation currentLoc,\r
+ LsraLocation* nextRefLocationPtr,\r
+ RegisterType regType);\r
+ void freeRegister(RegRecord* physRegRecord);\r
+ void freeRegisters(regMaskTP regsToFree);\r
+\r
+ var_types getDefType(GenTree* tree);\r
+\r
+ RefPosition* defineNewInternalTemp(GenTree* tree,\r
+ RegisterType regType,\r
+ regMaskTP regMask DEBUGARG(unsigned minRegCandidateCount));\r
+\r
+ int buildInternalRegisterDefsForNode(GenTree* tree,\r
+ TreeNodeInfo* info,\r
+ RefPosition* defs[] DEBUGARG(unsigned minRegCandidateCount));\r
+\r
+ void buildInternalRegisterUsesForNode(GenTree* tree,\r
+ TreeNodeInfo* info,\r
+ RefPosition* defs[],\r
+ int total DEBUGARG(unsigned minRegCandidateCount));\r
+\r
+ void resolveLocalRef(BasicBlock* block, GenTree* treeNode, RefPosition* currentRefPosition);\r
+\r
+ void insertMove(BasicBlock* block, GenTree* insertionPoint, unsigned lclNum, regNumber inReg, regNumber outReg);\r
+\r
+ void insertSwap(\r
+ BasicBlock* block, GenTree* insertionPoint, unsigned lclNum1, regNumber reg1, unsigned lclNum2, regNumber reg2);\r
+\r
+public:\r
+ // TODO-Cleanup: unused?\r
+ class PhysRegIntervalIterator\r
+ {\r
+ public:\r
+ PhysRegIntervalIterator(LinearScan* theLinearScan)\r
+ {\r
+ nextRegNumber = (regNumber)0;\r
+ linearScan = theLinearScan;\r
+ }\r
+ RegRecord* GetNext()\r
+ {\r
+ return &linearScan->physRegs[nextRegNumber];\r
+ }\r
+\r
+ private:\r
+ // This assumes that the physical registers are contiguous, starting\r
+ // with a register number of 0\r
+ regNumber nextRegNumber;\r
+ LinearScan* linearScan;\r
+ };\r
+\r
+private:\r
+ Interval* newInterval(RegisterType regType);\r
+\r
+ Interval* getIntervalForLocalVar(unsigned varIndex)\r
+ {\r
+ assert(varIndex < compiler->lvaTrackedCount);\r
+ assert(localVarIntervals[varIndex] != nullptr);\r
+ return localVarIntervals[varIndex];\r
+ }\r
+\r
+ Interval* getIntervalForLocalVarNode(GenTreeLclVarCommon* tree)\r
+ {\r
+ LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclNum];\r
+ assert(varDsc->lvTracked);\r
+ return getIntervalForLocalVar(varDsc->lvVarIndex);\r
+ }\r
+\r
+ RegRecord* getRegisterRecord(regNumber regNum);\r
+\r
+ RefPosition* newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType);\r
+\r
+ RefPosition* newRefPosition(Interval* theInterval,\r
+ LsraLocation theLocation,\r
+ RefType theRefType,\r
+ GenTree* theTreeNode,\r
+ regMaskTP mask,\r
+ unsigned multiRegIdx = 0 DEBUGARG(unsigned minRegCandidateCount = 1));\r
+\r
+ RefPosition* newRefPosition(\r
+ regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask);\r
+\r
+ void applyCalleeSaveHeuristics(RefPosition* rp);\r
+\r
+ void associateRefPosWithInterval(RefPosition* rp);\r
+\r
+ void associateRefPosWithRegister(RefPosition* rp);\r
+\r
+ unsigned getWeight(RefPosition* refPos);\r
+\r
+ /*****************************************************************************\r
+ * Register management\r
+ ****************************************************************************/\r
+ RegisterType getRegisterType(Interval* currentInterval, RefPosition* refPosition);\r
+ regNumber tryAllocateFreeReg(Interval* current, RefPosition* refPosition);\r
+ regNumber allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable);\r
+ regNumber assignCopyReg(RefPosition* refPosition);\r
+\r
+ bool isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition);\r
+ bool isSpillCandidate(Interval* current,\r
+ RefPosition* refPosition,\r
+ RegRecord* physRegRecord,\r
+ LsraLocation& nextLocation);\r
+ void checkAndAssignInterval(RegRecord* regRec, Interval* interval);\r
+ void assignPhysReg(RegRecord* regRec, Interval* interval);\r
+ void assignPhysReg(regNumber reg, Interval* interval)\r
+ {\r
+ assignPhysReg(getRegisterRecord(reg), interval);\r
+ }\r
+\r
+ bool isAssigned(RegRecord* regRec ARM_ARG(RegisterType newRegType));\r
+ bool isAssigned(RegRecord* regRec, LsraLocation lastLocation ARM_ARG(RegisterType newRegType));\r
+ void checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefPosition);\r
+ void unassignPhysReg(RegRecord* regRec ARM_ARG(RegisterType newRegType));\r
+ void unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPosition);\r
+ void unassignPhysRegNoSpill(RegRecord* reg);\r
+ void unassignPhysReg(regNumber reg)\r
+ {\r
+ unassignPhysReg(getRegisterRecord(reg), nullptr);\r
+ }\r
+\r
+ void setIntervalAsSpilled(Interval* interval);\r
+ void setIntervalAsSplit(Interval* interval);\r
+ void spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition);\r
+\r
+ void spillGCRefs(RefPosition* killRefPosition);\r
+\r
+ /*****************************************************************************\r
+ * For Resolution phase\r
+ ****************************************************************************/\r
+ // TODO-Throughput: Consider refactoring this so that we keep a map from regs to vars for better scaling\r
+ unsigned int regMapCount;\r
+\r
+ // When we split edges, we create new blocks, and instead of expanding the VarToRegMaps, we\r
+ // rely on the property that the "in" map is the same as the "from" block of the edge, and the\r
+ // "out" map is the same as the "to" block of the edge (by construction).\r
+ // So, for any block whose bbNum is greater than bbNumMaxBeforeResolution, we use the\r
+ // splitBBNumToTargetBBNumMap.\r
+ // TODO-Throughput: We may want to look into the cost/benefit tradeoff of doing this vs. expanding\r
+ // the arrays.\r
+\r
+ unsigned bbNumMaxBeforeResolution;\r
+ struct SplitEdgeInfo\r
+ {\r
+ unsigned fromBBNum;\r
+ unsigned toBBNum;\r
+ };\r
+ typedef JitHashTable<unsigned, JitSmallPrimitiveKeyFuncs<unsigned>, SplitEdgeInfo> SplitBBNumToTargetBBNumMap;\r
+ SplitBBNumToTargetBBNumMap* splitBBNumToTargetBBNumMap;\r
+ SplitBBNumToTargetBBNumMap* getSplitBBNumToTargetBBNumMap()\r
+ {\r
+ if (splitBBNumToTargetBBNumMap == nullptr)\r
+ {\r
+ splitBBNumToTargetBBNumMap =\r
+ new (getAllocator(compiler)) SplitBBNumToTargetBBNumMap(getAllocator(compiler));\r
+ }\r
+ return splitBBNumToTargetBBNumMap;\r
+ }\r
+ SplitEdgeInfo getSplitEdgeInfo(unsigned int bbNum);\r
+\r
+ void initVarRegMaps();\r
+ void setInVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg);\r
+ void setOutVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg);\r
+ VarToRegMap getInVarToRegMap(unsigned int bbNum);\r
+ VarToRegMap getOutVarToRegMap(unsigned int bbNum);\r
+ void setVarReg(VarToRegMap map, unsigned int trackedVarIndex, regNumber reg);\r
+ regNumber getVarReg(VarToRegMap map, unsigned int trackedVarIndex);\r
+ // Initialize the incoming VarToRegMap to the given map values (generally a predecessor of\r
+ // the block)\r
+ VarToRegMap setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarToRegMap);\r
+\r
+ regNumber getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type);\r
+\r
+#ifdef DEBUG\r
+ void dumpVarToRegMap(VarToRegMap map);\r
+ void dumpInVarToRegMap(BasicBlock* block);\r
+ void dumpOutVarToRegMap(BasicBlock* block);\r
+\r
+ // There are three points at which a tuple-style dump is produced, and each\r
+ // differs slightly:\r
+ // - In LSRA_DUMP_PRE, it does a simple dump of each node, with indications of what\r
+ // tree nodes are consumed.\r
+ // - In LSRA_DUMP_REFPOS, which is after the intervals are built, but before\r
+ // register allocation, each node is dumped, along with all of the RefPositions,\r
+ // The Intervals are identifed as Lnnn for lclVar intervals, Innn for for other\r
+ // intervals, and Tnnn for internal temps.\r
+ // - In LSRA_DUMP_POST, which is after register allocation, the registers are\r
+ // shown.\r
+\r
+ enum LsraTupleDumpMode{LSRA_DUMP_PRE, LSRA_DUMP_REFPOS, LSRA_DUMP_POST};\r
+ void lsraGetOperandString(GenTree* tree, LsraTupleDumpMode mode, char* operandString, unsigned operandStringLength);\r
+ void lsraDispNode(GenTree* tree, LsraTupleDumpMode mode, bool hasDest);\r
+ void DumpOperandDefs(\r
+ GenTree* operand, bool& first, LsraTupleDumpMode mode, char* operandString, const unsigned operandStringLength);\r
+ void TupleStyleDump(LsraTupleDumpMode mode);\r
+\r
+ LsraLocation maxNodeLocation;\r
+\r
+ // Width of various fields - used to create a streamlined dump during allocation that shows the\r
+ // state of all the registers in columns.\r
+ int regColumnWidth;\r
+ int regTableIndent;\r
+\r
+ const char* columnSeparator;\r
+ const char* line;\r
+ const char* leftBox;\r
+ const char* middleBox;\r
+ const char* rightBox;\r
+\r
+ static const int MAX_FORMAT_CHARS = 12;\r
+ char intervalNameFormat[MAX_FORMAT_CHARS];\r
+ char regNameFormat[MAX_FORMAT_CHARS];\r
+ char shortRefPositionFormat[MAX_FORMAT_CHARS];\r
+ char emptyRefPositionFormat[MAX_FORMAT_CHARS];\r
+ char indentFormat[MAX_FORMAT_CHARS];\r
+ static const int MAX_LEGEND_FORMAT_CHARS = 25;\r
+ char bbRefPosFormat[MAX_LEGEND_FORMAT_CHARS];\r
+ char legendFormat[MAX_LEGEND_FORMAT_CHARS];\r
+\r
+ // How many rows have we printed since last printing a "title row"?\r
+ static const int MAX_ROWS_BETWEEN_TITLES = 50;\r
+ int rowCountSinceLastTitle;\r
+ // Current mask of registers being printed in the dump.\r
+ regMaskTP lastDumpedRegisters;\r
+ regMaskTP registersToDump;\r
+ int lastUsedRegNumIndex;\r
+ bool shouldDumpReg(regNumber regNum)\r
+ {\r
+ return (registersToDump & genRegMask(regNum)) != 0;\r
+ }\r
+\r
+ void dumpRegRecordHeader();\r
+ void dumpRegRecordTitle();\r
+ void dumpRegRecordTitleIfNeeded();\r
+ void dumpRegRecordTitleLines();\r
+ void dumpRegRecords();\r
+ // An abbreviated RefPosition dump for printing with column-based register state\r
+ void dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBlock);\r
+ // Print the number of spaces occupied by a dumpRefPositionShort()\r
+ void dumpEmptyRefPosition();\r
+ // A dump of Referent, in exactly regColumnWidth characters\r
+ void dumpIntervalName(Interval* interval);\r
+\r
+ // Events during the allocation phase that cause some dump output\r
+ enum LsraDumpEvent{\r
+ // Conflicting def/use\r
+ LSRA_EVENT_DEFUSE_CONFLICT, LSRA_EVENT_DEFUSE_FIXED_DELAY_USE, LSRA_EVENT_DEFUSE_CASE1, LSRA_EVENT_DEFUSE_CASE2,\r
+ LSRA_EVENT_DEFUSE_CASE3, LSRA_EVENT_DEFUSE_CASE4, LSRA_EVENT_DEFUSE_CASE5, LSRA_EVENT_DEFUSE_CASE6,\r
+\r
+ // Spilling\r
+ LSRA_EVENT_SPILL, LSRA_EVENT_SPILL_EXTENDED_LIFETIME, LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL,\r
+ LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL, LSRA_EVENT_DONE_KILL_GC_REFS,\r
+\r
+ // Block boundaries\r
+ LSRA_EVENT_START_BB, LSRA_EVENT_END_BB,\r
+\r
+ // Miscellaneous\r
+ LSRA_EVENT_FREE_REGS,\r
+\r
+ // Characteristics of the current RefPosition\r
+ LSRA_EVENT_INCREMENT_RANGE_END, // ???\r
+ LSRA_EVENT_LAST_USE, LSRA_EVENT_LAST_USE_DELAYED, LSRA_EVENT_NEEDS_NEW_REG,\r
+\r
+ // Allocation decisions\r
+ LSRA_EVENT_FIXED_REG, LSRA_EVENT_EXP_USE, LSRA_EVENT_ZERO_REF, LSRA_EVENT_NO_ENTRY_REG_ALLOCATED,\r
+ LSRA_EVENT_KEPT_ALLOCATION, LSRA_EVENT_COPY_REG, LSRA_EVENT_MOVE_REG, LSRA_EVENT_ALLOC_REG,\r
+ LSRA_EVENT_ALLOC_SPILLED_REG, LSRA_EVENT_NO_REG_ALLOCATED, LSRA_EVENT_RELOAD, LSRA_EVENT_SPECIAL_PUTARG,\r
+ LSRA_EVENT_REUSE_REG,\r
+ };\r
+ void dumpLsraAllocationEvent(LsraDumpEvent event,\r
+ Interval* interval = nullptr,\r
+ regNumber reg = REG_NA,\r
+ BasicBlock* currentBlock = nullptr);\r
+\r
+ void dumpBlockHeader(BasicBlock* block);\r
+\r
+ void validateIntervals();\r
+#endif // DEBUG\r
+\r
+#if TRACK_LSRA_STATS\r
+ enum LsraStat{\r
+ LSRA_STAT_SPILL, LSRA_STAT_COPY_REG, LSRA_STAT_RESOLUTION_MOV, LSRA_STAT_SPLIT_EDGE,\r
+ };\r
+\r
+ unsigned regCandidateVarCount;\r
+ void updateLsraStat(LsraStat stat, unsigned currentBBNum);\r
+\r
+ void dumpLsraStats(FILE* file);\r
+\r
+#define INTRACK_STATS(x) x\r
+#else // !TRACK_LSRA_STATS\r
+#define INTRACK_STATS(x)\r
+#endif // !TRACK_LSRA_STATS\r
+\r
+ Compiler* compiler;\r
+\r
+private:\r
+#if MEASURE_MEM_ALLOC\r
+ CompAllocator* lsraAllocator;\r
+#endif\r
+\r
+ CompAllocator* getAllocator(Compiler* comp)\r
+ {\r
+#if MEASURE_MEM_ALLOC\r
+ if (lsraAllocator == nullptr)\r
+ {\r
+ lsraAllocator = new (comp, CMK_LSRA) CompAllocator(comp, CMK_LSRA);\r
+ }\r
+ return lsraAllocator;\r
+#else\r
+ return comp->getAllocator();\r
+#endif\r
+ }\r
+\r
+#ifdef DEBUG\r
+ // This is used for dumping\r
+ RefPosition* activeRefPosition;\r
+#endif // DEBUG\r
+\r
+ IntervalList intervals;\r
+\r
+ RegRecord physRegs[REG_COUNT];\r
+\r
+ // Map from tracked variable index to Interval*.\r
+ Interval** localVarIntervals;\r
+\r
+ // Set of blocks that have been visited.\r
+ BlockSet bbVisitedSet;\r
+ void markBlockVisited(BasicBlock* block)\r
+ {\r
+ BlockSetOps::AddElemD(compiler, bbVisitedSet, block->bbNum);\r
+ }\r
+ void clearVisitedBlocks()\r
+ {\r
+ BlockSetOps::ClearD(compiler, bbVisitedSet);\r
+ }\r
+ bool isBlockVisited(BasicBlock* block)\r
+ {\r
+ return BlockSetOps::IsMember(compiler, bbVisitedSet, block->bbNum);\r
+ }\r
+\r
+#if DOUBLE_ALIGN\r
+ bool doDoubleAlign;\r
+#endif\r
+\r
+ // A map from bbNum to the block information used during register allocation.\r
+ LsraBlockInfo* blockInfo;\r
+ BasicBlock* findPredBlockForLiveIn(BasicBlock* block, BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated));\r
+\r
+ // The order in which the blocks will be allocated.\r
+ // This is any array of BasicBlock*, in the order in which they should be traversed.\r
+ BasicBlock** blockSequence;\r
+ // The verifiedAllBBs flag indicates whether we have verified that all BBs have been\r
+ // included in the blockSeuqence above, during setBlockSequence().\r
+ bool verifiedAllBBs;\r
+ void setBlockSequence();\r
+ int compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights);\r
+ BasicBlockList* blockSequenceWorkList;\r
+ bool blockSequencingDone;\r
+ void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet);\r
+ void removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode);\r
+ BasicBlock* getNextCandidateFromWorkList();\r
+\r
+ // The bbNum of the block being currently allocated or resolved.\r
+ unsigned int curBBNum;\r
+ // The current location\r
+ LsraLocation currentLoc;\r
+ // The ordinal of the block we're on (i.e. this is the curBBSeqNum-th block we've allocated).\r
+ unsigned int curBBSeqNum;\r
+ // The number of blocks that we've sequenced.\r
+ unsigned int bbSeqCount;\r
+ // The Location of the start of the current block.\r
+ LsraLocation curBBStartLocation;\r
+ // True if the method contains any critical edges.\r
+ bool hasCriticalEdges;\r
+\r
+ // True if there are any register candidate lclVars available for allocation.\r
+ bool enregisterLocalVars;\r
+\r
+ virtual bool willEnregisterLocalVars() const\r
+ {\r
+ return enregisterLocalVars;\r
+ }\r
+\r
+ // Ordered list of RefPositions\r
+ RefPositionList refPositions;\r
+\r
+ // Per-block variable location mappings: an array indexed by block number that yields a\r
+ // pointer to an array of regNumber, one per variable.\r
+ VarToRegMap* inVarToRegMaps;\r
+ VarToRegMap* outVarToRegMaps;\r
+\r
+ // A temporary VarToRegMap used during the resolution of critical edges.\r
+ VarToRegMap sharedCriticalVarToRegMap;\r
+\r
+ PhasedVar<regMaskTP> availableIntRegs;\r
+ PhasedVar<regMaskTP> availableFloatRegs;\r
+ PhasedVar<regMaskTP> availableDoubleRegs;\r
+\r
+ // The set of all register candidates. Note that this may be a subset of tracked vars.\r
+ VARSET_TP registerCandidateVars;\r
+ // Current set of live register candidate vars, used during building of RefPositions to determine\r
+ // whether to preference to callee-save.\r
+ VARSET_TP currentLiveVars;\r
+ // Set of variables that may require resolution across an edge.\r
+ // This is first constructed during interval building, to contain all the lclVars that are live at BB edges.\r
+ // Then, any lclVar that is always in the same register is removed from the set.\r
+ VARSET_TP resolutionCandidateVars;\r
+ // This set contains all the lclVars that are ever spilled or split.\r
+ VARSET_TP splitOrSpilledVars;\r
+ // Set of floating point variables to consider for callee-save registers.\r
+ VARSET_TP fpCalleeSaveCandidateVars;\r
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE\r
+#if defined(_TARGET_AMD64_)\r
+ static bool varTypeNeedsPartialCalleeSave(var_types type)\r
+ {\r
+ return (emitTypeSize(type) == 32);\r
+ }\r
+ static const var_types LargeVectorSaveType = TYP_SIMD16;\r
+#elif defined(_TARGET_ARM64_)\r
+ static bool varTypeNeedsPartialCalleeSave(var_types type)\r
+ {\r
+ // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes\r
+ // For SIMD types longer then 8 bytes Caller is responsible for saving and restoring Upper bytes.\r
+ return (emitTypeSize(type) == 16);\r
+ }\r
+ static const var_types LargeVectorSaveType = TYP_DOUBLE;\r
+#else // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)\r
+#error("Unknown target architecture for FEATURE_SIMD")\r
+#endif // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)\r
+\r
+ // Set of large vector (TYP_SIMD32 on AVX) variables.\r
+ VARSET_TP largeVectorVars;\r
+ // Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers.\r
+ VARSET_TP largeVectorCalleeSaveCandidateVars;\r
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE\r
+\r
+ //-----------------------------------------------------------------------\r
+ // TreeNodeInfo methods\r
+ //-----------------------------------------------------------------------\r
+\r
+ // The operandToLocationInfoMap is used for the transient TreeNodeInfo that is computed by\r
+ // the TreeNodeInfoInit methods, and used in building RefPositions.\r
+ typedef SmallHashTable<GenTree*, LocationInfoListNode*, 32> OperandToLocationInfoMap;\r
+ OperandToLocationInfoMap* operandToLocationInfoMap;\r
+ // The useList is constructed for each node by the TreeNodeInfoInit methods.\r
+ // It contains the TreeNodeInfo for its operands, in their order of use.\r
+ LocationInfoList useList;\r
+\r
+ // Get the LocationInfoListNode for the given node, and put it into the useList.\r
+ // The node must not be contained, and must have been processed by buildRefPositionsForNode().\r
+ void appendLocationInfoToList(GenTree* node)\r
+ {\r
+ LocationInfoListNode* locationInfo;\r
+ bool found = operandToLocationInfoMap->TryRemove(node, &locationInfo);\r
+ assert(found);\r
+ useList.Append(locationInfo);\r
+ }\r
+ // Get the LocationInfoListNodes for the given node, and return it, but don't put it into the useList.\r
+ // The node must not be contained, and must have been processed by buildRefPositionsForNode().\r
+ LocationInfoListNode* getLocationInfo(GenTree* node)\r
+ {\r
+ LocationInfoListNode* locationInfo;\r
+ bool found = operandToLocationInfoMap->TryRemove(node, &locationInfo);\r
+ assert(found);\r
+ return locationInfo;\r
+ }\r
+ //------------------------------------------------------------------------\r
+ // appendBinaryLocationInfoToList: Get the LocationInfoListNodes for the operands of the\r
+ // given node, and put them into the useList.\r
+ //\r
+ // Arguments:\r
+ // node - a GenTreeOp\r
+ //\r
+ // Return Value:\r
+ // The number of actual register operands.\r
+ //\r
+ // Notes:\r
+ // The operands must already have been processed by buildRefPositionsForNode, and their\r
+ // LocationInfoListNodes placed in the operandToLocationInfoMap.\r
+ //\r
+ int appendBinaryLocationInfoToList(GenTreeOp* node)\r
+ {\r
+ bool found;\r
+ LocationInfoListNode* op1LocationInfo = nullptr;\r
+ LocationInfoListNode* op2LocationInfo = nullptr;\r
+ int srcCount = 0;\r
+ GenTree* op1 = node->gtOp1;\r
+ GenTree* op2 = node->gtGetOp2IfPresent();\r
+ if (node->IsReverseOp() && op2 != nullptr)\r
+ {\r
+ srcCount += GetOperandInfo(op2);\r
+ op2 = nullptr;\r
+ }\r
+ if (op1 != nullptr)\r
+ {\r
+ srcCount += GetOperandInfo(op1);\r
+ }\r
+ if (op2 != nullptr)\r
+ {\r
+ srcCount += GetOperandInfo(op2);\r
+ }\r
+ return srcCount;\r
+ }\r
+\r
+ // This is the main entry point for computing the TreeNodeInfo for a node.\r
+ void TreeNodeInfoInit(GenTree* stmt, TreeNodeInfo* info);\r
+\r
+ void TreeNodeInfoInitCheckByteable(GenTree* tree, TreeNodeInfo* info);\r
+\r
+ bool CheckAndSetDelayFree(GenTree* delayUseSrc);\r
+\r
+ void TreeNodeInfoInitSimple(GenTree* tree, TreeNodeInfo* info);\r
+ int GetOperandInfo(GenTree* node);\r
+ int GetOperandInfo(GenTree* node, LocationInfoListNode** pFirstInfo);\r
+ int GetIndirInfo(GenTreeIndir* indirTree);\r
+ void HandleFloatVarArgs(GenTreeCall* call, TreeNodeInfo* info, GenTree* argNode, bool* callHasFloatRegArgs);\r
+\r
+ void TreeNodeInfoInitStoreLoc(GenTree* tree, TreeNodeInfo* info);\r
+ void TreeNodeInfoInitReturn(GenTree* tree, TreeNodeInfo* info);\r
+ // This method, unlike the others, returns the number of sources, since it may be called when\r
+ // 'tree' is contained.\r
+ int TreeNodeInfoInitShiftRotate(GenTree* tree, TreeNodeInfo* info);\r
+ void TreeNodeInfoInitPutArgReg(GenTreeUnOp* node, TreeNodeInfo* info);\r
+ void TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info);\r
+ void TreeNodeInfoInitCmp(GenTree* tree, TreeNodeInfo* info);\r
+ void TreeNodeInfoInitStructArg(GenTree* structArg, TreeNodeInfo* info);\r
+ void TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode, TreeNodeInfo* info);\r
+ void TreeNodeInfoInitModDiv(GenTree* tree, TreeNodeInfo* info);\r
+ void TreeNodeInfoInitIntrinsic(GenTree* tree, TreeNodeInfo* info);\r
+ void TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* tree, TreeNodeInfo* info);\r
+ void TreeNodeInfoInitIndir(GenTreeIndir* indirTree, TreeNodeInfo* info);\r
+ void TreeNodeInfoInitGCWriteBarrier(GenTree* tree, TreeNodeInfo* info);\r
+ void TreeNodeInfoInitCast(GenTree* tree, TreeNodeInfo* info);\r
+\r
+#ifdef _TARGET_X86_\r
+ bool ExcludeNonByteableRegisters(GenTree* tree);\r
+#endif\r
+\r
+#if defined(_TARGET_XARCH_)\r
+ // returns true if the tree can use the read-modify-write memory instruction form\r
+ bool isRMWRegOper(GenTree* tree);\r
+ void TreeNodeInfoInitMul(GenTree* tree, TreeNodeInfo* info);\r
+ void SetContainsAVXFlags(bool isFloatingPointType = true, unsigned sizeOfSIMDVector = 0);\r
+#endif // defined(_TARGET_XARCH_)\r
+\r
+#ifdef FEATURE_SIMD\r
+ void TreeNodeInfoInitSIMD(GenTreeSIMD* tree, TreeNodeInfo* info);\r
+#endif // FEATURE_SIMD\r
+\r
+#if FEATURE_HW_INTRINSICS\r
+ void TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, TreeNodeInfo* info);\r
+#endif // FEATURE_HW_INTRINSICS\r
+\r
+ void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, TreeNodeInfo* info);\r
+#ifdef _TARGET_ARM_\r
+ void TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* tree, TreeNodeInfo* info);\r
+#endif\r
+ void TreeNodeInfoInitLclHeap(GenTree* tree, TreeNodeInfo* info);\r
+};\r
+\r
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\r
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\r
+XX XX\r
+XX Interval XX\r
+XX XX\r
+XX This is the fundamental data structure for linear scan register XX\r
+XX allocation. It represents the live range(s) for a variable or temp. XX\r
+XX XX\r
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\r
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\r
+*/\r
+\r
+class Interval : public Referenceable\r
+{\r
+public:\r
+ Interval(RegisterType registerType, regMaskTP registerPreferences)\r
+ : registerPreferences(registerPreferences)\r
+ , relatedInterval(nullptr)\r
+ , assignedReg(nullptr)\r
+ , registerType(registerType)\r
+ , isLocalVar(false)\r
+ , isSplit(false)\r
+ , isSpilled(false)\r
+ , isInternal(false)\r
+ , isStructField(false)\r
+ , isPromotedStruct(false)\r
+ , hasConflictingDefUse(false)\r
+ , hasInterferingUses(false)\r
+ , isSpecialPutArg(false)\r
+ , preferCalleeSave(false)\r
+ , isConstant(false)\r
+ , isMultiReg(false)\r
+ , physReg(REG_COUNT)\r
+#ifdef DEBUG\r
+ , intervalIndex(0)\r
+#endif\r
+ , varNum(0)\r
+ {\r
+ }\r
+\r
+#ifdef DEBUG\r
+ // print out representation\r
+ void dump();\r
+ // concise representation for embedding\r
+ void tinyDump();\r
+ // extremely concise representation\r
+ void microDump();\r
+#endif // DEBUG\r
+\r
+ void setLocalNumber(Compiler* compiler, unsigned lclNum, LinearScan* l);\r
+\r
+ // Fixed registers for which this Interval has a preference\r
+ regMaskTP registerPreferences;\r
+\r
+ // The relatedInterval is:\r
+ // - for any other interval, it is the interval to which this interval\r
+ // is currently preferenced (e.g. because they are related by a copy)\r
+ Interval* relatedInterval;\r
+\r
+ // The assignedReg is the RecRecord for the register to which this interval\r
+ // has been assigned at some point - if the interval is active, this is the\r
+ // register it currently occupies.\r
+ RegRecord* assignedReg;\r
+\r
+ // DECIDE : put this in a union or do something w/ inheritance?\r
+ // this is an interval for a physical register, not a allocatable entity\r
+\r
+ RegisterType registerType;\r
+ bool isLocalVar : 1;\r
+ // Indicates whether this interval has been assigned to different registers\r
+ bool isSplit : 1;\r
+ // Indicates whether this interval is ever spilled\r
+ bool isSpilled : 1;\r
+ // indicates an interval representing the internal requirements for\r
+ // generating code for a node (temp registers internal to the node)\r
+ // Note that this interval may live beyond a node in the GT_ARR_LENREF/GT_IND\r
+ // case (though never lives beyond a stmt)\r
+ bool isInternal : 1;\r
+ // true if this is a LocalVar for a struct field\r
+ bool isStructField : 1;\r
+ // true iff this is a GT_LDOBJ for a fully promoted (PROMOTION_TYPE_INDEPENDENT) struct\r
+ bool isPromotedStruct : 1;\r
+ // true if this is an SDSU interval for which the def and use have conflicting register\r
+ // requirements\r
+ bool hasConflictingDefUse : 1;\r
+ // true if this interval's defining node has "delayRegFree" uses, either due to it being an RMW instruction,\r
+ // OR because it requires an internal register that differs from the target.\r
+ bool hasInterferingUses : 1;\r
+\r
+ // True if this interval is defined by a putArg, whose source is a non-last-use lclVar.\r
+ // During allocation, this flag will be cleared if the source is not already in the required register.\r
+ // Othewise, we will leave the register allocated to the lclVar, but mark the RegRecord as\r
+ // isBusyUntilNextKill, so that it won't be reused if the lclVar goes dead before the call.\r
+ bool isSpecialPutArg : 1;\r
+\r
+ // True if this interval interferes with a call.\r
+ bool preferCalleeSave : 1;\r
+\r
+ // True if this interval is defined by a constant node that may be reused and/or may be\r
+ // able to reuse a constant that's already in a register.\r
+ bool isConstant : 1;\r
+\r
+ // True if this Interval is defined by a node that produces multiple registers.\r
+ bool isMultiReg : 1;\r
+\r
+ // The register to which it is currently assigned.\r
+ regNumber physReg;\r
+\r
+#ifdef DEBUG\r
+ unsigned int intervalIndex;\r
+#endif // DEBUG\r
+\r
+ unsigned int varNum; // This is the "variable number": the index into the lvaTable array\r
+\r
+ LclVarDsc* getLocalVar(Compiler* comp)\r
+ {\r
+ assert(isLocalVar);\r
+ return &(comp->lvaTable[this->varNum]);\r
+ }\r
+\r
+ // Get the local tracked variable "index" (lvVarIndex), used in bitmasks.\r
+ unsigned getVarIndex(Compiler* comp)\r
+ {\r
+ LclVarDsc* varDsc = getLocalVar(comp);\r
+ assert(varDsc->lvTracked); // If this isn't true, we shouldn't be calling this function!\r
+ return varDsc->lvVarIndex;\r
+ }\r
+\r
+ bool isAssignedTo(regNumber regNum)\r
+ {\r
+ // This uses regMasks to handle the case where a double actually occupies two registers\r
+ // TODO-Throughput: This could/should be done more cheaply.\r
+ return (physReg != REG_NA && (genRegMask(physReg, registerType) & genRegMask(regNum)) != RBM_NONE);\r
+ }\r
+\r
+ // Assign the related interval.\r
+ void assignRelatedInterval(Interval* newRelatedInterval)\r
+ {\r
+#ifdef DEBUG\r
+ if (VERBOSE)\r
+ {\r
+ printf("Assigning related ");\r
+ newRelatedInterval->microDump();\r
+ printf(" to ");\r
+ this->microDump();\r
+ printf("\n");\r
+ }\r
+#endif // DEBUG\r
+ relatedInterval = newRelatedInterval;\r
+ }\r
+\r
+ // Assign the related interval, but only if it isn't already assigned.\r
+ void assignRelatedIntervalIfUnassigned(Interval* newRelatedInterval)\r
+ {\r
+ if (relatedInterval == nullptr)\r
+ {\r
+ assignRelatedInterval(newRelatedInterval);\r
+ }\r
+ else\r
+ {\r
+#ifdef DEBUG\r
+ if (VERBOSE)\r
+ {\r
+ printf("Interval ");\r
+ this->microDump();\r
+ printf(" already has a related interval\n");\r
+ }\r
+#endif // DEBUG\r
+ }\r
+ }\r
+\r
+ // Update the registerPreferences on the interval.\r
+ // If there are conflicting requirements on this interval, set the preferences to\r
+ // the union of them. That way maybe we'll get at least one of them.\r
+ // An exception is made in the case where one of the existing or new\r
+ // preferences are all callee-save, in which case we "prefer" the callee-save\r
+\r
+ void updateRegisterPreferences(regMaskTP preferences)\r
+ {\r
+ // We require registerPreferences to have been initialized.\r
+ assert(registerPreferences != RBM_NONE);\r
+ // It is invalid to update with empty preferences\r
+ assert(preferences != RBM_NONE);\r
+\r
+ regMaskTP commonPreferences = (registerPreferences & preferences);\r
+ if (commonPreferences != RBM_NONE)\r
+ {\r
+ registerPreferences = commonPreferences;\r
+ return;\r
+ }\r
+\r
+ // There are no preferences in common.\r
+ // Preferences need to reflect both cases where a var must occupy a specific register,\r
+ // as well as cases where a var is live when a register is killed.\r
+ // In the former case, we would like to record all such registers, however we don't\r
+ // really want to use any registers that will interfere.\r
+ // To approximate this, we never "or" together multi-reg sets, which are generally kill sets.\r
+\r
+ if (!genMaxOneBit(preferences))\r
+ {\r
+ // The new preference value is a multi-reg set, so it's probably a kill.\r
+ // Keep the new value.\r
+ registerPreferences = preferences;\r
+ return;\r
+ }\r
+\r
+ if (!genMaxOneBit(registerPreferences))\r
+ {\r
+ // The old preference value is a multi-reg set.\r
+ // Keep the existing preference set, as it probably reflects one or more kills.\r
+ // It may have been a union of multiple individual registers, but we can't\r
+ // distinguish that case without extra cost.\r
+ return;\r
+ }\r
+\r
+ // If we reach here, we have two disjoint single-reg sets.\r
+ // Keep only the callee-save preferences, if not empty.\r
+ // Otherwise, take the union of the preferences.\r
+\r
+ regMaskTP newPreferences = registerPreferences | preferences;\r
+\r
+ if (preferCalleeSave)\r
+ {\r
+ regMaskTP calleeSaveMask = (calleeSaveRegs(this->registerType) & (newPreferences));\r
+ if (calleeSaveMask != RBM_NONE)\r
+ {\r
+ newPreferences = calleeSaveMask;\r
+ }\r
+ }\r
+ registerPreferences = newPreferences;\r
+ }\r
+};\r
+\r
+class RefPosition\r
+{\r
+public:\r
+ // A RefPosition refers to either an Interval or a RegRecord. 'referent' points to one\r
+ // of these types. If it refers to a RegRecord, then 'isPhysRegRef' is true. If it\r
+ // refers to an Interval, then 'isPhysRegRef' is false.\r
+ //\r
+ // Q: can 'referent' be NULL?\r
+\r
+ Referenceable* referent;\r
+\r
+ // nextRefPosition is the next in code order.\r
+ // Note that in either case there is no need for these to be doubly linked, as they\r
+ // are only traversed in the forward direction, and are not moved.\r
+ RefPosition* nextRefPosition;\r
+\r
+ // The remaining fields are common to both options\r
+ GenTree* treeNode;\r
+ unsigned int bbNum;\r
+\r
+ // Prior to the allocation pass, registerAssignment captures the valid registers\r
+ // for this RefPosition. An empty set means that any register is valid. A non-empty\r
+ // set means that it must be one of the given registers (may be the full set if the\r
+ // only constraint is that it must reside in SOME register)\r
+ // After the allocation pass, this contains the actual assignment\r
+ LsraLocation nodeLocation;\r
+ regMaskTP registerAssignment;\r
+\r
+ RefType refType;\r
+\r
+ // NOTE: C++ only packs bitfields if the base type is the same. So make all the base\r
+ // NOTE: types of the logically "bool" types that follow 'unsigned char', so they match\r
+ // NOTE: RefType that precedes this, and multiRegIdx can also match.\r
+\r
+ // Indicates whether this ref position is to be allocated a reg only if profitable. Currently these are the\r
+ // ref positions that lower/codegen has indicated as reg optional and is considered a contained memory operand if\r
+ // no reg is allocated.\r
+ unsigned char allocRegIfProfitable : 1;\r
+\r
+ // Used by RefTypeDef/Use positions of a multi-reg call node.\r
+ // Indicates the position of the register that this ref position refers to.\r
+ // The max bits needed is based on max value of MAX_RET_REG_COUNT value\r
+ // across all targets and that happens 4 on on Arm. Hence index value\r
+ // would be 0..MAX_RET_REG_COUNT-1.\r
+ unsigned char multiRegIdx : 2;\r
+\r
+ // Last Use - this may be true for multiple RefPositions in the same Interval\r
+ unsigned char lastUse : 1;\r
+\r
+ // Spill and Copy info\r
+ // reload indicates that the value was spilled, and must be reloaded here.\r
+ // spillAfter indicates that the value is spilled here, so a spill must be added.\r
+ // copyReg indicates that the value needs to be copied to a specific register,\r
+ // but that it will also retain its current assigned register.\r
+ // moveReg indicates that the value needs to be moved to a different register,\r
+ // and that this will be its new assigned register.\r
+ // A RefPosition may have any flag individually or the following combinations:\r
+ // - reload and spillAfter (i.e. it remains in memory), but not in combination with copyReg or moveReg\r
+ // (reload cannot exist with copyReg or moveReg; it should be reloaded into the appropriate reg)\r
+ // - spillAfter and copyReg (i.e. it must be copied to a new reg for use, but is then spilled)\r
+ // - spillAfter and moveReg (i.e. it most be both spilled and moved)\r
+ // NOTE: a moveReg involves an explicit move, and would usually not be needed for a fixed Reg if it is going\r
+ // to be spilled, because the code generator will do the move to the fixed register, and doesn't need to\r
+ // record the new register location as the new "home" location of the lclVar. However, if there is a conflicting\r
+ // use at the same location (e.g. lclVar V1 is in rdx and needs to be in rcx, but V2 needs to be in rdx), then\r
+ // we need an explicit move.\r
+ // - copyReg and moveReg must not exist with each other.\r
+\r
+ unsigned char reload : 1;\r
+ unsigned char spillAfter : 1;\r
+ unsigned char copyReg : 1;\r
+ unsigned char moveReg : 1; // true if this var is moved to a new register\r
+\r
+ unsigned char isPhysRegRef : 1; // true if 'referent' points of a RegRecord, false if it points to an Interval\r
+ unsigned char isFixedRegRef : 1;\r
+ unsigned char isLocalDefUse : 1;\r
+\r
+ // delayRegFree indicates that the register should not be freed right away, but instead wait\r
+ // until the next Location after it would normally be freed. This is used for the case of\r
+ // non-commutative binary operators, where op2 must not be assigned the same register as\r
+ // the target. We do this by not freeing it until after the target has been defined.\r
+ // Another option would be to actually change the Location of the op2 use until the same\r
+ // Location as the def, but then it could potentially reuse a register that has been freed\r
+ // from the other source(s), e.g. if it's a lastUse or spilled.\r
+ unsigned char delayRegFree : 1;\r
+\r
+ // outOfOrder is marked on a (non-def) RefPosition that doesn't follow a definition of the\r
+ // register currently assigned to the Interval. This happens when we use the assigned\r
+ // register from a predecessor that is not the most recently allocated BasicBlock.\r
+ unsigned char outOfOrder : 1;\r
+\r
+#ifdef DEBUG\r
+ // Minimum number registers that needs to be ensured while\r
+ // constraining candidates for this ref position under\r
+ // LSRA stress.\r
+ unsigned minRegCandidateCount;\r
+\r
+ // The unique RefPosition number, equal to its index in the\r
+ // refPositions list. Only used for debugging dumps.\r
+ unsigned rpNum;\r
+#endif // DEBUG\r
+\r
+ RefPosition(unsigned int bbNum, LsraLocation nodeLocation, GenTree* treeNode, RefType refType)\r
+ : referent(nullptr)\r
+ , nextRefPosition(nullptr)\r
+ , treeNode(treeNode)\r
+ , bbNum(bbNum)\r
+ , nodeLocation(nodeLocation)\r
+ , registerAssignment(RBM_NONE)\r
+ , refType(refType)\r
+ , multiRegIdx(0)\r
+ , lastUse(false)\r
+ , reload(false)\r
+ , spillAfter(false)\r
+ , copyReg(false)\r
+ , moveReg(false)\r
+ , isPhysRegRef(false)\r
+ , isFixedRegRef(false)\r
+ , isLocalDefUse(false)\r
+ , delayRegFree(false)\r
+ , outOfOrder(false)\r
+#ifdef DEBUG\r
+ , minRegCandidateCount(1)\r
+ , rpNum(0)\r
+#endif\r
+ {\r
+ }\r
+\r
+ Interval* getInterval()\r
+ {\r
+ assert(!isPhysRegRef);\r
+ return (Interval*)referent;\r
+ }\r
+ void setInterval(Interval* i)\r
+ {\r
+ referent = i;\r
+ isPhysRegRef = false;\r
+ }\r
+\r
+ RegRecord* getReg()\r
+ {\r
+ assert(isPhysRegRef);\r
+ return (RegRecord*)referent;\r
+ }\r
+ void setReg(RegRecord* r)\r
+ {\r
+ referent = r;\r
+ isPhysRegRef = true;\r
+ registerAssignment = genRegMask(r->regNum);\r
+ }\r
+\r
+ regNumber assignedReg()\r
+ {\r
+ if (registerAssignment == RBM_NONE)\r
+ {\r
+ return REG_NA;\r
+ }\r
+\r
+ return genRegNumFromMask(registerAssignment);\r
+ }\r
+\r
+ // Returns true if it is a reference on a gentree node.\r
+ bool IsActualRef()\r
+ {\r
+ return (refType == RefTypeDef || refType == RefTypeUse);\r
+ }\r
+\r
+ bool RequiresRegister()\r
+ {\r
+ return (IsActualRef()\r
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE\r
+ || refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse\r
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE\r
+ ) &&\r
+ !AllocateIfProfitable();\r
+ }\r
+\r
+ void setAllocateIfProfitable(bool val)\r
+ {\r
+ allocRegIfProfitable = val;\r
+ }\r
+\r
+ // Returns true whether this ref position is to be allocated\r
+ // a reg only if it is profitable.\r
+ bool AllocateIfProfitable()\r
+ {\r
+ // TODO-CQ: Right now if a ref position is marked as\r
+ // copyreg or movereg, then it is not treated as\r
+ // 'allocate if profitable'. This is an implementation\r
+ // limitation that needs to be addressed.\r
+ return allocRegIfProfitable && !copyReg && !moveReg;\r
+ }\r
+\r
+ void setMultiRegIdx(unsigned idx)\r
+ {\r
+ multiRegIdx = idx;\r
+ assert(multiRegIdx == idx);\r
+ }\r
+\r
+ unsigned getMultiRegIdx()\r
+ {\r
+ return multiRegIdx;\r
+ }\r
+\r
+ LsraLocation getRefEndLocation()\r
+ {\r
+ return delayRegFree ? nodeLocation + 1 : nodeLocation;\r
+ }\r
+\r
+ bool isIntervalRef()\r
+ {\r
+ return (!isPhysRegRef && (referent != nullptr));\r
+ }\r
+\r
+ // isTrueDef indicates that the RefPosition is a non-update def of a non-internal\r
+ // interval\r
+ bool isTrueDef()\r
+ {\r
+ return (refType == RefTypeDef && isIntervalRef() && !getInterval()->isInternal);\r
+ }\r
+\r
+ // isFixedRefOfRegMask indicates that the RefPosition has a fixed assignment to the register\r
+ // specified by the given mask\r
+ bool isFixedRefOfRegMask(regMaskTP regMask)\r
+ {\r
+ assert(genMaxOneBit(regMask));\r
+ return (registerAssignment == regMask);\r
+ }\r
+\r
+ // isFixedRefOfReg indicates that the RefPosition has a fixed assignment to the given register\r
+ bool isFixedRefOfReg(regNumber regNum)\r
+ {\r
+ return (isFixedRefOfRegMask(genRegMask(regNum)));\r
+ }\r
+\r
+#ifdef DEBUG\r
+ // operator= copies everything except 'rpNum', which must remain unique\r
+ RefPosition& operator=(const RefPosition& rp)\r
+ {\r
+ unsigned rpNumSave = rpNum;\r
+ memcpy(this, &rp, sizeof(rp));\r
+ rpNum = rpNumSave;\r
+ return *this;\r
+ }\r
+\r
+ void dump();\r
+#endif // DEBUG\r
+};\r
+\r
+#ifdef DEBUG\r
+void dumpRegMask(regMaskTP regs);\r
+#endif // DEBUG\r
+\r
+/*****************************************************************************/\r
+#endif //_LSRA_H_\r
+/*****************************************************************************/\r
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitReturn(GenTree* tree)
+void LinearScan::TreeNodeInfoInitReturn(GenTree* tree, TreeNodeInfo* info)
{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- GenTree* op1 = tree->gtGetOp1();
+ GenTree* op1 = tree->gtGetOp1();
assert(info->dstCount == 0);
if (tree->TypeGet() == TYP_LONG)
{
assert((op1->OperGet() == GT_LONG) && op1->isContained());
- GenTree* loVal = op1->gtGetOp1();
- GenTree* hiVal = op1->gtGetOp2();
- info->srcCount = 2;
- loVal->gtLsraInfo.setSrcCandidates(this, RBM_LNGRET_LO);
- hiVal->gtLsraInfo.setSrcCandidates(this, RBM_LNGRET_HI);
+ GenTree* loVal = op1->gtGetOp1();
+ GenTree* hiVal = op1->gtGetOp2();
+ info->srcCount = 2;
+ LocationInfoListNode* loValInfo = getLocationInfo(loVal);
+ LocationInfoListNode* hiValInfo = getLocationInfo(hiVal);
+ loValInfo->info.setSrcCandidates(this, RBM_LNGRET_LO);
+ hiValInfo->info.setSrcCandidates(this, RBM_LNGRET_HI);
+ useList.Append(loValInfo);
+ useList.Append(hiValInfo);
}
- else
+ else if ((tree->TypeGet() != TYP_VOID) && !op1->isContained())
{
regMaskTP useCandidates = RBM_NONE;
}
}
+ LocationInfoListNode* locationInfo = getLocationInfo(op1);
if (useCandidates != RBM_NONE)
{
- tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(this, useCandidates);
+ locationInfo->info.setSrcCandidates(this, useCandidates);
}
+ useList.Append(locationInfo);
}
}
-void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree)
+void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree, TreeNodeInfo* info)
{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
assert(info->dstCount == 1);
// Need a variable number of temp regs (see genLclHeap() in codegenarm.cpp):
// target (regCnt) + tmp + [psp]
info->srcCount = 1;
info->internalIntCount = hasPspSym ? 2 : 1;
+ appendLocationInfoToList(size);
}
// If we are needed in temporary registers we should be sure that
// requirements needed by LSRA to build the Interval Table (source,
// destination and internal [temp] register counts).
//
-void LinearScan::TreeNodeInfoInit(GenTree* tree)
+void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
{
- unsigned kind = tree->OperKind();
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- RegisterType registerType = TypeGet(tree);
+ unsigned kind = tree->OperKind();
+ RegisterType registerType = TypeGet(tree);
if (tree->isContained())
{
case GT_STORE_LCL_FLD:
case GT_STORE_LCL_VAR:
- TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon());
+ TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon(), info);
break;
case GT_NOP:
op1 = tree->gtOp.gtOp1;
assert(varTypeIsFloating(op1));
assert(op1->TypeGet() == tree->TypeGet());
+ appendLocationInfoToList(op1);
switch (tree->gtIntrinsic.gtIntrinsicId)
{
case GT_CAST:
{
- info->srcCount = 1;
assert(info->dstCount == 1);
// Non-overflow casts to/from float/double are done using SSE2 instructions
var_types castToType = tree->CastToType();
GenTreePtr castOp = tree->gtCast.CastOp();
var_types castOpType = castOp->TypeGet();
+ info->srcCount = GetOperandInfo(castOp);
if (tree->gtFlags & GTF_UNSIGNED)
{
castOpType = genUnsignedType(castOpType);
break;
case GT_SWITCH_TABLE:
- info->srcCount = 2;
assert(info->dstCount == 0);
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
+ assert(info->srcCount == 2);
break;
case GT_ASG:
// everything is made explicit by adding casts.
assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet());
- info->srcCount = 2;
assert(info->dstCount == 1);
-
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
+ assert(info->srcCount == 2);
break;
}
case GT_AND:
case GT_OR:
case GT_XOR:
- info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2;
assert(info->dstCount == 1);
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
+ assert(info->srcCount == (tree->gtOp.gtOp2->isContained() ? 1 : 2));
break;
case GT_RETURNTRAP:
// + a conditional call
info->srcCount = 1;
assert(info->dstCount == 0);
+ appendLocationInfoToList(tree->gtOp.gtOp1);
break;
case GT_MUL:
case GT_MULHI:
case GT_UDIV:
{
- info->srcCount = 2;
assert(info->dstCount == 1);
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
+ assert(info->srcCount == 2);
}
break;
case GT_MUL_LONG:
- info->srcCount = 2;
info->dstCount = 2;
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
+ assert(info->srcCount == 2);
break;
case GT_LIST:
tree->ClearUnusedValue();
info->isLocalDefUse = false;
- // An unused GT_LONG node needs to consume its sources.
+ // An unused GT_LONG node needs to consume its sources, but need not produce a register.
info->srcCount = 2;
info->dstCount = 0;
+ appendLocationInfoToList(tree->gtGetOp1());
+ appendLocationInfoToList(tree->gtGetOp2());
break;
case GT_CNS_DBL:
break;
case GT_RETURN:
- TreeNodeInfoInitReturn(tree);
+ TreeNodeInfoInitReturn(tree, info);
break;
case GT_RETFILT:
info->srcCount = 1;
info->setSrcCandidates(this, RBM_INTRET);
- tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(this, RBM_INTRET);
+ LocationInfoListNode* locationInfo = getLocationInfo(tree->gtOp.gtOp1);
+ locationInfo->info.setSrcCandidates(this, RBM_INTRET);
+ useList.Append(locationInfo);
}
break;
// Consumes arrLen & index - has no result
info->srcCount = 2;
assert(info->dstCount == 0);
+ appendLocationInfoToList(tree->AsBoundsChk()->gtIndex);
+ appendLocationInfoToList(tree->AsBoundsChk()->gtArrLen);
}
break;
break;
case GT_ARR_INDEX:
+ {
info->srcCount = 2;
assert(info->dstCount == 1);
info->internalIntCount = 1;
// For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
// times while the result is being computed.
- tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true;
- info->hasDelayFreeSrc = true;
- break;
+ LocationInfoListNode* arrObjInfo = getLocationInfo(tree->AsArrIndex()->ArrObj());
+ arrObjInfo->info.isDelayFree = true;
+ useList.Append(arrObjInfo);
+ useList.Append(getLocationInfo(tree->AsArrIndex()->IndexExpr()));
+ info->hasDelayFreeSrc = true;
+ }
+ break;
case GT_ARR_OFFSET:
+
// This consumes the offset, if any, the arrObj and the effective index,
// and produces the flattened offset for this dimension.
assert(info->dstCount == 1);
// from any of the operand's registers, but may be the same as targetReg.
info->internalIntCount = 1;
info->srcCount = 3;
+ appendLocationInfoToList(tree->AsArrOffs()->gtOffset);
}
+ appendLocationInfoToList(tree->AsArrOffs()->gtIndex);
+ appendLocationInfoToList(tree->AsArrOffs()->gtArrObj);
break;
case GT_LEA:
// This LEA is instantiating an address, so we set up the srcCount and dstCount here.
info->srcCount = 0;
+ assert(info->dstCount == 1);
if (lea->HasBase())
{
info->srcCount++;
+ appendLocationInfoToList(tree->AsAddrMode()->Base());
}
if (lea->HasIndex())
{
info->srcCount++;
+ appendLocationInfoToList(tree->AsAddrMode()->Index());
}
- assert(info->dstCount == 1);
// An internal register may be needed too; the logic here should be in sync with the
// genLeaInstruction()'s requirements for a such register.
case GT_NEG:
info->srcCount = 1;
assert(info->dstCount == 1);
+ appendLocationInfoToList(tree->gtOp.gtOp1);
break;
case GT_NOT:
info->srcCount = 1;
assert(info->dstCount == 1);
+ appendLocationInfoToList(tree->gtOp.gtOp1);
break;
case GT_LSH:
case GT_ROR:
case GT_LSH_HI:
case GT_RSH_LO:
- TreeNodeInfoInitShiftRotate(tree);
+ TreeNodeInfoInitShiftRotate(tree, info);
break;
case GT_EQ:
case GT_GE:
case GT_GT:
case GT_CMP:
- TreeNodeInfoInitCmp(tree);
+ TreeNodeInfoInitCmp(tree, info);
break;
case GT_CKFINITE:
info->srcCount = 1;
assert(info->dstCount == 1);
info->internalIntCount = 1;
+ appendLocationInfoToList(tree->gtOp.gtOp1);
break;
case GT_CALL:
- TreeNodeInfoInitCall(tree->AsCall());
+ TreeNodeInfoInitCall(tree->AsCall(), info);
break;
case GT_ADDR:
case GT_STORE_BLK:
case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
- TreeNodeInfoInitBlockStore(tree->AsBlk());
+ TreeNodeInfoInitBlockStore(tree->AsBlk(), info);
break;
case GT_INIT_VAL:
break;
case GT_LCLHEAP:
- TreeNodeInfoInitLclHeap(tree);
+ TreeNodeInfoInitLclHeap(tree, info);
break;
case GT_STOREIND:
if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
{
info->srcCount = 2;
- TreeNodeInfoInitGCWriteBarrier(tree);
+ TreeNodeInfoInitGCWriteBarrier(tree, info);
break;
}
- TreeNodeInfoInitIndir(tree->AsIndir());
+ TreeNodeInfoInitIndir(tree->AsIndir(), info);
// No contained source on ARM.
assert(!src->isContained());
info->srcCount++;
+ appendLocationInfoToList(src);
}
break;
assert(!tree->gtGetOp1()->isContained());
info->srcCount = 1;
info->internalIntCount = 1;
+ appendLocationInfoToList(tree->gtOp.gtOp1);
break;
case GT_IND:
assert(info->dstCount == 1);
info->srcCount = 1;
- TreeNodeInfoInitIndir(tree->AsIndir());
+ TreeNodeInfoInitIndir(tree->AsIndir(), info);
break;
case GT_CATCH_ARG:
{
assert(info->dstCount == 1);
}
+ appendLocationInfoToList(tree->gtOp.gtOp1);
break;
case GT_PUTARG_SPLIT:
- TreeNodeInfoInitPutArgSplit(tree->AsPutArgSplit());
+ TreeNodeInfoInitPutArgSplit(tree->AsPutArgSplit(), info);
break;
case GT_PUTARG_STK:
- TreeNodeInfoInitPutArgStk(tree->AsPutArgStk());
+ TreeNodeInfoInitPutArgStk(tree->AsPutArgStk(), info);
break;
case GT_PUTARG_REG:
- TreeNodeInfoInitPutArgReg(tree->AsUnOp());
+ TreeNodeInfoInitPutArgReg(tree->AsUnOp(), info);
break;
case GT_BITCAST:
{
info->srcCount = 1;
assert(info->dstCount == 1);
+ LocationInfoListNode* locationInfo = getLocationInfo(tree->gtOp.gtOp1);
+ locationInfo->info.isTgtPref = true;
+ useList.Append(locationInfo);
regNumber argReg = tree->gtRegNum;
regMaskTP argMask = genRegMask(argReg);
info->setDstCandidates(this, argMask);
info->setSrcCandidates(this, argMask);
- tree->AsUnOp()->gtOp1->gtLsraInfo.isTgtPref = true;
}
break;
}
else if (kind & (GTK_SMPOP))
{
- if (tree->gtGetOp2IfPresent() != nullptr)
- {
- info->srcCount = 2;
- }
- else
- {
- info->srcCount = 1;
- }
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
}
else
{
break;
case GT_INDEX_ADDR:
- info->srcCount = 2;
info->dstCount = 1;
info->internalIntCount = 1;
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
+ assert(info->srcCount == 2);
break;
} // end switch (tree->OperGet())
assert((info->dstCount < 2) || tree->IsMultiRegNode());
assert(info->isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
assert(!tree->IsUnusedValue() || (info->dstCount != 0));
+ assert(info->dstCount == tree->GetRegisterDstCount());
}
#endif // _TARGET_ARM_
// requirements needed by LSRA to build the Interval Table (source,
// destination and internal [temp] register counts).
//
-void LinearScan::TreeNodeInfoInit(GenTree* tree)
+void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
{
- unsigned kind = tree->OperKind();
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- RegisterType registerType = TypeGet(tree);
+ unsigned kind = tree->OperKind();
+ RegisterType registerType = TypeGet(tree);
if (tree->isContained())
{
}
else if (kind & (GTK_SMPOP))
{
- if (tree->gtGetOp2IfPresent() != nullptr)
- {
- info->srcCount = 2;
- }
- else
- {
- info->srcCount = 1;
- }
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
}
else
{
case GT_STORE_LCL_VAR:
info->srcCount = 1;
assert(info->dstCount == 0);
- TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon());
+ TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon(), info);
break;
case GT_LIST:
break;
case GT_RETURN:
- TreeNodeInfoInitReturn(tree);
+ TreeNodeInfoInitReturn(tree, info);
break;
case GT_RETFILT:
assert(info->dstCount == 0);
info->setSrcCandidates(this, RBM_INTRET);
- tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(this, RBM_INTRET);
+ LocationInfoListNode* locationInfo = getLocationInfo(tree->gtOp.gtOp1);
+ locationInfo->info.setSrcCandidates(this, RBM_INTRET);
+ useList.Append(locationInfo);
}
break;
break;
case GT_SWITCH_TABLE:
- info->srcCount = 2;
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
info->internalIntCount = 1;
assert(info->dstCount == 0);
break;
// No implicit conversions at this stage as the expectation is that
// everything is made explicit by adding casts.
assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet());
-
- info->srcCount = 2;
}
__fallthrough;
case GT_AND:
case GT_OR:
case GT_XOR:
- info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2;
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
assert(info->dstCount == 1);
break;
case GT_RETURNTRAP:
// this just turns into a compare of its child with an int
// + a conditional call
+ appendLocationInfoToList(tree->gtGetOp1());
info->srcCount = 1;
assert(info->dstCount == 0);
break;
case GT_MULHI:
case GT_UDIV:
{
- info->srcCount = 2;
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
assert(info->dstCount == 1);
}
break;
assert(varTypeIsFloating(op1));
assert(op1->TypeGet() == tree->TypeGet());
+ appendLocationInfoToList(op1);
info->srcCount = 1;
assert(info->dstCount == 1);
}
#ifdef FEATURE_SIMD
case GT_SIMD:
- TreeNodeInfoInitSIMD(tree->AsSIMD());
+ TreeNodeInfoInitSIMD(tree->AsSIMD(), info);
break;
#endif // FEATURE_SIMD
// register.
// see CodeGen::genIntToIntCast()
+ appendLocationInfoToList(tree->gtGetOp1());
info->srcCount = 1;
assert(info->dstCount == 1);
break;
case GT_NEG:
- info->srcCount = 1;
- assert(info->dstCount == 1);
- break;
-
case GT_NOT:
+ appendLocationInfoToList(tree->gtGetOp1());
info->srcCount = 1;
assert(info->dstCount == 1);
break;
case GT_RSH:
case GT_RSZ:
case GT_ROR:
- TreeNodeInfoInitShiftRotate(tree);
+ TreeNodeInfoInitShiftRotate(tree, info);
break;
case GT_EQ:
case GT_TEST_EQ:
case GT_TEST_NE:
case GT_JCMP:
- TreeNodeInfoInitCmp(tree);
+ TreeNodeInfoInitCmp(tree, info);
break;
case GT_CKFINITE:
+ appendLocationInfoToList(tree->gtOp.gtOp1);
info->srcCount = 1;
assert(info->dstCount == 1);
info->internalIntCount = 1;
// For ARMv8 exclusives the lifetime of the addr and data must be extended because
// it may be used used multiple during retries
- cmpXchgNode->gtOpLocation->gtLsraInfo.isDelayFree = true;
- cmpXchgNode->gtOpValue->gtLsraInfo.isDelayFree = true;
+ LocationInfoListNode* locationInfo = getLocationInfo(tree->gtCmpXchg.gtOpLocation);
+ locationInfo->info.isDelayFree = true;
+ useList.Append(locationInfo);
+ LocationInfoListNode* valueInfo = getLocationInfo(tree->gtCmpXchg.gtOpValue);
+ valueInfo->info.isDelayFree = true;
+ useList.Append(valueInfo);
if (!cmpXchgNode->gtOpComparand->isContained())
{
- cmpXchgNode->gtOpComparand->gtLsraInfo.isDelayFree = true;
+ LocationInfoListNode* comparandInfo = getLocationInfo(tree->gtCmpXchg.gtOpComparand);
+ comparandInfo->info.isDelayFree = true;
+ useList.Append(comparandInfo);
}
info->hasDelayFreeSrc = true;
case GT_LOCKADD:
case GT_XADD:
case GT_XCHG:
+ {
assert(info->dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1);
info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2;
info->internalIntCount = (tree->OperGet() == GT_XCHG) ? 1 : 2;
// For ARMv8 exclusives the lifetime of the addr and data must be extended because
// it may be used used multiple during retries
- tree->gtOp.gtOp1->gtLsraInfo.isDelayFree = true;
+ assert(!tree->gtOp.gtOp1->isContained());
+ LocationInfoListNode* op1Info = getLocationInfo(tree->gtOp.gtOp1);
+ op1Info->info.isDelayFree = true;
+ useList.Append(op1Info);
if (!tree->gtOp.gtOp2->isContained())
{
- tree->gtOp.gtOp2->gtLsraInfo.isDelayFree = true;
+ LocationInfoListNode* op2Info = getLocationInfo(tree->gtOp.gtOp2);
+ op2Info->info.isDelayFree = true;
+ useList.Append(op2Info);
}
info->hasDelayFreeSrc = true;
// Internals may not collide with target
info->isInternalRegDelayFree = true;
- break;
+ }
+ break;
case GT_PUTARG_STK:
- TreeNodeInfoInitPutArgStk(tree->AsPutArgStk());
+ TreeNodeInfoInitPutArgStk(tree->AsPutArgStk(), info);
break;
case GT_PUTARG_REG:
- TreeNodeInfoInitPutArgReg(tree->AsUnOp());
+ TreeNodeInfoInitPutArgReg(tree->AsUnOp(), info);
break;
case GT_CALL:
- TreeNodeInfoInitCall(tree->AsCall());
+ TreeNodeInfoInitCall(tree->AsCall(), info);
break;
case GT_ADDR:
case GT_STORE_BLK:
case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
- TreeNodeInfoInitBlockStore(tree->AsBlk());
+ TreeNodeInfoInitBlockStore(tree->AsBlk(), info);
break;
case GT_INIT_VAL:
}
else
{
+ appendLocationInfoToList(size);
info->srcCount = 1;
if (!compiler->info.compInitMem)
{
{
GenTreeBoundsChk* node = tree->AsBoundsChk();
// Consumes arrLen & index - has no result
- info->srcCount = 2;
assert(info->dstCount == 0);
GenTree* intCns = nullptr;
GenTree* other = nullptr;
- if (node->gtIndex->isContained() || node->gtArrLen->isContained())
- {
- info->srcCount = 1;
- }
- else
- {
- info->srcCount = 2;
- }
+ info->srcCount = GetOperandInfo(tree->AsBoundsChk()->gtIndex);
+ info->srcCount += GetOperandInfo(tree->AsBoundsChk()->gtArrLen);
}
break;
break;
case GT_ARR_INDEX:
+ {
info->srcCount = 2;
assert(info->dstCount == 1);
info->internalIntCount = 1;
// For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
// times while the result is being computed.
- tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true;
- info->hasDelayFreeSrc = true;
- break;
+ LocationInfoListNode* arrObjInfo = getLocationInfo(tree->AsArrIndex()->ArrObj());
+ arrObjInfo->info.isDelayFree = true;
+ useList.Append(arrObjInfo);
+ useList.Append(getLocationInfo(tree->AsArrIndex()->IndexExpr()));
+ info->hasDelayFreeSrc = true;
+ }
+ break;
case GT_ARR_OFFSET:
// This consumes the offset, if any, the arrObj and the effective index,
// and produces the flattened offset for this dimension.
- info->srcCount = tree->gtArrOffs.gtOffset->isContained() ? 2 : 3;
+ info->srcCount = 2;
+ if (!tree->gtArrOffs.gtOffset->isContained())
+ {
+ appendLocationInfoToList(tree->AsArrOffs()->gtOffset);
+ info->srcCount++;
+ }
+ appendLocationInfoToList(tree->AsArrOffs()->gtIndex);
+ appendLocationInfoToList(tree->AsArrOffs()->gtArrObj);
assert(info->dstCount == 1);
info->internalIntCount = 1;
break;
if (base != nullptr)
{
info->srcCount++;
+ appendLocationInfoToList(base);
}
if (index != nullptr)
{
info->srcCount++;
+ appendLocationInfoToList(index);
}
assert(info->dstCount == 1);
if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
{
info->srcCount = 2;
- TreeNodeInfoInitGCWriteBarrier(tree);
+ TreeNodeInfoInitGCWriteBarrier(tree, info);
break;
}
- TreeNodeInfoInitIndir(tree->AsIndir());
+ TreeNodeInfoInitIndir(tree->AsIndir(), info);
if (!tree->gtGetOp2()->isContained())
{
+ appendLocationInfoToList(tree->gtGetOp2());
info->srcCount++;
}
}
// is required, and it is not a localDefUse.
assert(info->dstCount == 0);
assert(!tree->gtGetOp1()->isContained());
+ appendLocationInfoToList(tree->gtOp.gtOp1);
info->srcCount = 1;
break;
case GT_IND:
assert(info->dstCount == 1);
- info->srcCount = 1;
- TreeNodeInfoInitIndir(tree->AsIndir());
+ TreeNodeInfoInitIndir(tree->AsIndir(), info);
break;
case GT_CATCH_ARG:
break;
case GT_INDEX_ADDR:
- info->srcCount = 2;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
info->internalIntCount = 1;
break;
} // end switch (tree->OperGet())
assert((info->dstCount < 2) || tree->IsMultiRegCall());
assert(info->isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
assert(!tree->IsUnusedValue() || (info->dstCount != 0));
+ assert(info->dstCount == tree->GetRegisterDstCount());
}
//------------------------------------------------------------------------
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitReturn(GenTree* tree)
+void LinearScan::TreeNodeInfoInitReturn(GenTree* tree, TreeNodeInfo* info)
{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
GenTree* op1 = tree->gtGetOp1();
regMaskTP useCandidates = RBM_NONE;
info->srcCount = ((tree->TypeGet() == TYP_VOID) || op1->isContained()) ? 0 : 1;
assert(info->dstCount == 0);
- if (varTypeIsStruct(tree))
+ if ((tree->TypeGet() != TYP_VOID) && !op1->isContained())
{
- // op1 has to be either an lclvar or a multi-reg returning call
- if (op1->OperGet() != GT_LCL_VAR)
+ if (varTypeIsStruct(tree))
{
- noway_assert(op1->IsMultiRegCall());
+ // op1 has to be either an lclvar or a multi-reg returning call
+ if (op1->OperGet() != GT_LCL_VAR)
+ {
+ noway_assert(op1->IsMultiRegCall());
- ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
- info->srcCount = retTypeDesc->GetReturnRegCount();
- useCandidates = retTypeDesc->GetABIReturnRegs();
+ ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
+ info->srcCount = retTypeDesc->GetReturnRegCount();
+ useCandidates = retTypeDesc->GetABIReturnRegs();
+ }
}
- }
- else
- {
- // Non-struct type return - determine useCandidates
- switch (tree->TypeGet())
+ else
{
- case TYP_VOID:
- useCandidates = RBM_NONE;
- break;
- case TYP_FLOAT:
- useCandidates = RBM_FLOATRET;
- break;
- case TYP_DOUBLE:
- useCandidates = RBM_DOUBLERET;
- break;
- case TYP_LONG:
- useCandidates = RBM_LNGRET;
- break;
- default:
- useCandidates = RBM_INTRET;
- break;
+ // Non-struct type return - determine useCandidates
+ switch (tree->TypeGet())
+ {
+ case TYP_VOID:
+ useCandidates = RBM_NONE;
+ break;
+ case TYP_FLOAT:
+ useCandidates = RBM_FLOATRET;
+ break;
+ case TYP_DOUBLE:
+ useCandidates = RBM_DOUBLERET;
+ break;
+ case TYP_LONG:
+ useCandidates = RBM_LNGRET;
+ break;
+ default:
+ useCandidates = RBM_INTRET;
+ break;
+ }
}
- }
- if (useCandidates != RBM_NONE)
- {
- tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(this, useCandidates);
+ LocationInfoListNode* locationInfo = getLocationInfo(op1);
+ if (useCandidates != RBM_NONE)
+ {
+ locationInfo->info.setSrcCandidates(this, useCandidates);
+ }
+ useList.Append(locationInfo);
}
}
// Return Value:
// None.
-void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
+void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree, TreeNodeInfo* info)
{
- TreeNodeInfo* info = &(simdTree->gtLsraInfo);
-
// Only SIMDIntrinsicInit can be contained
if (simdTree->isContained())
{
}
assert(info->dstCount == 1);
- switch (simdTree->gtSIMDIntrinsicID)
+ GenTree* op1 = simdTree->gtOp.gtOp1;
+ GenTree* op2 = simdTree->gtOp.gtOp2;
+ if (!op1->OperIs(GT_LIST))
{
- GenTree* op1;
- GenTree* op2;
+ info->srcCount += GetOperandInfo(op1);
+ }
+ if ((op2 != nullptr) && !op2->isContained())
+ {
+ info->srcCount += GetOperandInfo(op2);
+ }
+ switch (simdTree->gtSIMDIntrinsicID)
+ {
case SIMDIntrinsicInit:
- info->srcCount = simdTree->gtGetOp1()->isContained() ? 0 : 1;
+ assert(info->srcCount == (simdTree->gtGetOp1()->isContained() ? 0 : 1));
break;
case SIMDIntrinsicCast:
case SIMDIntrinsicConvertToUInt64:
case SIMDIntrinsicWidenLo:
case SIMDIntrinsicWidenHi:
- info->srcCount = 1;
+ assert(info->srcCount == 1);
break;
case SIMDIntrinsicGetItem:
+ {
op1 = simdTree->gtGetOp1();
op2 = simdTree->gtGetOp2();
- // We have an object and an item, which may be contained.
- info->srcCount = (op2->isContained() ? 1 : 2);
-
- if (op1->isContained())
- {
- // Although GT_IND of TYP_SIMD12 reserves an internal register for reading 4 and 8 bytes from memory
- // and assembling them into target reg, it is not required in this case.
- op1->gtLsraInfo.internalIntCount = 0;
- op1->gtLsraInfo.internalFloatCount = 0;
- info->srcCount -= 1;
- info->srcCount += GetOperandSourceCount(op1);
- }
-
+ // We have an object and an index, either of which may be contained.
if (!op2->IsCnsIntOrI() && (!op1->isContained() || op1->OperIsLocal()))
{
// If the index is not a constant and not contained or is a local
info->internalIntCount = 1;
// internal register must not clobber input index
- op2->gtLsraInfo.isDelayFree = true;
- info->hasDelayFreeSrc = true;
+ LocationInfoListNode* op2Info =
+ (op1->isContained()) ? useList.Begin() : useList.GetSecond(INDEBUG(op2));
+ op2Info->info.isDelayFree = true;
+ info->hasDelayFreeSrc = true;
}
if (!op2->IsCnsIntOrI() && (!op1->isContained()))
// we will use the SIMD temp location to store the vector.
compiler->getSIMDInitTempVarNum();
}
- break;
+ }
+ break;
case SIMDIntrinsicAdd:
case SIMDIntrinsicSub:
case SIMDIntrinsicGreaterThan:
case SIMDIntrinsicLessThanOrEqual:
case SIMDIntrinsicGreaterThanOrEqual:
- info->srcCount = 2;
+ assert(info->srcCount == 2);
break;
case SIMDIntrinsicSetX:
case SIMDIntrinsicSetZ:
case SIMDIntrinsicSetW:
case SIMDIntrinsicNarrow:
- info->srcCount = 2;
+ assert(info->srcCount == 2);
// Op1 will write to dst before Op2 is free
- simdTree->gtOp.gtOp2->gtLsraInfo.isDelayFree = true;
- info->hasDelayFreeSrc = true;
+ useList.GetSecond(INDEBUG(simdTree->gtGetOp2()))->info.isDelayFree = true;
+ info->hasDelayFreeSrc = true;
break;
case SIMDIntrinsicInitN:
{
- info->srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(simdTree->gtSIMDBaseType));
+ var_types baseType = simdTree->gtSIMDBaseType;
+ info->srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(baseType));
+ int initCount = 0;
+ for (GenTree* list = op1; list != nullptr; list = list->gtGetOp2())
+ {
+ assert(list->OperGet() == GT_LIST);
+ GenTree* listItem = list->gtGetOp1();
+ assert(listItem->TypeGet() == baseType);
+ assert(!listItem->isContained());
+ appendLocationInfoToList(listItem);
+ initCount++;
+ }
+ assert(initCount == info->srcCount);
if (varTypeIsFloating(simdTree->gtSIMDBaseType))
{
case SIMDIntrinsicInitArray:
// We have an array and an index, which may be contained.
- info->srcCount = simdTree->gtGetOp2()->isContained() ? 1 : 2;
+ assert(info->srcCount == (simdTree->gtGetOp2()->isContained() ? 1 : 2));
break;
case SIMDIntrinsicOpEquality:
case SIMDIntrinsicOpInEquality:
- info->srcCount = simdTree->gtGetOp2()->isContained() ? 1 : 2;
+ assert(info->srcCount == (simdTree->gtGetOp2()->isContained() ? 1 : 2));
info->setInternalCandidates(this, RBM_ALLFLOAT);
info->internalFloatCount = 1;
break;
case SIMDIntrinsicDotProduct:
- info->srcCount = 2;
+ assert(info->srcCount == 2);
info->setInternalCandidates(this, RBM_ALLFLOAT);
info->internalFloatCount = 1;
break;
case SIMDIntrinsicSelect:
// TODO-ARM64-CQ Allow lowering to see SIMDIntrinsicSelect so we can generate BSL VC, VA, VB
- // bsl target register must be VC. Reserve a temp in case we need to shuffle things
+ // bsl target register must be VC. Reserve a temp in case we need to shuffle things.
+ // This will require a different approach, as GenTreeSIMD has only two operands.
+ assert(!"SIMDIntrinsicSelect not yet supported");
+ assert(info->srcCount == 3);
info->setInternalCandidates(this, RBM_ALLFLOAT);
info->internalFloatCount = 1;
- info->srcCount = 3;
break;
case SIMDIntrinsicInitArrayX:
// - Setting the appropriate candidates for a store of a multi-reg call return value.
// - Handling of contained immediates.
//
-void LinearScan::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc)
+void LinearScan::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc, TreeNodeInfo* info)
{
- TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
- GenTree* op1 = storeLoc->gtGetOp1();
+ GenTree* op1 = storeLoc->gtGetOp1();
assert(info->dstCount == 0);
-#ifdef _TARGET_ARM_
- if (varTypeIsLong(op1))
- {
- info->srcCount = 2;
- assert(!op1->OperIs(GT_LONG) || op1->isContained());
- }
- else
-#endif // _TARGET_ARM_
- if (op1->isContained())
- {
- info->srcCount = 0;
- }
- else if (op1->IsMultiRegCall())
+ if (op1->IsMultiRegCall())
{
// This is the case of var = call where call is returning
// a value in multiple return registers.
// srcCount = number of registers in which the value is returned by call
GenTreeCall* call = op1->AsCall();
ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- info->srcCount = retTypeDesc->GetReturnRegCount();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+ info->srcCount = regCount;
// Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
- regMaskTP srcCandidates = allMultiRegCallNodeRegs(call);
- op1->gtLsraInfo.setSrcCandidates(this, srcCandidates);
+ regMaskTP srcCandidates = allMultiRegCallNodeRegs(call);
+ LocationInfoListNode* locInfo = getLocationInfo(op1);
+ locInfo->info.setSrcCandidates(this, srcCandidates);
+ useList.Append(locInfo);
+ }
+#ifdef _TARGET_ARM_
+ else if (varTypeIsLong(op1))
+ {
+ // The only possible operands for a GT_STORE_LCL_VAR are a multireg call node, which we have
+ // handled above, or a GT_LONG node.
+ assert(!op1->OperIs(GT_LONG) || op1->isContained());
+ info->srcCount = 2;
+ // TODO: Currently, GetOperandInfo always returns 1 for any non-contained node.
+ // Consider enhancing it to handle multi-reg nodes.
+ (void)GetOperandInfo(op1);
+ }
+#endif // _TARGET_ARM_
+ else if (op1->isContained())
+ {
+ info->srcCount = 0;
}
else
{
info->srcCount = 1;
+ appendLocationInfoToList(op1);
}
#ifdef FEATURE_SIMD
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitCmp(GenTreePtr tree)
+void LinearScan::TreeNodeInfoInitCmp(GenTreePtr tree, TreeNodeInfo* info)
{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
assert((info->dstCount == 1) || (tree->TypeGet() == TYP_VOID));
info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2;
}
-void LinearScan::TreeNodeInfoInitGCWriteBarrier(GenTree* tree)
+void LinearScan::TreeNodeInfoInitGCWriteBarrier(GenTree* tree, TreeNodeInfo* info)
{
- GenTreePtr dst = tree;
- GenTreePtr addr = tree->gtOp.gtOp1;
- GenTreePtr src = tree->gtOp.gtOp2;
-
- if (addr->OperGet() == GT_LEA)
- {
- // In the case where we are doing a helper assignment, if the dst
- // is an indir through an lea, we need to actually instantiate the
- // lea in a register
- GenTreeAddrMode* lea = addr->AsAddrMode();
-
- short leaSrcCount = 0;
- if (lea->Base() != nullptr)
- {
- leaSrcCount++;
- }
- if (lea->Index() != nullptr)
- {
- leaSrcCount++;
- }
- lea->gtLsraInfo.srcCount = leaSrcCount;
- lea->gtLsraInfo.dstCount = 1;
- }
+ GenTreePtr dst = tree;
+ GenTreePtr addr = tree->gtOp.gtOp1;
+ GenTreePtr src = tree->gtOp.gtOp2;
+ LocationInfoListNode* addrInfo = getLocationInfo(addr);
+ LocationInfoListNode* srcInfo = getLocationInfo(src);
+
+ // In the case where we are doing a helper assignment, even if the dst
+ // is an indir through an lea, we need to actually instantiate the
+ // lea in a register
+ assert(!addr->isContained() && !src->isContained());
+ useList.Append(addrInfo);
+ useList.Append(srcInfo);
+ info->srcCount = 2;
+ assert(info->dstCount == 0);
#if NOGC_WRITE_BARRIERS
NYI_ARM("NOGC_WRITE_BARRIERS");
// the 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF)
// the 'src' goes into x15 (REG_WRITE_BARRIER)
//
- addr->gtLsraInfo.setSrcCandidates(this, RBM_WRITE_BARRIER_DST_BYREF);
- src->gtLsraInfo.setSrcCandidates(this, RBM_WRITE_BARRIER);
+ addrInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_DST_BYREF);
+ srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER);
#else
// For the standard JIT Helper calls
// op1 goes into REG_ARG_0 and
// op2 goes into REG_ARG_1
//
- addr->gtLsraInfo.setSrcCandidates(this, RBM_ARG_0);
- src->gtLsraInfo.setSrcCandidates(this, RBM_ARG_1);
+ addrInfo->info.setSrcCandidates(this, RBM_ARG_0);
+ srcInfo->info.setSrcCandidates(this, RBM_ARG_1);
#endif // NOGC_WRITE_BARRIERS
// Both src and dst must reside in a register, which they should since we haven't set
// either of them as contained.
- assert(addr->gtLsraInfo.dstCount == 1);
- assert(src->gtLsraInfo.dstCount == 1);
+ assert(addrInfo->info.dstCount == 1);
+ assert(srcInfo->info.dstCount == 1);
}
//------------------------------------------------------------------------
// Arguments:
// indirTree - GT_IND, GT_STOREIND or block gentree node
//
-void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree)
+void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree, TreeNodeInfo* info)
{
// If this is the rhs of a block copy (i.e. non-enregisterable struct),
// it has no register requirements.
return;
}
- TreeNodeInfo* info = &(indirTree->gtLsraInfo);
- bool isStore = (indirTree->gtOper == GT_STOREIND);
- info->srcCount = GetIndirSourceCount(indirTree);
+ bool isStore = (indirTree->gtOper == GT_STOREIND);
+ info->srcCount = GetIndirInfo(indirTree);
GenTree* addr = indirTree->Addr();
GenTree* index = nullptr;
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree)
+int LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree, TreeNodeInfo* info)
{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
+ GenTreePtr source = tree->gtOp.gtOp1;
GenTreePtr shiftBy = tree->gtOp.gtOp2;
- info->srcCount = shiftBy->isContained() ? 1 : 2;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
+ if (!shiftBy->isContained())
+ {
+ appendLocationInfoToList(shiftBy);
+ info->srcCount = 1;
+ }
#ifdef _TARGET_ARM_
// The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that
// we can have a three operand form. Increment the srcCount.
- GenTreePtr source = tree->gtOp.gtOp1;
if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO)
{
assert((source->OperGet() == GT_LONG) && source->isContained());
- info->srcCount++;
+ info->srcCount += 2;
+ LocationInfoListNode* sourceLoInfo = getLocationInfo(source->gtOp.gtOp1);
+ useList.Append(sourceLoInfo);
+ LocationInfoListNode* sourceHiInfo = getLocationInfo(source->gtOp.gtOp2);
+ useList.Append(sourceHiInfo);
if (tree->OperGet() == GT_LSH_HI)
{
- GenTreePtr sourceLo = source->gtOp.gtOp1;
- sourceLo->gtLsraInfo.isDelayFree = true;
+ sourceLoInfo->info.isDelayFree = true;
}
else
{
- GenTreePtr sourceHi = source->gtOp.gtOp2;
- sourceHi->gtLsraInfo.isDelayFree = true;
+ sourceHiInfo->info.isDelayFree = true;
}
-
- source->gtLsraInfo.hasDelayFreeSrc = true;
- info->hasDelayFreeSrc = true;
+ info->hasDelayFreeSrc = true;
}
-
+ else
#endif // _TARGET_ARM_
+ {
+ appendLocationInfoToList(source);
+ info->srcCount++;
+ }
+ return info->srcCount;
}
//------------------------------------------------------------------------
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitPutArgReg(GenTreeUnOp* node)
+void LinearScan::TreeNodeInfoInitPutArgReg(GenTreeUnOp* node, TreeNodeInfo* info)
{
assert(node != nullptr);
assert(node->OperIsPutArgReg());
- node->gtLsraInfo.srcCount = 1;
- regNumber argReg = node->gtRegNum;
+ info->srcCount = 1;
+ regNumber argReg = node->gtRegNum;
assert(argReg != REG_NA);
// Set the register requirements for the node.
// The actual `long` types must have been transformed as a field list with two fields.
if (node->TypeGet() == TYP_LONG)
{
- node->gtLsraInfo.srcCount++;
- node->gtLsraInfo.dstCount = node->gtLsraInfo.srcCount;
+ info->srcCount++;
+ info->dstCount = info->srcCount;
assert(genRegArgNext(argReg) == REG_NEXT(argReg));
argMask |= genRegMask(REG_NEXT(argReg));
}
#endif // _TARGET_ARM_
-
- node->gtLsraInfo.setDstCandidates(this, argMask);
- node->gtLsraInfo.setSrcCandidates(this, argMask);
+ info->setDstCandidates(this, argMask);
+ info->setSrcCandidates(this, argMask);
// To avoid redundant moves, have the argument operand computed in the
// register in which the argument is passed to the call.
- node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(this, getUseCandidates(node));
+ LocationInfoListNode* op1Info = getLocationInfo(node->gtOp.gtOp1);
+ op1Info->info.setSrcCandidates(this, info->getSrcCandidates(this));
+ op1Info->info.isDelayFree = true;
+ useList.Append(op1Info);
}
//------------------------------------------------------------------------
// Since the integer register is not associated with the arg node, we will reserve it as
// an internal register on the call so that it is not used during the evaluation of the call node
// (e.g. for the target).
-void LinearScan::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs)
+void LinearScan::HandleFloatVarArgs(GenTreeCall* call, TreeNodeInfo* info, GenTree* argNode, bool* callHasFloatRegArgs)
{
#if FEATURE_VARARG
if (call->IsVarargs() && varTypeIsFloating(argNode))
regNumber argReg = argNode->gtRegNum;
regNumber targetReg = compiler->getCallArgIntRegister(argReg);
- call->gtLsraInfo.setInternalIntCount(call->gtLsraInfo.internalIntCount + 1);
- call->gtLsraInfo.addInternalCandidates(this, genRegMask(targetReg));
+ info->setInternalIntCount(info->internalIntCount + 1);
+ info->addInternalCandidates(this, genRegMask(targetReg));
}
#endif // FEATURE_VARARG
}
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call)
+void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info)
{
- TreeNodeInfo* info = &(call->gtLsraInfo);
bool hasMultiRegRetVal = false;
ReturnTypeDesc* retTypeDesc = nullptr;
info->dstCount = 0;
}
- GenTree* ctrlExpr = call->gtControlExpr;
+ GenTree* ctrlExpr = call->gtControlExpr;
+ LocationInfoListNode* ctrlExprInfo = nullptr;
if (call->gtCallType == CT_INDIRECT)
{
// either gtControlExpr != null or gtCallAddr != null.
// set reg requirements on call target represented as control sequence.
if (ctrlExpr != nullptr)
{
+ ctrlExprInfo = getLocationInfo(ctrlExpr);
+
// we should never see a gtControlExpr whose type is void.
assert(ctrlExpr->TypeGet() != TYP_VOID);
- info->srcCount++;
-
// In case of fast tail implemented as jmp, make sure that gtControlExpr is
// computed into a register.
if (call->IsFastTailCall())
{
// Fast tail call - make sure that call target is always computed in R12(ARM32)/IP0(ARM64)
// so that epilog sequence can generate "br xip0/r12" to achieve fast tail call.
- ctrlExpr->gtLsraInfo.setSrcCandidates(this, RBM_FASTTAILCALL_TARGET);
+ ctrlExprInfo->info.setSrcCandidates(this, RBM_FASTTAILCALL_TARGET);
}
}
#ifdef _TARGET_ARM_
// have been decomposed.
if (putArgChild->TypeGet() == TYP_LONG)
{
- argNode->gtLsraInfo.srcCount = 2;
- expectedSlots = 2;
+ useList.GetTreeNodeInfo(argNode).srcCount = 2;
+ expectedSlots = 2;
}
else if (putArgChild->TypeGet() == TYP_DOUBLE)
{
for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest())
{
info->srcCount++;
+ appendLocationInfoToList(entry->Current());
#ifdef DEBUG
assert(entry->Current()->OperIs(GT_PUTARG_REG));
assert(entry->Current()->gtRegNum == argReg);
#ifdef _TARGET_ARM_
else if (argNode->OperGet() == GT_PUTARG_SPLIT)
{
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
- info->srcCount += argNode->AsPutArgSplit()->gtNumRegs;
+ unsigned regCount = argNode->AsPutArgSplit()->gtNumRegs;
+ assert(regCount == curArgTabEntry->numRegs);
+ info->srcCount += regCount;
+ appendLocationInfoToList(argNode);
}
#endif
else
{
assert(argNode->OperIs(GT_PUTARG_REG));
assert(argNode->gtRegNum == argReg);
- HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs);
- info->srcCount++;
-
+ HandleFloatVarArgs(call, info, argNode, &callHasFloatRegArgs);
#ifdef _TARGET_ARM_
- // The `double` types have been transformed to `long` on arm,
+ // The `double` types have been transformed to `long` on armel,
// while the actual long types have been decomposed.
if (argNode->TypeGet() == TYP_LONG)
{
- info->srcCount++;
+ info->srcCount += appendBinaryLocationInfoToList(argNode->AsOp());
}
+ else
#endif // _TARGET_ARM_
+ {
+ appendLocationInfoToList(argNode);
+ info->srcCount++;
+ }
}
}
fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
assert(curArgTabEntry);
#endif
+#ifdef _TARGET_ARM_
+ // PUTARG_SPLIT nodes must be in the gtCallLateArgs list, since they
+ // define registers used by the call.
+ assert(arg->OperGet() != GT_PUTARG_SPLIT);
+#endif
if (arg->gtOper == GT_PUTARG_STK)
{
assert(curArgTabEntry->regNum == REG_STK);
}
-#ifdef _TARGET_ARM_
- else if (arg->OperGet() == GT_PUTARG_SPLIT)
- {
- assert(arg->AsPutArgSplit()->gtNumRegs == curArgTabEntry->numRegs);
- info->srcCount += arg->gtLsraInfo.dstCount;
- }
-#endif
else
{
- TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
- assert((argInfo->dstCount == 0) || (argInfo->isLocalDefUse));
+ assert(!arg->IsValue() || arg->IsUnusedValue());
}
}
args = args->gtOp.gtOp2;
// If it is a fast tail call, it is already preferenced to use IP0.
// Therefore, no need set src candidates on call tgt again.
- if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
+ if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExprInfo != nullptr))
{
NYI_ARM("float reg varargs");
// Don't assign the call target to any of the argument registers because
// we will use them to also pass floating point arguments as required
// by Arm64 ABI.
- ctrlExpr->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_ARG_REGS));
+ ctrlExprInfo->info.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_ARG_REGS));
+ }
+
+ if (ctrlExprInfo != nullptr)
+ {
+ useList.Append(ctrlExprInfo);
+ info->srcCount++;
}
#ifdef _TARGET_ARM_
// Notes:
// Set the child node(s) to be contained when we have a multireg arg
//
-void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode)
+void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, TreeNodeInfo* info)
{
assert(argNode->gtOper == GT_PUTARG_STK);
GenTreePtr putArgChild = argNode->gtOp.gtOp1;
- argNode->gtLsraInfo.srcCount = 0;
- argNode->gtLsraInfo.dstCount = 0;
+ info->srcCount = 0;
+ info->dstCount = 0;
// Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct
if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST))
// We consume all of the items in the GT_FIELD_LIST
for (GenTreeFieldList* current = putArgChild->AsFieldList(); current != nullptr; current = current->Rest())
{
- argNode->gtLsraInfo.srcCount++;
+ appendLocationInfoToList(current->Current());
+ info->srcCount++;
}
}
else
{
#ifdef _TARGET_ARM64_
// We could use a ldp/stp sequence so we need two internal registers
- argNode->gtLsraInfo.internalIntCount = 2;
+ info->internalIntCount = 2;
#else // _TARGET_ARM_
// We could use a ldr/str sequence so we need a internal register
- argNode->gtLsraInfo.internalIntCount = 1;
+ info->internalIntCount = 1;
#endif // _TARGET_ARM_
if (putArgChild->OperGet() == GT_OBJ)
{
+ assert(putArgChild->isContained());
GenTreePtr objChild = putArgChild->gtOp.gtOp1;
if (objChild->OperGet() == GT_LCL_VAR_ADDR)
{
// We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR
- // as one contained operation
+ // as one contained operation, and there are no source registers.
//
assert(objChild->isContained());
}
+ else
+ {
+ // We will generate all of the code for the GT_PUTARG_STK and its child node
+ // as one contained operation
+ //
+ appendLocationInfoToList(objChild);
+ info->srcCount = 1;
+ }
+ }
+ else
+ {
+ // No source registers.
+ putArgChild->OperIs(GT_LCL_VAR);
}
-
- // We will generate all of the code for the GT_PUTARG_STK and its child node
- // as one contained operation
- //
- argNode->gtLsraInfo.srcCount = putArgChild->gtLsraInfo.srcCount;
- assert(putArgChild->isContained());
}
}
else
{
assert(!putArgChild->isContained());
-#if defined(_TARGET_ARM_)
- // The `double` types have been transformed to `long` on armel,
- // while the actual long types have been decomposed.
- const bool isDouble = (putArgChild->TypeGet() == TYP_LONG);
- if (isDouble)
- {
- argNode->gtLsraInfo.srcCount = 2;
- }
- else
-#endif // defined(_TARGET_ARM_)
- {
- argNode->gtLsraInfo.srcCount = 1;
- }
+ info->srcCount = GetOperandInfo(putArgChild);
}
}
// Notes:
// Set the child node(s) to be contained
//
-void LinearScan::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode)
+void LinearScan::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode, TreeNodeInfo* info)
{
assert(argNode->gtOper == GT_PUTARG_SPLIT);
GenTreePtr putArgChild = argNode->gtOp.gtOp1;
// Registers for split argument corresponds to source
- argNode->gtLsraInfo.dstCount = argNode->gtNumRegs;
+ info->dstCount = argNode->gtNumRegs;
regNumber argReg = argNode->gtRegNum;
regMaskTP argMask = RBM_NONE;
{
argMask |= genRegMask((regNumber)((unsigned)argReg + i));
}
- argNode->gtLsraInfo.setDstCandidates(this, argMask);
- argNode->gtLsraInfo.setSrcCandidates(this, argMask);
+ info->setDstCandidates(this, argMask);
+ info->setSrcCandidates(this, argMask);
if (putArgChild->OperGet() == GT_FIELD_LIST)
{
{
GenTreePtr node = fieldListPtr->gtGetOp1();
assert(!node->isContained());
- unsigned currentRegCount = node->gtLsraInfo.dstCount;
- regMaskTP sourceMask = RBM_NONE;
+ LocationInfoListNode* nodeInfo = getLocationInfo(node);
+ unsigned currentRegCount = nodeInfo->info.dstCount;
+ regMaskTP sourceMask = RBM_NONE;
if (sourceRegCount < argNode->gtNumRegs)
{
for (unsigned regIndex = 0; regIndex < currentRegCount; regIndex++)
{
sourceMask |= genRegMask((regNumber)((unsigned)argReg + sourceRegCount + regIndex));
}
- node->gtLsraInfo.setSrcCandidates(this, sourceMask);
+ nodeInfo->info.setSrcCandidates(this, sourceMask);
}
sourceRegCount += currentRegCount;
+ useList.Append(nodeInfo);
}
- argNode->gtLsraInfo.srcCount = sourceRegCount;
+ info->srcCount += sourceRegCount;
assert(putArgChild->isContained());
}
else
assert(putArgChild->OperGet() == GT_OBJ);
// We can use a ldr/str sequence so we need an internal register
- argNode->gtLsraInfo.internalIntCount = 1;
- regMaskTP internalMask = RBM_ALLINT & ~argMask;
- argNode->gtLsraInfo.setInternalCandidates(this, internalMask);
+ info->internalIntCount = 1;
+ regMaskTP internalMask = RBM_ALLINT & ~argMask;
+ info->setInternalCandidates(this, internalMask);
GenTreePtr objChild = putArgChild->gtOp.gtOp1;
if (objChild->OperGet() == GT_LCL_VAR_ADDR)
}
else
{
- argNode->gtLsraInfo.srcCount = GetIndirSourceCount(putArgChild->AsIndir());
+ info->srcCount = GetIndirInfo(putArgChild->AsIndir());
}
assert(putArgChild->isContained());
}
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
+void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode, TreeNodeInfo* info)
{
GenTree* dstAddr = blkNode->Addr();
unsigned size = blkNode->gtBlkSize;
GenTree* source = blkNode->Data();
+ LocationInfoListNode* dstAddrInfo = nullptr;
+ LocationInfoListNode* sourceInfo = nullptr;
+ LocationInfoListNode* sizeInfo = nullptr;
+
// Sources are dest address and initVal or source.
// We may require an additional source or temp register for the size.
- blkNode->gtLsraInfo.srcCount = GetOperandSourceCount(dstAddr);
- assert(blkNode->gtLsraInfo.dstCount == 0);
+ if (!dstAddr->isContained())
+ {
+ info->srcCount++;
+ dstAddrInfo = getLocationInfo(dstAddr);
+ }
+ assert(info->dstCount == 0);
GenTreePtr srcAddrOrFill = nullptr;
bool isInitBlk = blkNode->OperIsInitBlkOp();
+ regMaskTP dstAddrRegMask = RBM_NONE;
+ regMaskTP sourceRegMask = RBM_NONE;
+ regMaskTP blkSizeRegMask = RBM_NONE;
+
+ short internalIntCount = 0;
+ regMaskTP internalIntCandidates = RBM_NONE;
+
if (isInitBlk)
{
GenTreePtr initVal = source;
initVal = initVal->gtGetOp1();
}
srcAddrOrFill = initVal;
+ if (!initVal->isContained())
+ {
+ info->srcCount++;
+ sourceInfo = getLocationInfo(initVal);
+ }
if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll)
{
// code sequences to improve CQ.
// For reference see the code in lsraxarch.cpp.
NYI_ARM("initblk loop unrolling is currently not implemented.");
- if (!initVal->isContained())
- {
- blkNode->gtLsraInfo.srcCount++;
- }
}
else
{
assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper);
- // The helper follows the regular ABI.
- dstAddr->gtLsraInfo.setSrcCandidates(this, RBM_ARG_0);
assert(!initVal->isContained());
- blkNode->gtLsraInfo.srcCount++;
- initVal->gtLsraInfo.setSrcCandidates(this, RBM_ARG_1);
- if (size != 0)
- {
- // Reserve a temp register for the block size argument.
- blkNode->gtLsraInfo.setInternalCandidates(this, RBM_ARG_2);
- blkNode->gtLsraInfo.internalIntCount = 1;
- }
- else
- {
- // The block size argument is a third argument to GT_STORE_DYN_BLK
- noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
- blkNode->gtLsraInfo.setSrcCount(3);
- GenTree* sizeNode = blkNode->AsDynBlk()->gtDynamicSize;
- sizeNode->gtLsraInfo.setSrcCandidates(this, RBM_ARG_2);
- }
+ // The helper follows the regular ABI.
+ dstAddrRegMask = RBM_ARG_0;
+ sourceRegMask = RBM_ARG_1;
+ blkSizeRegMask = RBM_ARG_2;
}
}
else
// Sources are src and dest and size if not constant.
if (source->gtOper == GT_IND)
{
- srcAddrOrFill = blkNode->Data()->gtGetOp1();
+ assert(source->isContained());
+ srcAddrOrFill = source->gtGetOp1();
+ sourceInfo = getLocationInfo(srcAddrOrFill);
+ info->srcCount++;
}
if (blkNode->OperGet() == GT_STORE_OBJ)
{
// CopyObj
// We don't need to materialize the struct size but we still need
// a temporary register to perform the sequence of loads and stores.
- blkNode->gtLsraInfo.internalIntCount = 1;
+ internalIntCount = 1;
if (size >= 2 * REGSIZE_BYTES)
{
// We will use ldp/stp to reduce code size and improve performance
// so we need to reserve an extra internal register
- blkNode->gtLsraInfo.internalIntCount++;
+ internalIntCount++;
}
// We can't use the special Write Barrier registers, so exclude them from the mask
- regMaskTP internalIntCandidates = RBM_ALLINT & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF);
- blkNode->gtLsraInfo.setInternalCandidates(this, internalIntCandidates);
+ internalIntCandidates = RBM_ALLINT & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF);
// If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF.
- dstAddr->gtLsraInfo.setSrcCandidates(this, RBM_WRITE_BARRIER_DST_BYREF);
+ dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF;
// If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF.
// Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF,
// which is killed by a StoreObj (and thus needn't be reserved).
if (srcAddrOrFill != nullptr)
{
- srcAddrOrFill->gtLsraInfo.setSrcCandidates(this, RBM_WRITE_BARRIER_SRC_BYREF);
+ sourceRegMask = RBM_WRITE_BARRIER_SRC_BYREF;
}
}
else
{
// CopyBlk
- short internalIntCount = 0;
- regMaskTP internalIntCandidates = RBM_NONE;
-
if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll)
{
// In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size
else
{
assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper);
- dstAddr->gtLsraInfo.setSrcCandidates(this, RBM_ARG_0);
+ dstAddrRegMask = RBM_ARG_0;
// The srcAddr goes in arg1.
if (srcAddrOrFill != nullptr)
{
- srcAddrOrFill->gtLsraInfo.setSrcCandidates(this, RBM_ARG_1);
- }
- if (size != 0)
- {
- // Reserve a temp register for the block size argument.
- internalIntCandidates |= RBM_ARG_2;
- internalIntCount++;
- }
- else
- {
- // The block size argument is a third argument to GT_STORE_DYN_BLK
- assert(blkNode->gtOper == GT_STORE_DYN_BLK);
- blkNode->gtLsraInfo.srcCount++;
- GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
- blockSize->gtLsraInfo.setSrcCandidates(this, RBM_ARG_2);
+ sourceRegMask = RBM_ARG_1;
}
- }
- if (internalIntCount != 0)
- {
- blkNode->gtLsraInfo.internalIntCount = internalIntCount;
- blkNode->gtLsraInfo.setInternalCandidates(this, internalIntCandidates);
+ blkSizeRegMask = RBM_ARG_2;
}
}
- blkNode->gtLsraInfo.srcCount += GetOperandSourceCount(source);
}
-}
-
-//------------------------------------------------------------------------
-// GetOperandSourceCount: Get the source registers for an operand that might be contained.
-//
-// Arguments:
-// node - The node of interest
-//
-// Return Value:
-// The number of source registers used by the *parent* of this node.
-//
-int LinearScan::GetOperandSourceCount(GenTree* node)
-{
- if (!node->isContained())
+ if (dstAddrInfo != nullptr)
{
- return 1;
+ if (dstAddrRegMask != RBM_NONE)
+ {
+ dstAddrInfo->info.setSrcCandidates(this, dstAddrRegMask);
+ }
+ useList.Append(dstAddrInfo);
}
-
-#if !defined(_TARGET_64BIT_)
- if (node->OperIs(GT_LONG))
+ if (sourceRegMask != RBM_NONE)
{
- return 2;
+ if (sourceInfo != nullptr)
+ {
+ sourceInfo->info.setSrcCandidates(this, sourceRegMask);
+ }
+ else
+ {
+ // This is a local source; we'll use a temp register for its address.
+ internalIntCandidates |= sourceRegMask;
+ internalIntCount++;
+ }
+ }
+ if (sourceInfo != nullptr)
+ {
+ useList.Add(sourceInfo, blkNode->IsReverseOp());
}
-#endif // !defined(_TARGET_64BIT_)
- if (node->OperIsIndir())
+ if (blkNode->OperIs(GT_STORE_DYN_BLK))
{
- const unsigned srcCount = GetIndirSourceCount(node->AsIndir());
- return srcCount;
+ // The block size argument is a third argument to GT_STORE_DYN_BLK
+ info->srcCount++;
+
+ GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
+ sizeInfo = getLocationInfo(blockSize);
+ useList.Add(sizeInfo, blkNode->AsDynBlk()->gtEvalSizeFirst);
}
- return 0;
+ if (blkSizeRegMask != RBM_NONE)
+ {
+ if (size != 0)
+ {
+ // Reserve a temp register for the block size argument.
+ internalIntCandidates |= blkSizeRegMask;
+ internalIntCount++;
+ }
+ else
+ {
+ // The block size argument is a third argument to GT_STORE_DYN_BLK
+ assert((blkNode->gtOper == GT_STORE_DYN_BLK) && (sizeInfo != nullptr));
+ info->setSrcCount(3);
+ sizeInfo->info.setSrcCandidates(this, blkSizeRegMask);
+ }
+ }
+ if (internalIntCount != 0)
+ {
+ info->internalIntCount = internalIntCount;
+ info->setInternalCandidates(this, internalIntCandidates);
+ }
}
#endif // _TARGET_ARMARCH_
// - Setting the appropriate candidates for a store of a multi-reg call return value.
// - Requesting an internal register for SIMD12 stores.
//
-void LinearScan::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc)
+void LinearScan::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc, TreeNodeInfo* info)
{
- TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
assert(info->dstCount == 0);
GenTree* op1 = storeLoc->gtGetOp1();
#ifdef _TARGET_X86_
if (op1->OperGet() == GT_LONG)
{
- assert(op1->isContained());
- info->srcCount = 2;
+ assert(op1->isContained() && !op1->gtOp.gtOp1->isContained() && !op1->gtOp.gtOp2->isContained());
+ info->srcCount = appendBinaryLocationInfoToList(op1->AsOp());
+ assert(info->srcCount == 2);
}
else
#endif // _TARGET_X86_
// srcCount = number of registers in which the value is returned by call
GenTreeCall* call = op1->AsCall();
ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- info->srcCount = retTypeDesc->GetReturnRegCount();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+ info->srcCount = regCount;
// Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
- regMaskTP srcCandidates = allMultiRegCallNodeRegs(call);
- op1->gtLsraInfo.setSrcCandidates(this, srcCandidates);
- return;
+ regMaskTP srcCandidates = allMultiRegCallNodeRegs(call);
+ LocationInfoListNode* locInfo = getLocationInfo(op1);
+ locInfo->info.setSrcCandidates(this, srcCandidates);
+ useList.Append(locInfo);
}
else
{
info->srcCount = 1;
+ appendLocationInfoToList(op1);
}
#ifdef FEATURE_SIMD
// requirements needed by LSRA to build the Interval Table (source,
// destination and internal [temp] register counts).
//
-void LinearScan::TreeNodeInfoInit(GenTree* tree)
+void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
if (tree->isContained())
{
info->dstCount = 0;
switch (tree->OperGet())
{
default:
- TreeNodeInfoInitSimple(tree);
+ TreeNodeInfoInitSimple(tree, info);
break;
case GT_LCL_VAR:
// is not, if they were marked regOptional they should now be marked contained instead.
// TODO-XArch-CQ: When this is being called while RefPositions are being created,
// use lvLRACandidate here instead.
- if (info->regOptional)
+ if (tree->IsRegOptional())
{
if (!compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum].lvTracked ||
compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum].lvDoNotEnregister)
{
- info->regOptional = false;
+ tree->ClearRegOptional();
tree->SetContained();
info->dstCount = 0;
return;
case GT_STORE_LCL_FLD:
case GT_STORE_LCL_VAR:
- TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon());
+ TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon(), info);
break;
case GT_LIST:
case GT_LONG:
assert(tree->IsUnusedValue()); // Contained nodes are already processed, only unused GT_LONG can reach here.
- // An unused GT_LONG doesn't produce any registers.
+ // An unused GT_LONG node needs to consume its sources, but need not produce a register.
tree->gtType = TYP_VOID;
tree->ClearUnusedValue();
info->isLocalDefUse = false;
-
- // An unused GT_LONG node needs to consume its sources.
- info->srcCount = 2;
- info->dstCount = 0;
+ info->srcCount = 2;
+ info->dstCount = 0;
+ appendLocationInfoToList(tree->gtGetOp1());
+ appendLocationInfoToList(tree->gtGetOp2());
break;
#endif // !defined(_TARGET_64BIT_)
break;
case GT_RETURN:
- TreeNodeInfoInitReturn(tree);
+ TreeNodeInfoInitReturn(tree, info);
break;
case GT_RETFILT:
info->srcCount = 1;
info->setSrcCandidates(this, RBM_INTRET);
- tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(this, RBM_INTRET);
+ LocationInfoListNode* locationInfo = getLocationInfo(tree->gtOp.gtOp1);
+ locationInfo->info.setSrcCandidates(this, RBM_INTRET);
+ useList.Append(locationInfo);
}
break;
// has a type but no child
case GT_NOP:
info->srcCount = 0;
+ assert((tree->gtOp.gtOp1 == nullptr) || tree->isContained());
if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
{
assert(info->dstCount == 1);
{
info->srcCount = 0;
assert(info->dstCount == 0);
-
GenTree* cmp = tree->gtGetOp1();
- assert(cmp->gtLsraInfo.dstCount == 0);
+ assert(!cmp->IsValue());
}
break;
break;
case GT_SWITCH_TABLE:
- info->srcCount = 2;
info->internalIntCount = 1;
assert(info->dstCount == 0);
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
+ assert(info->srcCount == 2);
break;
case GT_ASG:
// Rather they only support "op xmm, mem/xmm" form.
if (varTypeIsFloating(tree->TypeGet()))
{
- info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1);
- info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp2);
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
break;
}
case GT_OR:
case GT_XOR:
case GT_BT:
- info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1);
- info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp2);
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
break;
case GT_RETURNTRAP:
- // This just turns into a compare of its child with an int + a conditional call
- info->srcCount = tree->gtOp.gtOp1->isContained() ? 0 : 1;
+ // This just turns into a compare of its child with an int + a conditional call.
+ info->srcCount = GetOperandInfo(tree->gtOp.gtOp1);
assert(info->dstCount == 0);
info->internalIntCount = 1;
info->setInternalCandidates(this, allRegs(TYP_INT));
case GT_DIV:
case GT_UMOD:
case GT_UDIV:
- TreeNodeInfoInitModDiv(tree->AsOp());
+ TreeNodeInfoInitModDiv(tree->AsOp(), info);
break;
case GT_MUL:
#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
case GT_MUL_LONG:
#endif
- TreeNodeInfoInitMul(tree->AsOp());
+ TreeNodeInfoInitMul(tree->AsOp(), info);
break;
case GT_INTRINSIC:
- TreeNodeInfoInitIntrinsic(tree->AsOp());
+ TreeNodeInfoInitIntrinsic(tree->AsOp(), info);
break;
#ifdef FEATURE_SIMD
case GT_SIMD:
- TreeNodeInfoInitSIMD(tree->AsSIMD());
+ TreeNodeInfoInitSIMD(tree->AsSIMD(), info);
break;
#endif // FEATURE_SIMD
#if FEATURE_HW_INTRINSICS
case GT_HWIntrinsic:
- TreeNodeInfoInitHWIntrinsic(tree->AsHWIntrinsic());
+ TreeNodeInfoInitHWIntrinsic(tree->AsHWIntrinsic(), info);
break;
#endif // FEATURE_HW_INTRINSICS
case GT_CAST:
- TreeNodeInfoInitCast(tree);
+ TreeNodeInfoInitCast(tree, info);
break;
case GT_BITCAST:
- info->srcCount = 1;
- info->dstCount = 1;
- tree->AsUnOp()->gtOp1->gtLsraInfo.isTgtPref = true;
- break;
+ {
+ LocationInfoListNode* locationInfo = getLocationInfo(tree->gtOp.gtOp1);
+ locationInfo->info.isTgtPref = true;
+ useList.Append(locationInfo);
+ info->srcCount = 1;
+ info->dstCount = 1;
+ }
+ break;
case GT_NEG:
- info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1);
+ info->srcCount = GetOperandInfo(tree->gtOp.gtOp1);
// TODO-XArch-CQ:
// SSE instruction set doesn't have an instruction to negate a number.
break;
case GT_NOT:
- info->srcCount = GetOperandSourceCount(tree->gtOp.gtOp1);
+ info->srcCount = GetOperandInfo(tree->gtOp.gtOp1);
break;
case GT_LSH:
case GT_LSH_HI:
case GT_RSH_LO:
#endif
- TreeNodeInfoInitShiftRotate(tree);
+ (void)TreeNodeInfoInitShiftRotate(tree, info);
break;
case GT_EQ:
case GT_TEST_EQ:
case GT_TEST_NE:
case GT_CMP:
- TreeNodeInfoInitCmp(tree);
+ TreeNodeInfoInitCmp(tree, info);
break;
case GT_CKFINITE:
+ appendLocationInfoToList(tree->gtOp.gtOp1);
info->srcCount = 1;
assert(info->dstCount == 1);
info->internalIntCount = 1;
break;
case GT_CMPXCHG:
+ {
info->srcCount = 3;
assert(info->dstCount == 1);
// comparand is preferenced to RAX.
// Remaining two operands can be in any reg other than RAX.
- tree->gtCmpXchg.gtOpComparand->gtLsraInfo.setSrcCandidates(this, RBM_RAX);
- tree->gtCmpXchg.gtOpLocation->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RAX);
- tree->gtCmpXchg.gtOpValue->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RAX);
+ LocationInfoListNode* locationInfo = getLocationInfo(tree->gtCmpXchg.gtOpLocation);
+ locationInfo->info.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RAX);
+ useList.Append(locationInfo);
+ LocationInfoListNode* valueInfo = getLocationInfo(tree->gtCmpXchg.gtOpValue);
+ valueInfo->info.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RAX);
+ useList.Append(valueInfo);
info->setDstCandidates(this, RBM_RAX);
- break;
+ LocationInfoListNode* comparandInfo = getLocationInfo(tree->gtCmpXchg.gtOpComparand);
+ comparandInfo->info.setSrcCandidates(this, RBM_RAX);
+ useList.Append(comparandInfo);
+ }
+ break;
case GT_LOCKADD:
- {
- GenTreePtr op2 = tree->gtOp.gtOp2;
- info->srcCount = op2->isContained() ? 1 : 2;
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
assert(info->dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1);
- }
- break;
+ break;
case GT_PUTARG_REG:
- TreeNodeInfoInitPutArgReg(tree->AsUnOp());
+ TreeNodeInfoInitPutArgReg(tree->AsUnOp(), info);
break;
case GT_CALL:
- TreeNodeInfoInitCall(tree->AsCall());
+ TreeNodeInfoInitCall(tree->AsCall(), info);
break;
case GT_ADDR:
#ifdef FEATURE_PUT_STRUCT_ARG_STK
case GT_PUTARG_STK:
- TreeNodeInfoInitPutArgStk(tree->AsPutArgStk());
+ TreeNodeInfoInitPutArgStk(tree->AsPutArgStk(), info);
break;
#endif // FEATURE_PUT_STRUCT_ARG_STK
case GT_STORE_BLK:
case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
- TreeNodeInfoInitBlockStore(tree->AsBlk());
+ TreeNodeInfoInitBlockStore(tree->AsBlk(), info);
break;
case GT_INIT_VAL:
break;
case GT_LCLHEAP:
- TreeNodeInfoInitLclHeap(tree);
+ TreeNodeInfoInitLclHeap(tree, info);
break;
case GT_ARR_BOUNDS_CHECK:
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
// Consumes arrLen & index - has no result
- info->srcCount = GetOperandSourceCount(tree->AsBoundsChk()->gtIndex);
- info->srcCount += GetOperandSourceCount(tree->AsBoundsChk()->gtArrLen);
+ info->srcCount = 2;
assert(info->dstCount == 0);
+ info->srcCount = GetOperandInfo(tree->AsBoundsChk()->gtIndex);
+ info->srcCount += GetOperandInfo(tree->AsBoundsChk()->gtArrLen);
break;
case GT_ARR_ELEM:
break;
case GT_ARR_INDEX:
+ {
info->srcCount = 2;
assert(info->dstCount == 1);
+ assert(!tree->AsArrIndex()->ArrObj()->isContained());
+ assert(!tree->AsArrIndex()->IndexExpr()->isContained());
// For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
// times while the result is being computed.
- tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true;
- info->hasDelayFreeSrc = true;
- break;
+ LocationInfoListNode* arrObjInfo = getLocationInfo(tree->AsArrIndex()->ArrObj());
+ arrObjInfo->info.isDelayFree = true;
+ useList.Append(arrObjInfo);
+ useList.Append(getLocationInfo(tree->AsArrIndex()->IndexExpr()));
+ info->hasDelayFreeSrc = true;
+ }
+ break;
case GT_ARR_OFFSET:
// This consumes the offset, if any, the arrObj and the effective index,
}
else
{
- info->srcCount++;
// Here we simply need an internal register, which must be different
// from any of the operand's registers, but may be the same as targetReg.
info->srcCount = 3;
info->internalIntCount = 1;
+ appendLocationInfoToList(tree->AsArrOffs()->gtOffset);
}
+ appendLocationInfoToList(tree->AsArrOffs()->gtIndex);
+ appendLocationInfoToList(tree->AsArrOffs()->gtArrObj);
break;
case GT_LEA:
if (tree->AsAddrMode()->HasBase())
{
info->srcCount++;
+ appendLocationInfoToList(tree->AsAddrMode()->Base());
}
if (tree->AsAddrMode()->HasIndex())
{
info->srcCount++;
+ appendLocationInfoToList(tree->AsAddrMode()->Index());
}
break;
case GT_STOREIND:
if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
{
- TreeNodeInfoInitGCWriteBarrier(tree);
+ TreeNodeInfoInitGCWriteBarrier(tree, info);
break;
}
- TreeNodeInfoInitIndir(tree->AsIndir());
+ TreeNodeInfoInitIndir(tree->AsIndir(), info);
break;
case GT_NULLCHECK:
assert(info->dstCount == 0);
+ appendLocationInfoToList(tree->gtOp.gtOp1);
info->srcCount = 1;
break;
case GT_IND:
- TreeNodeInfoInitIndir(tree->AsIndir());
+ TreeNodeInfoInitIndir(tree->AsIndir(), info);
assert(info->dstCount == 1);
break;
break;
case GT_INDEX_ADDR:
- info->srcCount = 2;
- info->dstCount = 1;
+ assert(info->dstCount == 1);
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
if (tree->AsIndexAddr()->Index()->TypeGet() == TYP_I_IMPL)
{
// If we have a read-modify-write operation, we want to preference op1 to the target,
// if it is not contained.
- if (!op1->isContained())
+ if (!op1->isContained() && !op1->OperIs(GT_LIST))
{
- op1->gtLsraInfo.isTgtPref = true;
+ useList.GetTreeNodeInfo(op1).isTgtPref = true;
}
// Is this a non-commutative operator, or is op2 a contained memory op?
}
}
- TreeNodeInfoInitCheckByteable(tree);
+ TreeNodeInfoInitCheckByteable(tree, info);
// We need to be sure that we've set info->srcCount and info->dstCount appropriately
assert((info->dstCount < 2) || (tree->IsMultiRegCall() && info->dstCount == MAX_RET_REG_COUNT));
assert(info->isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
assert(!tree->IsUnusedValue() || (info->dstCount != 0));
+ assert(info->dstCount == tree->GetRegisterDstCount());
}
//---------------------------------------------------------------------
{
GenTree* base = delayUseSrc->AsIndir()->Base();
GenTree* index = delayUseSrc->AsIndir()->Index();
- if (base != nullptr)
+ if ((base != nullptr) && !base->isContained())
{
- base->gtLsraInfo.isDelayFree = true;
- returnValue = true;
+ useList.GetTreeNodeInfo(base).isDelayFree = true;
+ returnValue = true;
}
if (index != nullptr)
{
- index->gtLsraInfo.isDelayFree = true;
- returnValue = true;
+ assert(!index->isContained());
+ useList.GetTreeNodeInfo(index).isDelayFree = true;
+ returnValue = true;
}
}
}
else
{
- delayUseSrc->gtLsraInfo.isDelayFree = true;
- returnValue = true;
+ useList.GetTreeNodeInfo(delayUseSrc).isDelayFree = true;
+ returnValue = true;
}
return returnValue;
}
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitCheckByteable(GenTree* tree)
+void LinearScan::TreeNodeInfoInitCheckByteable(GenTree* tree, TreeNodeInfo* info)
{
#ifdef _TARGET_X86_
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
// Exclude RBM_NON_BYTE_REGS from dst candidates of tree node and src candidates of operands
// if the tree node is a byte type.
//
// No need to set src candidates on a contained child operand.
if (!op->isContained())
{
- regMask = op->gtLsraInfo.getSrcCandidates(this);
+ TreeNodeInfo& op1Info = useList.GetTreeNodeInfo(op);
+ regMask = op1Info.getSrcCandidates(this);
assert(regMask != RBM_NONE);
- op->gtLsraInfo.setSrcCandidates(this, regMask & ~RBM_NON_BYTE_REGS);
+ op1Info.setSrcCandidates(this, regMask & ~RBM_NON_BYTE_REGS);
}
}
op = tree->gtOp.gtOp2;
if (!op->isContained())
{
- regMask = op->gtLsraInfo.getSrcCandidates(this);
+ TreeNodeInfo& op2Info = useList.GetTreeNodeInfo(op);
+ regMask = op2Info.getSrcCandidates(this);
assert(regMask != RBM_NONE);
- op->gtLsraInfo.setSrcCandidates(this, regMask & ~RBM_NON_BYTE_REGS);
+ op2Info.setSrcCandidates(this, regMask & ~RBM_NON_BYTE_REGS);
}
}
}
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitSimple(GenTree* tree)
+void LinearScan::TreeNodeInfoInitSimple(GenTree* tree, TreeNodeInfo* info)
{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
if (tree->isContained())
{
info->srcCount = 0;
}
else if (kind & (GTK_SMPOP))
{
- if (tree->gtGetOp2IfPresent() != nullptr)
- {
- info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp2);
- }
- info->srcCount += GetOperandSourceCount(tree->gtOp.gtOp1);
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
}
else
{
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitReturn(GenTree* tree)
+void LinearScan::TreeNodeInfoInitReturn(GenTree* tree, TreeNodeInfo* info)
{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
assert(info->dstCount == 0);
GenTree* op1 = tree->gtGetOp1();
if (tree->TypeGet() == TYP_LONG)
{
assert((op1->OperGet() == GT_LONG) && op1->isContained());
- GenTree* loVal = op1->gtGetOp1();
- GenTree* hiVal = op1->gtGetOp2();
- info->srcCount = 2;
- loVal->gtLsraInfo.setSrcCandidates(this, RBM_LNGRET_LO);
- hiVal->gtLsraInfo.setSrcCandidates(this, RBM_LNGRET_HI);
+ GenTree* loVal = op1->gtGetOp1();
+ GenTree* hiVal = op1->gtGetOp2();
+ info->srcCount = 2;
+ LocationInfoListNode* loValInfo = getLocationInfo(loVal);
+ LocationInfoListNode* hiValInfo = getLocationInfo(hiVal);
+ loValInfo->info.setSrcCandidates(this, RBM_LNGRET_LO);
+ hiValInfo->info.setSrcCandidates(this, RBM_LNGRET_HI);
+ useList.Append(loValInfo);
+ useList.Append(hiValInfo);
}
else
#endif // !defined(_TARGET_64BIT_)
}
}
+ LocationInfoListNode* locationInfo = getLocationInfo(op1);
if (useCandidates != RBM_NONE)
{
- op1->gtLsraInfo.setSrcCandidates(this, useCandidates);
+ locationInfo->info.setSrcCandidates(this, useCandidates);
}
+ useList.Append(locationInfo);
}
}
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree)
+int LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree, TreeNodeInfo* info)
{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
// For shift operations, we need that the number
// of bits moved gets stored in CL in case
// the number of bits to shift is not a constant.
- GenTreePtr shiftBy = tree->gtOp.gtOp2;
- GenTreePtr source = tree->gtOp.gtOp1;
-
+ int srcCount = 0;
+ GenTreePtr shiftBy = tree->gtOp.gtOp2;
+ GenTreePtr source = tree->gtOp.gtOp1;
+ LocationInfoListNode* shiftByInfo = nullptr;
// x64 can encode 8 bits of shift and it will use 5 or 6. (the others are masked off)
// We will allow whatever can be encoded - hope you know what you are doing.
- if (!shiftBy->isContained())
+ if (shiftBy->isContained())
{
- shiftBy->gtLsraInfo.setSrcCandidates(this, RBM_RCX);
- if (source->isContained())
- {
- if (source->OperIs(GT_IND))
- {
- if (source->AsIndir()->Base() != nullptr)
- {
- source->AsIndir()->Base()->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RCX);
- }
- if (source->AsIndir()->Index() != nullptr)
- {
- source->AsIndir()->Index()->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RCX);
- }
- }
- }
- else
+ srcCount += GetOperandInfo(source);
+ }
+ else
+ {
+ srcCount++;
+ shiftByInfo = getLocationInfo(shiftBy);
+ shiftByInfo->info.setSrcCandidates(this, RBM_RCX);
+ info->setDstCandidates(this, allRegs(TYP_INT) & ~RBM_RCX);
+ LocationInfoListNode* sourceInfo;
+ srcCount += GetOperandInfo(source, &sourceInfo);
+ for (; sourceInfo != nullptr; sourceInfo = sourceInfo->Next())
{
- source->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RCX);
+ sourceInfo->info.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RCX);
}
- info->setDstCandidates(this, allRegs(TYP_INT) & ~RBM_RCX);
}
- // Note that Rotate Left/Right instructions don't set ZF and SF flags.
- //
- // If the operand being shifted is 32-bits then upper three bits are masked
- // by hardware to get actual shift count. Similarly for 64-bit operands
- // shift count is narrowed to [0..63]. If the resulting shift count is zero,
- // then shift operation won't modify flags.
- //
- // TODO-CQ-XARCH: We can optimize generating 'test' instruction for GT_EQ/NE(shift, 0)
- // if the shift count is known to be non-zero and in the range depending on the
- // operand size.
+// Note that Rotate Left/Right instructions don't set ZF and SF flags.
+//
+// If the operand being shifted is 32-bits then upper three bits are masked
+// by hardware to get actual shift count. Similarly for 64-bit operands
+// shift count is narrowed to [0..63]. If the resulting shift count is zero,
+// then shift operation won't modify flags.
+//
+// TODO-CQ-XARCH: We can optimize generating 'test' instruction for GT_EQ/NE(shift, 0)
+// if the shift count is known to be non-zero and in the range depending on the
+// operand size.
- if (!tree->isContained())
- {
#ifdef _TARGET_X86_
- // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that
- // we can have a three operand form. Increment the srcCount.
- if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO)
- {
- assert((source->OperGet() == GT_LONG) && source->isContained());
+ // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that
+ // we can have a three operand form. Increment the srcCount.
+ if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO)
+ {
+ assert((source->OperGet() == GT_LONG) && source->isContained());
- if (tree->OperGet() == GT_LSH_HI)
- {
- GenTreePtr sourceLo = source->gtOp.gtOp1;
- sourceLo->gtLsraInfo.isDelayFree = true;
- }
- else
- {
- GenTreePtr sourceHi = source->gtOp.gtOp2;
- sourceHi->gtLsraInfo.isDelayFree = true;
- }
+ GenTreePtr sourceLo = source->gtOp.gtOp1;
+ LocationInfoListNode* sourceLoInfo = useList.Begin();
+ LocationInfoListNode* sourceHiInfo = useList.GetSecond(INDEBUG(source->gtGetOp2()));
- source->gtLsraInfo.hasDelayFreeSrc = true;
- info->hasDelayFreeSrc = true;
- info->srcCount += 2;
+ info->hasDelayFreeSrc = true;
+ if (tree->OperGet() == GT_LSH_HI)
+ {
+ sourceLoInfo->info.isDelayFree = true;
}
else
+ {
+ sourceHiInfo->info.isDelayFree = true;
+ }
+ }
#endif
- if (!source->isContained())
+ if (shiftByInfo != nullptr)
+ {
+ if (tree->IsReverseOp())
{
- info->srcCount++;
+ useList.Prepend(shiftByInfo);
}
- if (!shiftBy->isContained())
+ else
{
- info->srcCount++;
+ useList.Append(shiftByInfo);
}
}
+ if (!tree->isContained())
+ {
+ info->srcCount = srcCount;
+ }
+ return srcCount;
}
//------------------------------------------------------------------------
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitPutArgReg(GenTreeUnOp* node)
+void LinearScan::TreeNodeInfoInitPutArgReg(GenTreeUnOp* node, TreeNodeInfo* info)
{
assert(node != nullptr);
assert(node->OperIsPutArgReg());
- node->gtLsraInfo.srcCount = 1;
- regNumber argReg = node->gtRegNum;
+ info->srcCount = 1;
+ regNumber argReg = node->gtRegNum;
assert(argReg != REG_NA);
// Set the register requirements for the node.
const regMaskTP argMask = genRegMask(argReg);
- node->gtLsraInfo.setDstCandidates(this, argMask);
- node->gtLsraInfo.setSrcCandidates(this, argMask);
+ info->setDstCandidates(this, argMask);
+ info->setSrcCandidates(this, argMask);
// To avoid redundant moves, have the argument operand computed in the
// register in which the argument is passed to the call.
- node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(this, getUseCandidates(node));
+ LocationInfoListNode* op1Info = getLocationInfo(node->gtOp.gtOp1);
+ op1Info->info.setSrcCandidates(this, info->getSrcCandidates(this));
+ op1Info->info.isDelayFree = true;
+ useList.Append(op1Info);
}
//------------------------------------------------------------------------
// Since the integer register is not associated with the arg node, we will reserve it as
// an internal register on the call so that it is not used during the evaluation of the call node
// (e.g. for the target).
-void LinearScan::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs)
+void LinearScan::HandleFloatVarArgs(GenTreeCall* call, TreeNodeInfo* info, GenTree* argNode, bool* callHasFloatRegArgs)
{
#if FEATURE_VARARG
if (call->IsVarargs() && varTypeIsFloating(argNode))
regNumber argReg = argNode->gtRegNum;
regNumber targetReg = compiler->getCallArgIntRegister(argReg);
- call->gtLsraInfo.setInternalIntCount(call->gtLsraInfo.internalIntCount + 1);
- call->gtLsraInfo.addInternalCandidates(this, genRegMask(targetReg));
+ info->setInternalIntCount(info->internalIntCount + 1);
+ info->addInternalCandidates(this, genRegMask(targetReg));
}
#endif // FEATURE_VARARG
}
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call)
+void LinearScan::TreeNodeInfoInitCall(GenTreeCall* call, TreeNodeInfo* info)
{
- TreeNodeInfo* info = &(call->gtLsraInfo);
bool hasMultiRegRetVal = false;
ReturnTypeDesc* retTypeDesc = nullptr;
assert(info->dstCount == 0);
}
- GenTree* ctrlExpr = call->gtControlExpr;
+ GenTree* ctrlExpr = call->gtControlExpr;
+ LocationInfoListNode* ctrlExprInfo = nullptr;
if (call->gtCallType == CT_INDIRECT)
{
ctrlExpr = call->gtCallAddr;
}
- // set reg requirements on call target represented as control sequence.
- if (ctrlExpr != nullptr)
- {
- // In case of fast tail implemented as jmp, make sure that gtControlExpr is
- // computed into a register.
- if (call->IsFastTailCall())
- {
- {
- // Fast tail call - make sure that call target is always computed in RAX
- // so that epilog sequence can generate "jmp rax" to achieve fast tail call.
- ctrlExpr->gtLsraInfo.setSrcCandidates(this, RBM_RAX);
- }
- }
-#ifdef _TARGET_X86_
- else
- {
- // On x86, we need to generate a very specific pattern for indirect VSD calls:
- //
- // 3-byte nop
- // call dword ptr [eax]
- //
- // Where EAX is also used as an argument to the stub dispatch helper. Make
- // sure that the call target address is computed into EAX in this case.
- if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT))
- {
- assert(ctrlExpr->isIndir() && ctrlExpr->isContained());
- ctrlExpr->gtGetOp1()->gtLsraInfo.setSrcCandidates(this, RBM_VIRTUAL_STUB_TARGET);
- }
- }
-#endif // _TARGET_X86_
- info->srcCount += GetOperandSourceCount(ctrlExpr);
- }
-
// If this is a varargs call, we will clear the internal candidates in case we need
// to reserve some integer registers for copying float args.
// We have to do this because otherwise the default candidates are allRegs, and adding
if (argNode->OperIsPutArgReg())
{
info->srcCount++;
- HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs);
+ HandleFloatVarArgs(call, info, argNode, &callHasFloatRegArgs);
+ appendLocationInfoToList(argNode);
}
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
else if (argNode->OperGet() == GT_FIELD_LIST)
{
assert(entry->Current()->OperIsPutArgReg());
info->srcCount++;
- HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs);
+ HandleFloatVarArgs(call, info, argNode, &callHasFloatRegArgs);
+ appendLocationInfoToList(entry->Current());
}
}
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
if (argNode->TypeGet() == TYP_STRUCT)
{
assert(argNode->gtOp.gtOp1 != nullptr && argNode->gtOp.gtOp1->OperGet() == GT_OBJ);
- assert(argNode->gtLsraInfo.srcCount == 0);
+ assert(argNode->gtOp.gtOp1->isContained());
}
#endif // FEATURE_PUT_STRUCT_ARG_STK
continue;
while (args)
{
GenTreePtr arg = args->gtOp.gtOp1;
- if (!(args->gtFlags & GTF_LATE_ARG))
+ if (!(arg->gtFlags & GTF_LATE_ARG) && !arg)
{
- TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
- if ((argInfo->dstCount != 0) && !arg->IsArgPlaceHolderNode() && !arg->isContained())
+ if (arg->IsValue() && !arg->isContained())
{
- argInfo->isLocalDefUse = true;
+ // argInfo->isLocalDefUse = true;
+ assert(arg->IsUnusedValue());
}
- assert(argInfo->dstCount == 0);
+ // assert(argInfo->dstCount == 0);
}
args = args->gtOp.gtOp2;
}
-#if FEATURE_VARARG
- // If it is a fast tail call, it is already preferenced to use RAX.
- // Therefore, no need set src candidates on call tgt again.
- if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
+ // set reg requirements on call target represented as control sequence.
+ if (ctrlExpr != nullptr)
{
- // Don't assign the call target to any of the argument registers because
- // we will use them to also pass floating point arguments as required
- // by Amd64 ABI.
- ctrlExpr->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_ARG_REGS));
- }
+ LocationInfoListNode* ctrlExprInfo = nullptr;
+ int ctrlExprCount = GetOperandInfo(ctrlExpr);
+ if (ctrlExprCount != 0)
+ {
+ assert(ctrlExprCount == 1);
+ ctrlExprInfo = useList.Last();
+ info->srcCount++;
+ }
+
+ // In case of fast tail implemented as jmp, make sure that gtControlExpr is
+ // computed into a register.
+ if (call->IsFastTailCall())
+ {
+ assert(!ctrlExpr->isContained() && ctrlExprInfo != nullptr);
+ // Fast tail call - make sure that call target is always computed in RAX
+ // so that epilog sequence can generate "jmp rax" to achieve fast tail call.
+ ctrlExprInfo->info.setSrcCandidates(this, RBM_RAX);
+ }
+#ifdef _TARGET_X86_
+ else if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT))
+ {
+ // On x86, we need to generate a very specific pattern for indirect VSD calls:
+ //
+ // 3-byte nop
+ // call dword ptr [eax]
+ //
+ // Where EAX is also used as an argument to the stub dispatch helper. Make
+ // sure that the call target address is computed into EAX in this case.
+ assert(ctrlExprInfo != nullptr);
+ assert(ctrlExpr->isIndir() && ctrlExpr->isContained());
+ ctrlExprInfo->info.setSrcCandidates(this, RBM_VIRTUAL_STUB_TARGET);
+ }
+#endif // _TARGET_X86_
+
+#if FEATURE_VARARG
+ // If it is a fast tail call, it is already preferenced to use RAX.
+ // Therefore, no need set src candidates on call tgt again.
+ if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExprInfo != nullptr))
+ {
+ // Don't assign the call target to any of the argument registers because
+ // we will use them to also pass floating point arguments as required
+ // by Amd64 ABI.
+ ctrlExprInfo->info.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_ARG_REGS));
+ }
#endif // !FEATURE_VARARG
+ }
}
//------------------------------------------------------------------------
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
+void LinearScan::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode, TreeNodeInfo* info)
{
GenTree* dstAddr = blkNode->Addr();
unsigned size = blkNode->gtBlkSize;
GenTree* source = blkNode->Data();
+ LocationInfoListNode* dstAddrInfo = nullptr;
+ LocationInfoListNode* sourceInfo = nullptr;
+ LocationInfoListNode* sizeInfo = nullptr;
+
// Sources are dest address, initVal or source.
// We may require an additional source or temp register for the size.
- blkNode->gtLsraInfo.srcCount = GetOperandSourceCount(dstAddr);
- assert(blkNode->gtLsraInfo.dstCount == 0);
- blkNode->gtLsraInfo.setInternalCandidates(this, RBM_NONE);
+ if (!dstAddr->isContained())
+ {
+ info->srcCount++;
+ dstAddrInfo = getLocationInfo(dstAddr);
+ }
+ assert(info->dstCount == 0);
+ info->setInternalCandidates(this, RBM_NONE);
GenTreePtr srcAddrOrFill = nullptr;
bool isInitBlk = blkNode->OperIsInitBlkOp();
srcAddrOrFill = initVal;
if (!initVal->isContained())
{
- blkNode->gtLsraInfo.srcCount++;
+ info->srcCount++;
+ sourceInfo = getLocationInfo(initVal);
}
switch (blkNode->gtBlkOpKind)
if (size >= XMM_REGSIZE_BYTES)
{
// Reserve an XMM register to fill it with a pack of 16 init value constants.
- blkNode->gtLsraInfo.internalFloatCount = 1;
- blkNode->gtLsraInfo.setInternalCandidates(this, internalFloatRegCandidates());
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(this, internalFloatRegCandidates());
// use XMM register to fill with constants, it's AVX instruction and set the flag
SetContainsAVXFlags();
}
// b) The fill value has to be in RAX.
// c) The buffer size will go in RCX.
dstAddrRegMask = RBM_RDI;
- srcAddrOrFill = initVal;
sourceRegMask = RBM_RAX;
blkSizeRegMask = RBM_RCX;
break;
// CopyObj or CopyBlk
if (source->gtOper == GT_IND)
{
+ assert(source->isContained());
srcAddrOrFill = source->gtGetOp1();
+ if (!srcAddrOrFill->isContained())
+ {
+ sourceInfo = getLocationInfo(srcAddrOrFill);
+ info->srcCount++;
+ }
}
if (blkNode->OperGet() == GT_STORE_OBJ)
{
// RBM_NON_BYTE_REGS from internal candidates.
if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
{
- blkNode->gtLsraInfo.internalIntCount++;
+ info->internalIntCount++;
regMaskTP regMask = allRegs(TYP_INT);
#ifdef _TARGET_X86_
regMask &= ~RBM_NON_BYTE_REGS;
}
#endif
- blkNode->gtLsraInfo.setInternalCandidates(this, regMask);
+ info->setInternalCandidates(this, regMask);
}
if (size >= XMM_REGSIZE_BYTES)
// If we have a buffer larger than XMM_REGSIZE_BYTES,
// reserve an XMM register to use it for a
// series of 16-byte loads and stores.
- blkNode->gtLsraInfo.internalFloatCount = 1;
- blkNode->gtLsraInfo.addInternalCandidates(this, internalFloatRegCandidates());
+ info->internalFloatCount = 1;
+ info->addInternalCandidates(this, internalFloatRegCandidates());
// Uses XMM reg for load and store and hence check to see whether AVX instructions
// are used for codegen, set ContainsAVX flag
SetContainsAVXFlags();
unreached();
}
}
- blkNode->gtLsraInfo.srcCount += GetOperandSourceCount(source);
}
- if (dstAddrRegMask != RBM_NONE)
+ if (dstAddrInfo != nullptr)
{
- dstAddr->gtLsraInfo.setSrcCandidates(this, dstAddrRegMask);
+ if (dstAddrRegMask != RBM_NONE)
+ {
+ dstAddrInfo->info.setSrcCandidates(this, dstAddrRegMask);
+ }
+ useList.Append(dstAddrInfo);
}
if (sourceRegMask != RBM_NONE)
{
- if (srcAddrOrFill != nullptr)
+ if (sourceInfo != nullptr)
{
- srcAddrOrFill->gtLsraInfo.setSrcCandidates(this, sourceRegMask);
+ sourceInfo->info.setSrcCandidates(this, sourceRegMask);
}
else
{
// This is a local source; we'll use a temp register for its address.
- blkNode->gtLsraInfo.addInternalCandidates(this, sourceRegMask);
- blkNode->gtLsraInfo.internalIntCount++;
+ info->addInternalCandidates(this, sourceRegMask);
+ info->internalIntCount++;
}
}
+ if (sourceInfo != nullptr)
+ {
+ useList.Add(sourceInfo, blkNode->IsReverseOp());
+ }
+
+ if (blkNode->OperIs(GT_STORE_DYN_BLK))
+ {
+ // The block size argument is a third argument to GT_STORE_DYN_BLK
+ info->srcCount++;
+
+ GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
+ sizeInfo = getLocationInfo(blockSize);
+ useList.Add(sizeInfo, blkNode->AsDynBlk()->gtEvalSizeFirst);
+ }
+
if (blkSizeRegMask != RBM_NONE)
{
if (size != 0)
{
// Reserve a temp register for the block size argument.
- blkNode->gtLsraInfo.addInternalCandidates(this, blkSizeRegMask);
- blkNode->gtLsraInfo.internalIntCount++;
+ info->addInternalCandidates(this, blkSizeRegMask);
+ info->internalIntCount++;
}
else
{
// The block size argument is a third argument to GT_STORE_DYN_BLK
- assert(blkNode->gtOper == GT_STORE_DYN_BLK);
- blkNode->gtLsraInfo.setSrcCount(3);
- GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
- blockSize->gtLsraInfo.setSrcCandidates(this, blkSizeRegMask);
+ assert((blkNode->gtOper == GT_STORE_DYN_BLK) && (sizeInfo != nullptr));
+ info->setSrcCount(3);
+ sizeInfo->info.setSrcCandidates(this, blkSizeRegMask);
}
}
}
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
+void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk, TreeNodeInfo* info)
{
- TreeNodeInfo* info = &(putArgStk->gtLsraInfo);
- info->srcCount = 0;
+ info->srcCount = 0;
assert(info->dstCount == 0);
if (putArgStk->gtOp1->gtOper == GT_FIELD_LIST)
fieldCount++;
if (!fieldNode->isContained())
{
+ appendLocationInfoToList(fieldNode);
info->srcCount++;
}
}
// For PutArgStk of a TYP_SIMD12, we need a SIMD temp register.
if (needsSimdTemp)
{
- info->srcCount = putArgStk->gtOp1->gtLsraInfo.dstCount;
assert(info->dstCount == 0);
info->internalFloatCount += 1;
info->addInternalCandidates(this, allSIMDRegs());
// For PutArgStk of a TYP_SIMD12, we need an extra register.
if (putArgStk->isSIMD12())
{
- info->srcCount = putArgStk->gtOp1->gtLsraInfo.dstCount;
+ appendLocationInfoToList(putArgStk->gtOp1);
+ info->srcCount = 1;
info->internalFloatCount = 1;
info->setInternalCandidates(this, allSIMDRegs());
return;
if (type != TYP_STRUCT)
{
- TreeNodeInfoInitSimple(putArgStk);
+ TreeNodeInfoInitSimple(putArgStk, info);
return;
}
GenTreePtr dst = putArgStk;
GenTreePtr srcAddr = nullptr;
- info->srcCount = GetOperandSourceCount(src);
+ info->srcCount = GetOperandInfo(src);
// If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2.
// Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree)
+void LinearScan::TreeNodeInfoInitLclHeap(GenTree* tree, TreeNodeInfo* info)
{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
info->srcCount = 1;
assert(info->dstCount == 1);
}
else
{
+ appendLocationInfoToList(size);
if (!compiler->info.compInitMem)
{
info->internalIntCount = 2;
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitModDiv(GenTree* tree)
+void LinearScan::TreeNodeInfoInitModDiv(GenTree* tree, TreeNodeInfo* info)
{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
GenTree* op1 = tree->gtGetOp1();
GenTree* op2 = tree->gtGetOp2();
- info->srcCount = GetOperandSourceCount(op1);
- info->srcCount += GetOperandSourceCount(op2);
assert(info->dstCount == 1);
if (varTypeIsFloating(tree->TypeGet()))
{
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
return;
}
info->internalIntCount = 1;
info->setInternalCandidates(this, allRegs(TYP_INT));
- loVal->gtLsraInfo.setSrcCandidates(this, RBM_EAX);
- hiVal->gtLsraInfo.setSrcCandidates(this, RBM_EDX);
+ LocationInfoListNode* loValInfo = getLocationInfo(loVal);
+ LocationInfoListNode* hiValInfo = getLocationInfo(hiVal);
+ loValInfo->info.setSrcCandidates(this, RBM_EAX);
+ hiValInfo->info.setSrcCandidates(this, RBM_EDX);
+ useList.Append(loValInfo);
+ useList.Append(hiValInfo);
+ info->srcCount = 2;
}
else
#endif
{
// If possible would like to have op1 in RAX to avoid a register move
- op1->gtLsraInfo.setSrcCandidates(this, RBM_RAX);
+ LocationInfoListNode* op1Info = getLocationInfo(op1);
+ op1Info->info.setSrcCandidates(this, RBM_RAX);
+ useList.Append(op1Info);
+ info->srcCount = 1;
}
- if (op2->isContained())
- {
- if (op2->gtOper == GT_IND)
- {
- if (op2->AsIndir()->Base() != nullptr)
- {
- op2->AsIndir()->Base()->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
- }
- if (op2->AsIndir()->Index() != nullptr)
- {
- op2->AsIndir()->Index()->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
- }
- }
- }
- else
+ LocationInfoListNode* op2Info;
+ info->srcCount += GetOperandInfo(op2, &op2Info);
+ for (; op2Info != nullptr; op2Info = op2Info->Next())
{
- op2->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
+ op2Info->info.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
}
}
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitIntrinsic(GenTree* tree)
+void LinearScan::TreeNodeInfoInitIntrinsic(GenTree* tree, TreeNodeInfo* info)
{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
// Both operand and its result must be of floating point type.
GenTree* op1 = tree->gtGetOp1();
assert(varTypeIsFloating(op1));
assert(op1->TypeGet() == tree->TypeGet());
- info->srcCount = GetOperandSourceCount(op1);
+ info->srcCount = GetOperandInfo(op1);
assert(info->dstCount == 1);
switch (tree->gtIntrinsic.gtIntrinsicId)
// Return Value:
// None.
-void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
+void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree, TreeNodeInfo* info)
{
- TreeNodeInfo* info = &(simdTree->gtLsraInfo);
-
// Only SIMDIntrinsicInit can be contained. Other than that,
// only SIMDIntrinsicOpEquality and SIMDIntrinsicOpInEquality can have 0 dstCount.
if (simdTree->isContained())
(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality));
}
SetContainsAVXFlags(true, simdTree->gtSIMDSize);
- switch (simdTree->gtSIMDIntrinsicID)
+ GenTree* op1 = simdTree->gtOp.gtOp1;
+ GenTree* op2 = simdTree->gtOp.gtOp2;
+ info->srcCount = 0;
+ if (!op1->OperIs(GT_LIST))
+ {
+ info->srcCount += GetOperandInfo(op1);
+ }
+ if ((op2 != nullptr) && !op2->isContained())
{
- GenTree* op1;
- GenTree* op2;
+ info->srcCount += GetOperandInfo(op2);
+ }
+ switch (simdTree->gtSIMDIntrinsicID)
+ {
case SIMDIntrinsicInit:
{
- op1 = simdTree->gtOp.gtOp1;
-
-#if !defined(_TARGET_64BIT_)
- if (op1->OperGet() == GT_LONG)
- {
- info->srcCount = 2;
- assert(op1->isContained());
- }
- else
-#endif // !defined(_TARGET_64BIT_)
- {
- info->srcCount = 1;
- }
-
// This sets all fields of a SIMD struct to the given value.
// Mark op1 as contained if it is either zero or int constant of all 1's,
// or a float constant with 16 or 32 byte simdType (AVX case)
#if !defined(_TARGET_64BIT_)
if (op1->OperGet() == GT_LONG)
{
- op1->SetContained();
+ assert(op1->isContained());
GenTree* op1lo = op1->gtGetOp1();
GenTree* op1hi = op1->gtGetOp2();
assert(op1hi->isContained());
assert((op1lo->IsIntegralConst(0) && op1hi->IsIntegralConst(0)) ||
(op1lo->IsIntegralConst(-1) && op1hi->IsIntegralConst(-1)));
- info->srcCount = 0;
+ assert(info->srcCount == 0);
}
else
{
- // need a temp
+ assert(info->srcCount == 2);
info->internalFloatCount = 1;
info->setInternalCandidates(this, allSIMDRegs());
info->isInternalRegDelayFree = true;
- info->srcCount = 2;
}
}
- else
#endif // !defined(_TARGET_64BIT_)
- {
- info->srcCount = op1->isContained() ? 0 : 1;
- }
}
break;
case SIMDIntrinsicInitN:
{
- info->srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(simdTree->gtSIMDBaseType));
+ var_types baseType = simdTree->gtSIMDBaseType;
+ info->srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(baseType));
+ int initCount = 0;
+ for (GenTree* list = op1; list != nullptr; list = list->gtGetOp2())
+ {
+ assert(list->OperGet() == GT_LIST);
+ GenTree* listItem = list->gtGetOp1();
+ assert(listItem->TypeGet() == baseType);
+ assert(!listItem->isContained());
+ appendLocationInfoToList(listItem);
+ initCount++;
+ }
+ assert(initCount == info->srcCount);
// Need an internal register to stitch together all the values into a single vector in a SIMD reg.
info->internalFloatCount = 1;
case SIMDIntrinsicInitArray:
// We have an array and an index, which may be contained.
- info->srcCount = simdTree->gtGetOp2()->isContained() ? 1 : 2;
+ assert(info->srcCount == (simdTree->gtGetOp2()->isContained() ? 1 : 2));
break;
case SIMDIntrinsicDiv:
// SSE2 has no instruction support for division on integer vectors
noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
- info->srcCount = 2;
+ assert(info->srcCount == 2);
break;
case SIMDIntrinsicAbs:
assert(simdTree->gtSIMDBaseType == TYP_INT || simdTree->gtSIMDBaseType == TYP_SHORT ||
simdTree->gtSIMDBaseType == TYP_BYTE);
assert(compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported);
- info->srcCount = 1;
+ assert(info->srcCount == 1);
break;
case SIMDIntrinsicSqrt:
// SSE2 has no instruction support for sqrt on integer vectors.
noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
- info->srcCount = 1;
+ assert(info->srcCount == 1);
break;
case SIMDIntrinsicAdd:
case SIMDIntrinsicBitwiseXor:
case SIMDIntrinsicMin:
case SIMDIntrinsicMax:
- info->srcCount = 2;
+ assert(info->srcCount == 2);
// SSE2 32-bit integer multiplication requires two temp regs
if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT &&
break;
case SIMDIntrinsicEqual:
- info->srcCount = 2;
+ assert(info->srcCount == 2);
break;
// SSE2 doesn't support < and <= directly on int vectors.
// Instead we need to use > and >= with swapped operands.
case SIMDIntrinsicLessThan:
case SIMDIntrinsicLessThanOrEqual:
- info->srcCount = 2;
+ assert(info->srcCount == 2);
noway_assert(!varTypeIsIntegral(simdTree->gtSIMDBaseType));
break;
// Instead we need to use < and <= with swapped operands.
case SIMDIntrinsicGreaterThan:
noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType));
- info->srcCount = 2;
+ assert(info->srcCount == 2);
break;
case SIMDIntrinsicOpEquality:
// If the second operand is contained then ContainCheckSIMD has determined
// that PTEST can be used. We only need a single source register and no
// internal registers.
- info->srcCount = 1;
+ assert(info->srcCount == 1);
}
else
{
// Can't use PTEST so we need 2 source registers, 1 internal SIMD register
// (to hold the result of PCMPEQD or other similar SIMD compare instruction)
// and one internal INT register (to hold the result of PMOVMSKB).
- info->srcCount = 2;
+ assert(info->srcCount == 2);
info->internalFloatCount = 1;
info->setInternalCandidates(this, allSIMDRegs());
info->internalIntCount = 1;
info->internalFloatCount = (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) ? 2 : 1;
info->setInternalCandidates(this, allSIMDRegs());
}
- info->srcCount = 2;
+ assert(info->srcCount == 2);
break;
case SIMDIntrinsicGetItem:
// - index (which element to get)
// The result is baseType of SIMD struct.
// op1 may be a contained memory op, but if so we will consume its address.
- info->srcCount = 0;
- op1 = simdTree->gtOp.gtOp1;
- op2 = simdTree->gtOp.gtOp2;
-
// op2 may be a contained constant.
- if (!op2->isContained())
- {
- info->srcCount++;
- }
+ op1 = simdTree->gtOp.gtOp1;
+ op2 = simdTree->gtOp.gtOp2;
- if (op1->isContained())
- {
- // Although GT_IND of TYP_SIMD12 reserves an internal float
- // register for reading 4 and 8 bytes from memory and
- // assembling them into target XMM reg, it is not required
- // in this case.
- op1->gtLsraInfo.internalIntCount = 0;
- op1->gtLsraInfo.internalFloatCount = 0;
- info->srcCount += GetOperandSourceCount(op1);
- }
- else
+ if (!op1->isContained())
{
// If the index is not a constant, we will use the SIMD temp location to store the vector.
// Otherwise, if the baseType is floating point, the targetReg will be a xmm reg and we
// In all other cases with constant index, we need a temp xmm register to extract the
// element if index is other than zero.
- info->srcCount++;
if (!op2->IsCnsIntOrI())
{
(void)compiler->getSIMDInitTempVarNum();
case SIMDIntrinsicSetY:
case SIMDIntrinsicSetZ:
case SIMDIntrinsicSetW:
- info->srcCount = 2;
+ assert(info->srcCount == 2);
// We need an internal integer register for SSE2 codegen
if (compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported)
break;
case SIMDIntrinsicCast:
- info->srcCount = 1;
+ assert(info->srcCount == 1);
break;
case SIMDIntrinsicConvertToSingle:
- info->srcCount = 1;
+ assert(info->srcCount == 1);
if (simdTree->gtSIMDBaseType == TYP_UINT)
{
// We need an internal register different from targetReg.
case SIMDIntrinsicConvertToUInt32:
case SIMDIntrinsicConvertToInt32:
- info->srcCount = 1;
+ assert(info->srcCount == 1);
break;
case SIMDIntrinsicWidenLo:
case SIMDIntrinsicWidenHi:
- info->srcCount = 1;
+ assert(info->srcCount == 1);
if (varTypeIsIntegral(simdTree->gtSIMDBaseType))
{
// We need an internal register different from targetReg.
case SIMDIntrinsicConvertToInt64:
case SIMDIntrinsicConvertToUInt64:
+ assert(info->srcCount == 1);
// We need an internal register different from targetReg.
info->isInternalRegDelayFree = true;
- info->srcCount = 1;
info->internalIntCount = 1;
if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)
{
break;
case SIMDIntrinsicConvertToDouble:
+ assert(info->srcCount == 1);
// We need an internal register different from targetReg.
info->isInternalRegDelayFree = true;
- info->srcCount = 1;
info->internalIntCount = 1;
#ifdef _TARGET_X86_
if (simdTree->gtSIMDBaseType == TYP_LONG)
break;
case SIMDIntrinsicNarrow:
+ assert(info->srcCount == 2);
// We need an internal register different from targetReg.
info->isInternalRegDelayFree = true;
- info->srcCount = 2;
if ((compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) && (simdTree->gtSIMDBaseType != TYP_DOUBLE))
{
info->internalFloatCount = 2;
break;
case SIMDIntrinsicShuffleSSE2:
+ assert(info->srcCount == 1);
// Second operand is an integer constant and marked as contained.
assert(simdTree->gtOp.gtOp2->isContainedIntOrIImmed());
- info->srcCount = 1;
break;
case SIMDIntrinsicGetX:
// Return Value:
// None.
-void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
+void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, TreeNodeInfo* info)
{
NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId;
InstructionSet isa = compiler->isaOfHWIntrinsic(intrinsicID);
{
SetContainsAVXFlags(true, 32);
}
- TreeNodeInfo* info = &(intrinsicTree->gtLsraInfo);
+ info->srcCount += GetOperandInfo(intrinsicTree->gtOp.gtOp1);
if (intrinsicTree->gtGetOp2IfPresent() != nullptr)
{
- info->srcCount += GetOperandSourceCount(intrinsicTree->gtOp.gtOp2);
+ info->srcCount += GetOperandInfo(intrinsicTree->gtOp.gtOp2);
}
- info->srcCount += GetOperandSourceCount(intrinsicTree->gtOp.gtOp1);
#ifdef _TARGET_X86_
if (intrinsicTree->gtHWIntrinsicId == NI_SSE42_Crc32)
var_types srcType = intrinsicTree->gtSIMDBaseType;
if (varTypeIsByte(srcType))
{
- intrinsicTree->gtOp.gtOp2->gtLsraInfo.setSrcCandidates(this, RBM_BYTE_REGS);
+ LocationInfoListNode* op2Info = useList.GetSecond(INDEBUG(intrinsicTree->gtGetOp2()));
+ op2Info->info.setSrcCandidates(this, RBM_BYTE_REGS);
}
}
#endif
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitCast(GenTree* tree)
+void LinearScan::TreeNodeInfoInitCast(GenTree* tree, TreeNodeInfo* info)
{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
// TODO-XArch-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned register.
// see CodeGen::genIntToIntCast()
GenTreePtr castOp = tree->gtCast.CastOp();
var_types castOpType = castOp->TypeGet();
- info->srcCount = GetOperandSourceCount(castOp);
+ info->srcCount = GetOperandInfo(castOp);
assert(info->dstCount == 1);
if (tree->gtFlags & GTF_UNSIGNED)
{
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitGCWriteBarrier(GenTree* tree)
+void LinearScan::TreeNodeInfoInitGCWriteBarrier(GenTree* tree, TreeNodeInfo* info)
{
assert(tree->OperGet() == GT_STOREIND);
- GenTreeStoreInd* dst = tree->AsStoreInd();
- GenTreePtr addr = dst->Addr();
- GenTreePtr src = dst->Data();
+ GenTreeStoreInd* dst = tree->AsStoreInd();
+ GenTreePtr addr = dst->Addr();
+ GenTreePtr src = dst->Data();
+ LocationInfoListNode* addrInfo = getLocationInfo(addr);
+ LocationInfoListNode* srcInfo = getLocationInfo(src);
// In the case where we are doing a helper assignment, we need to actually instantiate the
// address in a register.
- assert(!addr->isContained());
- tree->gtLsraInfo.srcCount = 1 + GetIndirSourceCount(dst);
- assert(tree->gtLsraInfo.dstCount == 0);
+ assert(!addr->isContained() && !src->isContained());
+ useList.Append(addrInfo);
+ useList.Append(srcInfo);
+ info->srcCount = 2;
+ assert(info->dstCount == 0);
bool useOptimizedWriteBarrierHelper = false; // By default, assume no optimized write barriers.
// Special write barrier:
// op1 (addr) goes into REG_WRITE_BARRIER (rdx) and
// op2 (src) goes into any int register.
- addr->gtLsraInfo.setSrcCandidates(this, RBM_WRITE_BARRIER);
- src->gtLsraInfo.setSrcCandidates(this, RBM_WRITE_BARRIER_SRC);
+ addrInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER);
+ srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_SRC);
}
#else // !defined(_TARGET_X86_)
// For the standard JIT Helper calls:
// op1 (addr) goes into REG_ARG_0 and
// op2 (src) goes into REG_ARG_1
- addr->gtLsraInfo.setSrcCandidates(this, RBM_ARG_0);
- src->gtLsraInfo.setSrcCandidates(this, RBM_ARG_1);
+ addrInfo->info.setSrcCandidates(this, RBM_ARG_0);
+ srcInfo->info.setSrcCandidates(this, RBM_ARG_1);
}
// Both src and dst must reside in a register, which they should since we haven't set
// either of them as contained.
- assert(addr->gtLsraInfo.dstCount == 1);
- assert(src->gtLsraInfo.dstCount == 1);
+ assert(addrInfo->info.dstCount == 1);
+ assert(srcInfo->info.dstCount == 1);
}
//-----------------------------------------------------------------------------------------
// Arguments:
// indirTree - GT_IND or GT_STOREIND gentree node
//
-void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree)
+void LinearScan::TreeNodeInfoInitIndir(GenTreeIndir* indirTree, TreeNodeInfo* info)
{
// If this is the rhs of a block copy (i.e. non-enregisterable struct),
// it has no register requirements.
return;
}
- TreeNodeInfo* info = &(indirTree->gtLsraInfo);
-
- info->srcCount = GetIndirSourceCount(indirTree);
+ int indirSrcCount = GetIndirInfo(indirTree);
if (indirTree->gtOper == GT_STOREIND)
{
GenTree* source = indirTree->gtOp.gtOp2;
if (source->OperIsShiftOrRotate())
{
- TreeNodeInfoInitShiftRotate(source);
+ info->srcCount += TreeNodeInfoInitShiftRotate(source, info);
+ }
+ else
+ {
+ info->srcCount += appendBinaryLocationInfoToList(source->AsOp());
}
if (indirTree->AsStoreInd()->IsRMWDstOp1())
{
}
if (nonMemSource != nullptr)
{
- info->srcCount += GetOperandSourceCount(nonMemSource);
assert(!nonMemSource->isContained() || (!nonMemSource->isMemoryOp() && !nonMemSource->IsLocal()));
#ifdef _TARGET_X86_
if (varTypeIsByte(indirTree) && !nonMemSource->isContained())
{
// If storeInd is of TYP_BYTE, set source to byteable registers.
- regMaskTP regMask = nonMemSource->gtLsraInfo.getSrcCandidates(this);
+ TreeNodeInfo& nonMemSourceInfo = useList.GetTreeNodeInfo(nonMemSource);
+ regMaskTP regMask = nonMemSourceInfo.getSrcCandidates(this);
regMask &= ~RBM_NON_BYTE_REGS;
assert(regMask != RBM_NONE);
- nonMemSource->gtLsraInfo.setSrcCandidates(this, regMask);
+ nonMemSourceInfo.setSrcCandidates(this, regMask);
}
#endif
}
}
else
{
- info->srcCount += GetOperandSourceCount(source);
- }
#ifdef _TARGET_X86_
- if (varTypeIsByte(indirTree) && !source->isContained())
- {
- // If storeInd is of TYP_BYTE, set source to byteable registers.
- regMaskTP regMask = source->gtLsraInfo.getSrcCandidates(this);
- regMask &= ~RBM_NON_BYTE_REGS;
- assert(regMask != RBM_NONE);
- source->gtLsraInfo.setSrcCandidates(this, regMask);
- }
+ if (varTypeIsByte(indirTree) && !source->isContained())
+ {
+ // If storeInd is of TYP_BYTE, set source to byteable registers.
+ LocationInfoListNode* sourceInfo = getLocationInfo(source);
+ regMaskTP regMask = sourceInfo->info.getSrcCandidates(this);
+ regMask &= ~RBM_NON_BYTE_REGS;
+ assert(regMask != RBM_NONE);
+ sourceInfo->info.setSrcCandidates(this, regMask);
+ useList.Append(sourceInfo);
+ info->srcCount++;
+ }
+ else
#endif
+ {
+ info->srcCount += GetOperandInfo(source);
+ }
+ }
}
+ info->srcCount += indirSrcCount;
#ifdef FEATURE_SIMD
if (indirTree->TypeGet() == TYP_SIMD12)
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitCmp(GenTreePtr tree)
+void LinearScan::TreeNodeInfoInitCmp(GenTreePtr tree, TreeNodeInfo* info)
{
assert(tree->OperIsCompare() || tree->OperIs(GT_CMP));
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- info->srcCount = 0;
+ info->srcCount = 0;
assert((info->dstCount == 1) || (tree->TypeGet() == TYP_VOID));
#ifdef _TARGET_X86_
var_types op1Type = op1->TypeGet();
var_types op2Type = op2->TypeGet();
- if (op1->TypeGet() != TYP_VOID)
- {
- info->srcCount += GetOperandSourceCount(op1);
- }
- info->srcCount += GetOperandSourceCount(op2);
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
}
//------------------------------------------------------------------------
// Return Value:
// None.
//
-void LinearScan::TreeNodeInfoInitMul(GenTreePtr tree)
+void LinearScan::TreeNodeInfoInitMul(GenTreePtr tree, TreeNodeInfo* info)
{
#if defined(_TARGET_X86_)
assert(tree->OperIs(GT_MUL, GT_MULHI, GT_MUL_LONG));
#else
assert(tree->OperIs(GT_MUL, GT_MULHI));
#endif
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- GenTree* op1 = tree->gtOp.gtOp1;
- GenTree* op2 = tree->gtOp.gtOp2;
- info->srcCount = GetOperandSourceCount(op1);
- info->srcCount += GetOperandSourceCount(op2);
+ GenTree* op1 = tree->gtOp.gtOp1;
+ GenTree* op2 = tree->gtOp.gtOp2;
+ info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
assert(info->dstCount == 1);
// Case of float/double mul.
}
#endif // _TARGET_X86_
-//------------------------------------------------------------------------
-// GetOperandSourceCount: Get the source registers for an operand that might be contained.
-//
-// Arguments:
-// node - The node of interest
-//
-// Return Value:
-// The number of source registers used by the *parent* of this node.
-//
-int LinearScan::GetOperandSourceCount(GenTree* node)
-{
- if (!node->isContained())
- {
- return 1;
- }
-
-#if !defined(_TARGET_64BIT_)
- if (node->OperIs(GT_LONG))
- {
- return 2;
- }
-#endif // !defined(_TARGET_64BIT_)
- if (node->OperIsIndir())
- {
- const unsigned srcCount = GetIndirSourceCount(node->AsIndir());
- return srcCount;
- }
-
- return 0;
-}
-
#endif // _TARGET_XARCH_
#endif // !LEGACY_BACKEND
public:
TreeNodeInfo()
{
- loc = 0;
_dstCount = 0;
_srcCount = 0;
_internalIntCount = 0;
dstCandsIndex = 0;
internalCandsIndex = 0;
isLocalDefUse = false;
- isLsraAdded = false;
isDelayFree = false;
hasDelayFreeSrc = false;
isTgtPref = false;
- regOptional = false;
- definesAnyRegisters = false;
isInternalRegDelayFree = false;
#ifdef DEBUG
isInitialized = false;
void setInternalCandidates(LinearScan* lsra, regMaskTP mask);
void addInternalCandidates(LinearScan* lsra, regMaskTP mask);
- LsraLocation loc;
-
public:
unsigned char srcCandsIndex;
unsigned char dstCandsIndex;
// nodes, or top-level nodes that are non-void.
unsigned char isLocalDefUse : 1;
- // Is this node added by LSRA, e.g. as a resolution or copy/reload move.
- unsigned char isLsraAdded : 1;
-
// isDelayFree is set when the register defined by this node will interfere with the destination
// of the consuming node, and therefore it must not be freed immediately after use.
unsigned char isDelayFree : 1;
// in the same register as op1.
unsigned char isTgtPref : 1;
- // Whether a spilled second src can be treated as a contained operand
- unsigned char regOptional : 1;
-
- // Whether or not a node defines any registers, whether directly (for nodes where dstCout is non-zero)
- // or indirectly (for contained nodes, which propagate the transitive closure of the registers
- // defined by their inputs). Used during buildRefPositionsForNode in order to avoid unnecessary work.
- unsigned char definesAnyRegisters : 1;
-
// Whether internal register needs to be different from targetReg
// in which result is produced.
unsigned char isInternalRegDelayFree : 1;
public:
// Initializes the TreeNodeInfo value with the given values.
- void Initialize(LinearScan* lsra, GenTree* node, LsraLocation location);
+ void Initialize(LinearScan* lsra, GenTree* node);
#ifdef DEBUG
void dump(LinearScan* lsra);