1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4 /*****************************************************************************/
10 #include "smallhash.h"
12 // Minor and forward-reference types
21 // LsraLocation tracks the linearized order of the nodes.
22 // Each node is assigned two LsraLocations - one for all the uses and all but the last
23 // def, and a second location for the last def (if any)
25 typedef unsigned int LsraLocation;
26 const unsigned int MinLocation = 0;
27 const unsigned int MaxLocation = UINT_MAX;
28 // max number of registers an operation could require internally (in addition to uses and defs)
29 const unsigned int MaxInternalRegisters = 8;
30 const unsigned int RegisterTypeCount = 2;
32 /*****************************************************************************
34 *****************************************************************************/
35 typedef var_types RegisterType;
36 #define IntRegisterType TYP_INT
37 #define FloatRegisterType TYP_FLOAT
39 //------------------------------------------------------------------------
40 // regType: Return the RegisterType to use for a given type
43 // type - the type of interest
46 RegisterType regType(T type)
49 if (varTypeIsSIMD(type))
51 return FloatRegisterType;
53 #endif // FEATURE_SIMD
54 return varTypeIsFloating(TypeGet(type)) ? FloatRegisterType : IntRegisterType;
57 //------------------------------------------------------------------------
58 // useFloatReg: Check if the given var_type should be allocated to a FloatRegisterType
60 inline bool useFloatReg(var_types type)
62 return (regType(type) == FloatRegisterType);
65 //------------------------------------------------------------------------
66 // registerTypesEquivalent: Check to see if two RegisterTypes are equivalent
68 inline bool registerTypesEquivalent(RegisterType a, RegisterType b)
70 return varTypeIsIntegralOrI(a) == varTypeIsIntegralOrI(b);
73 //------------------------------------------------------------------------
74 // registerTypesEquivalent: Get the set of callee-save registers of the given RegisterType
76 inline regMaskTP calleeSaveRegs(RegisterType rt)
78 return varTypeIsIntegralOrI(rt) ? RBM_INT_CALLEE_SAVED : RBM_FLT_CALLEE_SAVED;
81 //------------------------------------------------------------------------
82 // RefInfo: Captures the necessary information for a definition that is "in-flight"
83 // during `buildIntervals` (i.e. a tree-node definition has been encountered,
84 // but not its use). This includes the RefPosition and its associated
92 RefInfo(RefPosition* r, GenTree* t) : ref(r), treeNode(t)
96 // default constructor for data structures
102 //------------------------------------------------------------------------
103 // RefInfoListNode: used to store a single `RefInfo` value for a
104 // node during `buildIntervals`.
106 // This is the node type for `RefInfoList` below.
108 class RefInfoListNode final : public RefInfo
110 friend class RefInfoList;
111 friend class RefInfoListNodePool;
113 RefInfoListNode* m_next; // The next node in the list
116 RefInfoListNode(RefPosition* r, GenTree* t) : RefInfo(r, t)
120 //------------------------------------------------------------------------
121 // RefInfoListNode::Next: Returns the next node in the list.
122 RefInfoListNode* Next() const
128 //------------------------------------------------------------------------
129 // RefInfoList: used to store a list of `RefInfo` values for a
130 // node during `buildIntervals`.
132 // This list of 'RefInfoListNode's contains the source nodes consumed by
133 // a node, and is created by 'BuildNode'.
135 class RefInfoList final
137 friend class RefInfoListNodePool;
139 RefInfoListNode* m_head; // The head of the list
140 RefInfoListNode* m_tail; // The tail of the list
143 RefInfoList() : m_head(nullptr), m_tail(nullptr)
147 RefInfoList(RefInfoListNode* node) : m_head(node), m_tail(node)
149 assert(m_head->m_next == nullptr);
152 //------------------------------------------------------------------------
153 // RefInfoList::IsEmpty: Returns true if the list is empty.
157 return m_head == nullptr;
160 //------------------------------------------------------------------------
161 // RefInfoList::Begin: Returns the first node in the list.
163 RefInfoListNode* Begin() const
168 //------------------------------------------------------------------------
169 // RefInfoList::End: Returns the position after the last node in the
170 // list. The returned value is suitable for use as
171 // a sentinel for iteration.
173 RefInfoListNode* End() const
178 //------------------------------------------------------------------------
179 // RefInfoList::End: Returns the position after the last node in the
180 // list. The returned value is suitable for use as
181 // a sentinel for iteration.
183 RefInfoListNode* Last() const
188 //------------------------------------------------------------------------
189 // RefInfoList::Append: Appends a node to the list.
192 // node - The node to append. Must not be part of an existing list.
194 void Append(RefInfoListNode* node)
196 assert(node->m_next == nullptr);
198 if (m_tail == nullptr)
200 assert(m_head == nullptr);
205 m_tail->m_next = node;
210 //------------------------------------------------------------------------
211 // RefInfoList::Append: Appends another list to this list.
214 // other - The list to append.
216 void Append(RefInfoList other)
218 if (m_tail == nullptr)
220 assert(m_head == nullptr);
221 m_head = other.m_head;
225 m_tail->m_next = other.m_head;
228 m_tail = other.m_tail;
231 //------------------------------------------------------------------------
232 // RefInfoList::Prepend: Prepends a node to the list.
235 // node - The node to prepend. Must not be part of an existing list.
237 void Prepend(RefInfoListNode* node)
239 assert(node->m_next == nullptr);
241 if (m_head == nullptr)
243 assert(m_tail == nullptr);
248 node->m_next = m_head;
254 //------------------------------------------------------------------------
255 // RefInfoList::Add: Adds a node to the list.
258 // node - The node to add. Must not be part of an existing list.
259 // prepend - True if it should be prepended (otherwise is appended)
261 void Add(RefInfoListNode* node, bool prepend)
273 //------------------------------------------------------------------------
274 // removeListNode - retrieve the RefInfo for the given node
277 // The BuildNode methods use this helper to retrieve the RefInfo for child nodes
278 // from the useList being constructed.
280 RefInfoListNode* removeListNode(RefInfoListNode* listNode, RefInfoListNode* prevListNode)
282 RefInfoListNode* nextNode = listNode->Next();
283 if (prevListNode == nullptr)
289 prevListNode->m_next = nextNode;
291 if (nextNode == nullptr)
293 m_tail = prevListNode;
295 listNode->m_next = nullptr;
299 // removeListNode - remove the RefInfoListNode for the given GenTree node from the defList
300 RefInfoListNode* removeListNode(GenTree* node);
301 // Same as above but takes a multiRegIdx to support multi-reg nodes.
302 RefInfoListNode* removeListNode(GenTree* node, unsigned multiRegIdx);
304 //------------------------------------------------------------------------
305 // GetRefPosition - retrieve the RefPosition for the given node
308 // The Build methods use this helper to retrieve the RefPosition for child nodes
309 // from the useList being constructed. Note that, if the user knows the order of the operands,
310 // it is expected that they should just retrieve them directly.
312 RefPosition* GetRefPosition(GenTree* node)
314 for (RefInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next())
316 if (listNode->treeNode == node)
318 return listNode->ref;
321 assert(!"GetRefPosition didn't find the node");
325 //------------------------------------------------------------------------
326 // RefInfoList::GetSecond: Gets the second node in the list.
329 // (DEBUG ONLY) treeNode - The GenTree* we expect to be in the second node.
331 RefInfoListNode* GetSecond(INDEBUG(GenTree* treeNode))
333 noway_assert((Begin() != nullptr) && (Begin()->Next() != nullptr));
334 RefInfoListNode* second = Begin()->Next();
335 assert(second->treeNode == treeNode);
340 // Count - return the number of nodes in the list (DEBUG only)
344 for (RefInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next())
353 //------------------------------------------------------------------------
354 // RefInfoListNodePool: manages a pool of `RefInfoListNode`
355 // values to decrease overall memory usage
356 // during `buildIntervals`.
358 // `buildIntervals` involves creating a list of RefInfo items per
359 // node that either directly produces a set of registers or that is a
360 // contained node with register-producing sources. However, these lists
361 // are short-lived: they are destroyed once the use of the corresponding
362 // node is processed. As such, there is typically only a small number of
363 // `RefInfoListNode` values in use at any given time. Pooling these
364 // values avoids otherwise frequent allocations.
365 class RefInfoListNodePool final
367 RefInfoListNode* m_freeList;
368 Compiler* m_compiler;
369 static const unsigned defaultPreallocation = 8;
372 RefInfoListNodePool(Compiler* compiler, unsigned preallocate = defaultPreallocation);
373 RefInfoListNode* GetNode(RefPosition* r, GenTree* t, unsigned regIdx = 0);
374 void ReturnNode(RefInfoListNode* listNode);
379 // bbNum of the predecessor to use for the register location of live-in variables.
381 unsigned int predBBNum;
382 BasicBlock::weight_t weight;
383 bool hasCriticalInEdge;
384 bool hasCriticalOutEdge;
387 // Per block maintained LSRA statistics.
389 // Number of spills of local vars or tree temps in this basic block.
392 // Number of GT_COPY nodes inserted in this basic block while allocating regs.
393 // Note that GT_COPY nodes are also inserted as part of basic block boundary
394 // resolution, which are accounted against resolutionMovCount but not
395 // against copyRegCount.
396 unsigned copyRegCount;
398 // Number of resolution moves inserted in this basic block.
399 unsigned resolutionMovCount;
401 // Number of critical edges from this block that are split.
402 unsigned splitEdgeCount;
403 #endif // TRACK_LSRA_STATS
406 // This is sort of a bit mask
407 // The low order 2 bits will be 1 for defs, and 2 for uses
408 enum RefType : unsigned char
410 #define DEF_REFTYPE(memberName, memberValue, shortName) memberName = memberValue,
411 #include "lsra_reftypes.h"
415 // position in a block (for resolution)
418 BlockPositionStart = 0,
419 BlockPositionEnd = 1,
423 inline bool RefTypeIsUse(RefType refType)
425 return ((refType & RefTypeUse) == RefTypeUse);
428 inline bool RefTypeIsDef(RefType refType)
430 return ((refType & RefTypeDef) == RefTypeDef);
433 typedef regNumberSmall* VarToRegMap;
435 typedef jitstd::list<Interval> IntervalList;
436 typedef jitstd::list<RefPosition> RefPositionList;
437 typedef jitstd::list<RefPosition>::iterator RefPositionIterator;
438 typedef jitstd::list<RefPosition>::reverse_iterator RefPositionReverseIterator;
445 firstRefPosition = nullptr;
446 recentRefPosition = nullptr;
447 lastRefPosition = nullptr;
451 // A linked list of RefPositions. These are only traversed in the forward
452 // direction, and are not moved, so they don't need to be doubly linked
453 // (see RefPosition).
455 RefPosition* firstRefPosition;
456 RefPosition* recentRefPosition;
457 RefPosition* lastRefPosition;
461 // Get the position of the next reference which is at or greater than
462 // the current location (relies upon recentRefPosition being udpated
463 // during traversal).
464 RefPosition* getNextRefPosition();
465 LsraLocation getNextRefLocation();
468 class RegRecord : public Referenceable
473 assignedInterval = nullptr;
474 previousInterval = nullptr;
476 isCalleeSave = false;
477 registerType = IntRegisterType;
478 isBusyUntilNextKill = false;
481 void init(regNumber reg)
483 #ifdef _TARGET_ARM64_
484 // The Zero register, or the SP
485 if ((reg == REG_ZR) || (reg == REG_SP))
487 // IsGeneralRegister returns false for REG_ZR and REG_SP
489 registerType = IntRegisterType;
493 if (emitter::isFloatReg(reg))
495 registerType = FloatRegisterType;
499 // The constructor defaults to IntRegisterType
500 assert(emitter::isGeneralRegister(reg) && registerType == IntRegisterType);
503 isCalleeSave = ((RBM_CALLEE_SAVED & genRegMask(reg)) != 0);
507 // print out representation
509 // concise representation for embedding
515 // RefPosition * getNextRefPosition();
516 // LsraLocation getNextRefLocation();
520 // interval to which this register is currently allocated.
521 // If the interval is inactive (isActive == false) then it is not currently live,
522 // and the register call be unassigned (i.e. setting assignedInterval to nullptr)
523 // without spilling the register.
524 Interval* assignedInterval;
525 // Interval to which this register was previously allocated, and which was unassigned
526 // because it was inactive. This register will be reassigned to this Interval when
527 // assignedInterval becomes inactive.
528 Interval* previousInterval;
532 RegisterType registerType;
533 // This register must be considered busy until the next time it is explicitly killed.
534 // This is used so that putarg_reg can avoid killing its lclVar source, while avoiding
535 // the problem with the reg becoming free if the last-use is encountered before the call.
536 bool isBusyUntilNextKill;
538 bool conflictingFixedRegReference(RefPosition* refPosition);
541 inline bool leafInRange(GenTree* leaf, int lower, int upper)
543 if (!leaf->IsIntCnsFitsInI32())
547 if (leaf->gtIntCon.gtIconVal < lower)
551 if (leaf->gtIntCon.gtIconVal > upper)
559 inline bool leafInRange(GenTree* leaf, int lower, int upper, int multiple)
561 if (!leafInRange(leaf, lower, upper))
565 if (leaf->gtIntCon.gtIconVal % multiple)
573 inline bool leafAddInRange(GenTree* leaf, int lower, int upper, int multiple = 1)
575 if (leaf->OperGet() != GT_ADD)
579 return leafInRange(leaf->gtGetOp2(), lower, upper, multiple);
582 inline bool isCandidateVar(LclVarDsc* varDsc)
584 return varDsc->lvLRACandidate;
587 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
588 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
592 XX This is the container for the Linear Scan data structures and methods. XX
594 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
595 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
597 // OPTION 1: The algorithm as described in "Optimized Interval Splitting in a
598 // Linear Scan Register Allocator". It is driven by iterating over the Interval
599 // lists. In this case, we need multiple IntervalLists, and Intervals will be
600 // moved between them so they must be easily updated.
602 // OPTION 2: The algorithm is driven by iterating over the RefPositions. In this
603 // case, we only need a single IntervalList, and it won't be updated.
604 // The RefPosition must refer to its Interval, and we need to be able to traverse
605 // to the next RefPosition in code order
606 // THIS IS THE OPTION CURRENTLY BEING PURSUED
608 class LinearScan : public LinearScanInterface
610 friend class RefPosition;
611 friend class Interval;
612 friend class Lowering;
615 // This could use further abstraction. From Compiler we need the tree,
616 // the flowgraph and the allocator.
617 LinearScan(Compiler* theCompiler);
619 // This is the main driver
620 virtual void doLinearScan();
622 static bool isSingleRegister(regMaskTP regMask)
624 return (genExactlyOneBit(regMask));
627 // Initialize the block traversal for LSRA.
628 // This resets the bbVisitedSet, and on the first invocation sets the blockSequence array,
629 // which determines the order in which blocks will be allocated (currently called during Lowering).
630 BasicBlock* startBlockSequence();
631 // Move to the next block in sequence, updating the current block information.
632 BasicBlock* moveToNextBlock();
633 // Get the next block to be scheduled without changing the current block,
634 // but updating the blockSequence during the first iteration if it is not fully computed.
635 BasicBlock* getNextBlock();
637 // This is called during code generation to update the location of variables
638 virtual void recordVarLocationsAtStartOfBB(BasicBlock* bb);
640 // This does the dataflow analysis and builds the intervals
641 void buildIntervals();
643 // This is where the actual assignment is done
644 void allocateRegisters();
646 // This is the resolution phase, where cross-block mismatches are fixed up
647 void resolveRegisters();
649 void writeRegisters(RefPosition* currentRefPosition, GenTree* tree);
651 // Insert a copy in the case where a tree node value must be moved to a different
652 // register at the point of use, or it is reloaded to a different register
653 // than the one it was spilled from
654 void insertCopyOrReload(BasicBlock* block, GenTree* tree, unsigned multiRegIdx, RefPosition* refPosition);
656 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
657 // Insert code to save and restore the upper half of a vector that lives
658 // in a callee-save register at the point of a call (the upper half is
660 void insertUpperVectorSaveAndReload(GenTree* tree, RefPosition* refPosition, BasicBlock* block);
661 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
663 // resolve along one block-block edge
669 ResolveSharedCritical,
673 static const char* resolveTypeName[ResolveTypeCount];
683 void addResolutionForDouble(BasicBlock* block,
684 GenTree* insertionPoint,
685 Interval** sourceIntervals,
686 regNumberSmall* location,
689 ResolveType resolveType);
692 BasicBlock* block, GenTree* insertionPoint, Interval* interval, regNumber outReg, regNumber inReg);
694 void handleOutgoingCriticalEdges(BasicBlock* block);
696 void resolveEdge(BasicBlock* fromBlock, BasicBlock* toBlock, ResolveType resolveType, VARSET_VALARG_TP liveSet);
700 // Keep track of how many temp locations we'll need for spill
702 void updateMaxSpill(RefPosition* refPosition);
703 void recordMaxSpill();
705 // max simultaneous spill locations used of every type
706 unsigned int maxSpill[TYP_COUNT];
707 unsigned int currentSpill[TYP_COUNT];
708 bool needFloatTmpForFPCall;
709 bool needDoubleTmpForFPCall;
713 //------------------------------------------------------------------------
714 // Should we stress lsra? This uses the COMPlus_JitStressRegs variable.
716 // The mask bits are currently divided into fields in which each non-zero value
717 // is a distinct stress option (e.g. 0x3 is not a combination of 0x1 and 0x2).
718 // However, subject to possible constraints (to be determined), the different
719 // fields can be combined (e.g. 0x7 is a combination of 0x3 and 0x4).
720 // Note that the field values are declared in a public enum, but the actual bits are
721 // only accessed via accessors.
723 unsigned lsraStressMask;
725 // This controls the registers available for allocation
726 enum LsraStressLimitRegs{LSRA_LIMIT_NONE = 0, LSRA_LIMIT_CALLEE = 0x1, LSRA_LIMIT_CALLER = 0x2,
727 LSRA_LIMIT_SMALL_SET = 0x3, LSRA_LIMIT_MASK = 0x3};
729 // When LSRA_LIMIT_SMALL_SET is specified, it is desirable to select a "mixed" set of caller- and callee-save
730 // registers, so as to get different coverage than limiting to callee or caller.
731 // At least for x86 and AMD64, and potentially other architecture that will support SIMD,
732 // we need a minimum of 5 fp regs in order to support the InitN intrinsic for Vector4.
733 // Hence the "SmallFPSet" has 5 elements.
734 CLANG_FORMAT_COMMENT_ANCHOR;
736 #if defined(_TARGET_AMD64_)
737 #ifdef UNIX_AMD64_ABI
738 // On System V the RDI and RSI are not callee saved. Use R12 ans R13 as callee saved registers.
739 static const regMaskTP LsraLimitSmallIntSet =
740 (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13);
741 #else // !UNIX_AMD64_ABI
742 // On Windows Amd64 use the RDI and RSI as callee saved registers.
743 static const regMaskTP LsraLimitSmallIntSet =
744 (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI);
745 #endif // !UNIX_AMD64_ABI
746 static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
747 #elif defined(_TARGET_ARM_)
748 // On ARM, we may need two registers to set up the target register for a virtual call, so we need
749 // to have at least the maximum number of arg registers, plus 2.
750 static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5);
751 static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17);
752 #elif defined(_TARGET_ARM64_)
753 static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20);
754 static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9);
755 #elif defined(_TARGET_X86_)
756 static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI);
757 static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
759 #error Unsupported or unset target architecture
762 LsraStressLimitRegs getStressLimitRegs()
764 return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK);
767 regMaskTP getConstrainedRegMask(regMaskTP regMaskActual, regMaskTP regMaskConstrain, unsigned minRegCount);
768 regMaskTP stressLimitRegs(RefPosition* refPosition, regMaskTP mask);
770 // This controls the heuristics used to select registers
771 // These can be combined.
772 enum LsraSelect{LSRA_SELECT_DEFAULT = 0, LSRA_SELECT_REVERSE_HEURISTICS = 0x04,
773 LSRA_SELECT_REVERSE_CALLER_CALLEE = 0x08, LSRA_SELECT_NEAREST = 0x10, LSRA_SELECT_MASK = 0x1c};
774 LsraSelect getSelectionHeuristics()
776 return (LsraSelect)(lsraStressMask & LSRA_SELECT_MASK);
778 bool doReverseSelect()
780 return ((lsraStressMask & LSRA_SELECT_REVERSE_HEURISTICS) != 0);
782 bool doReverseCallerCallee()
784 return ((lsraStressMask & LSRA_SELECT_REVERSE_CALLER_CALLEE) != 0);
786 bool doSelectNearest()
788 return ((lsraStressMask & LSRA_SELECT_NEAREST) != 0);
791 // This controls the order in which basic blocks are visited during allocation
792 enum LsraTraversalOrder{LSRA_TRAVERSE_LAYOUT = 0x20, LSRA_TRAVERSE_PRED_FIRST = 0x40,
793 LSRA_TRAVERSE_RANDOM = 0x60, // NYI
794 LSRA_TRAVERSE_DEFAULT = LSRA_TRAVERSE_PRED_FIRST, LSRA_TRAVERSE_MASK = 0x60};
795 LsraTraversalOrder getLsraTraversalOrder()
797 if ((lsraStressMask & LSRA_TRAVERSE_MASK) == 0)
799 return LSRA_TRAVERSE_DEFAULT;
801 return (LsraTraversalOrder)(lsraStressMask & LSRA_TRAVERSE_MASK);
803 bool isTraversalLayoutOrder()
805 return getLsraTraversalOrder() == LSRA_TRAVERSE_LAYOUT;
807 bool isTraversalPredFirstOrder()
809 return getLsraTraversalOrder() == LSRA_TRAVERSE_PRED_FIRST;
812 // This controls whether lifetimes should be extended to the entire method.
813 // Note that this has no effect under MinOpts
814 enum LsraExtendLifetimes{LSRA_DONT_EXTEND = 0, LSRA_EXTEND_LIFETIMES = 0x80, LSRA_EXTEND_LIFETIMES_MASK = 0x80};
815 LsraExtendLifetimes getLsraExtendLifeTimes()
817 return (LsraExtendLifetimes)(lsraStressMask & LSRA_EXTEND_LIFETIMES_MASK);
819 bool extendLifetimes()
821 return getLsraExtendLifeTimes() == LSRA_EXTEND_LIFETIMES;
824 // This controls whether variables locations should be set to the previous block in layout order
825 // (LSRA_BLOCK_BOUNDARY_LAYOUT), or to that of the highest-weight predecessor (LSRA_BLOCK_BOUNDARY_PRED -
826 // the default), or rotated (LSRA_BLOCK_BOUNDARY_ROTATE).
827 enum LsraBlockBoundaryLocations{LSRA_BLOCK_BOUNDARY_PRED = 0, LSRA_BLOCK_BOUNDARY_LAYOUT = 0x100,
828 LSRA_BLOCK_BOUNDARY_ROTATE = 0x200, LSRA_BLOCK_BOUNDARY_MASK = 0x300};
829 LsraBlockBoundaryLocations getLsraBlockBoundaryLocations()
831 return (LsraBlockBoundaryLocations)(lsraStressMask & LSRA_BLOCK_BOUNDARY_MASK);
833 regNumber rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs);
835 // This controls whether we always insert a GT_RELOAD instruction after a spill
836 // Note that this can be combined with LSRA_SPILL_ALWAYS (or not)
837 enum LsraReload{LSRA_NO_RELOAD_IF_SAME = 0, LSRA_ALWAYS_INSERT_RELOAD = 0x400, LSRA_RELOAD_MASK = 0x400};
838 LsraReload getLsraReload()
840 return (LsraReload)(lsraStressMask & LSRA_RELOAD_MASK);
842 bool alwaysInsertReload()
844 return getLsraReload() == LSRA_ALWAYS_INSERT_RELOAD;
847 // This controls whether we spill everywhere
848 enum LsraSpill{LSRA_DONT_SPILL_ALWAYS = 0, LSRA_SPILL_ALWAYS = 0x800, LSRA_SPILL_MASK = 0x800};
849 LsraSpill getLsraSpill()
851 return (LsraSpill)(lsraStressMask & LSRA_SPILL_MASK);
855 return getLsraSpill() == LSRA_SPILL_ALWAYS;
858 // This controls whether RefPositions that lower/codegen indicated as reg optional be
859 // allocated a reg at all.
860 enum LsraRegOptionalControl{LSRA_REG_OPTIONAL_DEFAULT = 0, LSRA_REG_OPTIONAL_NO_ALLOC = 0x1000,
861 LSRA_REG_OPTIONAL_MASK = 0x1000};
863 LsraRegOptionalControl getLsraRegOptionalControl()
865 return (LsraRegOptionalControl)(lsraStressMask & LSRA_REG_OPTIONAL_MASK);
868 bool regOptionalNoAlloc()
870 return getLsraRegOptionalControl() == LSRA_REG_OPTIONAL_NO_ALLOC;
873 bool candidatesAreStressLimited()
875 return ((lsraStressMask & (LSRA_LIMIT_MASK | LSRA_SELECT_MASK)) != 0);
879 void dumpNodeInfo(GenTree* node, regMaskTP dstCandidates, int srcCount, int dstCount);
881 void lsraDumpIntervals(const char* msg);
882 void dumpRefPositions(const char* msg);
883 void dumpVarRefPositions(const char* msg);
886 static bool IsLsraAdded(GenTree* node)
888 return ((node->gtDebugFlags & GTF_DEBUG_NODE_LSRA_ADDED) != 0);
890 static void SetLsraAdded(GenTree* node)
892 node->gtDebugFlags |= GTF_DEBUG_NODE_LSRA_ADDED;
894 static bool IsResolutionMove(GenTree* node);
895 static bool IsResolutionNode(LIR::Range& containingRange, GenTree* node);
897 void verifyFinalAllocation();
898 void verifyResolutionMove(GenTree* resolutionNode, LsraLocation currentLocation);
900 bool doSelectNearest()
904 bool extendLifetimes()
912 // In a retail build we support only the default traversal order
913 bool isTraversalLayoutOrder()
917 bool isTraversalPredFirstOrder()
921 bool getLsraExtendLifeTimes()
925 static void SetLsraAdded(GenTree* node)
927 // do nothing; checked only under #DEBUG
929 bool candidatesAreStressLimited()
936 // Used by Lowering when considering whether to split Longs, as well as by identifyCandidates().
937 bool isRegCandidate(LclVarDsc* varDsc);
939 bool isContainableMemoryOp(GenTree* node);
942 // Determine which locals are candidates for allocation
943 void identifyCandidates();
945 // determine which locals are used in EH constructs we don't want to deal with
946 void identifyCandidatesExceptionDataflow();
948 void buildPhysRegRecords();
951 void checkLastUses(BasicBlock* block);
952 static int ComputeOperandDstCount(GenTree* operand);
953 static int ComputeAvailableSrcCount(GenTree* node);
958 // Update allocations at start/end of block
959 void unassignIntervalBlockStart(RegRecord* regRecord, VarToRegMap inVarToRegMap);
960 void processBlockEndAllocation(BasicBlock* current);
962 // Record variable locations at start/end of block
963 void processBlockStartLocations(BasicBlock* current, bool allocationPass);
964 void processBlockEndLocations(BasicBlock* current);
967 bool isSecondHalfReg(RegRecord* regRec, Interval* interval);
968 RegRecord* getSecondHalfRegRec(RegRecord* regRec);
969 RegRecord* findAnotherHalfRegRec(RegRecord* regRec);
970 bool canSpillDoubleReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight);
971 void unassignDoublePhysReg(RegRecord* doubleRegRecord);
973 void updateAssignedInterval(RegRecord* reg, Interval* interval, RegisterType regType);
974 void updatePreviousInterval(RegRecord* reg, Interval* interval, RegisterType regType);
975 bool canRestorePreviousInterval(RegRecord* regRec, Interval* assignedInterval);
976 bool isAssignedToInterval(Interval* interval, RegRecord* regRec);
977 bool isRefPositionActive(RefPosition* refPosition, LsraLocation refLocation);
978 bool canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight);
979 bool isRegInUse(RegRecord* regRec, RefPosition* refPosition);
981 // insert refpositions representing prolog zero-inits which will be added later
982 void insertZeroInitRefPositions();
984 // add physreg refpositions for a tree node, based on calling convention and instruction selection predictions
985 void addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse);
987 void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition);
989 void buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation loc);
991 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
992 void buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, VARSET_VALARG_TP liveLargeVectors);
993 void buildUpperVectorRestoreRefPositions(GenTree* tree, LsraLocation currentLoc, VARSET_VALARG_TP liveLargeVectors);
994 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
996 #if defined(UNIX_AMD64_ABI)
997 // For AMD64 on SystemV machines. This method
998 // is called as replacement for raUpdateRegStateForArg
999 // that is used on Windows. On System V systems a struct can be passed
1000 // partially using registers from the 2 register files.
1001 void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc);
1002 #endif // defined(UNIX_AMD64_ABI)
1004 // Update reg state for an incoming register argument
1005 void updateRegStateForArg(LclVarDsc* argDsc);
1007 inline bool isCandidateLocalRef(GenTree* tree)
1009 if (tree->IsLocal())
1011 unsigned int lclNum = tree->gtLclVarCommon.gtLclNum;
1012 assert(lclNum < compiler->lvaCount);
1013 LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
1015 return isCandidateVar(varDsc);
1020 // Helpers for getKillSetForNode().
1021 regMaskTP getKillSetForStoreInd(GenTreeStoreInd* tree);
1022 regMaskTP getKillSetForShiftRotate(GenTreeOp* tree);
1023 regMaskTP getKillSetForMul(GenTreeOp* tree);
1024 regMaskTP getKillSetForCall(GenTreeCall* call);
1025 regMaskTP getKillSetForModDiv(GenTreeOp* tree);
1026 regMaskTP getKillSetForBlockStore(GenTreeBlk* blkNode);
1027 regMaskTP getKillSetForReturn();
1028 regMaskTP getKillSetForProfilerHook();
1029 #ifdef FEATURE_HW_INTRINSICS
1030 regMaskTP getKillSetForHWIntrinsic(GenTreeHWIntrinsic* node);
1031 #endif // FEATURE_HW_INTRINSICS
1033 // Return the registers killed by the given tree node.
1034 // This is used only for an assert, and for stress, so it is only defined under DEBUG.
1035 // Otherwise, the Build methods should obtain the killMask from the appropriate method above.
1037 regMaskTP getKillSetForNode(GenTree* tree);
1040 // Given some tree node add refpositions for all the registers this node kills
1041 bool buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc, regMaskTP killMask);
1043 regMaskTP allRegs(RegisterType rt);
1044 regMaskTP allByteRegs();
1045 regMaskTP allSIMDRegs();
1046 regMaskTP internalFloatRegCandidates();
1048 bool registerIsFree(regNumber regNum, RegisterType regType);
1049 bool registerIsAvailable(RegRecord* physRegRecord,
1050 LsraLocation currentLoc,
1051 LsraLocation* nextRefLocationPtr,
1052 RegisterType regType);
1053 void freeRegister(RegRecord* physRegRecord);
1054 void freeRegisters(regMaskTP regsToFree);
1056 // Get the type that this tree defines.
1057 var_types getDefType(GenTree* tree)
1059 return tree->TypeGet();
1062 // Managing internal registers during the BuildNode process.
1063 RefPosition* defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP candidates);
1064 RefPosition* buildInternalIntRegisterDefForNode(GenTree* tree, regMaskTP internalCands = RBM_NONE);
1065 RefPosition* buildInternalFloatRegisterDefForNode(GenTree* tree, regMaskTP internalCands = RBM_NONE);
1066 void buildInternalRegisterUses();
1068 void resolveLocalRef(BasicBlock* block, GenTree* treeNode, RefPosition* currentRefPosition);
1070 void insertMove(BasicBlock* block, GenTree* insertionPoint, unsigned lclNum, regNumber inReg, regNumber outReg);
1073 BasicBlock* block, GenTree* insertionPoint, unsigned lclNum1, regNumber reg1, unsigned lclNum2, regNumber reg2);
1076 // TODO-Cleanup: unused?
1077 class PhysRegIntervalIterator
1080 PhysRegIntervalIterator(LinearScan* theLinearScan)
1082 nextRegNumber = (regNumber)0;
1083 linearScan = theLinearScan;
1085 RegRecord* GetNext()
1087 return &linearScan->physRegs[nextRegNumber];
1091 // This assumes that the physical registers are contiguous, starting
1092 // with a register number of 0
1093 regNumber nextRegNumber;
1094 LinearScan* linearScan;
1098 Interval* newInterval(RegisterType regType);
1100 Interval* getIntervalForLocalVar(unsigned varIndex)
1102 assert(varIndex < compiler->lvaTrackedCount);
1103 assert(localVarIntervals[varIndex] != nullptr);
1104 return localVarIntervals[varIndex];
1107 Interval* getIntervalForLocalVarNode(GenTreeLclVarCommon* tree)
1109 LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclNum];
1110 assert(varDsc->lvTracked);
1111 return getIntervalForLocalVar(varDsc->lvVarIndex);
1114 RegRecord* getRegisterRecord(regNumber regNum);
1116 RefPosition* newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType);
1118 RefPosition* newRefPosition(Interval* theInterval,
1119 LsraLocation theLocation,
1121 GenTree* theTreeNode,
1123 unsigned multiRegIdx = 0);
1125 // This creates a RefTypeUse at currentLoc. It sets the treeNode to nullptr if it is not a
1127 RefPosition* newUseRefPosition(Interval* theInterval,
1128 GenTree* theTreeNode,
1130 unsigned multiRegIdx = 0);
1132 RefPosition* newRefPosition(
1133 regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask);
1135 void applyCalleeSaveHeuristics(RefPosition* rp);
1137 void checkConflictingDefUse(RefPosition* rp);
1139 void associateRefPosWithInterval(RefPosition* rp);
1141 unsigned getWeight(RefPosition* refPos);
1143 /*****************************************************************************
1144 * Register management
1145 ****************************************************************************/
1146 RegisterType getRegisterType(Interval* currentInterval, RefPosition* refPosition);
1147 regNumber tryAllocateFreeReg(Interval* current, RefPosition* refPosition);
1148 regNumber allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable);
1149 regNumber assignCopyReg(RefPosition* refPosition);
1151 bool isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition);
1152 bool isSpillCandidate(Interval* current,
1153 RefPosition* refPosition,
1154 RegRecord* physRegRecord,
1155 LsraLocation& nextLocation);
1156 void checkAndAssignInterval(RegRecord* regRec, Interval* interval);
1157 void assignPhysReg(RegRecord* regRec, Interval* interval);
1158 void assignPhysReg(regNumber reg, Interval* interval)
1160 assignPhysReg(getRegisterRecord(reg), interval);
1163 bool isAssigned(RegRecord* regRec ARM_ARG(RegisterType newRegType));
1164 bool isAssigned(RegRecord* regRec, LsraLocation lastLocation ARM_ARG(RegisterType newRegType));
1165 void checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefPosition);
1166 void unassignPhysReg(RegRecord* regRec ARM_ARG(RegisterType newRegType));
1167 void unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPosition);
1168 void unassignPhysRegNoSpill(RegRecord* reg);
1169 void unassignPhysReg(regNumber reg)
1171 unassignPhysReg(getRegisterRecord(reg), nullptr);
1174 void setIntervalAsSpilled(Interval* interval);
1175 void setIntervalAsSplit(Interval* interval);
1176 void spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition);
1178 void spillGCRefs(RefPosition* killRefPosition);
1180 /*****************************************************************************
1181 * For Resolution phase
1182 ****************************************************************************/
1183 // TODO-Throughput: Consider refactoring this so that we keep a map from regs to vars for better scaling
1184 unsigned int regMapCount;
1186 // When we split edges, we create new blocks, and instead of expanding the VarToRegMaps, we
1187 // rely on the property that the "in" map is the same as the "from" block of the edge, and the
1188 // "out" map is the same as the "to" block of the edge (by construction).
1189 // So, for any block whose bbNum is greater than bbNumMaxBeforeResolution, we use the
1190 // splitBBNumToTargetBBNumMap.
1191 // TODO-Throughput: We may want to look into the cost/benefit tradeoff of doing this vs. expanding
1194 unsigned bbNumMaxBeforeResolution;
1195 struct SplitEdgeInfo
1200 typedef JitHashTable<unsigned, JitSmallPrimitiveKeyFuncs<unsigned>, SplitEdgeInfo> SplitBBNumToTargetBBNumMap;
1201 SplitBBNumToTargetBBNumMap* splitBBNumToTargetBBNumMap;
1202 SplitBBNumToTargetBBNumMap* getSplitBBNumToTargetBBNumMap()
1204 if (splitBBNumToTargetBBNumMap == nullptr)
1206 splitBBNumToTargetBBNumMap =
1207 new (getAllocator(compiler)) SplitBBNumToTargetBBNumMap(getAllocator(compiler));
1209 return splitBBNumToTargetBBNumMap;
1211 SplitEdgeInfo getSplitEdgeInfo(unsigned int bbNum);
1213 void initVarRegMaps();
1214 void setInVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg);
1215 void setOutVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg);
1216 VarToRegMap getInVarToRegMap(unsigned int bbNum);
1217 VarToRegMap getOutVarToRegMap(unsigned int bbNum);
1218 void setVarReg(VarToRegMap map, unsigned int trackedVarIndex, regNumber reg);
1219 regNumber getVarReg(VarToRegMap map, unsigned int trackedVarIndex);
1220 // Initialize the incoming VarToRegMap to the given map values (generally a predecessor of
1222 VarToRegMap setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarToRegMap);
1224 regNumber getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type);
1227 void dumpVarToRegMap(VarToRegMap map);
1228 void dumpInVarToRegMap(BasicBlock* block);
1229 void dumpOutVarToRegMap(BasicBlock* block);
1231 // There are three points at which a tuple-style dump is produced, and each
1232 // differs slightly:
1233 // - In LSRA_DUMP_PRE, it does a simple dump of each node, with indications of what
1234 // tree nodes are consumed.
1235 // - In LSRA_DUMP_REFPOS, which is after the intervals are built, but before
1236 // register allocation, each node is dumped, along with all of the RefPositions,
1237 // The Intervals are identifed as Lnnn for lclVar intervals, Innn for for other
1238 // intervals, and Tnnn for internal temps.
1239 // - In LSRA_DUMP_POST, which is after register allocation, the registers are
1242 enum LsraTupleDumpMode{LSRA_DUMP_PRE, LSRA_DUMP_REFPOS, LSRA_DUMP_POST};
1243 void lsraGetOperandString(GenTree* tree, LsraTupleDumpMode mode, char* operandString, unsigned operandStringLength);
1244 void lsraDispNode(GenTree* tree, LsraTupleDumpMode mode, bool hasDest);
1245 void DumpOperandDefs(
1246 GenTree* operand, bool& first, LsraTupleDumpMode mode, char* operandString, const unsigned operandStringLength);
1247 void TupleStyleDump(LsraTupleDumpMode mode);
1249 LsraLocation maxNodeLocation;
1251 // Width of various fields - used to create a streamlined dump during allocation that shows the
1252 // state of all the registers in columns.
1256 const char* columnSeparator;
1258 const char* leftBox;
1259 const char* middleBox;
1260 const char* rightBox;
1262 static const int MAX_FORMAT_CHARS = 12;
1263 char intervalNameFormat[MAX_FORMAT_CHARS];
1264 char regNameFormat[MAX_FORMAT_CHARS];
1265 char shortRefPositionFormat[MAX_FORMAT_CHARS];
1266 char emptyRefPositionFormat[MAX_FORMAT_CHARS];
1267 char indentFormat[MAX_FORMAT_CHARS];
1268 static const int MAX_LEGEND_FORMAT_CHARS = 25;
1269 char bbRefPosFormat[MAX_LEGEND_FORMAT_CHARS];
1270 char legendFormat[MAX_LEGEND_FORMAT_CHARS];
1272 // How many rows have we printed since last printing a "title row"?
1273 static const int MAX_ROWS_BETWEEN_TITLES = 50;
1274 int rowCountSinceLastTitle;
1275 // Current mask of registers being printed in the dump.
1276 regMaskTP lastDumpedRegisters;
1277 regMaskTP registersToDump;
1278 int lastUsedRegNumIndex;
1279 bool shouldDumpReg(regNumber regNum)
1281 return (registersToDump & genRegMask(regNum)) != 0;
1284 void dumpRegRecordHeader();
1285 void dumpRegRecordTitle();
1286 void dumpRegRecordTitleIfNeeded();
1287 void dumpRegRecordTitleLines();
1288 void dumpRegRecords();
1289 // An abbreviated RefPosition dump for printing with column-based register state
1290 void dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBlock);
1291 // Print the number of spaces occupied by a dumpRefPositionShort()
1292 void dumpEmptyRefPosition();
1293 // A dump of Referent, in exactly regColumnWidth characters
1294 void dumpIntervalName(Interval* interval);
1296 // Events during the allocation phase that cause some dump output
1298 // Conflicting def/use
1299 LSRA_EVENT_DEFUSE_CONFLICT, LSRA_EVENT_DEFUSE_FIXED_DELAY_USE, LSRA_EVENT_DEFUSE_CASE1, LSRA_EVENT_DEFUSE_CASE2,
1300 LSRA_EVENT_DEFUSE_CASE3, LSRA_EVENT_DEFUSE_CASE4, LSRA_EVENT_DEFUSE_CASE5, LSRA_EVENT_DEFUSE_CASE6,
1303 LSRA_EVENT_SPILL, LSRA_EVENT_SPILL_EXTENDED_LIFETIME, LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL,
1304 LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL, LSRA_EVENT_DONE_KILL_GC_REFS,
1307 LSRA_EVENT_START_BB, LSRA_EVENT_END_BB,
1310 LSRA_EVENT_FREE_REGS,
1312 // Characteristics of the current RefPosition
1313 LSRA_EVENT_INCREMENT_RANGE_END, // ???
1314 LSRA_EVENT_LAST_USE, LSRA_EVENT_LAST_USE_DELAYED, LSRA_EVENT_NEEDS_NEW_REG,
1316 // Allocation decisions
1317 LSRA_EVENT_FIXED_REG, LSRA_EVENT_EXP_USE, LSRA_EVENT_ZERO_REF, LSRA_EVENT_NO_ENTRY_REG_ALLOCATED,
1318 LSRA_EVENT_KEPT_ALLOCATION, LSRA_EVENT_COPY_REG, LSRA_EVENT_MOVE_REG, LSRA_EVENT_ALLOC_REG,
1319 LSRA_EVENT_ALLOC_SPILLED_REG, LSRA_EVENT_NO_REG_ALLOCATED, LSRA_EVENT_RELOAD, LSRA_EVENT_SPECIAL_PUTARG,
1320 LSRA_EVENT_REUSE_REG,
1322 void dumpLsraAllocationEvent(LsraDumpEvent event,
1323 Interval* interval = nullptr,
1324 regNumber reg = REG_NA,
1325 BasicBlock* currentBlock = nullptr);
1327 void validateIntervals();
1330 #if TRACK_LSRA_STATS
1332 LSRA_STAT_SPILL, LSRA_STAT_COPY_REG, LSRA_STAT_RESOLUTION_MOV, LSRA_STAT_SPLIT_EDGE,
1335 unsigned regCandidateVarCount;
1336 void updateLsraStat(LsraStat stat, unsigned currentBBNum);
1338 void dumpLsraStats(FILE* file);
1340 #define INTRACK_STATS(x) x
1341 #else // !TRACK_LSRA_STATS
1342 #define INTRACK_STATS(x)
1343 #endif // !TRACK_LSRA_STATS
1348 CompAllocator getAllocator(Compiler* comp)
1350 return comp->getAllocator(CMK_LSRA);
1354 // This is used for dumping
1355 RefPosition* activeRefPosition;
1358 IntervalList intervals;
1360 RegRecord physRegs[REG_COUNT];
1362 // Map from tracked variable index to Interval*.
1363 Interval** localVarIntervals;
1365 // Set of blocks that have been visited.
1366 BlockSet bbVisitedSet;
1367 void markBlockVisited(BasicBlock* block)
1369 BlockSetOps::AddElemD(compiler, bbVisitedSet, block->bbNum);
1371 void clearVisitedBlocks()
1373 BlockSetOps::ClearD(compiler, bbVisitedSet);
1375 bool isBlockVisited(BasicBlock* block)
1377 return BlockSetOps::IsMember(compiler, bbVisitedSet, block->bbNum);
1384 // A map from bbNum to the block information used during register allocation.
1385 LsraBlockInfo* blockInfo;
1386 BasicBlock* findPredBlockForLiveIn(BasicBlock* block, BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated));
1388 // The order in which the blocks will be allocated.
1389 // This is any array of BasicBlock*, in the order in which they should be traversed.
1390 BasicBlock** blockSequence;
1391 // The verifiedAllBBs flag indicates whether we have verified that all BBs have been
1392 // included in the blockSeuqence above, during setBlockSequence().
1393 bool verifiedAllBBs;
1394 void setBlockSequence();
1395 int compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights);
1396 BasicBlockList* blockSequenceWorkList;
1397 bool blockSequencingDone;
1398 void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet);
1399 void removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode);
1400 BasicBlock* getNextCandidateFromWorkList();
1402 // The bbNum of the block being currently allocated or resolved.
1403 unsigned int curBBNum;
1404 // The current location
1405 LsraLocation currentLoc;
1406 // The ordinal of the block we're on (i.e. this is the curBBSeqNum-th block we've allocated).
1407 unsigned int curBBSeqNum;
1408 // The number of blocks that we've sequenced.
1409 unsigned int bbSeqCount;
1410 // The Location of the start of the current block.
1411 LsraLocation curBBStartLocation;
1412 // True if the method contains any critical edges.
1413 bool hasCriticalEdges;
1415 // True if there are any register candidate lclVars available for allocation.
1416 bool enregisterLocalVars;
1418 virtual bool willEnregisterLocalVars() const
1420 return enregisterLocalVars;
1423 // Ordered list of RefPositions
1424 RefPositionList refPositions;
1426 // Per-block variable location mappings: an array indexed by block number that yields a
1427 // pointer to an array of regNumber, one per variable.
1428 VarToRegMap* inVarToRegMaps;
1429 VarToRegMap* outVarToRegMaps;
1431 // A temporary VarToRegMap used during the resolution of critical edges.
1432 VarToRegMap sharedCriticalVarToRegMap;
1434 PhasedVar<regMaskTP> availableIntRegs;
1435 PhasedVar<regMaskTP> availableFloatRegs;
1436 PhasedVar<regMaskTP> availableDoubleRegs;
1438 // The set of all register candidates. Note that this may be a subset of tracked vars.
1439 VARSET_TP registerCandidateVars;
1440 // Current set of live register candidate vars, used during building of RefPositions to determine
1441 // whether to preference to callee-save.
1442 VARSET_TP currentLiveVars;
1443 // Set of variables that may require resolution across an edge.
1444 // This is first constructed during interval building, to contain all the lclVars that are live at BB edges.
1445 // Then, any lclVar that is always in the same register is removed from the set.
1446 VARSET_TP resolutionCandidateVars;
1447 // This set contains all the lclVars that are ever spilled or split.
1448 VARSET_TP splitOrSpilledVars;
1449 // Set of floating point variables to consider for callee-save registers.
1450 VARSET_TP fpCalleeSaveCandidateVars;
1451 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1452 #if defined(_TARGET_AMD64_)
1453 static bool varTypeNeedsPartialCalleeSave(var_types type)
1455 return (emitTypeSize(type) == 32);
1457 static const var_types LargeVectorSaveType = TYP_SIMD16;
1458 #elif defined(_TARGET_ARM64_)
1459 static bool varTypeNeedsPartialCalleeSave(var_types type)
1461 // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes
1462 // For SIMD types longer then 8 bytes Caller is responsible for saving and restoring Upper bytes.
1463 return (emitTypeSize(type) == 16);
1465 static const var_types LargeVectorSaveType = TYP_DOUBLE;
1466 #else // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
1467 #error("Unknown target architecture for FEATURE_SIMD")
1468 #endif // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
1470 // Set of large vector (TYP_SIMD32 on AVX) variables.
1471 VARSET_TP largeVectorVars;
1472 // Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers.
1473 VARSET_TP largeVectorCalleeSaveCandidateVars;
1474 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1476 //-----------------------------------------------------------------------
1478 //-----------------------------------------------------------------------
1480 // The listNodePool is used to maintain the RefInfo for nodes that are "in flight"
1481 // i.e. whose consuming node has not yet been handled.
1482 RefInfoListNodePool listNodePool;
1484 // The defList is used for the transient RefInfo that is computed by
1485 // the Build methods, and used in building RefPositions.
1486 // When Def RefPositions are built for a node, their NodeInfo is placed
1487 // in the defList. As the consuming node is handled, it moves the NodeInfo
1488 // into an ordered useList corresponding to the uses for that node.
1489 RefInfoList defList;
1491 // As we build uses, we may want to preference the next definition (i.e. the register produced
1492 // by the current node) to the same register as one of its uses. This is done by setting
1493 // 'tgtPrefUse' to that RefPosition.
1494 RefPosition* tgtPrefUse = nullptr;
1495 RefPosition* tgtPrefUse2 = nullptr;
1497 // The following keep track of information about internal (temporary register) intervals
1498 // during the building of a single node.
1499 static const int MaxInternalCount = 4;
1500 RefPosition* internalDefs[MaxInternalCount];
1501 int internalCount = 0;
1502 bool setInternalRegsDelayFree;
1504 // When a RefTypeUse is marked as 'delayRegFree', we also want to mark the RefTypeDef
1505 // in the next Location as 'hasInterferingUses'. This is accomplished by setting this
1506 // 'pendingDelayFree' to true as they are created, and clearing it as a new node is
1507 // handled in 'BuildNode'.
1508 bool pendingDelayFree;
1510 // This method clears the "build state" before starting to handle a new node.
1511 void clearBuildState()
1513 tgtPrefUse = nullptr;
1514 tgtPrefUse2 = nullptr;
1516 setInternalRegsDelayFree = false;
1517 pendingDelayFree = false;
1520 RefPosition* BuildUse(GenTree* operand, regMaskTP candidates = RBM_NONE, int multiRegIdx = 0);
1522 void setDelayFree(RefPosition* use);
1523 int BuildBinaryUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE);
1524 #ifdef _TARGET_XARCH_
1525 int BuildRMWUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE);
1526 #endif // !_TARGET_XARCH_
1527 // This is the main entry point for building the RefPositions for a node.
1528 // These methods return the number of sources.
1529 int BuildNode(GenTree* stmt);
1531 void getTgtPrefOperands(GenTreeOp* tree, bool& prefOp1, bool& prefOp2);
1532 bool supportsSpecialPutArg();
1534 int BuildSimple(GenTree* tree);
1535 int BuildOperandUses(GenTree* node, regMaskTP candidates = RBM_NONE);
1536 int BuildDelayFreeUses(GenTree* node, regMaskTP candidates = RBM_NONE);
1537 int BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates = RBM_NONE);
1538 int BuildAddrUses(GenTree* addr, regMaskTP candidates = RBM_NONE);
1539 void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs);
1540 RefPosition* BuildDef(GenTree* tree, regMaskTP dstCandidates = RBM_NONE, int multiRegIdx = 0);
1541 void BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates = RBM_NONE);
1542 void BuildDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask);
1544 int BuildStoreLoc(GenTree* tree);
1545 int BuildReturn(GenTree* tree);
1546 #ifdef _TARGET_XARCH_
1547 // This method, unlike the others, returns the number of sources, since it may be called when
1548 // 'tree' is contained.
1549 int BuildShiftRotate(GenTree* tree);
1550 #endif // _TARGET_XARCH_
1552 int BuildShiftLongCarry(GenTree* tree);
1554 int BuildPutArgReg(GenTreeUnOp* node);
1555 int BuildCall(GenTreeCall* call);
1556 int BuildCmp(GenTree* tree);
1557 int BuildBlockStore(GenTreeBlk* blkNode);
1558 int BuildModDiv(GenTree* tree);
1559 int BuildIntrinsic(GenTree* tree);
1560 int BuildStoreLoc(GenTreeLclVarCommon* tree);
1561 int BuildIndir(GenTreeIndir* indirTree);
1562 int BuildGCWriteBarrier(GenTree* tree);
1563 int BuildCast(GenTreeCast* cast);
1565 #if defined(_TARGET_XARCH_)
1566 // returns true if the tree can use the read-modify-write memory instruction form
1567 bool isRMWRegOper(GenTree* tree);
1568 int BuildMul(GenTree* tree);
1569 void SetContainsAVXFlags(unsigned sizeOfSIMDVector = 0);
1570 // Move the last use bit, if any, from 'fromTree' to 'toTree'; 'fromTree' must be contained.
1571 void CheckAndMoveRMWLastUse(GenTree* fromTree, GenTree* toTree)
1573 // If 'fromTree' is not a last-use lclVar, there's nothing to do.
1574 if ((fromTree == nullptr) || !fromTree->OperIs(GT_LCL_VAR) || ((fromTree->gtFlags & GTF_VAR_DEATH) == 0))
1578 // If 'fromTree' was a lclVar, it must be contained and 'toTree' must match.
1579 if (!fromTree->isContained() || (toTree == nullptr) || !toTree->OperIs(GT_LCL_VAR) ||
1580 (toTree->AsLclVarCommon()->gtLclNum != toTree->AsLclVarCommon()->gtLclNum))
1582 assert(!"Unmatched RMW indirections");
1585 // This is probably not necessary, but keeps things consistent.
1586 fromTree->gtFlags &= ~GTF_VAR_DEATH;
1587 if (toTree != nullptr) // Just to be conservative
1589 toTree->gtFlags |= GTF_VAR_DEATH;
1592 #endif // defined(_TARGET_XARCH_)
1595 int BuildSIMD(GenTreeSIMD* tree);
1596 #endif // FEATURE_SIMD
1598 #ifdef FEATURE_HW_INTRINSICS
1599 int BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree);
1600 #endif // FEATURE_HW_INTRINSICS
1602 int BuildPutArgStk(GenTreePutArgStk* argNode);
1603 #if FEATURE_ARG_SPLIT
1604 int BuildPutArgSplit(GenTreePutArgSplit* tree);
1605 #endif // FEATURE_ARG_SPLIT
1606 int BuildLclHeap(GenTree* tree);
1609 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1610 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1614 XX This is the fundamental data structure for linear scan register XX
1615 XX allocation. It represents the live range(s) for a variable or temp. XX
1617 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1618 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1621 class Interval : public Referenceable
1624 Interval(RegisterType registerType, regMaskTP registerPreferences)
1625 : registerPreferences(registerPreferences)
1626 , relatedInterval(nullptr)
1627 , assignedReg(nullptr)
1628 , registerType(registerType)
1633 , isStructField(false)
1634 , isPromotedStruct(false)
1635 , hasConflictingDefUse(false)
1636 , hasInterferingUses(false)
1637 , isSpecialPutArg(false)
1638 , preferCalleeSave(false)
1640 , physReg(REG_COUNT)
1649 // print out representation
1651 // concise representation for embedding
1653 // extremely concise representation
1657 void setLocalNumber(Compiler* compiler, unsigned lclNum, LinearScan* l);
1659 // Fixed registers for which this Interval has a preference
1660 regMaskTP registerPreferences;
1662 // The relatedInterval is:
1663 // - for any other interval, it is the interval to which this interval
1664 // is currently preferenced (e.g. because they are related by a copy)
1665 Interval* relatedInterval;
1667 // The assignedReg is the RecRecord for the register to which this interval
1668 // has been assigned at some point - if the interval is active, this is the
1669 // register it currently occupies.
1670 RegRecord* assignedReg;
1672 // DECIDE : put this in a union or do something w/ inheritance?
1673 // this is an interval for a physical register, not a allocatable entity
1675 RegisterType registerType;
1676 bool isLocalVar : 1;
1677 // Indicates whether this interval has been assigned to different registers
1679 // Indicates whether this interval is ever spilled
1681 // indicates an interval representing the internal requirements for
1682 // generating code for a node (temp registers internal to the node)
1683 // Note that this interval may live beyond a node in the GT_ARR_LENREF/GT_IND
1684 // case (though never lives beyond a stmt)
1685 bool isInternal : 1;
1686 // true if this is a LocalVar for a struct field
1687 bool isStructField : 1;
1688 // true iff this is a GT_LDOBJ for a fully promoted (PROMOTION_TYPE_INDEPENDENT) struct
1689 bool isPromotedStruct : 1;
1690 // true if this is an SDSU interval for which the def and use have conflicting register
1692 bool hasConflictingDefUse : 1;
1693 // true if this interval's defining node has "delayRegFree" uses, either due to it being an RMW instruction,
1694 // OR because it requires an internal register that differs from the target.
1695 bool hasInterferingUses : 1;
1697 // True if this interval is defined by a putArg, whose source is a non-last-use lclVar.
1698 // During allocation, this flag will be cleared if the source is not already in the required register.
1699 // Othewise, we will leave the register allocated to the lclVar, but mark the RegRecord as
1700 // isBusyUntilNextKill, so that it won't be reused if the lclVar goes dead before the call.
1701 bool isSpecialPutArg : 1;
1703 // True if this interval interferes with a call.
1704 bool preferCalleeSave : 1;
1706 // True if this interval is defined by a constant node that may be reused and/or may be
1707 // able to reuse a constant that's already in a register.
1708 bool isConstant : 1;
1710 // The register to which it is currently assigned.
1714 unsigned int intervalIndex;
1717 unsigned int varNum; // This is the "variable number": the index into the lvaTable array
1719 LclVarDsc* getLocalVar(Compiler* comp)
1722 return &(comp->lvaTable[this->varNum]);
1725 // Get the local tracked variable "index" (lvVarIndex), used in bitmasks.
1726 unsigned getVarIndex(Compiler* comp)
1728 LclVarDsc* varDsc = getLocalVar(comp);
1729 assert(varDsc->lvTracked); // If this isn't true, we shouldn't be calling this function!
1730 return varDsc->lvVarIndex;
1733 bool isAssignedTo(regNumber regNum)
1735 // This uses regMasks to handle the case where a double actually occupies two registers
1736 // TODO-Throughput: This could/should be done more cheaply.
1737 return (physReg != REG_NA && (genRegMask(physReg, registerType) & genRegMask(regNum)) != RBM_NONE);
1740 // Assign the related interval.
1741 void assignRelatedInterval(Interval* newRelatedInterval)
1746 printf("Assigning related ");
1747 newRelatedInterval->microDump();
1753 relatedInterval = newRelatedInterval;
1756 // Assign the related interval, but only if it isn't already assigned.
1757 bool assignRelatedIntervalIfUnassigned(Interval* newRelatedInterval)
1759 if (relatedInterval == nullptr)
1761 assignRelatedInterval(newRelatedInterval);
1769 printf("Interval ");
1771 printf(" already has a related interval\n");
1778 // Get the current preferences for this Interval.
1779 // Note that when we have an assigned register we don't necessarily update the
1780 // registerPreferences to that register, as there may be multiple, possibly disjoint,
1781 // definitions. This method will return the current assigned register if any, or
1782 // the 'registerPreferences' otherwise.
1784 regMaskTP getCurrentPreferences()
1786 return (assignedReg == nullptr) ? registerPreferences : genRegMask(assignedReg->regNum);
1789 void mergeRegisterPreferences(regMaskTP preferences)
1791 // We require registerPreferences to have been initialized.
1792 assert(registerPreferences != RBM_NONE);
1793 // It is invalid to update with empty preferences
1794 assert(preferences != RBM_NONE);
1796 regMaskTP commonPreferences = (registerPreferences & preferences);
1797 if (commonPreferences != RBM_NONE)
1799 registerPreferences = commonPreferences;
1803 // There are no preferences in common.
1804 // Preferences need to reflect both cases where a var must occupy a specific register,
1805 // as well as cases where a var is live when a register is killed.
1806 // In the former case, we would like to record all such registers, however we don't
1807 // really want to use any registers that will interfere.
1808 // To approximate this, we never "or" together multi-reg sets, which are generally kill sets.
1810 if (!genMaxOneBit(preferences))
1812 // The new preference value is a multi-reg set, so it's probably a kill.
1813 // Keep the new value.
1814 registerPreferences = preferences;
1818 if (!genMaxOneBit(registerPreferences))
1820 // The old preference value is a multi-reg set.
1821 // Keep the existing preference set, as it probably reflects one or more kills.
1822 // It may have been a union of multiple individual registers, but we can't
1823 // distinguish that case without extra cost.
1827 // If we reach here, we have two disjoint single-reg sets.
1828 // Keep only the callee-save preferences, if not empty.
1829 // Otherwise, take the union of the preferences.
1831 regMaskTP newPreferences = registerPreferences | preferences;
1833 if (preferCalleeSave)
1835 regMaskTP calleeSaveMask = (calleeSaveRegs(this->registerType) & (newPreferences));
1836 if (calleeSaveMask != RBM_NONE)
1838 newPreferences = calleeSaveMask;
1841 registerPreferences = newPreferences;
1844 // Update the registerPreferences on the interval.
1845 // If there are conflicting requirements on this interval, set the preferences to
1846 // the union of them. That way maybe we'll get at least one of them.
1847 // An exception is made in the case where one of the existing or new
1848 // preferences are all callee-save, in which case we "prefer" the callee-save
1850 void updateRegisterPreferences(regMaskTP preferences)
1852 // If this interval is preferenced, that interval may have already been assigned a
1853 // register, and we want to include that in the preferences.
1854 if ((relatedInterval != nullptr) && !relatedInterval->isActive)
1856 mergeRegisterPreferences(relatedInterval->getCurrentPreferences());
1859 // Now merge the new preferences.
1860 mergeRegisterPreferences(preferences);
1867 // A RefPosition refers to either an Interval or a RegRecord. 'referent' points to one
1868 // of these types. If it refers to a RegRecord, then 'isPhysRegRef' is true. If it
1869 // refers to an Interval, then 'isPhysRegRef' is false.
1870 // referent can never be null.
1872 Referenceable* referent;
1874 // nextRefPosition is the next in code order.
1875 // Note that in either case there is no need for these to be doubly linked, as they
1876 // are only traversed in the forward direction, and are not moved.
1877 RefPosition* nextRefPosition;
1879 // The remaining fields are common to both options
1883 // Prior to the allocation pass, registerAssignment captures the valid registers
1884 // for this RefPosition. An empty set means that any register is valid. A non-empty
1885 // set means that it must be one of the given registers (may be the full set if the
1886 // only constraint is that it must reside in SOME register)
1887 // After the allocation pass, this contains the actual assignment
1888 LsraLocation nodeLocation;
1889 regMaskTP registerAssignment;
1893 // NOTE: C++ only packs bitfields if the base type is the same. So make all the base
1894 // NOTE: types of the logically "bool" types that follow 'unsigned char', so they match
1895 // NOTE: RefType that precedes this, and multiRegIdx can also match.
1897 // Indicates whether this ref position is to be allocated a reg only if profitable. Currently these are the
1898 // ref positions that lower/codegen has indicated as reg optional and is considered a contained memory operand if
1899 // no reg is allocated.
1900 unsigned char allocRegIfProfitable : 1;
1902 // Used by RefTypeDef/Use positions of a multi-reg call node.
1903 // Indicates the position of the register that this ref position refers to.
1904 // The max bits needed is based on max value of MAX_RET_REG_COUNT value
1905 // across all targets and that happens 4 on on Arm. Hence index value
1906 // would be 0..MAX_RET_REG_COUNT-1.
1907 unsigned char multiRegIdx : 2;
1909 // Last Use - this may be true for multiple RefPositions in the same Interval
1910 unsigned char lastUse : 1;
1912 // Spill and Copy info
1913 // reload indicates that the value was spilled, and must be reloaded here.
1914 // spillAfter indicates that the value is spilled here, so a spill must be added.
1915 // copyReg indicates that the value needs to be copied to a specific register,
1916 // but that it will also retain its current assigned register.
1917 // moveReg indicates that the value needs to be moved to a different register,
1918 // and that this will be its new assigned register.
1919 // A RefPosition may have any flag individually or the following combinations:
1920 // - reload and spillAfter (i.e. it remains in memory), but not in combination with copyReg or moveReg
1921 // (reload cannot exist with copyReg or moveReg; it should be reloaded into the appropriate reg)
1922 // - spillAfter and copyReg (i.e. it must be copied to a new reg for use, but is then spilled)
1923 // - spillAfter and moveReg (i.e. it most be both spilled and moved)
1924 // NOTE: a moveReg involves an explicit move, and would usually not be needed for a fixed Reg if it is going
1925 // to be spilled, because the code generator will do the move to the fixed register, and doesn't need to
1926 // record the new register location as the new "home" location of the lclVar. However, if there is a conflicting
1927 // use at the same location (e.g. lclVar V1 is in rdx and needs to be in rcx, but V2 needs to be in rdx), then
1928 // we need an explicit move.
1929 // - copyReg and moveReg must not exist with each other.
1931 unsigned char reload : 1;
1932 unsigned char spillAfter : 1;
1933 unsigned char copyReg : 1;
1934 unsigned char moveReg : 1; // true if this var is moved to a new register
1936 unsigned char isPhysRegRef : 1; // true if 'referent' points of a RegRecord, false if it points to an Interval
1937 unsigned char isFixedRegRef : 1;
1938 unsigned char isLocalDefUse : 1;
1940 // delayRegFree indicates that the register should not be freed right away, but instead wait
1941 // until the next Location after it would normally be freed. This is used for the case of
1942 // non-commutative binary operators, where op2 must not be assigned the same register as
1943 // the target. We do this by not freeing it until after the target has been defined.
1944 // Another option would be to actually change the Location of the op2 use until the same
1945 // Location as the def, but then it could potentially reuse a register that has been freed
1946 // from the other source(s), e.g. if it's a lastUse or spilled.
1947 unsigned char delayRegFree : 1;
1949 // outOfOrder is marked on a (non-def) RefPosition that doesn't follow a definition of the
1950 // register currently assigned to the Interval. This happens when we use the assigned
1951 // register from a predecessor that is not the most recently allocated BasicBlock.
1952 unsigned char outOfOrder : 1;
1955 // Minimum number registers that needs to be ensured while
1956 // constraining candidates for this ref position under
1958 unsigned minRegCandidateCount;
1960 // The unique RefPosition number, equal to its index in the
1961 // refPositions list. Only used for debugging dumps.
1965 RefPosition(unsigned int bbNum, LsraLocation nodeLocation, GenTree* treeNode, RefType refType)
1967 , nextRefPosition(nullptr)
1968 , treeNode(treeNode)
1970 , nodeLocation(nodeLocation)
1971 , registerAssignment(RBM_NONE)
1979 , isPhysRegRef(false)
1980 , isFixedRegRef(false)
1981 , isLocalDefUse(false)
1982 , delayRegFree(false)
1985 , minRegCandidateCount(1)
1991 Interval* getInterval()
1993 assert(!isPhysRegRef);
1994 return (Interval*)referent;
1996 void setInterval(Interval* i)
1999 isPhysRegRef = false;
2004 assert(isPhysRegRef);
2005 return (RegRecord*)referent;
2007 void setReg(RegRecord* r)
2010 isPhysRegRef = true;
2011 registerAssignment = genRegMask(r->regNum);
2014 regNumber assignedReg()
2016 if (registerAssignment == RBM_NONE)
2021 return genRegNumFromMask(registerAssignment);
2024 // Returns true if it is a reference on a gentree node.
2027 return (refType == RefTypeDef || refType == RefTypeUse);
2030 bool RequiresRegister()
2032 return (IsActualRef()
2033 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
2034 || refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse
2035 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
2037 !AllocateIfProfitable();
2040 void setAllocateIfProfitable(bool val)
2042 allocRegIfProfitable = val;
2045 // Returns true whether this ref position is to be allocated
2046 // a reg only if it is profitable.
2047 bool AllocateIfProfitable()
2049 // TODO-CQ: Right now if a ref position is marked as
2050 // copyreg or movereg, then it is not treated as
2051 // 'allocate if profitable'. This is an implementation
2052 // limitation that needs to be addressed.
2053 return allocRegIfProfitable && !copyReg && !moveReg;
2056 void setMultiRegIdx(unsigned idx)
2059 assert(multiRegIdx == idx);
2062 unsigned getMultiRegIdx()
2067 LsraLocation getRefEndLocation()
2069 return delayRegFree ? nodeLocation + 1 : nodeLocation;
2072 RefPosition* getRangeEndRef()
2074 if (lastUse || nextRefPosition == nullptr)
2078 // It would seem to make sense to only return 'nextRefPosition' if it is a lastUse,
2079 // and otherwise return `lastRefPosition', but that tends to excessively lengthen
2080 // the range for heuristic purposes.
2081 // TODO-CQ: Look into how this might be improved .
2082 return nextRefPosition;
2085 LsraLocation getRangeEndLocation()
2087 return getRangeEndRef()->getRefEndLocation();
2090 bool isIntervalRef()
2092 return (!isPhysRegRef && (referent != nullptr));
2095 // isTrueDef indicates that the RefPosition is a non-update def of a non-internal
2099 return (refType == RefTypeDef && isIntervalRef() && !getInterval()->isInternal);
2102 // isFixedRefOfRegMask indicates that the RefPosition has a fixed assignment to the register
2103 // specified by the given mask
2104 bool isFixedRefOfRegMask(regMaskTP regMask)
2106 assert(genMaxOneBit(regMask));
2107 return (registerAssignment == regMask);
2110 // isFixedRefOfReg indicates that the RefPosition has a fixed assignment to the given register
2111 bool isFixedRefOfReg(regNumber regNum)
2113 return (isFixedRefOfRegMask(genRegMask(regNum)));
2117 // operator= copies everything except 'rpNum', which must remain unique
2118 RefPosition& operator=(const RefPosition& rp)
2120 unsigned rpNumSave = rpNum;
2121 memcpy(this, &rp, sizeof(rp));
2131 void dumpRegMask(regMaskTP regs);
2134 /*****************************************************************************/
2136 /*****************************************************************************/