1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4 /*****************************************************************************/
10 #include "smallhash.h"
13 // Minor and forward-reference types
22 // LsraLocation tracks the linearized order of the nodes.
23 // Each node is assigned two LsraLocations - one for all the uses and all but the last
24 // def, and a second location for the last def (if any)
26 typedef unsigned int LsraLocation;
27 const unsigned int MinLocation = 0;
28 const unsigned int MaxLocation = UINT_MAX;
29 // max number of registers an operation could require internally (in addition to uses and defs)
30 const unsigned int MaxInternalRegisters = 8;
31 const unsigned int RegisterTypeCount = 2;
33 /*****************************************************************************
35 *****************************************************************************/
36 typedef var_types RegisterType;
37 #define IntRegisterType TYP_INT
38 #define FloatRegisterType TYP_FLOAT
40 //------------------------------------------------------------------------
41 // regType: Return the RegisterType to use for a given type
44 // type - the type of interest
47 RegisterType regType(T type)
50 if (varTypeIsSIMD(type))
52 return FloatRegisterType;
54 #endif // FEATURE_SIMD
55 return varTypeIsFloating(TypeGet(type)) ? FloatRegisterType : IntRegisterType;
58 //------------------------------------------------------------------------
59 // useFloatReg: Check if the given var_type should be allocated to a FloatRegisterType
61 inline bool useFloatReg(var_types type)
63 return (regType(type) == FloatRegisterType);
66 //------------------------------------------------------------------------
67 // registerTypesEquivalent: Check to see if two RegisterTypes are equivalent
69 inline bool registerTypesEquivalent(RegisterType a, RegisterType b)
71 return varTypeIsIntegralOrI(a) == varTypeIsIntegralOrI(b);
74 //------------------------------------------------------------------------
75 // registerTypesEquivalent: Get the set of callee-save registers of the given RegisterType
77 inline regMaskTP calleeSaveRegs(RegisterType rt)
79 return varTypeIsIntegralOrI(rt) ? RBM_INT_CALLEE_SAVED : RBM_FLT_CALLEE_SAVED;
82 //------------------------------------------------------------------------
83 // LocationInfo: Captures the necessary information for a node that is "in-flight"
84 // during `buildIntervals` (i.e. its definition has been encountered,
94 LocationInfo(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0) : interval(i), treeNode(t), loc(l)
98 // default constructor for data structures
104 //------------------------------------------------------------------------
105 // LocationInfoListNode: used to store a single `LocationInfo` value for a
106 // node during `buildIntervals`.
108 // This is the node type for `LocationInfoList` below.
110 class LocationInfoListNode final : public LocationInfo
112 friend class LocationInfoList;
113 friend class LocationInfoListNodePool;
115 LocationInfoListNode* m_next; // The next node in the list
118 LocationInfoListNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0) : LocationInfo(l, i, t, regIdx)
122 //------------------------------------------------------------------------
123 // LocationInfoListNode::Next: Returns the next node in the list.
124 LocationInfoListNode* Next() const
130 //------------------------------------------------------------------------
131 // LocationInfoList: used to store a list of `LocationInfo` values for a
132 // node during `buildIntervals`.
134 // This list of 'LocationInfoListNode's contains the source nodes consumed by
135 // a node, and is created by 'BuildNode'.
137 class LocationInfoList final
139 friend class LocationInfoListNodePool;
141 LocationInfoListNode* m_head; // The head of the list
142 LocationInfoListNode* m_tail; // The tail of the list
145 LocationInfoList() : m_head(nullptr), m_tail(nullptr)
149 LocationInfoList(LocationInfoListNode* node) : m_head(node), m_tail(node)
151 assert(m_head->m_next == nullptr);
154 //------------------------------------------------------------------------
155 // LocationInfoList::IsEmpty: Returns true if the list is empty.
159 return m_head == nullptr;
162 //------------------------------------------------------------------------
163 // LocationInfoList::Begin: Returns the first node in the list.
165 LocationInfoListNode* Begin() const
170 //------------------------------------------------------------------------
171 // LocationInfoList::End: Returns the position after the last node in the
172 // list. The returned value is suitable for use as
173 // a sentinel for iteration.
175 LocationInfoListNode* End() const
180 //------------------------------------------------------------------------
181 // LocationInfoList::End: Returns the position after the last node in the
182 // list. The returned value is suitable for use as
183 // a sentinel for iteration.
185 LocationInfoListNode* Last() const
190 //------------------------------------------------------------------------
191 // LocationInfoList::Append: Appends a node to the list.
194 // node - The node to append. Must not be part of an existing list.
196 void Append(LocationInfoListNode* node)
198 assert(node->m_next == nullptr);
200 if (m_tail == nullptr)
202 assert(m_head == nullptr);
207 m_tail->m_next = node;
212 //------------------------------------------------------------------------
213 // LocationInfoList::Append: Appends another list to this list.
216 // other - The list to append.
218 void Append(LocationInfoList other)
220 if (m_tail == nullptr)
222 assert(m_head == nullptr);
223 m_head = other.m_head;
227 m_tail->m_next = other.m_head;
230 m_tail = other.m_tail;
233 //------------------------------------------------------------------------
234 // LocationInfoList::Prepend: Prepends a node to the list.
237 // node - The node to prepend. Must not be part of an existing list.
239 void Prepend(LocationInfoListNode* node)
241 assert(node->m_next == nullptr);
243 if (m_head == nullptr)
245 assert(m_tail == nullptr);
250 node->m_next = m_head;
256 //------------------------------------------------------------------------
257 // LocationInfoList::Add: Adds a node to the list.
260 // node - The node to add. Must not be part of an existing list.
261 // prepend - True if it should be prepended (otherwise is appended)
263 void Add(LocationInfoListNode* node, bool prepend)
275 //------------------------------------------------------------------------
276 // removeListNode - retrieve the TreeNodeInfo for the given node
279 // The BuildNode methods use this helper to retrieve the TreeNodeInfo for child nodes
280 // from the useList being constructed. Note that, if the user knows the order of the operands,
281 // it is expected that they should just retrieve them directly.
283 LocationInfoListNode* removeListNode(GenTree* node)
285 LocationInfoListNode* prevListNode = nullptr;
286 for (LocationInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next())
288 if (listNode->treeNode == node)
290 LocationInfoListNode* nextNode = listNode->Next();
291 if (prevListNode == nullptr)
297 prevListNode->m_next = nextNode;
299 if (nextNode == nullptr)
301 m_tail = prevListNode;
303 listNode->m_next = nullptr;
306 prevListNode = listNode;
308 assert(!"removeListNode didn't find the node");
312 //------------------------------------------------------------------------
313 // GetTreeNodeInfo - retrieve the TreeNodeInfo for the given node
316 // The Build methods use this helper to retrieve the TreeNodeInfo for child nodes
317 // from the useList being constructed. Note that, if the user knows the order of the operands,
318 // it is expected that they should just retrieve them directly.
320 TreeNodeInfo& GetTreeNodeInfo(GenTree* node)
322 for (LocationInfoListNode *listNode = Begin(), *end = End(); listNode != end; listNode = listNode->Next())
324 if (listNode->treeNode == node)
326 return listNode->info;
329 assert(!"GetTreeNodeInfo didn't find the node");
333 //------------------------------------------------------------------------
334 // LocationInfoList::GetSecond: Gets the second node in the list.
337 // (DEBUG ONLY) treeNode - The GenTree* we expect to be in the second node.
339 LocationInfoListNode* GetSecond(INDEBUG(GenTree* treeNode))
341 noway_assert((Begin() != nullptr) && (Begin()->Next() != nullptr));
342 LocationInfoListNode* second = Begin()->Next();
343 assert(second->treeNode == treeNode);
348 //------------------------------------------------------------------------
349 // LocationInfoListNodePool: manages a pool of `LocationInfoListNode`
350 // values to decrease overall memory usage
351 // during `buildIntervals`.
353 // `buildIntervals` involves creating a list of location info values per
354 // node that either directly produces a set of registers or that is a
355 // contained node with register-producing sources. However, these lists
356 // are short-lived: they are destroyed once the use of the corresponding
357 // node is processed. As such, there is typically only a small number of
358 // `LocationInfoListNode` values in use at any given time. Pooling these
359 // values avoids otherwise frequent allocations.
360 class LocationInfoListNodePool final
362 LocationInfoListNode* m_freeList;
363 Compiler* m_compiler;
364 static const unsigned defaultPreallocation = 8;
367 // Creates a pool of `LocationInfoListNode` values.
368 LocationInfoListNodePool(Compiler* compiler, unsigned preallocate = defaultPreallocation);
370 // Fetches an unused node from the pool.
371 LocationInfoListNode* GetNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0);
373 // Returns a list of nodes to the pool.
374 void ReturnNodes(LocationInfoList& list);
379 // bbNum of the predecessor to use for the register location of live-in variables.
381 unsigned int predBBNum;
382 BasicBlock::weight_t weight;
383 bool hasCriticalInEdge;
384 bool hasCriticalOutEdge;
387 // Per block maintained LSRA statistics.
389 // Number of spills of local vars or tree temps in this basic block.
392 // Number of GT_COPY nodes inserted in this basic block while allocating regs.
393 // Note that GT_COPY nodes are also inserted as part of basic block boundary
394 // resolution, which are accounted against resolutionMovCount but not
395 // against copyRegCount.
396 unsigned copyRegCount;
398 // Number of resolution moves inserted in this basic block.
399 unsigned resolutionMovCount;
401 // Number of critical edges from this block that are split.
402 unsigned splitEdgeCount;
403 #endif // TRACK_LSRA_STATS
406 // This is sort of a bit mask
407 // The low order 2 bits will be 1 for defs, and 2 for uses
408 enum RefType : unsigned char
410 #define DEF_REFTYPE(memberName, memberValue, shortName) memberName = memberValue,
411 #include "lsra_reftypes.h"
415 // position in a block (for resolution)
418 BlockPositionStart = 0,
419 BlockPositionEnd = 1,
423 inline bool RefTypeIsUse(RefType refType)
425 return ((refType & RefTypeUse) == RefTypeUse);
428 inline bool RefTypeIsDef(RefType refType)
430 return ((refType & RefTypeDef) == RefTypeDef);
433 typedef regNumberSmall* VarToRegMap;
435 template <typename ElementType, CompMemKind MemKind>
436 class ListElementAllocator
439 template <typename U, CompMemKind CMK>
440 friend class ListElementAllocator;
442 Compiler* m_compiler;
445 ListElementAllocator(Compiler* compiler) : m_compiler(compiler)
449 template <typename U>
450 ListElementAllocator(const ListElementAllocator<U, MemKind>& other) : m_compiler(other.m_compiler)
454 ElementType* allocate(size_t count)
456 return reinterpret_cast<ElementType*>(m_compiler->compGetMem(sizeof(ElementType) * count, MemKind));
459 void deallocate(ElementType* pointer, size_t count)
463 template <typename U>
466 typedef ListElementAllocator<U, MemKind> allocator;
470 typedef ListElementAllocator<Interval, CMK_LSRA_Interval> LinearScanMemoryAllocatorInterval;
471 typedef ListElementAllocator<RefPosition, CMK_LSRA_RefPosition> LinearScanMemoryAllocatorRefPosition;
473 typedef jitstd::list<Interval, LinearScanMemoryAllocatorInterval> IntervalList;
474 typedef jitstd::list<RefPosition, LinearScanMemoryAllocatorRefPosition> RefPositionList;
475 typedef jitstd::list<RefPosition, LinearScanMemoryAllocatorRefPosition>::iterator RefPositionIterator;
476 typedef jitstd::list<RefPosition, LinearScanMemoryAllocatorRefPosition>::reverse_iterator RefPositionReverseIterator;
483 firstRefPosition = nullptr;
484 recentRefPosition = nullptr;
485 lastRefPosition = nullptr;
489 // A linked list of RefPositions. These are only traversed in the forward
490 // direction, and are not moved, so they don't need to be doubly linked
491 // (see RefPosition).
493 RefPosition* firstRefPosition;
494 RefPosition* recentRefPosition;
495 RefPosition* lastRefPosition;
499 // Get the position of the next reference which is at or greater than
500 // the current location (relies upon recentRefPosition being udpated
501 // during traversal).
502 RefPosition* getNextRefPosition();
503 LsraLocation getNextRefLocation();
506 class RegRecord : public Referenceable
511 assignedInterval = nullptr;
512 previousInterval = nullptr;
514 isCalleeSave = false;
515 registerType = IntRegisterType;
516 isBusyUntilNextKill = false;
519 void init(regNumber reg)
521 #ifdef _TARGET_ARM64_
522 // The Zero register, or the SP
523 if ((reg == REG_ZR) || (reg == REG_SP))
525 // IsGeneralRegister returns false for REG_ZR and REG_SP
527 registerType = IntRegisterType;
531 if (emitter::isFloatReg(reg))
533 registerType = FloatRegisterType;
537 // The constructor defaults to IntRegisterType
538 assert(emitter::isGeneralRegister(reg) && registerType == IntRegisterType);
541 isCalleeSave = ((RBM_CALLEE_SAVED & genRegMask(reg)) != 0);
545 // print out representation
547 // concise representation for embedding
553 // RefPosition * getNextRefPosition();
554 // LsraLocation getNextRefLocation();
558 // interval to which this register is currently allocated.
559 // If the interval is inactive (isActive == false) then it is not currently live,
560 // and the register call be unassigned (i.e. setting assignedInterval to nullptr)
561 // without spilling the register.
562 Interval* assignedInterval;
563 // Interval to which this register was previously allocated, and which was unassigned
564 // because it was inactive. This register will be reassigned to this Interval when
565 // assignedInterval becomes inactive.
566 Interval* previousInterval;
570 RegisterType registerType;
571 // This register must be considered busy until the next time it is explicitly killed.
572 // This is used so that putarg_reg can avoid killing its lclVar source, while avoiding
573 // the problem with the reg becoming free if the last-use is encountered before the call.
574 bool isBusyUntilNextKill;
576 bool conflictingFixedRegReference(RefPosition* refPosition);
579 inline bool leafInRange(GenTree* leaf, int lower, int upper)
581 if (!leaf->IsIntCnsFitsInI32())
585 if (leaf->gtIntCon.gtIconVal < lower)
589 if (leaf->gtIntCon.gtIconVal > upper)
597 inline bool leafInRange(GenTree* leaf, int lower, int upper, int multiple)
599 if (!leafInRange(leaf, lower, upper))
603 if (leaf->gtIntCon.gtIconVal % multiple)
611 inline bool leafAddInRange(GenTree* leaf, int lower, int upper, int multiple = 1)
613 if (leaf->OperGet() != GT_ADD)
617 return leafInRange(leaf->gtOp.gtOp2, lower, upper, multiple);
620 inline bool isCandidateVar(LclVarDsc* varDsc)
622 return varDsc->lvLRACandidate;
625 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
626 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
630 XX This is the container for the Linear Scan data structures and methods. XX
632 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
633 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
635 // OPTION 1: The algorithm as described in "Optimized Interval Splitting in a
636 // Linear Scan Register Allocator". It is driven by iterating over the Interval
637 // lists. In this case, we need multiple IntervalLists, and Intervals will be
638 // moved between them so they must be easily updated.
640 // OPTION 2: The algorithm is driven by iterating over the RefPositions. In this
641 // case, we only need a single IntervalList, and it won't be updated.
642 // The RefPosition must refer to its Interval, and we need to be able to traverse
643 // to the next RefPosition in code order
644 // THIS IS THE OPTION CURRENTLY BEING PURSUED
646 class LocationInfoList;
647 class LocationInfoListNodePool;
649 class LinearScan : public LinearScanInterface
651 friend class RefPosition;
652 friend class Interval;
653 friend class Lowering;
654 friend class TreeNodeInfo;
657 // This could use further abstraction. From Compiler we need the tree,
658 // the flowgraph and the allocator.
659 LinearScan(Compiler* theCompiler);
661 // This is the main driver
662 virtual void doLinearScan();
664 // TreeNodeInfo contains three register masks: src candidates, dst candidates, and internal condidates.
665 // Instead of storing actual register masks, however, which are large, we store a small index into a table
666 // of register masks, stored in this class. We create only as many distinct register masks as are needed.
667 // All identical register masks get the same index. The register mask table contains:
668 // 1. A mask containing all eligible integer registers.
669 // 2. A mask containing all elibible floating-point registers.
670 // 3. A mask for each of single register.
671 // 4. A mask for each combination of registers, created dynamically as required.
673 // Currently, the maximum number of masks allowed is a constant defined by 'numMasks'. The register mask
674 // table is never resized. It is also limited by the size of the index, currently an unsigned char.
675 CLANG_FORMAT_COMMENT_ANCHOR;
677 #if defined(_TARGET_ARM64_)
678 static const int numMasks = 128;
680 static const int numMasks = 64;
683 regMaskTP* regMaskTable;
686 typedef int RegMaskIndex;
688 // allint is 0, allfloat is 1, all the single-bit masks start at 2
693 FIRST_SINGLE_REG_IDX = 2
696 RegMaskIndex GetIndexForRegMask(regMaskTP mask);
697 regMaskTP GetRegMaskForIndex(RegMaskIndex index);
698 void RemoveRegistersFromMasks(regMaskTP removeMask);
700 static bool isSingleRegister(regMaskTP regMask)
702 return (genExactlyOneBit(regMask));
706 void dspRegisterMaskTable();
709 // Initialize the block traversal for LSRA.
710 // This resets the bbVisitedSet, and on the first invocation sets the blockSequence array,
711 // which determines the order in which blocks will be allocated (currently called during Lowering).
712 BasicBlock* startBlockSequence();
713 // Move to the next block in sequence, updating the current block information.
714 BasicBlock* moveToNextBlock();
715 // Get the next block to be scheduled without changing the current block,
716 // but updating the blockSequence during the first iteration if it is not fully computed.
717 BasicBlock* getNextBlock();
719 // This is called during code generation to update the location of variables
720 virtual void recordVarLocationsAtStartOfBB(BasicBlock* bb);
722 // This does the dataflow analysis and builds the intervals
723 void buildIntervals();
725 // This is where the actual assignment is done
726 void allocateRegisters();
728 // This is the resolution phase, where cross-block mismatches are fixed up
729 void resolveRegisters();
731 void writeRegisters(RefPosition* currentRefPosition, GenTree* tree);
733 // Insert a copy in the case where a tree node value must be moved to a different
734 // register at the point of use, or it is reloaded to a different register
735 // than the one it was spilled from
736 void insertCopyOrReload(BasicBlock* block, GenTree* tree, unsigned multiRegIdx, RefPosition* refPosition);
738 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
739 // Insert code to save and restore the upper half of a vector that lives
740 // in a callee-save register at the point of a call (the upper half is
742 void insertUpperVectorSaveAndReload(GenTree* tree, RefPosition* refPosition, BasicBlock* block);
743 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
745 // resolve along one block-block edge
751 ResolveSharedCritical,
755 static const char* resolveTypeName[ResolveTypeCount];
765 void addResolutionForDouble(BasicBlock* block,
766 GenTree* insertionPoint,
767 Interval** sourceIntervals,
768 regNumberSmall* location,
771 ResolveType resolveType);
774 BasicBlock* block, GenTree* insertionPoint, Interval* interval, regNumber outReg, regNumber inReg);
776 void handleOutgoingCriticalEdges(BasicBlock* block);
778 void resolveEdge(BasicBlock* fromBlock, BasicBlock* toBlock, ResolveType resolveType, VARSET_VALARG_TP liveSet);
782 // Finally, the register assignments are written back to the tree nodes.
783 void recordRegisterAssignments();
785 // Keep track of how many temp locations we'll need for spill
787 void updateMaxSpill(RefPosition* refPosition);
788 void recordMaxSpill();
790 // max simultaneous spill locations used of every type
791 unsigned int maxSpill[TYP_COUNT];
792 unsigned int currentSpill[TYP_COUNT];
793 bool needFloatTmpForFPCall;
794 bool needDoubleTmpForFPCall;
798 //------------------------------------------------------------------------
799 // Should we stress lsra?
800 // This uses the same COMPLUS variable as rsStressRegs (COMPlus_JitStressRegs)
801 // However, the possible values and their interpretation are entirely different.
803 // The mask bits are currently divided into fields in which each non-zero value
804 // is a distinct stress option (e.g. 0x3 is not a combination of 0x1 and 0x2).
805 // However, subject to possible constraints (to be determined), the different
806 // fields can be combined (e.g. 0x7 is a combination of 0x3 and 0x4).
807 // Note that the field values are declared in a public enum, but the actual bits are
808 // only accessed via accessors.
810 unsigned lsraStressMask;
812 // This controls the registers available for allocation
813 enum LsraStressLimitRegs{LSRA_LIMIT_NONE = 0, LSRA_LIMIT_CALLEE = 0x1, LSRA_LIMIT_CALLER = 0x2,
814 LSRA_LIMIT_SMALL_SET = 0x3, LSRA_LIMIT_MASK = 0x3};
816 // When we limit the number of candidate registers, we have to take into account any
817 // "specialPutArg" references that are in flight, as that increases the number of live
818 // registers between it and the next call.
819 int specialPutArgCount;
821 // When LSRA_LIMIT_SMALL_SET is specified, it is desirable to select a "mixed" set of caller- and callee-save
822 // registers, so as to get different coverage than limiting to callee or caller.
823 // At least for x86 and AMD64, and potentially other architecture that will support SIMD,
824 // we need a minimum of 5 fp regs in order to support the InitN intrinsic for Vector4.
825 // Hence the "SmallFPSet" has 5 elements.
826 CLANG_FORMAT_COMMENT_ANCHOR;
828 #if defined(_TARGET_AMD64_)
829 #ifdef UNIX_AMD64_ABI
830 // On System V the RDI and RSI are not callee saved. Use R12 ans R13 as callee saved registers.
831 static const regMaskTP LsraLimitSmallIntSet =
832 (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13);
833 #else // !UNIX_AMD64_ABI
834 // On Windows Amd64 use the RDI and RSI as callee saved registers.
835 static const regMaskTP LsraLimitSmallIntSet =
836 (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI);
837 #endif // !UNIX_AMD64_ABI
838 static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
839 #elif defined(_TARGET_ARM_)
840 static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4);
841 static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17);
842 #elif defined(_TARGET_ARM64_)
843 static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20);
844 static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9);
845 #elif defined(_TARGET_X86_)
846 static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI);
847 static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
849 #error Unsupported or unset target architecture
852 LsraStressLimitRegs getStressLimitRegs()
854 return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK);
857 regMaskTP getConstrainedRegMask(regMaskTP regMaskActual, regMaskTP regMaskConstrain, unsigned minRegCount);
858 regMaskTP stressLimitRegs(RefPosition* refPosition, regMaskTP mask);
860 // This controls the heuristics used to select registers
861 // These can be combined.
862 enum LsraSelect{LSRA_SELECT_DEFAULT = 0, LSRA_SELECT_REVERSE_HEURISTICS = 0x04,
863 LSRA_SELECT_REVERSE_CALLER_CALLEE = 0x08, LSRA_SELECT_NEAREST = 0x10, LSRA_SELECT_MASK = 0x1c};
864 LsraSelect getSelectionHeuristics()
866 return (LsraSelect)(lsraStressMask & LSRA_SELECT_MASK);
868 bool doReverseSelect()
870 return ((lsraStressMask & LSRA_SELECT_REVERSE_HEURISTICS) != 0);
872 bool doReverseCallerCallee()
874 return ((lsraStressMask & LSRA_SELECT_REVERSE_CALLER_CALLEE) != 0);
876 bool doSelectNearest()
878 return ((lsraStressMask & LSRA_SELECT_NEAREST) != 0);
881 // This controls the order in which basic blocks are visited during allocation
882 enum LsraTraversalOrder{LSRA_TRAVERSE_LAYOUT = 0x20, LSRA_TRAVERSE_PRED_FIRST = 0x40,
883 LSRA_TRAVERSE_RANDOM = 0x60, // NYI
884 LSRA_TRAVERSE_DEFAULT = LSRA_TRAVERSE_PRED_FIRST, LSRA_TRAVERSE_MASK = 0x60};
885 LsraTraversalOrder getLsraTraversalOrder()
887 if ((lsraStressMask & LSRA_TRAVERSE_MASK) == 0)
889 return LSRA_TRAVERSE_DEFAULT;
891 return (LsraTraversalOrder)(lsraStressMask & LSRA_TRAVERSE_MASK);
893 bool isTraversalLayoutOrder()
895 return getLsraTraversalOrder() == LSRA_TRAVERSE_LAYOUT;
897 bool isTraversalPredFirstOrder()
899 return getLsraTraversalOrder() == LSRA_TRAVERSE_PRED_FIRST;
902 // This controls whether lifetimes should be extended to the entire method.
903 // Note that this has no effect under MinOpts
904 enum LsraExtendLifetimes{LSRA_DONT_EXTEND = 0, LSRA_EXTEND_LIFETIMES = 0x80, LSRA_EXTEND_LIFETIMES_MASK = 0x80};
905 LsraExtendLifetimes getLsraExtendLifeTimes()
907 return (LsraExtendLifetimes)(lsraStressMask & LSRA_EXTEND_LIFETIMES_MASK);
909 bool extendLifetimes()
911 return getLsraExtendLifeTimes() == LSRA_EXTEND_LIFETIMES;
914 // This controls whether variables locations should be set to the previous block in layout order
915 // (LSRA_BLOCK_BOUNDARY_LAYOUT), or to that of the highest-weight predecessor (LSRA_BLOCK_BOUNDARY_PRED -
916 // the default), or rotated (LSRA_BLOCK_BOUNDARY_ROTATE).
917 enum LsraBlockBoundaryLocations{LSRA_BLOCK_BOUNDARY_PRED = 0, LSRA_BLOCK_BOUNDARY_LAYOUT = 0x100,
918 LSRA_BLOCK_BOUNDARY_ROTATE = 0x200, LSRA_BLOCK_BOUNDARY_MASK = 0x300};
919 LsraBlockBoundaryLocations getLsraBlockBoundaryLocations()
921 return (LsraBlockBoundaryLocations)(lsraStressMask & LSRA_BLOCK_BOUNDARY_MASK);
923 regNumber rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs);
925 // This controls whether we always insert a GT_RELOAD instruction after a spill
926 // Note that this can be combined with LSRA_SPILL_ALWAYS (or not)
927 enum LsraReload{LSRA_NO_RELOAD_IF_SAME = 0, LSRA_ALWAYS_INSERT_RELOAD = 0x400, LSRA_RELOAD_MASK = 0x400};
928 LsraReload getLsraReload()
930 return (LsraReload)(lsraStressMask & LSRA_RELOAD_MASK);
932 bool alwaysInsertReload()
934 return getLsraReload() == LSRA_ALWAYS_INSERT_RELOAD;
937 // This controls whether we spill everywhere
938 enum LsraSpill{LSRA_DONT_SPILL_ALWAYS = 0, LSRA_SPILL_ALWAYS = 0x800, LSRA_SPILL_MASK = 0x800};
939 LsraSpill getLsraSpill()
941 return (LsraSpill)(lsraStressMask & LSRA_SPILL_MASK);
945 return getLsraSpill() == LSRA_SPILL_ALWAYS;
948 // This controls whether RefPositions that lower/codegen indicated as reg optional be
949 // allocated a reg at all.
950 enum LsraRegOptionalControl{LSRA_REG_OPTIONAL_DEFAULT = 0, LSRA_REG_OPTIONAL_NO_ALLOC = 0x1000,
951 LSRA_REG_OPTIONAL_MASK = 0x1000};
953 LsraRegOptionalControl getLsraRegOptionalControl()
955 return (LsraRegOptionalControl)(lsraStressMask & LSRA_REG_OPTIONAL_MASK);
958 bool regOptionalNoAlloc()
960 return getLsraRegOptionalControl() == LSRA_REG_OPTIONAL_NO_ALLOC;
963 bool candidatesAreStressLimited()
965 return ((lsraStressMask & (LSRA_LIMIT_MASK | LSRA_SELECT_MASK)) != 0);
970 void lsraDumpIntervals(const char* msg);
971 void dumpRefPositions(const char* msg);
972 void dumpVarRefPositions(const char* msg);
975 static bool IsLsraAdded(GenTree* node)
977 return ((node->gtDebugFlags & GTF_DEBUG_NODE_LSRA_ADDED) != 0);
979 static void SetLsraAdded(GenTree* node)
981 node->gtDebugFlags |= GTF_DEBUG_NODE_LSRA_ADDED;
983 static bool IsResolutionMove(GenTree* node);
984 static bool IsResolutionNode(LIR::Range& containingRange, GenTree* node);
986 void verifyFinalAllocation();
987 void verifyResolutionMove(GenTree* resolutionNode, LsraLocation currentLocation);
989 bool doSelectNearest()
993 bool extendLifetimes()
1001 // In a retail build we support only the default traversal order
1002 bool isTraversalLayoutOrder()
1006 bool isTraversalPredFirstOrder()
1010 bool getLsraExtendLifeTimes()
1014 static void SetLsraAdded(GenTree* node)
1016 // do nothing; checked only under #DEBUG
1018 bool candidatesAreStressLimited()
1025 // Used by Lowering when considering whether to split Longs, as well as by identifyCandidates().
1026 bool isRegCandidate(LclVarDsc* varDsc);
1028 bool isContainableMemoryOp(GenTree* node);
1031 // Determine which locals are candidates for allocation
1032 void identifyCandidates();
1034 // determine which locals are used in EH constructs we don't want to deal with
1035 void identifyCandidatesExceptionDataflow();
1037 void buildPhysRegRecords();
1040 void checkLastUses(BasicBlock* block);
1041 static int ComputeOperandDstCount(GenTree* operand);
1042 static int ComputeAvailableSrcCount(GenTree* node);
1045 void setFrameType();
1047 // Update allocations at start/end of block
1048 void unassignIntervalBlockStart(RegRecord* regRecord, VarToRegMap inVarToRegMap);
1049 void processBlockEndAllocation(BasicBlock* current);
1051 // Record variable locations at start/end of block
1052 void processBlockStartLocations(BasicBlock* current, bool allocationPass);
1053 void processBlockEndLocations(BasicBlock* current);
1056 bool isSecondHalfReg(RegRecord* regRec, Interval* interval);
1057 RegRecord* getSecondHalfRegRec(RegRecord* regRec);
1058 RegRecord* findAnotherHalfRegRec(RegRecord* regRec);
1059 bool canSpillDoubleReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight);
1060 void unassignDoublePhysReg(RegRecord* doubleRegRecord);
1062 void updateAssignedInterval(RegRecord* reg, Interval* interval, RegisterType regType);
1063 void updatePreviousInterval(RegRecord* reg, Interval* interval, RegisterType regType);
1064 bool canRestorePreviousInterval(RegRecord* regRec, Interval* assignedInterval);
1065 bool isAssignedToInterval(Interval* interval, RegRecord* regRec);
1066 bool isRefPositionActive(RefPosition* refPosition, LsraLocation refLocation);
1067 bool canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight);
1068 bool isRegInUse(RegRecord* regRec, RefPosition* refPosition);
1070 RefType CheckBlockType(BasicBlock* block, BasicBlock* prevBlock);
1072 // insert refpositions representing prolog zero-inits which will be added later
1073 void insertZeroInitRefPositions();
1075 void AddMapping(GenTree* node, LsraLocation loc);
1077 // add physreg refpositions for a tree node, based on calling convention and instruction selection predictions
1078 void addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse);
1080 void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition);
1082 void buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation loc);
1084 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1085 VARSET_VALRET_TP buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc);
1086 void buildUpperVectorRestoreRefPositions(GenTree* tree, LsraLocation currentLoc, VARSET_VALARG_TP liveLargeVectors);
1087 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1089 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1090 // For AMD64 on SystemV machines. This method
1091 // is called as replacement for raUpdateRegStateForArg
1092 // that is used on Windows. On System V systems a struct can be passed
1093 // partially using registers from the 2 register files.
1094 void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc);
1095 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1097 // Update reg state for an incoming register argument
1098 void updateRegStateForArg(LclVarDsc* argDsc);
1100 inline bool isCandidateLocalRef(GenTree* tree)
1102 if (tree->IsLocal())
1104 unsigned int lclNum = tree->gtLclVarCommon.gtLclNum;
1105 assert(lclNum < compiler->lvaCount);
1106 LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
1108 return isCandidateVar(varDsc);
1113 static Compiler::fgWalkResult markAddrModeOperandsHelperMD(GenTree* tree, void* p);
1115 // Helper for getKillSetForNode().
1116 regMaskTP getKillSetForStoreInd(GenTreeStoreInd* tree);
1118 // Return the registers killed by the given tree node.
1119 regMaskTP getKillSetForNode(GenTree* tree);
1121 // Given some tree node add refpositions for all the registers this node kills
1122 bool buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc);
1124 regMaskTP allRegs(RegisterType rt);
1125 regMaskTP allRegs(GenTree* tree);
1126 regMaskTP allMultiRegCallNodeRegs(GenTreeCall* tree);
1127 regMaskTP allSIMDRegs();
1128 regMaskTP internalFloatRegCandidates();
1130 bool isMultiRegRelated(RefPosition* refPosition, LsraLocation location);
1131 bool registerIsFree(regNumber regNum, RegisterType regType);
1132 bool registerIsAvailable(RegRecord* physRegRecord,
1133 LsraLocation currentLoc,
1134 LsraLocation* nextRefLocationPtr,
1135 RegisterType regType);
1136 void freeRegister(RegRecord* physRegRecord);
1137 void freeRegisters(regMaskTP regsToFree);
1139 // Get the type that this tree defines.
1140 var_types getDefType(GenTree* tree)
1142 return tree->TypeGet();
1145 RefPosition* defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP regMask);
1147 int buildInternalRegisterDefsForNode(GenTree* tree, TreeNodeInfo* info, RefPosition* defs[]);
1149 void buildInternalRegisterUsesForNode(GenTree* tree, TreeNodeInfo* info, RefPosition* defs[], int total);
1151 void resolveLocalRef(BasicBlock* block, GenTree* treeNode, RefPosition* currentRefPosition);
1153 void insertMove(BasicBlock* block, GenTree* insertionPoint, unsigned lclNum, regNumber inReg, regNumber outReg);
1156 BasicBlock* block, GenTree* insertionPoint, unsigned lclNum1, regNumber reg1, unsigned lclNum2, regNumber reg2);
1159 // TODO-Cleanup: unused?
1160 class PhysRegIntervalIterator
1163 PhysRegIntervalIterator(LinearScan* theLinearScan)
1165 nextRegNumber = (regNumber)0;
1166 linearScan = theLinearScan;
1168 RegRecord* GetNext()
1170 return &linearScan->physRegs[nextRegNumber];
1174 // This assumes that the physical registers are contiguous, starting
1175 // with a register number of 0
1176 regNumber nextRegNumber;
1177 LinearScan* linearScan;
1181 Interval* newInterval(RegisterType regType);
1183 Interval* getIntervalForLocalVar(unsigned varIndex)
1185 assert(varIndex < compiler->lvaTrackedCount);
1186 assert(localVarIntervals[varIndex] != nullptr);
1187 return localVarIntervals[varIndex];
1190 Interval* getIntervalForLocalVarNode(GenTreeLclVarCommon* tree)
1192 LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclNum];
1193 assert(varDsc->lvTracked);
1194 return getIntervalForLocalVar(varDsc->lvVarIndex);
1197 RegRecord* getRegisterRecord(regNumber regNum);
1199 RefPosition* newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType);
1201 RefPosition* newRefPosition(Interval* theInterval,
1202 LsraLocation theLocation,
1204 GenTree* theTreeNode,
1206 unsigned multiRegIdx = 0);
1208 RefPosition* newRefPosition(
1209 regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask);
1211 void applyCalleeSaveHeuristics(RefPosition* rp);
1213 void checkConflictingDefUse(RefPosition* rp);
1215 void associateRefPosWithInterval(RefPosition* rp);
1217 void associateRefPosWithRegister(RefPosition* rp);
1219 unsigned getWeight(RefPosition* refPos);
1221 /*****************************************************************************
1222 * Register management
1223 ****************************************************************************/
1224 RegisterType getRegisterType(Interval* currentInterval, RefPosition* refPosition);
1225 regNumber tryAllocateFreeReg(Interval* current, RefPosition* refPosition);
1226 regNumber allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable);
1227 regNumber assignCopyReg(RefPosition* refPosition);
1229 bool isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition);
1230 bool isSpillCandidate(Interval* current,
1231 RefPosition* refPosition,
1232 RegRecord* physRegRecord,
1233 LsraLocation& nextLocation);
1234 void checkAndAssignInterval(RegRecord* regRec, Interval* interval);
1235 void assignPhysReg(RegRecord* regRec, Interval* interval);
1236 void assignPhysReg(regNumber reg, Interval* interval)
1238 assignPhysReg(getRegisterRecord(reg), interval);
1241 bool isAssigned(RegRecord* regRec ARM_ARG(RegisterType newRegType));
1242 bool isAssigned(RegRecord* regRec, LsraLocation lastLocation ARM_ARG(RegisterType newRegType));
1243 void checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefPosition);
1244 void unassignPhysReg(RegRecord* regRec ARM_ARG(RegisterType newRegType));
1245 void unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPosition);
1246 void unassignPhysRegNoSpill(RegRecord* reg);
1247 void unassignPhysReg(regNumber reg)
1249 unassignPhysReg(getRegisterRecord(reg), nullptr);
1252 void setIntervalAsSpilled(Interval* interval);
1253 void setIntervalAsSplit(Interval* interval);
1254 void spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition);
1256 void spillGCRefs(RefPosition* killRefPosition);
1258 /*****************************************************************************
1259 * For Resolution phase
1260 ****************************************************************************/
1261 // TODO-Throughput: Consider refactoring this so that we keep a map from regs to vars for better scaling
1262 unsigned int regMapCount;
1264 // When we split edges, we create new blocks, and instead of expanding the VarToRegMaps, we
1265 // rely on the property that the "in" map is the same as the "from" block of the edge, and the
1266 // "out" map is the same as the "to" block of the edge (by construction).
1267 // So, for any block whose bbNum is greater than bbNumMaxBeforeResolution, we use the
1268 // splitBBNumToTargetBBNumMap.
1269 // TODO-Throughput: We may want to look into the cost/benefit tradeoff of doing this vs. expanding
1272 unsigned bbNumMaxBeforeResolution;
1273 struct SplitEdgeInfo
1278 typedef JitHashTable<unsigned, JitSmallPrimitiveKeyFuncs<unsigned>, SplitEdgeInfo> SplitBBNumToTargetBBNumMap;
1279 SplitBBNumToTargetBBNumMap* splitBBNumToTargetBBNumMap;
1280 SplitBBNumToTargetBBNumMap* getSplitBBNumToTargetBBNumMap()
1282 if (splitBBNumToTargetBBNumMap == nullptr)
1284 splitBBNumToTargetBBNumMap =
1285 new (getAllocator(compiler)) SplitBBNumToTargetBBNumMap(getAllocator(compiler));
1287 return splitBBNumToTargetBBNumMap;
1289 SplitEdgeInfo getSplitEdgeInfo(unsigned int bbNum);
1291 void initVarRegMaps();
1292 void setInVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg);
1293 void setOutVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg);
1294 VarToRegMap getInVarToRegMap(unsigned int bbNum);
1295 VarToRegMap getOutVarToRegMap(unsigned int bbNum);
1296 void setVarReg(VarToRegMap map, unsigned int trackedVarIndex, regNumber reg);
1297 regNumber getVarReg(VarToRegMap map, unsigned int trackedVarIndex);
1298 // Initialize the incoming VarToRegMap to the given map values (generally a predecessor of
1300 VarToRegMap setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarToRegMap);
1302 regNumber getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type);
1305 void dumpVarToRegMap(VarToRegMap map);
1306 void dumpInVarToRegMap(BasicBlock* block);
1307 void dumpOutVarToRegMap(BasicBlock* block);
1309 // There are three points at which a tuple-style dump is produced, and each
1310 // differs slightly:
1311 // - In LSRA_DUMP_PRE, it does a simple dump of each node, with indications of what
1312 // tree nodes are consumed.
1313 // - In LSRA_DUMP_REFPOS, which is after the intervals are built, but before
1314 // register allocation, each node is dumped, along with all of the RefPositions,
1315 // The Intervals are identifed as Lnnn for lclVar intervals, Innn for for other
1316 // intervals, and Tnnn for internal temps.
1317 // - In LSRA_DUMP_POST, which is after register allocation, the registers are
1320 enum LsraTupleDumpMode{LSRA_DUMP_PRE, LSRA_DUMP_REFPOS, LSRA_DUMP_POST};
1321 void lsraGetOperandString(GenTree* tree, LsraTupleDumpMode mode, char* operandString, unsigned operandStringLength);
1322 void lsraDispNode(GenTree* tree, LsraTupleDumpMode mode, bool hasDest);
1323 void DumpOperandDefs(
1324 GenTree* operand, bool& first, LsraTupleDumpMode mode, char* operandString, const unsigned operandStringLength);
1325 void TupleStyleDump(LsraTupleDumpMode mode);
1327 LsraLocation maxNodeLocation;
1329 // Width of various fields - used to create a streamlined dump during allocation that shows the
1330 // state of all the registers in columns.
1334 const char* columnSeparator;
1336 const char* leftBox;
1337 const char* middleBox;
1338 const char* rightBox;
1340 static const int MAX_FORMAT_CHARS = 12;
1341 char intervalNameFormat[MAX_FORMAT_CHARS];
1342 char regNameFormat[MAX_FORMAT_CHARS];
1343 char shortRefPositionFormat[MAX_FORMAT_CHARS];
1344 char emptyRefPositionFormat[MAX_FORMAT_CHARS];
1345 char indentFormat[MAX_FORMAT_CHARS];
1346 static const int MAX_LEGEND_FORMAT_CHARS = 25;
1347 char bbRefPosFormat[MAX_LEGEND_FORMAT_CHARS];
1348 char legendFormat[MAX_LEGEND_FORMAT_CHARS];
1350 // How many rows have we printed since last printing a "title row"?
1351 static const int MAX_ROWS_BETWEEN_TITLES = 50;
1352 int rowCountSinceLastTitle;
1353 // Current mask of registers being printed in the dump.
1354 regMaskTP lastDumpedRegisters;
1355 regMaskTP registersToDump;
1356 int lastUsedRegNumIndex;
1357 bool shouldDumpReg(regNumber regNum)
1359 return (registersToDump & genRegMask(regNum)) != 0;
1362 void dumpRegRecordHeader();
1363 void dumpRegRecordTitle();
1364 void dumpRegRecordTitleIfNeeded();
1365 void dumpRegRecordTitleLines();
1366 void dumpRegRecords();
1367 // An abbreviated RefPosition dump for printing with column-based register state
1368 void dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBlock);
1369 // Print the number of spaces occupied by a dumpRefPositionShort()
1370 void dumpEmptyRefPosition();
1371 // A dump of Referent, in exactly regColumnWidth characters
1372 void dumpIntervalName(Interval* interval);
1374 // Events during the allocation phase that cause some dump output
1376 // Conflicting def/use
1377 LSRA_EVENT_DEFUSE_CONFLICT, LSRA_EVENT_DEFUSE_FIXED_DELAY_USE, LSRA_EVENT_DEFUSE_CASE1, LSRA_EVENT_DEFUSE_CASE2,
1378 LSRA_EVENT_DEFUSE_CASE3, LSRA_EVENT_DEFUSE_CASE4, LSRA_EVENT_DEFUSE_CASE5, LSRA_EVENT_DEFUSE_CASE6,
1381 LSRA_EVENT_SPILL, LSRA_EVENT_SPILL_EXTENDED_LIFETIME, LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL,
1382 LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL, LSRA_EVENT_DONE_KILL_GC_REFS,
1385 LSRA_EVENT_START_BB, LSRA_EVENT_END_BB,
1388 LSRA_EVENT_FREE_REGS,
1390 // Characteristics of the current RefPosition
1391 LSRA_EVENT_INCREMENT_RANGE_END, // ???
1392 LSRA_EVENT_LAST_USE, LSRA_EVENT_LAST_USE_DELAYED, LSRA_EVENT_NEEDS_NEW_REG,
1394 // Allocation decisions
1395 LSRA_EVENT_FIXED_REG, LSRA_EVENT_EXP_USE, LSRA_EVENT_ZERO_REF, LSRA_EVENT_NO_ENTRY_REG_ALLOCATED,
1396 LSRA_EVENT_KEPT_ALLOCATION, LSRA_EVENT_COPY_REG, LSRA_EVENT_MOVE_REG, LSRA_EVENT_ALLOC_REG,
1397 LSRA_EVENT_ALLOC_SPILLED_REG, LSRA_EVENT_NO_REG_ALLOCATED, LSRA_EVENT_RELOAD, LSRA_EVENT_SPECIAL_PUTARG,
1398 LSRA_EVENT_REUSE_REG,
1400 void dumpLsraAllocationEvent(LsraDumpEvent event,
1401 Interval* interval = nullptr,
1402 regNumber reg = REG_NA,
1403 BasicBlock* currentBlock = nullptr);
1405 void dumpBlockHeader(BasicBlock* block);
1407 void validateIntervals();
1410 #if TRACK_LSRA_STATS
1412 LSRA_STAT_SPILL, LSRA_STAT_COPY_REG, LSRA_STAT_RESOLUTION_MOV, LSRA_STAT_SPLIT_EDGE,
1415 unsigned regCandidateVarCount;
1416 void updateLsraStat(LsraStat stat, unsigned currentBBNum);
1418 void dumpLsraStats(FILE* file);
1420 #define INTRACK_STATS(x) x
1421 #else // !TRACK_LSRA_STATS
1422 #define INTRACK_STATS(x)
1423 #endif // !TRACK_LSRA_STATS
1428 #if MEASURE_MEM_ALLOC
1429 CompAllocator* lsraAllocator;
1432 CompAllocator* getAllocator(Compiler* comp)
1434 #if MEASURE_MEM_ALLOC
1435 if (lsraAllocator == nullptr)
1437 lsraAllocator = new (comp, CMK_LSRA) CompAllocator(comp, CMK_LSRA);
1439 return lsraAllocator;
1441 return comp->getAllocator();
1446 // This is used for dumping
1447 RefPosition* activeRefPosition;
1450 IntervalList intervals;
1452 RegRecord physRegs[REG_COUNT];
1454 // Map from tracked variable index to Interval*.
1455 Interval** localVarIntervals;
1457 // Set of blocks that have been visited.
1458 BlockSet bbVisitedSet;
1459 void markBlockVisited(BasicBlock* block)
1461 BlockSetOps::AddElemD(compiler, bbVisitedSet, block->bbNum);
1463 void clearVisitedBlocks()
1465 BlockSetOps::ClearD(compiler, bbVisitedSet);
1467 bool isBlockVisited(BasicBlock* block)
1469 return BlockSetOps::IsMember(compiler, bbVisitedSet, block->bbNum);
1476 // A map from bbNum to the block information used during register allocation.
1477 LsraBlockInfo* blockInfo;
1478 BasicBlock* findPredBlockForLiveIn(BasicBlock* block, BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated));
1480 // The order in which the blocks will be allocated.
1481 // This is any array of BasicBlock*, in the order in which they should be traversed.
1482 BasicBlock** blockSequence;
1483 // The verifiedAllBBs flag indicates whether we have verified that all BBs have been
1484 // included in the blockSeuqence above, during setBlockSequence().
1485 bool verifiedAllBBs;
1486 void setBlockSequence();
1487 int compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights);
1488 BasicBlockList* blockSequenceWorkList;
1489 bool blockSequencingDone;
1490 void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet);
1491 void removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode);
1492 BasicBlock* getNextCandidateFromWorkList();
1494 // The bbNum of the block being currently allocated or resolved.
1495 unsigned int curBBNum;
1496 // The current location
1497 LsraLocation currentLoc;
1498 // The ordinal of the block we're on (i.e. this is the curBBSeqNum-th block we've allocated).
1499 unsigned int curBBSeqNum;
1500 // The number of blocks that we've sequenced.
1501 unsigned int bbSeqCount;
1502 // The Location of the start of the current block.
1503 LsraLocation curBBStartLocation;
1504 // True if the method contains any critical edges.
1505 bool hasCriticalEdges;
1507 // True if there are any register candidate lclVars available for allocation.
1508 bool enregisterLocalVars;
1510 virtual bool willEnregisterLocalVars() const
1512 return enregisterLocalVars;
1515 // Ordered list of RefPositions
1516 RefPositionList refPositions;
1518 // Per-block variable location mappings: an array indexed by block number that yields a
1519 // pointer to an array of regNumber, one per variable.
1520 VarToRegMap* inVarToRegMaps;
1521 VarToRegMap* outVarToRegMaps;
1523 // A temporary VarToRegMap used during the resolution of critical edges.
1524 VarToRegMap sharedCriticalVarToRegMap;
1526 PhasedVar<regMaskTP> availableIntRegs;
1527 PhasedVar<regMaskTP> availableFloatRegs;
1528 PhasedVar<regMaskTP> availableDoubleRegs;
1530 // The set of all register candidates. Note that this may be a subset of tracked vars.
1531 VARSET_TP registerCandidateVars;
1532 // Current set of live register candidate vars, used during building of RefPositions to determine
1533 // whether to preference to callee-save.
1534 VARSET_TP currentLiveVars;
1535 // Set of variables that may require resolution across an edge.
1536 // This is first constructed during interval building, to contain all the lclVars that are live at BB edges.
1537 // Then, any lclVar that is always in the same register is removed from the set.
1538 VARSET_TP resolutionCandidateVars;
1539 // This set contains all the lclVars that are ever spilled or split.
1540 VARSET_TP splitOrSpilledVars;
1541 // Set of floating point variables to consider for callee-save registers.
1542 VARSET_TP fpCalleeSaveCandidateVars;
1543 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1544 #if defined(_TARGET_AMD64_)
1545 static bool varTypeNeedsPartialCalleeSave(var_types type)
1547 return (emitTypeSize(type) == 32);
1549 static const var_types LargeVectorSaveType = TYP_SIMD16;
1550 #elif defined(_TARGET_ARM64_)
1551 static bool varTypeNeedsPartialCalleeSave(var_types type)
1553 // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes
1554 // For SIMD types longer then 8 bytes Caller is responsible for saving and restoring Upper bytes.
1555 return (emitTypeSize(type) == 16);
1557 static const var_types LargeVectorSaveType = TYP_DOUBLE;
1558 #else // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
1559 #error("Unknown target architecture for FEATURE_SIMD")
1560 #endif // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
1562 // Set of large vector (TYP_SIMD32 on AVX) variables.
1563 VARSET_TP largeVectorVars;
1564 // Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers.
1565 VARSET_TP largeVectorCalleeSaveCandidateVars;
1566 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1568 //-----------------------------------------------------------------------
1570 //-----------------------------------------------------------------------
1572 // The listNodePool is used to maintain the TreeNodeInfo for nodes that are "in flight"
1573 // i.e. whose consuming node has not yet been handled.
1574 LocationInfoListNodePool listNodePool;
1576 // The defList is used for the transient TreeNodeInfo that is computed by
1577 // the Build methods, and used in building RefPositions.
1578 // When Def RefPositions are built for a node, their NodeInfo is placed
1579 // in the defList. As the consuming node is handled, it moves the NodeInfo
1580 // into an ordered useList corresponding to the uses for that node.
1581 LocationInfoList defList;
1583 // The useList is constructed for each node by the Build methods.
1584 // It contains the TreeNodeInfo for its operands, in their order of use.
1585 LocationInfoList useList;
1587 // During the build phase, this is the NodeInfo for the current node.
1588 TreeNodeInfo* currentNodeInfo;
1590 // Remove the LocationInfoListNode for the given node from the defList, and put it into the useList.
1591 // The node must not be contained, and must have been processed by buildRefPositionsForNode().
1592 void appendLocationInfoToList(GenTree* node)
1594 LocationInfoListNode* locationInfo = defList.removeListNode(node);
1595 useList.Append(locationInfo);
1597 // Get the LocationInfoListNodes for the given node, and return it, but don't put it into the useList.
1598 // The node must not be contained, and must have been processed by buildRefPositionsForNode().
1599 LocationInfoListNode* getLocationInfo(GenTree* node)
1601 LocationInfoListNode* locationInfo = defList.removeListNode(node);
1602 return locationInfo;
1604 //------------------------------------------------------------------------
1605 // appendBinaryLocationInfoToList: Get the LocationInfoListNodes for the operands of the
1606 // given node, and put them into the useList.
1609 // node - a GenTreeOp
1612 // The number of actual register operands.
1615 // The operands must already have been processed by buildRefPositionsForNode, and their
1616 // LocationInfoListNodes placed in the defList.
1618 int appendBinaryLocationInfoToList(GenTreeOp* node)
1621 LocationInfoListNode* op1LocationInfo = nullptr;
1622 LocationInfoListNode* op2LocationInfo = nullptr;
1624 GenTree* op1 = node->gtOp1;
1625 GenTree* op2 = node->gtGetOp2IfPresent();
1626 if (node->IsReverseOp() && op2 != nullptr)
1628 srcCount += GetOperandInfo(op2);
1633 srcCount += GetOperandInfo(op1);
1637 srcCount += GetOperandInfo(op2);
1642 // This is the main entry point for computing the TreeNodeInfo for a node.
1643 void BuildNode(GenTree* stmt);
1645 void BuildCheckByteable(GenTree* tree);
1647 bool CheckAndSetDelayFree(GenTree* delayUseSrc);
1649 void BuildSimple(GenTree* tree);
1650 int GetOperandInfo(GenTree* node);
1651 int GetOperandInfo(GenTree* node, LocationInfoListNode** pFirstInfo);
1652 int GetIndirInfo(GenTreeIndir* indirTree);
1653 void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs);
1655 void BuildStoreLoc(GenTree* tree);
1656 void BuildReturn(GenTree* tree);
1657 #ifdef _TARGET_XARCH_
1658 // This method, unlike the others, returns the number of sources, since it may be called when
1659 // 'tree' is contained.
1660 int BuildShiftRotate(GenTree* tree);
1661 #endif // _TARGET_XARCH_
1663 void BuildShiftLongCarry(GenTree* tree);
1665 void BuildPutArgReg(GenTreeUnOp* node);
1666 void BuildCall(GenTreeCall* call);
1667 void BuildCmp(GenTree* tree);
1668 void BuildStructArg(GenTree* structArg);
1669 void BuildBlockStore(GenTreeBlk* blkNode);
1670 void BuildModDiv(GenTree* tree);
1671 void BuildIntrinsic(GenTree* tree);
1672 void BuildStoreLoc(GenTreeLclVarCommon* tree);
1673 void BuildIndir(GenTreeIndir* indirTree);
1674 void BuildGCWriteBarrier(GenTree* tree);
1675 void BuildCast(GenTree* tree);
1678 bool ExcludeNonByteableRegisters(GenTree* tree);
1681 #if defined(_TARGET_XARCH_)
1682 // returns true if the tree can use the read-modify-write memory instruction form
1683 bool isRMWRegOper(GenTree* tree);
1684 void BuildMul(GenTree* tree);
1685 void SetContainsAVXFlags(bool isFloatingPointType = true, unsigned sizeOfSIMDVector = 0);
1686 // Move the last use bit, if any, from 'fromTree' to 'toTree'; 'fromTree' must be contained.
1687 void CheckAndMoveRMWLastUse(GenTree* fromTree, GenTree* toTree)
1689 // If 'fromTree' is not a last-use lclVar, there's nothing to do.
1690 if ((fromTree == nullptr) || !fromTree->OperIs(GT_LCL_VAR) || ((fromTree->gtFlags & GTF_VAR_DEATH) == 0))
1694 // If 'fromTree' was a lclVar, it must be contained and 'toTree' must match.
1695 if (!fromTree->isContained() || (toTree == nullptr) || !toTree->OperIs(GT_LCL_VAR) ||
1696 (toTree->AsLclVarCommon()->gtLclNum != toTree->AsLclVarCommon()->gtLclNum))
1698 assert(!"Unmatched RMW indirections");
1701 // This is probably not necessary, but keeps things consistent.
1702 fromTree->gtFlags &= ~GTF_VAR_DEATH;
1703 if (toTree != nullptr) // Just to be conservative
1705 toTree->gtFlags |= GTF_VAR_DEATH;
1708 #endif // defined(_TARGET_XARCH_)
1711 void BuildSIMD(GenTreeSIMD* tree);
1712 #endif // FEATURE_SIMD
1714 #ifdef FEATURE_HW_INTRINSICS
1715 void BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree);
1716 #endif // FEATURE_HW_INTRINSICS
1718 void BuildPutArgStk(GenTreePutArgStk* argNode);
1720 void BuildPutArgSplit(GenTreePutArgSplit* tree);
1722 void BuildLclHeap(GenTree* tree);
1725 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1726 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1730 XX This is the fundamental data structure for linear scan register XX
1731 XX allocation. It represents the live range(s) for a variable or temp. XX
1733 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1734 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1737 class Interval : public Referenceable
1740 Interval(RegisterType registerType, regMaskTP registerPreferences)
1741 : registerPreferences(registerPreferences)
1742 , relatedInterval(nullptr)
1743 , assignedReg(nullptr)
1744 , registerType(registerType)
1749 , isStructField(false)
1750 , isPromotedStruct(false)
1751 , hasConflictingDefUse(false)
1752 , hasInterferingUses(false)
1753 , isSpecialPutArg(false)
1754 , preferCalleeSave(false)
1757 , physReg(REG_COUNT)
1766 // print out representation
1768 // concise representation for embedding
1770 // extremely concise representation
1774 void setLocalNumber(Compiler* compiler, unsigned lclNum, LinearScan* l);
1776 // Fixed registers for which this Interval has a preference
1777 regMaskTP registerPreferences;
1779 // The relatedInterval is:
1780 // - for any other interval, it is the interval to which this interval
1781 // is currently preferenced (e.g. because they are related by a copy)
1782 Interval* relatedInterval;
1784 // The assignedReg is the RecRecord for the register to which this interval
1785 // has been assigned at some point - if the interval is active, this is the
1786 // register it currently occupies.
1787 RegRecord* assignedReg;
1789 // DECIDE : put this in a union or do something w/ inheritance?
1790 // this is an interval for a physical register, not a allocatable entity
1792 RegisterType registerType;
1793 bool isLocalVar : 1;
1794 // Indicates whether this interval has been assigned to different registers
1796 // Indicates whether this interval is ever spilled
1798 // indicates an interval representing the internal requirements for
1799 // generating code for a node (temp registers internal to the node)
1800 // Note that this interval may live beyond a node in the GT_ARR_LENREF/GT_IND
1801 // case (though never lives beyond a stmt)
1802 bool isInternal : 1;
1803 // true if this is a LocalVar for a struct field
1804 bool isStructField : 1;
1805 // true iff this is a GT_LDOBJ for a fully promoted (PROMOTION_TYPE_INDEPENDENT) struct
1806 bool isPromotedStruct : 1;
1807 // true if this is an SDSU interval for which the def and use have conflicting register
1809 bool hasConflictingDefUse : 1;
1810 // true if this interval's defining node has "delayRegFree" uses, either due to it being an RMW instruction,
1811 // OR because it requires an internal register that differs from the target.
1812 bool hasInterferingUses : 1;
1814 // True if this interval is defined by a putArg, whose source is a non-last-use lclVar.
1815 // During allocation, this flag will be cleared if the source is not already in the required register.
1816 // Othewise, we will leave the register allocated to the lclVar, but mark the RegRecord as
1817 // isBusyUntilNextKill, so that it won't be reused if the lclVar goes dead before the call.
1818 bool isSpecialPutArg : 1;
1820 // True if this interval interferes with a call.
1821 bool preferCalleeSave : 1;
1823 // True if this interval is defined by a constant node that may be reused and/or may be
1824 // able to reuse a constant that's already in a register.
1825 bool isConstant : 1;
1827 // True if this Interval is defined by a node that produces multiple registers.
1828 bool isMultiReg : 1;
1830 // The register to which it is currently assigned.
1834 unsigned int intervalIndex;
1837 unsigned int varNum; // This is the "variable number": the index into the lvaTable array
1839 LclVarDsc* getLocalVar(Compiler* comp)
1842 return &(comp->lvaTable[this->varNum]);
1845 // Get the local tracked variable "index" (lvVarIndex), used in bitmasks.
1846 unsigned getVarIndex(Compiler* comp)
1848 LclVarDsc* varDsc = getLocalVar(comp);
1849 assert(varDsc->lvTracked); // If this isn't true, we shouldn't be calling this function!
1850 return varDsc->lvVarIndex;
1853 bool isAssignedTo(regNumber regNum)
1855 // This uses regMasks to handle the case where a double actually occupies two registers
1856 // TODO-Throughput: This could/should be done more cheaply.
1857 return (physReg != REG_NA && (genRegMask(physReg, registerType) & genRegMask(regNum)) != RBM_NONE);
1860 // Assign the related interval.
1861 void assignRelatedInterval(Interval* newRelatedInterval)
1866 printf("Assigning related ");
1867 newRelatedInterval->microDump();
1873 relatedInterval = newRelatedInterval;
1876 // Assign the related interval, but only if it isn't already assigned.
1877 void assignRelatedIntervalIfUnassigned(Interval* newRelatedInterval)
1879 if (relatedInterval == nullptr)
1881 assignRelatedInterval(newRelatedInterval);
1888 printf("Interval ");
1890 printf(" already has a related interval\n");
1896 // Update the registerPreferences on the interval.
1897 // If there are conflicting requirements on this interval, set the preferences to
1898 // the union of them. That way maybe we'll get at least one of them.
1899 // An exception is made in the case where one of the existing or new
1900 // preferences are all callee-save, in which case we "prefer" the callee-save
1902 void updateRegisterPreferences(regMaskTP preferences)
1904 // We require registerPreferences to have been initialized.
1905 assert(registerPreferences != RBM_NONE);
1906 // It is invalid to update with empty preferences
1907 assert(preferences != RBM_NONE);
1909 regMaskTP commonPreferences = (registerPreferences & preferences);
1910 if (commonPreferences != RBM_NONE)
1912 registerPreferences = commonPreferences;
1916 // There are no preferences in common.
1917 // Preferences need to reflect both cases where a var must occupy a specific register,
1918 // as well as cases where a var is live when a register is killed.
1919 // In the former case, we would like to record all such registers, however we don't
1920 // really want to use any registers that will interfere.
1921 // To approximate this, we never "or" together multi-reg sets, which are generally kill sets.
1923 if (!genMaxOneBit(preferences))
1925 // The new preference value is a multi-reg set, so it's probably a kill.
1926 // Keep the new value.
1927 registerPreferences = preferences;
1931 if (!genMaxOneBit(registerPreferences))
1933 // The old preference value is a multi-reg set.
1934 // Keep the existing preference set, as it probably reflects one or more kills.
1935 // It may have been a union of multiple individual registers, but we can't
1936 // distinguish that case without extra cost.
1940 // If we reach here, we have two disjoint single-reg sets.
1941 // Keep only the callee-save preferences, if not empty.
1942 // Otherwise, take the union of the preferences.
1944 regMaskTP newPreferences = registerPreferences | preferences;
1946 if (preferCalleeSave)
1948 regMaskTP calleeSaveMask = (calleeSaveRegs(this->registerType) & (newPreferences));
1949 if (calleeSaveMask != RBM_NONE)
1951 newPreferences = calleeSaveMask;
1954 registerPreferences = newPreferences;
1961 // A RefPosition refers to either an Interval or a RegRecord. 'referent' points to one
1962 // of these types. If it refers to a RegRecord, then 'isPhysRegRef' is true. If it
1963 // refers to an Interval, then 'isPhysRegRef' is false.
1965 // Q: can 'referent' be NULL?
1967 Referenceable* referent;
1969 // nextRefPosition is the next in code order.
1970 // Note that in either case there is no need for these to be doubly linked, as they
1971 // are only traversed in the forward direction, and are not moved.
1972 RefPosition* nextRefPosition;
1974 // The remaining fields are common to both options
1978 // Prior to the allocation pass, registerAssignment captures the valid registers
1979 // for this RefPosition. An empty set means that any register is valid. A non-empty
1980 // set means that it must be one of the given registers (may be the full set if the
1981 // only constraint is that it must reside in SOME register)
1982 // After the allocation pass, this contains the actual assignment
1983 LsraLocation nodeLocation;
1984 regMaskTP registerAssignment;
1988 // NOTE: C++ only packs bitfields if the base type is the same. So make all the base
1989 // NOTE: types of the logically "bool" types that follow 'unsigned char', so they match
1990 // NOTE: RefType that precedes this, and multiRegIdx can also match.
1992 // Indicates whether this ref position is to be allocated a reg only if profitable. Currently these are the
1993 // ref positions that lower/codegen has indicated as reg optional and is considered a contained memory operand if
1994 // no reg is allocated.
1995 unsigned char allocRegIfProfitable : 1;
1997 // Used by RefTypeDef/Use positions of a multi-reg call node.
1998 // Indicates the position of the register that this ref position refers to.
1999 // The max bits needed is based on max value of MAX_RET_REG_COUNT value
2000 // across all targets and that happens 4 on on Arm. Hence index value
2001 // would be 0..MAX_RET_REG_COUNT-1.
2002 unsigned char multiRegIdx : 2;
2004 // Last Use - this may be true for multiple RefPositions in the same Interval
2005 unsigned char lastUse : 1;
2007 // Spill and Copy info
2008 // reload indicates that the value was spilled, and must be reloaded here.
2009 // spillAfter indicates that the value is spilled here, so a spill must be added.
2010 // copyReg indicates that the value needs to be copied to a specific register,
2011 // but that it will also retain its current assigned register.
2012 // moveReg indicates that the value needs to be moved to a different register,
2013 // and that this will be its new assigned register.
2014 // A RefPosition may have any flag individually or the following combinations:
2015 // - reload and spillAfter (i.e. it remains in memory), but not in combination with copyReg or moveReg
2016 // (reload cannot exist with copyReg or moveReg; it should be reloaded into the appropriate reg)
2017 // - spillAfter and copyReg (i.e. it must be copied to a new reg for use, but is then spilled)
2018 // - spillAfter and moveReg (i.e. it most be both spilled and moved)
2019 // NOTE: a moveReg involves an explicit move, and would usually not be needed for a fixed Reg if it is going
2020 // to be spilled, because the code generator will do the move to the fixed register, and doesn't need to
2021 // record the new register location as the new "home" location of the lclVar. However, if there is a conflicting
2022 // use at the same location (e.g. lclVar V1 is in rdx and needs to be in rcx, but V2 needs to be in rdx), then
2023 // we need an explicit move.
2024 // - copyReg and moveReg must not exist with each other.
2026 unsigned char reload : 1;
2027 unsigned char spillAfter : 1;
2028 unsigned char copyReg : 1;
2029 unsigned char moveReg : 1; // true if this var is moved to a new register
2031 unsigned char isPhysRegRef : 1; // true if 'referent' points of a RegRecord, false if it points to an Interval
2032 unsigned char isFixedRegRef : 1;
2033 unsigned char isLocalDefUse : 1;
2035 // delayRegFree indicates that the register should not be freed right away, but instead wait
2036 // until the next Location after it would normally be freed. This is used for the case of
2037 // non-commutative binary operators, where op2 must not be assigned the same register as
2038 // the target. We do this by not freeing it until after the target has been defined.
2039 // Another option would be to actually change the Location of the op2 use until the same
2040 // Location as the def, but then it could potentially reuse a register that has been freed
2041 // from the other source(s), e.g. if it's a lastUse or spilled.
2042 unsigned char delayRegFree : 1;
2044 // outOfOrder is marked on a (non-def) RefPosition that doesn't follow a definition of the
2045 // register currently assigned to the Interval. This happens when we use the assigned
2046 // register from a predecessor that is not the most recently allocated BasicBlock.
2047 unsigned char outOfOrder : 1;
2050 // Minimum number registers that needs to be ensured while
2051 // constraining candidates for this ref position under
2053 unsigned minRegCandidateCount;
2055 // The unique RefPosition number, equal to its index in the
2056 // refPositions list. Only used for debugging dumps.
2060 RefPosition(unsigned int bbNum, LsraLocation nodeLocation, GenTree* treeNode, RefType refType)
2062 , nextRefPosition(nullptr)
2063 , treeNode(treeNode)
2065 , nodeLocation(nodeLocation)
2066 , registerAssignment(RBM_NONE)
2074 , isPhysRegRef(false)
2075 , isFixedRegRef(false)
2076 , isLocalDefUse(false)
2077 , delayRegFree(false)
2080 , minRegCandidateCount(1)
2086 Interval* getInterval()
2088 assert(!isPhysRegRef);
2089 return (Interval*)referent;
2091 void setInterval(Interval* i)
2094 isPhysRegRef = false;
2099 assert(isPhysRegRef);
2100 return (RegRecord*)referent;
2102 void setReg(RegRecord* r)
2105 isPhysRegRef = true;
2106 registerAssignment = genRegMask(r->regNum);
2109 regNumber assignedReg()
2111 if (registerAssignment == RBM_NONE)
2116 return genRegNumFromMask(registerAssignment);
2119 // Returns true if it is a reference on a gentree node.
2122 return (refType == RefTypeDef || refType == RefTypeUse);
2125 bool RequiresRegister()
2127 return (IsActualRef()
2128 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
2129 || refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse
2130 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
2132 !AllocateIfProfitable();
2135 void setAllocateIfProfitable(bool val)
2137 allocRegIfProfitable = val;
2140 // Returns true whether this ref position is to be allocated
2141 // a reg only if it is profitable.
2142 bool AllocateIfProfitable()
2144 // TODO-CQ: Right now if a ref position is marked as
2145 // copyreg or movereg, then it is not treated as
2146 // 'allocate if profitable'. This is an implementation
2147 // limitation that needs to be addressed.
2148 return allocRegIfProfitable && !copyReg && !moveReg;
2151 void setMultiRegIdx(unsigned idx)
2154 assert(multiRegIdx == idx);
2157 unsigned getMultiRegIdx()
2162 LsraLocation getRefEndLocation()
2164 return delayRegFree ? nodeLocation + 1 : nodeLocation;
2167 bool isIntervalRef()
2169 return (!isPhysRegRef && (referent != nullptr));
2172 // isTrueDef indicates that the RefPosition is a non-update def of a non-internal
2176 return (refType == RefTypeDef && isIntervalRef() && !getInterval()->isInternal);
2179 // isFixedRefOfRegMask indicates that the RefPosition has a fixed assignment to the register
2180 // specified by the given mask
2181 bool isFixedRefOfRegMask(regMaskTP regMask)
2183 assert(genMaxOneBit(regMask));
2184 return (registerAssignment == regMask);
2187 // isFixedRefOfReg indicates that the RefPosition has a fixed assignment to the given register
2188 bool isFixedRefOfReg(regNumber regNum)
2190 return (isFixedRefOfRegMask(genRegMask(regNum)));
2194 // operator= copies everything except 'rpNum', which must remain unique
2195 RefPosition& operator=(const RefPosition& rp)
2197 unsigned rpNumSave = rpNum;
2198 memcpy(this, &rp, sizeof(rp));
2208 void dumpRegMask(regMaskTP regs);
2211 /*****************************************************************************/
2213 /*****************************************************************************/