1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4 /*****************************************************************************/
10 #include "smallhash.h"
13 // Minor and forward-reference types
22 // LsraLocation tracks the linearized order of the nodes.
23 // Each node is assigned two LsraLocations - one for all the uses and all but the last
24 // def, and a second location for the last def (if any)
26 typedef unsigned int LsraLocation;
27 const unsigned int MinLocation = 0;
28 const unsigned int MaxLocation = UINT_MAX;
29 // max number of registers an operation could require internally (in addition to uses and defs)
30 const unsigned int MaxInternalRegisters = 8;
31 const unsigned int RegisterTypeCount = 2;
33 typedef var_types RegisterType;
34 #define IntRegisterType TYP_INT
35 #define FloatRegisterType TYP_FLOAT
37 inline regMaskTP calleeSaveRegs(RegisterType rt)
39 return varTypeIsIntegralOrI(rt) ? RBM_INT_CALLEE_SAVED : RBM_FLT_CALLEE_SAVED;
46 // Reg Index in case of multi-reg result producing call node.
47 // Indicates the position of the register that this location refers to.
48 // The max bits needed is based on max value of MAX_RET_REG_COUNT value
49 // across all targets and that happens 4 on on Arm. Hence index value
50 // would be 0..MAX_RET_REG_COUNT-1.
51 unsigned multiRegIdx : 2;
56 LocationInfo(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0)
57 : loc(l), multiRegIdx(regIdx), interval(i), treeNode(t)
59 assert(multiRegIdx == regIdx);
62 // default constructor for data structures
70 // bbNum of the predecessor to use for the register location of live-in variables.
72 BasicBlock::weight_t weight;
73 unsigned int predBBNum;
74 bool hasCriticalInEdge;
75 bool hasCriticalOutEdge;
78 // Per block maintained LSRA statistics.
80 // Number of spills of local vars or tree temps in this basic block.
83 // Number of GT_COPY nodes inserted in this basic block while allocating regs.
84 // Note that GT_COPY nodes are also inserted as part of basic block boundary
85 // resolution, which are accounted against resolutionMovCount but not
86 // against copyRegCount.
87 unsigned copyRegCount;
89 // Number of resolution moves inserted in this basic block.
90 unsigned resolutionMovCount;
92 // Number of critical edges from this block that are split.
93 unsigned splitEdgeCount;
94 #endif // TRACK_LSRA_STATS
97 // This is sort of a bit mask
98 // The low order 2 bits will be 1 for defs, and 2 for uses
99 enum RefType : unsigned char
101 #define DEF_REFTYPE(memberName, memberValue, shortName) memberName = memberValue,
102 #include "lsra_reftypes.h"
106 // position in a block (for resolution)
109 BlockPositionStart = 0,
110 BlockPositionEnd = 1,
114 inline bool RefTypeIsUse(RefType refType)
116 return ((refType & RefTypeUse) == RefTypeUse);
119 inline bool RefTypeIsDef(RefType refType)
121 return ((refType & RefTypeDef) == RefTypeDef);
124 typedef regNumberSmall* VarToRegMap;
126 template <typename ElementType, CompMemKind MemKind>
127 class ListElementAllocator
130 template <typename U, CompMemKind CMK>
131 friend class ListElementAllocator;
133 Compiler* m_compiler;
136 ListElementAllocator(Compiler* compiler) : m_compiler(compiler)
140 template <typename U>
141 ListElementAllocator(const ListElementAllocator<U, MemKind>& other) : m_compiler(other.m_compiler)
145 ElementType* allocate(size_t count)
147 return reinterpret_cast<ElementType*>(m_compiler->compGetMem(sizeof(ElementType) * count, MemKind));
150 void deallocate(ElementType* pointer, size_t count)
154 template <typename U>
157 typedef ListElementAllocator<U, MemKind> allocator;
161 typedef ListElementAllocator<Interval, CMK_LSRA_Interval> LinearScanMemoryAllocatorInterval;
162 typedef ListElementAllocator<RefPosition, CMK_LSRA_RefPosition> LinearScanMemoryAllocatorRefPosition;
164 typedef jitstd::list<Interval, LinearScanMemoryAllocatorInterval> IntervalList;
165 typedef jitstd::list<RefPosition, LinearScanMemoryAllocatorRefPosition> RefPositionList;
172 firstRefPosition = nullptr;
173 recentRefPosition = nullptr;
174 lastRefPosition = nullptr;
178 // A linked list of RefPositions. These are only traversed in the forward
179 // direction, and are not moved, so they don't need to be doubly linked
180 // (see RefPosition).
182 RefPosition* firstRefPosition;
183 RefPosition* recentRefPosition;
184 RefPosition* lastRefPosition;
188 // Get the position of the next reference which is at or greater than
189 // the current location (relies upon recentRefPosition being udpated
190 // during traversal).
191 RefPosition* getNextRefPosition();
192 LsraLocation getNextRefLocation();
195 class RegRecord : public Referenceable
200 assignedInterval = nullptr;
201 previousInterval = nullptr;
203 isCalleeSave = false;
204 registerType = IntRegisterType;
205 isBusyUntilNextKill = false;
208 void init(regNumber reg)
210 #ifdef _TARGET_ARM64_
211 // The Zero register, or the SP
212 if ((reg == REG_ZR) || (reg == REG_SP))
214 // IsGeneralRegister returns false for REG_ZR and REG_SP
216 registerType = IntRegisterType;
220 if (emitter::isFloatReg(reg))
222 registerType = FloatRegisterType;
226 // The constructor defaults to IntRegisterType
227 assert(emitter::isGeneralRegister(reg) && registerType == IntRegisterType);
230 isCalleeSave = ((RBM_CALLEE_SAVED & genRegMask(reg)) != 0);
234 // print out representation
236 // concise representation for embedding
242 // RefPosition * getNextRefPosition();
243 // LsraLocation getNextRefLocation();
247 // interval to which this register is currently allocated.
248 // If the interval is inactive (isActive == false) then it is not currently live,
249 // and the register call be unassigned (i.e. setting assignedInterval to nullptr)
250 // without spilling the register.
251 Interval* assignedInterval;
252 // Interval to which this register was previously allocated, and which was unassigned
253 // because it was inactive. This register will be reassigned to this Interval when
254 // assignedInterval becomes inactive.
255 Interval* previousInterval;
259 RegisterType registerType;
260 // This register must be considered busy until the next time it is explicitly killed.
261 // This is used so that putarg_reg can avoid killing its lclVar source, while avoiding
262 // the problem with the reg becoming free if the last-use is encountered before the call.
263 bool isBusyUntilNextKill;
265 bool conflictingFixedRegReference(RefPosition* refPosition);
268 inline bool leafInRange(GenTree* leaf, int lower, int upper)
270 if (!leaf->IsIntCnsFitsInI32())
274 if (leaf->gtIntCon.gtIconVal < lower)
278 if (leaf->gtIntCon.gtIconVal > upper)
286 inline bool leafInRange(GenTree* leaf, int lower, int upper, int multiple)
288 if (!leafInRange(leaf, lower, upper))
292 if (leaf->gtIntCon.gtIconVal % multiple)
300 inline bool leafAddInRange(GenTree* leaf, int lower, int upper, int multiple = 1)
302 if (leaf->OperGet() != GT_ADD)
306 return leafInRange(leaf->gtOp.gtOp2, lower, upper, multiple);
309 inline bool isCandidateVar(LclVarDsc* varDsc)
311 return varDsc->lvLRACandidate;
314 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
315 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
319 XX This is the container for the Linear Scan data structures and methods. XX
321 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
322 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
324 // OPTION 1: The algorithm as described in "Optimized Interval Splitting in a
325 // Linear Scan Register Allocator". It is driven by iterating over the Interval
326 // lists. In this case, we need multiple IntervalLists, and Intervals will be
327 // moved between them so they must be easily updated.
329 // OPTION 2: The algorithm is driven by iterating over the RefPositions. In this
330 // case, we only need a single IntervalList, and it won't be updated.
331 // The RefPosition must refer to its Interval, and we need to be able to traverse
332 // to the next RefPosition in code order
333 // THIS IS THE OPTION CURRENTLY BEING PURSUED
335 class LocationInfoList;
336 class LocationInfoListNodePool;
338 class LinearScan : public LinearScanInterface
340 friend class RefPosition;
341 friend class Interval;
342 friend class Lowering;
343 friend class TreeNodeInfo;
346 // This could use further abstraction. From Compiler we need the tree,
347 // the flowgraph and the allocator.
348 LinearScan(Compiler* theCompiler);
350 // This is the main driver
351 virtual void doLinearScan();
353 // TreeNodeInfo contains three register masks: src candidates, dst candidates, and internal condidates.
354 // Instead of storing actual register masks, however, which are large, we store a small index into a table
355 // of register masks, stored in this class. We create only as many distinct register masks as are needed.
356 // All identical register masks get the same index. The register mask table contains:
357 // 1. A mask containing all eligible integer registers.
358 // 2. A mask containing all elibible floating-point registers.
359 // 3. A mask for each of single register.
360 // 4. A mask for each combination of registers, created dynamically as required.
362 // Currently, the maximum number of masks allowed is a constant defined by 'numMasks'. The register mask
363 // table is never resized. It is also limited by the size of the index, currently an unsigned char.
364 CLANG_FORMAT_COMMENT_ANCHOR;
366 #if defined(_TARGET_ARM64_)
367 static const int numMasks = 128;
369 static const int numMasks = 64;
372 regMaskTP* regMaskTable;
375 typedef int RegMaskIndex;
377 // allint is 0, allfloat is 1, all the single-bit masks start at 2
382 FIRST_SINGLE_REG_IDX = 2
385 RegMaskIndex GetIndexForRegMask(regMaskTP mask);
386 regMaskTP GetRegMaskForIndex(RegMaskIndex index);
387 void RemoveRegisterFromMasks(regNumber reg);
390 void dspRegisterMaskTable();
393 // Initialize the block traversal for LSRA.
394 // This resets the bbVisitedSet, and on the first invocation sets the blockSequence array,
395 // which determines the order in which blocks will be allocated (currently called during Lowering).
396 BasicBlock* startBlockSequence();
397 // Move to the next block in sequence, updating the current block information.
398 BasicBlock* moveToNextBlock();
399 // Get the next block to be scheduled without changing the current block,
400 // but updating the blockSequence during the first iteration if it is not fully computed.
401 BasicBlock* getNextBlock();
403 // This is called during code generation to update the location of variables
404 virtual void recordVarLocationsAtStartOfBB(BasicBlock* bb);
406 // This does the dataflow analysis and builds the intervals
407 void buildIntervals();
409 // This is where the actual assignment is done
410 void allocateRegisters();
412 // This is the resolution phase, where cross-block mismatches are fixed up
413 void resolveRegisters();
415 void writeRegisters(RefPosition* currentRefPosition, GenTree* tree);
417 // Insert a copy in the case where a tree node value must be moved to a different
418 // register at the point of use, or it is reloaded to a different register
419 // than the one it was spilled from
420 void insertCopyOrReload(BasicBlock* block, GenTreePtr tree, unsigned multiRegIdx, RefPosition* refPosition);
422 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
423 // Insert code to save and restore the upper half of a vector that lives
424 // in a callee-save register at the point of a call (the upper half is
426 void insertUpperVectorSaveAndReload(GenTreePtr tree, RefPosition* refPosition, BasicBlock* block);
427 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
429 // resolve along one block-block edge
435 ResolveSharedCritical,
439 static const char* resolveTypeName[ResolveTypeCount];
449 BasicBlock* block, GenTreePtr insertionPoint, Interval* interval, regNumber outReg, regNumber inReg);
451 void handleOutgoingCriticalEdges(BasicBlock* block);
453 void resolveEdge(BasicBlock* fromBlock, BasicBlock* toBlock, ResolveType resolveType, VARSET_VALARG_TP liveSet);
457 // Finally, the register assignments are written back to the tree nodes.
458 void recordRegisterAssignments();
460 // Keep track of how many temp locations we'll need for spill
462 void updateMaxSpill(RefPosition* refPosition);
463 void recordMaxSpill();
465 // max simultaneous spill locations used of every type
466 unsigned int maxSpill[TYP_COUNT];
467 unsigned int currentSpill[TYP_COUNT];
468 bool needFloatTmpForFPCall;
469 bool needDoubleTmpForFPCall;
473 //------------------------------------------------------------------------
474 // Should we stress lsra?
475 // This uses the same COMPLUS variable as rsStressRegs (COMPlus_JitStressRegs)
476 // However, the possible values and their interpretation are entirely different.
478 // The mask bits are currently divided into fields in which each non-zero value
479 // is a distinct stress option (e.g. 0x3 is not a combination of 0x1 and 0x2).
480 // However, subject to possible constraints (to be determined), the different
481 // fields can be combined (e.g. 0x7 is a combination of 0x3 and 0x4).
482 // Note that the field values are declared in a public enum, but the actual bits are
483 // only accessed via accessors.
485 unsigned lsraStressMask;
487 // This controls the registers available for allocation
488 enum LsraStressLimitRegs{LSRA_LIMIT_NONE = 0, LSRA_LIMIT_CALLEE = 0x1, LSRA_LIMIT_CALLER = 0x2,
489 LSRA_LIMIT_SMALL_SET = 0x3, LSRA_LIMIT_MASK = 0x3};
491 // When LSRA_LIMIT_SMALL_SET is specified, it is desirable to select a "mixed" set of caller- and callee-save
492 // registers, so as to get different coverage than limiting to callee or caller.
493 // At least for x86 and AMD64, and potentially other architecture that will support SIMD,
494 // we need a minimum of 5 fp regs in order to support the InitN intrinsic for Vector4.
495 // Hence the "SmallFPSet" has 5 elements.
496 CLANG_FORMAT_COMMENT_ANCHOR;
498 #if defined(_TARGET_AMD64_)
499 #ifdef UNIX_AMD64_ABI
500 // On System V the RDI and RSI are not callee saved. Use R12 ans R13 as callee saved registers.
501 static const regMaskTP LsraLimitSmallIntSet =
502 (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13);
503 #else // !UNIX_AMD64_ABI
504 // On Windows Amd64 use the RDI and RSI as callee saved registers.
505 static const regMaskTP LsraLimitSmallIntSet =
506 (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI);
507 #endif // !UNIX_AMD64_ABI
508 static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
509 #elif defined(_TARGET_ARM_)
510 static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4);
511 static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17);
512 #elif defined(_TARGET_ARM64_)
513 static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20);
514 static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9);
515 #elif defined(_TARGET_X86_)
516 static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI);
517 static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
519 #error Unsupported or unset target architecture
522 LsraStressLimitRegs getStressLimitRegs()
524 return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK);
527 regMaskTP getConstrainedRegMask(regMaskTP regMaskActual, regMaskTP regMaskConstrain, unsigned minRegCount);
528 regMaskTP stressLimitRegs(RefPosition* refPosition, regMaskTP mask);
530 // This controls the heuristics used to select registers
531 // These can be combined.
532 enum LsraSelect{LSRA_SELECT_DEFAULT = 0, LSRA_SELECT_REVERSE_HEURISTICS = 0x04,
533 LSRA_SELECT_REVERSE_CALLER_CALLEE = 0x08, LSRA_SELECT_NEAREST = 0x10, LSRA_SELECT_MASK = 0x1c};
534 LsraSelect getSelectionHeuristics()
536 return (LsraSelect)(lsraStressMask & LSRA_SELECT_MASK);
538 bool doReverseSelect()
540 return ((lsraStressMask & LSRA_SELECT_REVERSE_HEURISTICS) != 0);
542 bool doReverseCallerCallee()
544 return ((lsraStressMask & LSRA_SELECT_REVERSE_CALLER_CALLEE) != 0);
546 bool doSelectNearest()
548 return ((lsraStressMask & LSRA_SELECT_NEAREST) != 0);
551 // This controls the order in which basic blocks are visited during allocation
552 enum LsraTraversalOrder{LSRA_TRAVERSE_LAYOUT = 0x20, LSRA_TRAVERSE_PRED_FIRST = 0x40,
553 LSRA_TRAVERSE_RANDOM = 0x60, // NYI
554 LSRA_TRAVERSE_DEFAULT = LSRA_TRAVERSE_PRED_FIRST, LSRA_TRAVERSE_MASK = 0x60};
555 LsraTraversalOrder getLsraTraversalOrder()
557 if ((lsraStressMask & LSRA_TRAVERSE_MASK) == 0)
559 return LSRA_TRAVERSE_DEFAULT;
561 return (LsraTraversalOrder)(lsraStressMask & LSRA_TRAVERSE_MASK);
563 bool isTraversalLayoutOrder()
565 return getLsraTraversalOrder() == LSRA_TRAVERSE_LAYOUT;
567 bool isTraversalPredFirstOrder()
569 return getLsraTraversalOrder() == LSRA_TRAVERSE_PRED_FIRST;
572 // This controls whether lifetimes should be extended to the entire method.
573 // Note that this has no effect under MinOpts
574 enum LsraExtendLifetimes{LSRA_DONT_EXTEND = 0, LSRA_EXTEND_LIFETIMES = 0x80, LSRA_EXTEND_LIFETIMES_MASK = 0x80};
575 LsraExtendLifetimes getLsraExtendLifeTimes()
577 return (LsraExtendLifetimes)(lsraStressMask & LSRA_EXTEND_LIFETIMES_MASK);
579 bool extendLifetimes()
581 return getLsraExtendLifeTimes() == LSRA_EXTEND_LIFETIMES;
584 // This controls whether variables locations should be set to the previous block in layout order
585 // (LSRA_BLOCK_BOUNDARY_LAYOUT), or to that of the highest-weight predecessor (LSRA_BLOCK_BOUNDARY_PRED -
586 // the default), or rotated (LSRA_BLOCK_BOUNDARY_ROTATE).
587 enum LsraBlockBoundaryLocations{LSRA_BLOCK_BOUNDARY_PRED = 0, LSRA_BLOCK_BOUNDARY_LAYOUT = 0x100,
588 LSRA_BLOCK_BOUNDARY_ROTATE = 0x200, LSRA_BLOCK_BOUNDARY_MASK = 0x300};
589 LsraBlockBoundaryLocations getLsraBlockBoundaryLocations()
591 return (LsraBlockBoundaryLocations)(lsraStressMask & LSRA_BLOCK_BOUNDARY_MASK);
593 regNumber rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs);
595 // This controls whether we always insert a GT_RELOAD instruction after a spill
596 // Note that this can be combined with LSRA_SPILL_ALWAYS (or not)
597 enum LsraReload{LSRA_NO_RELOAD_IF_SAME = 0, LSRA_ALWAYS_INSERT_RELOAD = 0x400, LSRA_RELOAD_MASK = 0x400};
598 LsraReload getLsraReload()
600 return (LsraReload)(lsraStressMask & LSRA_RELOAD_MASK);
602 bool alwaysInsertReload()
604 return getLsraReload() == LSRA_ALWAYS_INSERT_RELOAD;
607 // This controls whether we spill everywhere
608 enum LsraSpill{LSRA_DONT_SPILL_ALWAYS = 0, LSRA_SPILL_ALWAYS = 0x800, LSRA_SPILL_MASK = 0x800};
609 LsraSpill getLsraSpill()
611 return (LsraSpill)(lsraStressMask & LSRA_SPILL_MASK);
615 return getLsraSpill() == LSRA_SPILL_ALWAYS;
618 // This controls whether RefPositions that lower/codegen indicated as reg optional be
619 // allocated a reg at all.
620 enum LsraRegOptionalControl{LSRA_REG_OPTIONAL_DEFAULT = 0, LSRA_REG_OPTIONAL_NO_ALLOC = 0x1000,
621 LSRA_REG_OPTIONAL_MASK = 0x1000};
623 LsraRegOptionalControl getLsraRegOptionalControl()
625 return (LsraRegOptionalControl)(lsraStressMask & LSRA_REG_OPTIONAL_MASK);
628 bool regOptionalNoAlloc()
630 return getLsraRegOptionalControl() == LSRA_REG_OPTIONAL_NO_ALLOC;
634 void lsraDumpIntervals(const char* msg);
635 void dumpRefPositions(const char* msg);
636 void dumpVarRefPositions(const char* msg);
638 static bool IsResolutionMove(GenTree* node);
639 static bool IsResolutionNode(LIR::Range& containingRange, GenTree* node);
641 void verifyFinalAllocation();
642 void verifyResolutionMove(GenTree* resolutionNode, LsraLocation currentLocation);
644 bool doSelectNearest()
648 bool extendLifetimes()
656 // In a retail build we support only the default traversal order
657 bool isTraversalLayoutOrder()
661 bool isTraversalPredFirstOrder()
665 bool getLsraExtendLifeTimes()
672 // Used by Lowering when considering whether to split Longs, as well as by identifyCandidates().
673 bool isRegCandidate(LclVarDsc* varDsc);
675 bool isContainableMemoryOp(GenTree* node);
678 // Determine which locals are candidates for allocation
679 void identifyCandidates();
681 // determine which locals are used in EH constructs we don't want to deal with
682 void identifyCandidatesExceptionDataflow();
684 void buildPhysRegRecords();
687 void checkLastUses(BasicBlock* block);
692 // Update allocations at start/end of block
693 void processBlockEndAllocation(BasicBlock* current);
695 // Record variable locations at start/end of block
696 void processBlockStartLocations(BasicBlock* current, bool allocationPass);
697 void processBlockEndLocations(BasicBlock* current);
700 bool isSecondHalfReg(RegRecord* regRec, Interval* interval);
701 RegRecord* findAnotherHalfRegRec(RegRecord* regRec);
702 bool canSpillDoubleReg(RegRecord* physRegRecord,
703 LsraLocation refLocation,
704 unsigned* recentAssignedRefWeight,
705 unsigned farthestRefPosWeight);
706 void unassignDoublePhysReg(RegRecord* doubleRegRecord);
708 void updateAssignedInterval(RegRecord* reg, Interval* interval, RegisterType regType);
709 void updatePreviousInterval(RegRecord* reg, Interval* interval, RegisterType regType);
710 bool canRestorePreviousInterval(RegRecord* regRec, Interval* assignedInterval);
711 bool isAssignedToInterval(Interval* interval, RegRecord* regRec);
712 bool checkActiveInterval(Interval* interval, LsraLocation refLocation);
713 bool checkActiveIntervals(RegRecord* physRegRecord, LsraLocation refLocation, RegisterType registerType);
714 bool canSpillReg(RegRecord* physRegRecord,
715 LsraLocation refLocation,
716 unsigned* recentAssignedRefWeight,
717 unsigned farthestRefPosWeight);
718 bool isRegInUse(RegRecord* regRec, RefPosition* refPosition, LsraLocation* nextLocation);
720 RefType CheckBlockType(BasicBlock* block, BasicBlock* prevBlock);
722 // insert refpositions representing prolog zero-inits which will be added later
723 void insertZeroInitRefPositions();
725 void AddMapping(GenTree* node, LsraLocation loc);
727 // add physreg refpositions for a tree node, based on calling convention and instruction selection predictions
728 void addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse);
730 void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition);
732 void buildRefPositionsForNode(GenTree* tree,
734 LocationInfoListNodePool& listNodePool,
735 HashTableBase<GenTree*, LocationInfoList>& operandToLocationInfoMap,
738 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
739 VARSET_VALRET_TP buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc);
740 void buildUpperVectorRestoreRefPositions(GenTree* tree, LsraLocation currentLoc, VARSET_VALARG_TP liveLargeVectors);
741 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
743 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
744 // For AMD64 on SystemV machines. This method
745 // is called as replacement for raUpdateRegStateForArg
746 // that is used on Windows. On System V systems a struct can be passed
747 // partially using registers from the 2 register files.
748 void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc);
749 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
751 // Update reg state for an incoming register argument
752 void updateRegStateForArg(LclVarDsc* argDsc);
754 inline bool isLocalDefUse(GenTree* tree)
756 return tree->gtLsraInfo.isLocalDefUse;
759 inline bool isCandidateLocalRef(GenTree* tree)
763 unsigned int lclNum = tree->gtLclVarCommon.gtLclNum;
764 assert(lclNum < compiler->lvaCount);
765 LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
767 return isCandidateVar(varDsc);
772 static Compiler::fgWalkResult markAddrModeOperandsHelperMD(GenTreePtr tree, void* p);
774 // Return the registers killed by the given tree node.
775 regMaskTP getKillSetForNode(GenTree* tree);
777 // Given some tree node add refpositions for all the registers this node kills
778 bool buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc);
780 regMaskTP allRegs(RegisterType rt);
781 regMaskTP allRegs(GenTree* tree);
782 regMaskTP allMultiRegCallNodeRegs(GenTreeCall* tree);
783 regMaskTP allSIMDRegs();
784 regMaskTP internalFloatRegCandidates();
786 bool registerIsFree(regNumber regNum, RegisterType regType);
787 bool registerIsAvailable(RegRecord* physRegRecord,
788 LsraLocation currentLoc,
789 LsraLocation* nextRefLocationPtr,
790 RegisterType regType);
791 void freeRegister(RegRecord* physRegRecord);
792 void freeRegisters(regMaskTP regsToFree);
794 regMaskTP getUseCandidates(GenTree* useNode);
795 regMaskTP getDefCandidates(GenTree* tree);
796 var_types getDefType(GenTree* tree);
798 RefPosition* defineNewInternalTemp(GenTree* tree,
799 RegisterType regType,
800 LsraLocation currentLoc,
801 regMaskTP regMask DEBUGARG(unsigned minRegCandidateCount));
803 int buildInternalRegisterDefsForNode(GenTree* tree,
804 LsraLocation currentLoc,
805 RefPosition* defs[] DEBUGARG(unsigned minRegCandidateCount));
807 void buildInternalRegisterUsesForNode(GenTree* tree,
808 LsraLocation currentLoc,
810 int total DEBUGARG(unsigned minRegCandidateCount));
812 void resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosition* currentRefPosition);
814 void insertMove(BasicBlock* block, GenTreePtr insertionPoint, unsigned lclNum, regNumber inReg, regNumber outReg);
816 void insertSwap(BasicBlock* block,
817 GenTreePtr insertionPoint,
824 // TODO-Cleanup: unused?
825 class PhysRegIntervalIterator
828 PhysRegIntervalIterator(LinearScan* theLinearScan)
830 nextRegNumber = (regNumber)0;
831 linearScan = theLinearScan;
835 return &linearScan->physRegs[nextRegNumber];
839 // This assumes that the physical registers are contiguous, starting
840 // with a register number of 0
841 regNumber nextRegNumber;
842 LinearScan* linearScan;
846 Interval* newInterval(RegisterType regType);
848 Interval* getIntervalForLocalVar(unsigned varIndex)
850 assert(varIndex < compiler->lvaTrackedCount);
851 assert(localVarIntervals[varIndex] != nullptr);
852 return localVarIntervals[varIndex];
855 Interval* getIntervalForLocalVarNode(GenTreeLclVarCommon* tree)
857 LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclNum];
858 assert(varDsc->lvTracked);
859 return getIntervalForLocalVar(varDsc->lvVarIndex);
862 RegRecord* getRegisterRecord(regNumber regNum);
864 RefPosition* newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType);
866 RefPosition* newRefPosition(Interval* theInterval,
867 LsraLocation theLocation,
869 GenTree* theTreeNode,
871 unsigned multiRegIdx = 0 DEBUGARG(unsigned minRegCandidateCount = 1));
873 RefPosition* newRefPosition(
874 regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask);
876 void applyCalleeSaveHeuristics(RefPosition* rp);
878 void associateRefPosWithInterval(RefPosition* rp);
880 void associateRefPosWithRegister(RefPosition* rp);
882 unsigned getWeight(RefPosition* refPos);
884 /*****************************************************************************
885 * Register management
886 ****************************************************************************/
887 RegisterType getRegisterType(Interval* currentInterval, RefPosition* refPosition);
888 regNumber tryAllocateFreeReg(Interval* current, RefPosition* refPosition);
889 RegRecord* findBestPhysicalReg(RegisterType regType,
890 LsraLocation endLocation,
891 regMaskTP candidates,
892 regMaskTP preferences);
893 regNumber allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable);
894 regNumber assignCopyReg(RefPosition* refPosition);
896 void checkAndAssignInterval(RegRecord* regRec, Interval* interval);
897 void assignPhysReg(RegRecord* regRec, Interval* interval);
898 void assignPhysReg(regNumber reg, Interval* interval)
900 assignPhysReg(getRegisterRecord(reg), interval);
903 bool isAssigned(RegRecord* regRec ARM_ARG(RegisterType newRegType));
904 bool isAssigned(RegRecord* regRec, LsraLocation lastLocation ARM_ARG(RegisterType newRegType));
905 void checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefPosition);
906 void unassignPhysReg(RegRecord* regRec ARM_ARG(RegisterType newRegType));
907 void unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPosition);
908 void unassignPhysRegNoSpill(RegRecord* reg);
909 void unassignPhysReg(regNumber reg)
911 unassignPhysReg(getRegisterRecord(reg), nullptr);
914 void setIntervalAsSpilled(Interval* interval);
915 void setIntervalAsSplit(Interval* interval);
916 void spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition);
918 void spillGCRefs(RefPosition* killRefPosition);
920 /*****************************************************************************
921 * For Resolution phase
922 ****************************************************************************/
923 // TODO-Throughput: Consider refactoring this so that we keep a map from regs to vars for better scaling
924 unsigned int regMapCount;
926 // When we split edges, we create new blocks, and instead of expanding the VarToRegMaps, we
927 // rely on the property that the "in" map is the same as the "from" block of the edge, and the
928 // "out" map is the same as the "to" block of the edge (by construction).
929 // So, for any block whose bbNum is greater than bbNumMaxBeforeResolution, we use the
930 // splitBBNumToTargetBBNumMap.
931 // TODO-Throughput: We may want to look into the cost/benefit tradeoff of doing this vs. expanding
934 unsigned bbNumMaxBeforeResolution;
940 typedef SimplerHashTable<unsigned, SmallPrimitiveKeyFuncs<unsigned>, SplitEdgeInfo, JitSimplerHashBehavior>
941 SplitBBNumToTargetBBNumMap;
942 SplitBBNumToTargetBBNumMap* splitBBNumToTargetBBNumMap;
943 SplitBBNumToTargetBBNumMap* getSplitBBNumToTargetBBNumMap()
945 if (splitBBNumToTargetBBNumMap == nullptr)
947 splitBBNumToTargetBBNumMap =
948 new (getAllocator(compiler)) SplitBBNumToTargetBBNumMap(getAllocator(compiler));
950 return splitBBNumToTargetBBNumMap;
952 SplitEdgeInfo getSplitEdgeInfo(unsigned int bbNum);
954 void initVarRegMaps();
955 void setInVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg);
956 void setOutVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg);
957 VarToRegMap getInVarToRegMap(unsigned int bbNum);
958 VarToRegMap getOutVarToRegMap(unsigned int bbNum);
959 void setVarReg(VarToRegMap map, unsigned int trackedVarIndex, regNumber reg);
960 regNumber getVarReg(VarToRegMap map, unsigned int trackedVarIndex);
961 // Initialize the incoming VarToRegMap to the given map values (generally a predecessor of
963 VarToRegMap setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarToRegMap);
965 regNumber getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type);
968 void dumpVarToRegMap(VarToRegMap map);
969 void dumpInVarToRegMap(BasicBlock* block);
970 void dumpOutVarToRegMap(BasicBlock* block);
972 // There are three points at which a tuple-style dump is produced, and each
974 // - In LSRA_DUMP_PRE, it does a simple dump of each node, with indications of what
975 // tree nodes are consumed.
976 // - In LSRA_DUMP_REFPOS, which is after the intervals are built, but before
977 // register allocation, each node is dumped, along with all of the RefPositions,
978 // The Intervals are identifed as Lnnn for lclVar intervals, Innn for for other
979 // intervals, and Tnnn for internal temps.
980 // - In LSRA_DUMP_POST, which is after register allocation, the registers are
983 enum LsraTupleDumpMode{LSRA_DUMP_PRE, LSRA_DUMP_REFPOS, LSRA_DUMP_POST};
984 void lsraGetOperandString(GenTreePtr tree,
985 LsraTupleDumpMode mode,
987 unsigned operandStringLength);
988 void lsraDispNode(GenTreePtr tree, LsraTupleDumpMode mode, bool hasDest);
989 void DumpOperandDefs(
990 GenTree* operand, bool& first, LsraTupleDumpMode mode, char* operandString, const unsigned operandStringLength);
991 void TupleStyleDump(LsraTupleDumpMode mode);
994 LsraLocation maxNodeLocation;
996 // Width of various fields - used to create a streamlined dump during allocation that shows the
997 // state of all the registers in columns.
1001 const char* columnSeparator;
1003 const char* leftBox;
1004 const char* middleBox;
1005 const char* rightBox;
1007 static const int MAX_FORMAT_CHARS = 12;
1008 char intervalNameFormat[MAX_FORMAT_CHARS];
1009 char regNameFormat[MAX_FORMAT_CHARS];
1010 char shortRefPositionFormat[MAX_FORMAT_CHARS];
1011 char emptyRefPositionFormat[MAX_FORMAT_CHARS];
1012 char indentFormat[MAX_FORMAT_CHARS];
1013 static const int MAX_LEGEND_FORMAT_CHARS = 25;
1014 char bbRefPosFormat[MAX_LEGEND_FORMAT_CHARS];
1015 char legendFormat[MAX_LEGEND_FORMAT_CHARS];
1017 // How many rows have we printed since last printing a "title row"?
1018 static const int MAX_ROWS_BETWEEN_TITLES = 50;
1019 int rowCountSinceLastTitle;
1021 void dumpRegRecordHeader();
1022 void dumpRegRecordTitle();
1023 void dumpRegRecordTitleLines();
1024 int getLastUsedRegNumIndex();
1025 void dumpRegRecords();
1026 // An abbreviated RefPosition dump for printing with column-based register state
1027 void dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBlock);
1028 // Print the number of spaces occupied by a dumpRefPositionShort()
1029 void dumpEmptyRefPosition();
1030 // A dump of Referent, in exactly regColumnWidth characters
1031 void dumpIntervalName(Interval* interval);
1033 // Events during the allocation phase that cause some dump output, which differs depending
1034 // upon whether dumpTerse is set:
1036 // Conflicting def/use
1037 LSRA_EVENT_DEFUSE_CONFLICT, LSRA_EVENT_DEFUSE_FIXED_DELAY_USE, LSRA_EVENT_DEFUSE_CASE1, LSRA_EVENT_DEFUSE_CASE2,
1038 LSRA_EVENT_DEFUSE_CASE3, LSRA_EVENT_DEFUSE_CASE4, LSRA_EVENT_DEFUSE_CASE5, LSRA_EVENT_DEFUSE_CASE6,
1041 LSRA_EVENT_SPILL, LSRA_EVENT_SPILL_EXTENDED_LIFETIME, LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL,
1042 LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL, LSRA_EVENT_DONE_KILL_GC_REFS,
1045 LSRA_EVENT_START_BB, LSRA_EVENT_END_BB,
1048 LSRA_EVENT_FREE_REGS,
1050 // Characteristics of the current RefPosition
1051 LSRA_EVENT_INCREMENT_RANGE_END, // ???
1052 LSRA_EVENT_LAST_USE, LSRA_EVENT_LAST_USE_DELAYED, LSRA_EVENT_NEEDS_NEW_REG,
1054 // Allocation decisions
1055 LSRA_EVENT_FIXED_REG, LSRA_EVENT_EXP_USE, LSRA_EVENT_ZERO_REF, LSRA_EVENT_NO_ENTRY_REG_ALLOCATED,
1056 LSRA_EVENT_KEPT_ALLOCATION, LSRA_EVENT_COPY_REG, LSRA_EVENT_MOVE_REG, LSRA_EVENT_ALLOC_REG,
1057 LSRA_EVENT_ALLOC_SPILLED_REG, LSRA_EVENT_NO_REG_ALLOCATED, LSRA_EVENT_RELOAD, LSRA_EVENT_SPECIAL_PUTARG,
1058 LSRA_EVENT_REUSE_REG,
1060 void dumpLsraAllocationEvent(LsraDumpEvent event,
1061 Interval* interval = nullptr,
1062 regNumber reg = REG_NA,
1063 BasicBlock* currentBlock = nullptr);
1065 void dumpBlockHeader(BasicBlock* block);
1067 void validateIntervals();
1070 #if TRACK_LSRA_STATS
1072 LSRA_STAT_SPILL, LSRA_STAT_COPY_REG, LSRA_STAT_RESOLUTION_MOV, LSRA_STAT_SPLIT_EDGE,
1075 unsigned regCandidateVarCount;
1076 void updateLsraStat(LsraStat stat, unsigned currentBBNum);
1078 void dumpLsraStats(FILE* file);
1080 #define INTRACK_STATS(x) x
1081 #else // !TRACK_LSRA_STATS
1082 #define INTRACK_STATS(x)
1083 #endif // !TRACK_LSRA_STATS
1088 #if MEASURE_MEM_ALLOC
1089 IAllocator* lsraIAllocator;
1092 IAllocator* getAllocator(Compiler* comp)
1094 #if MEASURE_MEM_ALLOC
1095 if (lsraIAllocator == nullptr)
1097 lsraIAllocator = new (comp, CMK_LSRA) CompAllocator(comp, CMK_LSRA);
1099 return lsraIAllocator;
1101 return comp->getAllocator();
1106 // This is used for dumping
1107 RefPosition* activeRefPosition;
1110 IntervalList intervals;
1112 RegRecord physRegs[REG_COUNT];
1114 // Map from tracked variable index to Interval*.
1115 Interval** localVarIntervals;
1117 // Set of blocks that have been visited.
1118 BlockSet bbVisitedSet;
1119 void markBlockVisited(BasicBlock* block)
1121 BlockSetOps::AddElemD(compiler, bbVisitedSet, block->bbNum);
1123 void clearVisitedBlocks()
1125 BlockSetOps::ClearD(compiler, bbVisitedSet);
1127 bool isBlockVisited(BasicBlock* block)
1129 return BlockSetOps::IsMember(compiler, bbVisitedSet, block->bbNum);
1136 // A map from bbNum to the block information used during register allocation.
1137 LsraBlockInfo* blockInfo;
1138 BasicBlock* findPredBlockForLiveIn(BasicBlock* block, BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated));
1140 // The order in which the blocks will be allocated.
1141 // This is any array of BasicBlock*, in the order in which they should be traversed.
1142 BasicBlock** blockSequence;
1143 // The verifiedAllBBs flag indicates whether we have verified that all BBs have been
1144 // included in the blockSeuqence above, during setBlockSequence().
1145 bool verifiedAllBBs;
1146 void setBlockSequence();
1147 int compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights);
1148 BasicBlockList* blockSequenceWorkList;
1149 bool blockSequencingDone;
1150 void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet);
1151 void removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode);
1152 BasicBlock* getNextCandidateFromWorkList();
1154 // The bbNum of the block being currently allocated or resolved.
1155 unsigned int curBBNum;
1156 // The ordinal of the block we're on (i.e. this is the curBBSeqNum-th block we've allocated).
1157 unsigned int curBBSeqNum;
1158 // The number of blocks that we've sequenced.
1159 unsigned int bbSeqCount;
1160 // The Location of the start of the current block.
1161 LsraLocation curBBStartLocation;
1162 // True if the method contains any critical edges.
1163 bool hasCriticalEdges;
1165 // True if there are any register candidate lclVars available for allocation.
1166 bool enregisterLocalVars;
1168 virtual bool willEnregisterLocalVars() const
1170 return enregisterLocalVars;
1173 // Ordered list of RefPositions
1174 RefPositionList refPositions;
1176 // Per-block variable location mappings: an array indexed by block number that yields a
1177 // pointer to an array of regNumber, one per variable.
1178 VarToRegMap* inVarToRegMaps;
1179 VarToRegMap* outVarToRegMaps;
1181 // A temporary VarToRegMap used during the resolution of critical edges.
1182 VarToRegMap sharedCriticalVarToRegMap;
1184 PhasedVar<regMaskTP> availableIntRegs;
1185 PhasedVar<regMaskTP> availableFloatRegs;
1186 PhasedVar<regMaskTP> availableDoubleRegs;
1188 // The set of all register candidates. Note that this may be a subset of tracked vars.
1189 VARSET_TP registerCandidateVars;
1190 // Current set of live register candidate vars, used during building of RefPositions to determine
1191 // whether to preference to callee-save.
1192 VARSET_TP currentLiveVars;
1193 // Set of variables that may require resolution across an edge.
1194 // This is first constructed during interval building, to contain all the lclVars that are live at BB edges.
1195 // Then, any lclVar that is always in the same register is removed from the set.
1196 VARSET_TP resolutionCandidateVars;
1197 // This set contains all the lclVars that are ever spilled or split.
1198 VARSET_TP splitOrSpilledVars;
1199 // Set of floating point variables to consider for callee-save registers.
1200 VARSET_TP fpCalleeSaveCandidateVars;
1201 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1202 #if defined(_TARGET_AMD64_)
1203 static const var_types LargeVectorType = TYP_SIMD32;
1204 static const var_types LargeVectorSaveType = TYP_SIMD16;
1205 #elif defined(_TARGET_ARM64_)
1206 static const var_types LargeVectorType = TYP_SIMD16;
1207 static const var_types LargeVectorSaveType = TYP_DOUBLE;
1208 #else // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
1209 #error("Unknown target architecture for FEATURE_SIMD")
1210 #endif // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
1212 // Set of large vector (TYP_SIMD32 on AVX) variables.
1213 VARSET_TP largeVectorVars;
1214 // Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers.
1215 VARSET_TP largeVectorCalleeSaveCandidateVars;
1216 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1218 //-----------------------------------------------------------------------
1219 // TreeNodeInfo methods
1220 //-----------------------------------------------------------------------
1222 void TreeNodeInfoInit(GenTree* stmt);
1224 void TreeNodeInfoInitCheckByteable(GenTree* tree);
1226 void SetDelayFree(GenTree* delayUseSrc);
1228 void TreeNodeInfoInitSimple(GenTree* tree);
1229 int GetOperandSourceCount(GenTree* node);
1230 int GetIndirSourceCount(GenTreeIndir* indirTree);
1231 void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs);
1233 void TreeNodeInfoInitStoreLoc(GenTree* tree);
1234 void TreeNodeInfoInitReturn(GenTree* tree);
1235 void TreeNodeInfoInitShiftRotate(GenTree* tree);
1236 void TreeNodeInfoInitPutArgReg(GenTreeUnOp* node);
1237 void TreeNodeInfoInitCall(GenTreeCall* call);
1238 void TreeNodeInfoInitCmp(GenTreePtr tree);
1239 void TreeNodeInfoInitStructArg(GenTreePtr structArg);
1240 void TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode);
1241 void TreeNodeInfoInitModDiv(GenTree* tree);
1242 void TreeNodeInfoInitIntrinsic(GenTree* tree);
1243 void TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* tree);
1244 void TreeNodeInfoInitIndir(GenTreeIndir* indirTree);
1245 void TreeNodeInfoInitGCWriteBarrier(GenTree* tree);
1246 void TreeNodeInfoInitCast(GenTree* tree);
1249 bool ExcludeNonByteableRegisters(GenTree* tree);
1252 #if defined(_TARGET_XARCH_)
1253 // returns true if the tree can use the read-modify-write memory instruction form
1254 bool isRMWRegOper(GenTreePtr tree);
1255 void TreeNodeInfoInitMul(GenTreePtr tree);
1256 void SetContainsAVXFlags(bool isFloatingPointType = true, unsigned sizeOfSIMDVector = 0);
1257 #endif // defined(_TARGET_XARCH_)
1260 void TreeNodeInfoInitSIMD(GenTreeSIMD* tree);
1261 #endif // FEATURE_SIMD
1263 void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode);
1265 void TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* tree);
1267 void TreeNodeInfoInitLclHeap(GenTree* tree);
1270 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1271 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1275 XX This is the fundamental data structure for linear scan register XX
1276 XX allocation. It represents the live range(s) for a variable or temp. XX
1278 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1279 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
1282 class Interval : public Referenceable
1285 Interval(RegisterType registerType, regMaskTP registerPreferences)
1286 : registerPreferences(registerPreferences)
1287 , relatedInterval(nullptr)
1288 , assignedReg(nullptr)
1289 , registerType(registerType)
1294 , isStructField(false)
1295 , isPromotedStruct(false)
1296 , hasConflictingDefUse(false)
1297 , hasNonCommutativeRMWDef(false)
1298 , isSpecialPutArg(false)
1299 , preferCalleeSave(false)
1301 , physReg(REG_COUNT)
1310 // print out representation
1312 // concise representation for embedding
1314 // extremely concise representation
1318 void setLocalNumber(Compiler* compiler, unsigned lclNum, LinearScan* l);
1320 // Fixed registers for which this Interval has a preference
1321 regMaskTP registerPreferences;
1323 // The relatedInterval is:
1324 // - for any other interval, it is the interval to which this interval
1325 // is currently preferenced (e.g. because they are related by a copy)
1326 Interval* relatedInterval;
1328 // The assignedReg is the RecRecord for the register to which this interval
1329 // has been assigned at some point - if the interval is active, this is the
1330 // register it currently occupies.
1331 RegRecord* assignedReg;
1333 // DECIDE : put this in a union or do something w/ inheritance?
1334 // this is an interval for a physical register, not a allocatable entity
1336 RegisterType registerType;
1337 bool isLocalVar : 1;
1338 // Indicates whether this interval has been assigned to different registers
1340 // Indicates whether this interval is ever spilled
1342 // indicates an interval representing the internal requirements for
1343 // generating code for a node (temp registers internal to the node)
1344 // Note that this interval may live beyond a node in the GT_ARR_LENREF/GT_IND
1345 // case (though never lives beyond a stmt)
1346 bool isInternal : 1;
1347 // true if this is a LocalVar for a struct field
1348 bool isStructField : 1;
1349 // true iff this is a GT_LDOBJ for a fully promoted (PROMOTION_TYPE_INDEPENDENT) struct
1350 bool isPromotedStruct : 1;
1351 // true if this is an SDSU interval for which the def and use have conflicting register
1353 bool hasConflictingDefUse : 1;
1354 // true if this interval is defined by a non-commutative 2-operand instruction
1355 bool hasNonCommutativeRMWDef : 1;
1357 // True if this interval is defined by a putArg, whose source is a non-last-use lclVar.
1358 // During allocation, this flag will be cleared if the source is not already in the required register.
1359 // Othewise, we will leave the register allocated to the lclVar, but mark the RegRecord as
1360 // isBusyUntilNextKill, so that it won't be reused if the lclVar goes dead before the call.
1361 bool isSpecialPutArg : 1;
1363 // True if this interval interferes with a call.
1364 bool preferCalleeSave : 1;
1366 // True if this interval is defined by a constant node that may be reused and/or may be
1367 // able to reuse a constant that's already in a register.
1368 bool isConstant : 1;
1370 // The register to which it is currently assigned.
1374 unsigned int intervalIndex;
1377 unsigned int varNum; // This is the "variable number": the index into the lvaTable array
1379 LclVarDsc* getLocalVar(Compiler* comp)
1382 return &(comp->lvaTable[this->varNum]);
1385 // Get the local tracked variable "index" (lvVarIndex), used in bitmasks.
1386 unsigned getVarIndex(Compiler* comp)
1388 LclVarDsc* varDsc = getLocalVar(comp);
1389 assert(varDsc->lvTracked); // If this isn't true, we shouldn't be calling this function!
1390 return varDsc->lvVarIndex;
1393 bool isAssignedTo(regNumber regNum)
1395 // This uses regMasks to handle the case where a double actually occupies two registers
1396 // TODO-Throughput: This could/should be done more cheaply.
1397 return (physReg != REG_NA && (genRegMask(physReg, registerType) & genRegMask(regNum)) != RBM_NONE);
1400 // Assign the related interval.
1401 void assignRelatedInterval(Interval* newRelatedInterval)
1406 printf("Assigning related ");
1407 newRelatedInterval->microDump();
1413 relatedInterval = newRelatedInterval;
1416 // Assign the related interval, but only if it isn't already assigned.
1417 void assignRelatedIntervalIfUnassigned(Interval* newRelatedInterval)
1419 if (relatedInterval == nullptr)
1421 assignRelatedInterval(newRelatedInterval);
1428 printf("Interval ");
1430 printf(" already has a related interval\n");
1436 // Update the registerPreferences on the interval.
1437 // If there are conflicting requirements on this interval, set the preferences to
1438 // the union of them. That way maybe we'll get at least one of them.
1439 // An exception is made in the case where one of the existing or new
1440 // preferences are all callee-save, in which case we "prefer" the callee-save
1442 void updateRegisterPreferences(regMaskTP preferences)
1444 // We require registerPreferences to have been initialized.
1445 assert(registerPreferences != RBM_NONE);
1446 // It is invalid to update with empty preferences
1447 assert(preferences != RBM_NONE);
1449 regMaskTP commonPreferences = (registerPreferences & preferences);
1450 if (commonPreferences != RBM_NONE)
1452 registerPreferences = commonPreferences;
1456 // There are no preferences in common.
1457 // Preferences need to reflect both cases where a var must occupy a specific register,
1458 // as well as cases where a var is live when a register is killed.
1459 // In the former case, we would like to record all such registers, however we don't
1460 // really want to use any registers that will interfere.
1461 // To approximate this, we never "or" together multi-reg sets, which are generally kill sets.
1463 if (!genMaxOneBit(preferences))
1465 // The new preference value is a multi-reg set, so it's probably a kill.
1466 // Keep the new value.
1467 registerPreferences = preferences;
1471 if (!genMaxOneBit(registerPreferences))
1473 // The old preference value is a multi-reg set.
1474 // Keep the existing preference set, as it probably reflects one or more kills.
1475 // It may have been a union of multiple individual registers, but we can't
1476 // distinguish that case without extra cost.
1480 // If we reach here, we have two disjoint single-reg sets.
1481 // Keep only the callee-save preferences, if not empty.
1482 // Otherwise, take the union of the preferences.
1484 regMaskTP newPreferences = registerPreferences | preferences;
1486 if (preferCalleeSave)
1488 regMaskTP calleeSaveMask = (calleeSaveRegs(this->registerType) & (newPreferences));
1489 if (calleeSaveMask != RBM_NONE)
1491 newPreferences = calleeSaveMask;
1494 registerPreferences = newPreferences;
1501 // A RefPosition refers to either an Interval or a RegRecord. 'referent' points to one
1502 // of these types. If it refers to a RegRecord, then 'isPhysRegRef' is true. If it
1503 // refers to an Interval, then 'isPhysRegRef' is false.
1505 // Q: can 'referent' be NULL?
1507 Referenceable* referent;
1509 // nextRefPosition is the next in code order.
1510 // Note that in either case there is no need for these to be doubly linked, as they
1511 // are only traversed in the forward direction, and are not moved.
1512 RefPosition* nextRefPosition;
1514 // The remaining fields are common to both options
1518 // Prior to the allocation pass, registerAssignment captures the valid registers
1519 // for this RefPosition. An empty set means that any register is valid. A non-empty
1520 // set means that it must be one of the given registers (may be the full set if the
1521 // only constraint is that it must reside in SOME register)
1522 // After the allocation pass, this contains the actual assignment
1523 LsraLocation nodeLocation;
1524 regMaskTP registerAssignment;
1528 // NOTE: C++ only packs bitfields if the base type is the same. So make all the base
1529 // NOTE: types of the logically "bool" types that follow 'unsigned char', so they match
1530 // NOTE: RefType that precedes this, and multiRegIdx can also match.
1532 // Indicates whether this ref position is to be allocated a reg only if profitable. Currently these are the
1533 // ref positions that lower/codegen has indicated as reg optional and is considered a contained memory operand if
1534 // no reg is allocated.
1535 unsigned char allocRegIfProfitable : 1;
1537 // Used by RefTypeDef/Use positions of a multi-reg call node.
1538 // Indicates the position of the register that this ref position refers to.
1539 // The max bits needed is based on max value of MAX_RET_REG_COUNT value
1540 // across all targets and that happens 4 on on Arm. Hence index value
1541 // would be 0..MAX_RET_REG_COUNT-1.
1542 unsigned char multiRegIdx : 2;
1544 // Last Use - this may be true for multiple RefPositions in the same Interval
1545 unsigned char lastUse : 1;
1547 // Spill and Copy info
1548 // reload indicates that the value was spilled, and must be reloaded here.
1549 // spillAfter indicates that the value is spilled here, so a spill must be added.
1550 // copyReg indicates that the value needs to be copied to a specific register,
1551 // but that it will also retain its current assigned register.
1552 // moveReg indicates that the value needs to be moved to a different register,
1553 // and that this will be its new assigned register.
1554 // A RefPosition may have any flag individually or the following combinations:
1555 // - reload and spillAfter (i.e. it remains in memory), but not in combination with copyReg or moveReg
1556 // (reload cannot exist with copyReg or moveReg; it should be reloaded into the appropriate reg)
1557 // - spillAfter and copyReg (i.e. it must be copied to a new reg for use, but is then spilled)
1558 // - spillAfter and moveReg (i.e. it most be both spilled and moved)
1559 // NOTE: a moveReg involves an explicit move, and would usually not be needed for a fixed Reg if it is going
1560 // to be spilled, because the code generator will do the move to the fixed register, and doesn't need to
1561 // record the new register location as the new "home" location of the lclVar. However, if there is a conflicting
1562 // use at the same location (e.g. lclVar V1 is in rdx and needs to be in rcx, but V2 needs to be in rdx), then
1563 // we need an explicit move.
1564 // - copyReg and moveReg must not exist with each other.
1566 unsigned char reload : 1;
1567 unsigned char spillAfter : 1;
1568 unsigned char copyReg : 1;
1569 unsigned char moveReg : 1; // true if this var is moved to a new register
1571 unsigned char isPhysRegRef : 1; // true if 'referent' points of a RegRecord, false if it points to an Interval
1572 unsigned char isFixedRegRef : 1;
1573 unsigned char isLocalDefUse : 1;
1575 // delayRegFree indicates that the register should not be freed right away, but instead wait
1576 // until the next Location after it would normally be freed. This is used for the case of
1577 // non-commutative binary operators, where op2 must not be assigned the same register as
1578 // the target. We do this by not freeing it until after the target has been defined.
1579 // Another option would be to actually change the Location of the op2 use until the same
1580 // Location as the def, but then it could potentially reuse a register that has been freed
1581 // from the other source(s), e.g. if it's a lastUse or spilled.
1582 unsigned char delayRegFree : 1;
1584 // outOfOrder is marked on a (non-def) RefPosition that doesn't follow a definition of the
1585 // register currently assigned to the Interval. This happens when we use the assigned
1586 // register from a predecessor that is not the most recently allocated BasicBlock.
1587 unsigned char outOfOrder : 1;
1590 // Minimum number registers that needs to be ensured while
1591 // constraining candidates for this ref position under
1593 unsigned minRegCandidateCount;
1595 // The unique RefPosition number, equal to its index in the
1596 // refPositions list. Only used for debugging dumps.
1600 RefPosition(unsigned int bbNum, LsraLocation nodeLocation, GenTree* treeNode, RefType refType)
1602 , nextRefPosition(nullptr)
1603 , treeNode(treeNode)
1605 , nodeLocation(nodeLocation)
1606 , registerAssignment(RBM_NONE)
1614 , isPhysRegRef(false)
1615 , isFixedRegRef(false)
1616 , isLocalDefUse(false)
1617 , delayRegFree(false)
1620 , minRegCandidateCount(1)
1626 Interval* getInterval()
1628 assert(!isPhysRegRef);
1629 return (Interval*)referent;
1631 void setInterval(Interval* i)
1634 isPhysRegRef = false;
1639 assert(isPhysRegRef);
1640 return (RegRecord*)referent;
1642 void setReg(RegRecord* r)
1645 isPhysRegRef = true;
1646 registerAssignment = genRegMask(r->regNum);
1649 regNumber assignedReg()
1651 if (registerAssignment == RBM_NONE)
1656 return genRegNumFromMask(registerAssignment);
1659 // Returns true if it is a reference on a gentree node.
1662 return (refType == RefTypeDef || refType == RefTypeUse);
1665 bool RequiresRegister()
1667 return (IsActualRef()
1668 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1669 || refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse
1670 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1672 !AllocateIfProfitable();
1675 void setAllocateIfProfitable(bool val)
1677 allocRegIfProfitable = val;
1680 // Returns true whether this ref position is to be allocated
1681 // a reg only if it is profitable.
1682 bool AllocateIfProfitable()
1684 // TODO-CQ: Right now if a ref position is marked as
1685 // copyreg or movereg, then it is not treated as
1686 // 'allocate if profitable'. This is an implementation
1687 // limitation that needs to be addressed.
1688 return allocRegIfProfitable && !copyReg && !moveReg;
1691 void setMultiRegIdx(unsigned idx)
1694 assert(multiRegIdx == idx);
1697 unsigned getMultiRegIdx()
1702 LsraLocation getRefEndLocation()
1704 return delayRegFree ? nodeLocation + 1 : nodeLocation;
1707 bool isIntervalRef()
1709 return (!isPhysRegRef && (referent != nullptr));
1712 // isTrueDef indicates that the RefPosition is a non-update def of a non-internal
1716 return (refType == RefTypeDef && isIntervalRef() && !getInterval()->isInternal);
1719 // isFixedRefOfRegMask indicates that the RefPosition has a fixed assignment to the register
1720 // specified by the given mask
1721 bool isFixedRefOfRegMask(regMaskTP regMask)
1723 assert(genMaxOneBit(regMask));
1724 return (registerAssignment == regMask);
1727 // isFixedRefOfReg indicates that the RefPosition has a fixed assignment to the given register
1728 bool isFixedRefOfReg(regNumber regNum)
1730 return (isFixedRefOfRegMask(genRegMask(regNum)));
1734 // operator= copies everything except 'rpNum', which must remain unique
1735 RefPosition& operator=(const RefPosition& rp)
1737 unsigned rpNumSave = rpNum;
1738 memcpy(this, &rp, sizeof(rp));
1748 void dumpRegMask(regMaskTP regs);
1751 /*****************************************************************************/
1753 /*****************************************************************************/