1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8 XX Interval and RefPosition Building XX
10 XX This contains the logic for constructing Intervals and RefPositions that XX
11 XX is common across architectures. See lsra{arch}.cpp for the architecture- XX
12 XX specific methods for building. XX
14 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
15 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
23 #ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
27 //------------------------------------------------------------------------
28 // LocationInfoListNodePool::LocationInfoListNodePool:
29 // Creates a pool of `LocationInfoListNode` values.
32 // compiler - The compiler context.
33 // preallocate - The number of nodes to preallocate.
35 LocationInfoListNodePool::LocationInfoListNodePool(Compiler* compiler, unsigned preallocate) : m_compiler(compiler)
39 size_t preallocateSize = sizeof(LocationInfoListNode) * preallocate;
40 LocationInfoListNode* preallocatedNodes =
41 reinterpret_cast<LocationInfoListNode*>(compiler->compGetMem(preallocateSize, CMK_LSRA));
43 LocationInfoListNode* head = preallocatedNodes;
44 head->m_next = nullptr;
46 for (unsigned i = 1; i < preallocate; i++)
48 LocationInfoListNode* node = &preallocatedNodes[i];
57 //------------------------------------------------------------------------
58 // LocationInfoListNodePool::GetNode: Fetches an unused node from the
62 // l - - The `LsraLocation` for the `LocationInfo` value.
63 // i - The interval for the `LocationInfo` value.
64 // t - The IR node for the `LocationInfo` value
65 // regIdx - The register index for the `LocationInfo` value.
68 // A pooled or newly-allocated `LocationInfoListNode`, depending on the
69 // contents of the pool.
70 LocationInfoListNode* LocationInfoListNodePool::GetNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx)
72 LocationInfoListNode* head = m_freeList;
75 head = reinterpret_cast<LocationInfoListNode*>(m_compiler->compGetMem(sizeof(LocationInfoListNode)));
79 m_freeList = head->m_next;
85 head->m_next = nullptr;
90 //------------------------------------------------------------------------
91 // LocationInfoListNodePool::ReturnNodes: Returns a list of nodes to the node
92 // pool and clears the given list.
95 // list - The list to return.
97 void LocationInfoListNodePool::ReturnNodes(LocationInfoList& list)
99 assert(list.m_head != nullptr);
100 assert(list.m_tail != nullptr);
102 LocationInfoListNode* head = m_freeList;
103 list.m_tail->m_next = head;
104 m_freeList = list.m_head;
106 list.m_head = nullptr;
107 list.m_tail = nullptr;
110 //------------------------------------------------------------------------
111 // newInterval: Create a new Interval of the given RegisterType.
114 // theRegisterType - The type of Interval to create.
116 // TODO-Cleanup: Consider adding an overload that takes a varDsc, and can appropriately
117 // set such fields as isStructField
119 Interval* LinearScan::newInterval(RegisterType theRegisterType)
121 intervals.emplace_back(theRegisterType, allRegs(theRegisterType));
122 Interval* newInt = &intervals.back();
125 newInt->intervalIndex = static_cast<unsigned>(intervals.size() - 1);
128 DBEXEC(VERBOSE, newInt->dump());
132 //------------------------------------------------------------------------
133 // newRefPositionRaw: Create a new RefPosition
136 // nodeLocation - The location of the reference.
137 // treeNode - The GenTree of the reference.
138 // refType - The type of reference
141 // This is used to create RefPositions for both RegRecords and Intervals,
142 // so it does only the common initialization.
144 RefPosition* LinearScan::newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType)
146 refPositions.emplace_back(curBBNum, nodeLocation, treeNode, refType);
147 RefPosition* newRP = &refPositions.back();
149 newRP->rpNum = static_cast<unsigned>(refPositions.size() - 1);
154 //------------------------------------------------------------------------
155 // resolveConflictingDefAndUse: Resolve the situation where we have conflicting def and use
156 // register requirements on a single-def, single-use interval.
159 // defRefPosition - The interval definition
160 // useRefPosition - The (sole) interval use
166 // The two RefPositions are for the same interval, which is a tree-temp.
169 // We require some special handling for the case where the use is a "delayRegFree" case of a fixedReg.
170 // In that case, if we change the registerAssignment on the useRefPosition, we will lose the fact that,
171 // even if we assign a different register (and rely on codegen to do the copy), that fixedReg also needs
172 // to remain busy until the Def register has been allocated. In that case, we don't allow Case 1 or Case 4
174 // Here are the cases we consider (in this order):
175 // 1. If The defRefPosition specifies a single register, and there are no conflicting
176 // FixedReg uses of it between the def and use, we use that register, and the code generator
177 // will insert the copy. Note that it cannot be in use because there is a FixedRegRef for the def.
178 // 2. If the useRefPosition specifies a single register, and it is not in use, and there are no
179 // conflicting FixedReg uses of it between the def and use, we use that register, and the code generator
180 // will insert the copy.
181 // 3. If the defRefPosition specifies a single register (but there are conflicts, as determined
182 // in 1.), and there are no conflicts with the useRefPosition register (if it's a single register),
183 /// we set the register requirements on the defRefPosition to the use registers, and the
184 // code generator will insert a copy on the def. We can't rely on the code generator to put a copy
185 // on the use if it has multiple possible candidates, as it won't know which one has been allocated.
186 // 4. If the useRefPosition specifies a single register, and there are no conflicts with the register
187 // on the defRefPosition, we leave the register requirements on the defRefPosition as-is, and set
188 // the useRefPosition to the def registers, for similar reasons to case #3.
189 // 5. If both the defRefPosition and the useRefPosition specify single registers, but both have conflicts,
190 // We set the candiates on defRefPosition to be all regs of the appropriate type, and since they are
191 // single registers, codegen can insert the copy.
192 // 6. Finally, if the RefPositions specify disjoint subsets of the registers (or the use is fixed but
193 // has a conflict), we must insert a copy. The copy will be inserted before the use if the
194 // use is not fixed (in the fixed case, the code generator will insert the use).
196 // TODO-CQ: We get bad register allocation in case #3 in the situation where no register is
197 // available for the lifetime. We end up allocating a register that must be spilled, and it probably
198 // won't be the register that is actually defined by the target instruction. So, we have to copy it
199 // and THEN spill it. In this case, we should be using the def requirement. But we need to change
200 // the interface to this method a bit to make that work (e.g. returning a candidate set to use, but
201 // leaving the registerAssignment as-is on the def, so that if we find that we need to spill anyway
202 // we can use the fixed-reg on the def.
205 void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition)
207 assert(!interval->isLocalVar);
209 RefPosition* useRefPosition = defRefPosition->nextRefPosition;
210 regMaskTP defRegAssignment = defRefPosition->registerAssignment;
211 regMaskTP useRegAssignment = useRefPosition->registerAssignment;
212 RegRecord* defRegRecord = nullptr;
213 RegRecord* useRegRecord = nullptr;
214 regNumber defReg = REG_NA;
215 regNumber useReg = REG_NA;
216 bool defRegConflict = false;
217 bool useRegConflict = false;
219 // If the useRefPosition is a "delayRegFree", we can't change the registerAssignment
220 // on it, or we will fail to ensure that the fixedReg is busy at the time the target
221 // (of the node that uses this interval) is allocated.
222 bool canChangeUseAssignment = !useRefPosition->isFixedRegRef || !useRefPosition->delayRegFree;
224 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CONFLICT));
225 if (!canChangeUseAssignment)
227 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_FIXED_DELAY_USE));
229 if (defRefPosition->isFixedRegRef)
231 defReg = defRefPosition->assignedReg();
232 defRegRecord = getRegisterRecord(defReg);
233 if (canChangeUseAssignment)
235 RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition;
236 assert(currFixedRegRefPosition != nullptr &&
237 currFixedRegRefPosition->nodeLocation == defRefPosition->nodeLocation);
239 if (currFixedRegRefPosition->nextRefPosition == nullptr ||
240 currFixedRegRefPosition->nextRefPosition->nodeLocation > useRefPosition->getRefEndLocation())
242 // This is case #1. Use the defRegAssignment
243 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE1));
244 useRefPosition->registerAssignment = defRegAssignment;
249 defRegConflict = true;
253 if (useRefPosition->isFixedRegRef)
255 useReg = useRefPosition->assignedReg();
256 useRegRecord = getRegisterRecord(useReg);
257 RefPosition* currFixedRegRefPosition = useRegRecord->recentRefPosition;
259 // We know that useRefPosition is a fixed use, so the nextRefPosition must not be null.
260 RefPosition* nextFixedRegRefPosition = useRegRecord->getNextRefPosition();
261 assert(nextFixedRegRefPosition != nullptr &&
262 nextFixedRegRefPosition->nodeLocation <= useRefPosition->nodeLocation);
264 // First, check to see if there are any conflicting FixedReg references between the def and use.
265 if (nextFixedRegRefPosition->nodeLocation == useRefPosition->nodeLocation)
267 // OK, no conflicting FixedReg references.
268 // Now, check to see whether it is currently in use.
269 if (useRegRecord->assignedInterval != nullptr)
271 RefPosition* possiblyConflictingRef = useRegRecord->assignedInterval->recentRefPosition;
272 LsraLocation possiblyConflictingRefLocation = possiblyConflictingRef->getRefEndLocation();
273 if (possiblyConflictingRefLocation >= defRefPosition->nodeLocation)
275 useRegConflict = true;
280 // This is case #2. Use the useRegAssignment
281 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE2));
282 defRefPosition->registerAssignment = useRegAssignment;
288 useRegConflict = true;
291 if (defRegRecord != nullptr && !useRegConflict)
294 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE3));
295 defRefPosition->registerAssignment = useRegAssignment;
298 if (useRegRecord != nullptr && !defRegConflict && canChangeUseAssignment)
301 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE4));
302 useRefPosition->registerAssignment = defRegAssignment;
305 if (defRegRecord != nullptr && useRegRecord != nullptr)
308 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE5));
309 RegisterType regType = interval->registerType;
310 assert((getRegisterType(interval, defRefPosition) == regType) &&
311 (getRegisterType(interval, useRefPosition) == regType));
312 regMaskTP candidates = allRegs(regType);
313 defRefPosition->registerAssignment = candidates;
316 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE6));
320 //------------------------------------------------------------------------
321 // applyCalleeSaveHeuristics: Set register preferences for an interval based on the given RefPosition
324 // rp - The RefPosition of interest
327 // This is slightly more general than its name applies, and updates preferences not just
328 // for callee-save registers.
330 void LinearScan::applyCalleeSaveHeuristics(RefPosition* rp)
332 #ifdef _TARGET_AMD64_
333 if (compiler->opts.compDbgEnC)
335 // We only use RSI and RDI for EnC code, so we don't want to favor callee-save regs.
338 #endif // _TARGET_AMD64_
340 Interval* theInterval = rp->getInterval();
343 if (!doReverseCallerCallee())
346 // Set preferences so that this register set will be preferred for earlier refs
347 theInterval->updateRegisterPreferences(rp->registerAssignment);
351 //------------------------------------------------------------------------
352 // checkConflictingDefUse: Ensure that we have consistent def/use on SDSU temps.
355 // useRP - The use RefPosition of a tree temp (SDSU Interval)
358 // There are a couple of cases where this may over-constrain allocation:
359 // 1. In the case of a non-commutative rmw def (in which the rmw source must be delay-free), or
360 // 2. In the case where the defining node requires a temp distinct from the target (also a
362 // In those cases, if we propagate a single-register restriction from the consumer to the producer
363 // the delayed uses will not see a fixed reference in the PhysReg at that position, and may
364 // incorrectly allocate that register.
365 // TODO-CQ: This means that we may often require a copy at the use of this node's result.
366 // This case could be moved to BuildRefPositionsForNode, at the point where the def RefPosition is
367 // created, causing a RefTypeFixedRef to be added at that location. This, however, results in
368 // more PhysReg RefPositions (a throughput impact), and a large number of diffs that require
369 // further analysis to determine benefit.
372 void LinearScan::checkConflictingDefUse(RefPosition* useRP)
374 assert(useRP->refType == RefTypeUse);
375 Interval* theInterval = useRP->getInterval();
376 assert(!theInterval->isLocalVar);
378 RefPosition* defRP = theInterval->firstRefPosition;
380 // All defs must have a valid treeNode, but we check it below to be conservative.
381 assert(defRP->treeNode != nullptr);
382 regMaskTP prevAssignment = defRP->registerAssignment;
383 regMaskTP newAssignment = (prevAssignment & useRP->registerAssignment);
384 if (newAssignment != RBM_NONE)
386 if (!isSingleRegister(newAssignment) || !theInterval->hasInterferingUses)
388 defRP->registerAssignment = newAssignment;
393 theInterval->hasConflictingDefUse = true;
397 //------------------------------------------------------------------------
398 // associateRefPosWithInterval: Update the Interval based on the given RefPosition.
401 // rp - The RefPosition of interest
404 // This is called at the time when 'rp' has just been created, so it becomes
405 // the nextRefPosition of the recentRefPosition, and both the recentRefPosition
406 // and lastRefPosition of its referent.
408 void LinearScan::associateRefPosWithInterval(RefPosition* rp)
410 Referenceable* theReferent = rp->referent;
412 if (theReferent != nullptr)
414 // All RefPositions except the dummy ones at the beginning of blocks
416 if (rp->isIntervalRef())
418 Interval* theInterval = rp->getInterval();
420 applyCalleeSaveHeuristics(rp);
422 if (theInterval->isLocalVar)
424 if (RefTypeIsUse(rp->refType))
426 RefPosition* const prevRP = theInterval->recentRefPosition;
427 if ((prevRP != nullptr) && (prevRP->bbNum == rp->bbNum))
429 prevRP->lastUse = false;
433 rp->lastUse = (rp->refType != RefTypeExpUse) && (rp->refType != RefTypeParamDef) &&
434 (rp->refType != RefTypeZeroInit) && !extendLifetimes();
436 else if (rp->refType == RefTypeUse)
438 checkConflictingDefUse(rp);
443 RefPosition* prevRP = theReferent->recentRefPosition;
444 if (prevRP != nullptr)
446 prevRP->nextRefPosition = rp;
450 theReferent->firstRefPosition = rp;
452 theReferent->recentRefPosition = rp;
453 theReferent->lastRefPosition = rp;
457 assert((rp->refType == RefTypeBB) || (rp->refType == RefTypeKillGCRefs));
461 //---------------------------------------------------------------------------
462 // newRefPosition: allocate and initialize a new RefPosition.
465 // reg - reg number that identifies RegRecord to be associated
466 // with this RefPosition
467 // theLocation - LSRA location of RefPosition
468 // theRefType - RefPosition type
469 // theTreeNode - GenTree node for which this RefPosition is created
470 // mask - Set of valid registers for this RefPosition
471 // multiRegIdx - register position if this RefPosition corresponds to a
472 // multi-reg call node.
477 RefPosition* LinearScan::newRefPosition(
478 regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask)
480 RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType);
482 newRP->setReg(getRegisterRecord(reg));
483 newRP->registerAssignment = mask;
485 newRP->setMultiRegIdx(0);
486 newRP->setAllocateIfProfitable(false);
488 associateRefPosWithInterval(newRP);
490 DBEXEC(VERBOSE, newRP->dump());
494 //---------------------------------------------------------------------------
495 // newRefPosition: allocate and initialize a new RefPosition.
498 // theInterval - interval to which RefPosition is associated with.
499 // theLocation - LSRA location of RefPosition
500 // theRefType - RefPosition type
501 // theTreeNode - GenTree node for which this RefPosition is created
502 // mask - Set of valid registers for this RefPosition
503 // multiRegIdx - register position if this RefPosition corresponds to a
504 // multi-reg call node.
509 RefPosition* LinearScan::newRefPosition(Interval* theInterval,
510 LsraLocation theLocation,
512 GenTree* theTreeNode,
514 unsigned multiRegIdx /* = 0 */)
517 if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType)
519 // In the case we're using floating point registers we must make sure
520 // this flag was set previously in the compiler since this will mandate
521 // whether LSRA will take into consideration FP reg killsets.
522 assert(compiler->compFloatingPointUsed || ((mask & RBM_FLT_CALLEE_SAVED) == 0));
526 // If this reference is constrained to a single register (and it's not a dummy
527 // or Kill reftype already), add a RefTypeFixedReg at this location so that its
528 // availability can be more accurately determined
530 bool isFixedRegister = isSingleRegister(mask);
531 bool insertFixedRef = false;
534 // Insert a RefTypeFixedReg for any normal def or use (not ParamDef or BB)
535 if (theRefType == RefTypeUse || theRefType == RefTypeDef)
537 insertFixedRef = true;
543 regNumber physicalReg = genRegNumFromMask(mask);
544 RefPosition* pos = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask);
545 assert(theInterval != nullptr);
546 assert((allRegs(theInterval->registerType) & mask) != 0);
549 RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType);
551 newRP->setInterval(theInterval);
554 newRP->isFixedRegRef = isFixedRegister;
556 #ifndef _TARGET_AMD64_
557 // We don't need this for AMD because the PInvoke method epilog code is explicit
558 // at register allocation time.
559 if (theInterval != nullptr && theInterval->isLocalVar && compiler->info.compCallUnmanaged &&
560 theInterval->varNum == compiler->genReturnLocal)
562 mask &= ~(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME);
563 noway_assert(mask != RBM_NONE);
565 #endif // !_TARGET_AMD64_
566 newRP->registerAssignment = mask;
568 newRP->setMultiRegIdx(multiRegIdx);
569 newRP->setAllocateIfProfitable(false);
571 associateRefPosWithInterval(newRP);
573 DBEXEC(VERBOSE, newRP->dump());
577 //------------------------------------------------------------------------
578 // IsContainableMemoryOp: Checks whether this is a memory op that can be contained.
581 // node - the node of interest.
584 // True if this will definitely be a memory reference that could be contained.
587 // This differs from the isMemoryOp() method on GenTree because it checks for
588 // the case of doNotEnregister local. This won't include locals that
589 // for some other reason do not become register candidates, nor those that get
591 // Also, because we usually call this before we redo dataflow, any new lclVars
592 // introduced after the last dataflow analysis will not yet be marked lvTracked,
593 // so we don't use that.
595 bool LinearScan::isContainableMemoryOp(GenTree* node)
597 if (node->isMemoryOp())
603 if (!enregisterLocalVars)
607 LclVarDsc* varDsc = &compiler->lvaTable[node->AsLclVar()->gtLclNum];
608 return varDsc->lvDoNotEnregister;
613 //------------------------------------------------------------------------
614 // addRefsForPhysRegMask: Adds RefPositions of the given type for all the registers in 'mask'.
617 // mask - the mask (set) of registers.
618 // currentLoc - the location at which they should be added
619 // refType - the type of refposition
620 // isLastUse - true IFF this is a last use of the register
622 void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse)
624 for (regNumber reg = REG_FIRST; mask; reg = REG_NEXT(reg), mask >>= 1)
628 // This assumes that these are all "special" RefTypes that
629 // don't need to be recorded on the tree (hence treeNode is nullptr)
630 RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr,
631 genRegMask(reg)); // This MUST occupy the physical register (obviously)
641 //------------------------------------------------------------------------
642 // getKillSetForStoreInd: Determine the liveness kill set for a GT_STOREIND node.
643 // If the GT_STOREIND will generate a write barrier, determine the specific kill
644 // set required by the case-specific, platform-specific write barrier. If no
645 // write barrier is required, the kill set will be RBM_NONE.
648 // tree - the GT_STOREIND node
650 // Return Value: a register mask of the registers killed
652 regMaskTP LinearScan::getKillSetForStoreInd(GenTreeStoreInd* tree)
654 assert(tree->OperIs(GT_STOREIND));
656 regMaskTP killMask = RBM_NONE;
658 GenTree* data = tree->Data();
660 GCInfo::WriteBarrierForm writeBarrierForm = compiler->codeGen->gcInfo.gcIsWriteBarrierCandidate(tree, data);
661 if (writeBarrierForm != GCInfo::WBF_NoBarrier)
663 if (compiler->codeGen->genUseOptimizedWriteBarriers(writeBarrierForm))
665 // We can't determine the exact helper to be used at this point, because it depends on
666 // the allocated register for the `data` operand. However, all the (x86) optimized
667 // helpers have the same kill set: EDX. And note that currently, only x86 can return
668 // `true` for genUseOptimizedWriteBarriers().
669 killMask = RBM_CALLEE_TRASH_NOGC;
673 // Figure out which helper we're going to use, and then get the kill set for that helper.
674 CorInfoHelpFunc helper =
675 compiler->codeGen->genWriteBarrierHelperForWriteBarrierForm(tree, writeBarrierForm);
676 killMask = compiler->compHelperCallKillSet(helper);
683 //------------------------------------------------------------------------
684 // getKillSetForNode: Return the registers killed by the given tree node.
687 // compiler - the compiler context to use
688 // tree - the tree for which the kill set is needed.
690 // Return Value: a register mask of the registers killed
692 regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
694 regMaskTP killMask = RBM_NONE;
695 switch (tree->OperGet())
697 #ifdef _TARGET_XARCH_
699 // We use the 128-bit multiply when performing an overflow checking unsigned multiply
701 if (((tree->gtFlags & GTF_UNSIGNED) != 0) && tree->gtOverflowEx())
703 // Both RAX and RDX are killed by the operation
704 killMask = RBM_RAX | RBM_RDX;
709 #if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
712 killMask = RBM_RAX | RBM_RDX;
719 if (!varTypeIsFloating(tree->TypeGet()))
721 // Both RAX and RDX are killed by the operation
722 killMask = RBM_RAX | RBM_RDX;
725 #endif // _TARGET_XARCH_
728 if (tree->OperIsCopyBlkOp())
730 assert(tree->AsObj()->gtGcPtrCount != 0);
731 killMask = compiler->compHelperCallKillSet(CORINFO_HELP_ASSIGN_BYREF);
737 case GT_STORE_DYN_BLK:
739 GenTreeBlk* blkNode = tree->AsBlk();
740 bool isCopyBlk = varTypeIsStruct(blkNode->Data());
741 switch (blkNode->gtBlkOpKind)
743 case GenTreeBlk::BlkOpKindHelper:
746 killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY);
750 killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET);
754 #ifdef _TARGET_XARCH_
755 case GenTreeBlk::BlkOpKindRepInstr:
758 // rep movs kills RCX, RDI and RSI
759 killMask = RBM_RCX | RBM_RDI | RBM_RSI;
763 // rep stos kills RCX and RDI.
764 // (Note that the Data() node, if not constant, will be assigned to
765 // RCX, but it's find that this kills it, as the value is not available
766 // after this node in any case.)
767 killMask = RBM_RDI | RBM_RCX;
771 case GenTreeBlk::BlkOpKindRepInstr:
773 case GenTreeBlk::BlkOpKindUnroll:
774 case GenTreeBlk::BlkOpKindInvalid:
775 // for these 'gtBlkOpKind' kinds, we leave 'killMask' = RBM_NONE
782 killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
786 if (compiler->compFloatingPointUsed)
788 if (tree->TypeGet() == TYP_DOUBLE)
790 needDoubleTmpForFPCall = true;
792 else if (tree->TypeGet() == TYP_FLOAT)
794 needFloatTmpForFPCall = true;
797 #endif // _TARGET_X86_
798 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
799 if (tree->IsHelperCall())
801 GenTreeCall* call = tree->AsCall();
802 CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd);
803 killMask = compiler->compHelperCallKillSet(helpFunc);
806 #endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
808 // if there is no FP used, we can ignore the FP kills
809 if (compiler->compFloatingPointUsed)
811 killMask = RBM_CALLEE_TRASH;
815 killMask = RBM_INT_CALLEE_TRASH;
818 if (tree->AsCall()->IsVirtualStub())
820 killMask |= compiler->virtualStubParamInfo->GetRegMask();
822 #else // !_TARGET_ARM_
823 // Verify that the special virtual stub call registers are in the kill mask.
824 // We don't just add them unconditionally to the killMask because for most architectures
825 // they are already in the RBM_CALLEE_TRASH set,
826 // and we don't want to introduce extra checks and calls in this hot function.
827 assert(!tree->AsCall()->IsVirtualStub() || ((killMask & compiler->virtualStubParamInfo->GetRegMask()) ==
828 compiler->virtualStubParamInfo->GetRegMask()));
833 killMask = getKillSetForStoreInd(tree->AsStoreInd());
836 #if defined(PROFILING_SUPPORTED)
837 // If this method requires profiler ELT hook then mark these nodes as killing
838 // callee trash registers (excluding RAX and XMM0). The reason for this is that
839 // profiler callback would trash these registers. See vm\amd64\asmhelpers.asm for
842 if (compiler->compIsProfilerHookNeeded())
844 killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_LEAVE);
849 if (compiler->compIsProfilerHookNeeded())
851 killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_TAILCALL);
854 #endif // PROFILING_SUPPORTED
857 // for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE
863 //------------------------------------------------------------------------
864 // buildKillPositionsForNode:
865 // Given some tree node add refpositions for all the registers this node kills
868 // tree - the tree for which kill positions should be generated
869 // currentLoc - the location at which the kills should be added
872 // true - kills were inserted
873 // false - no kills were inserted
876 // The return value is needed because if we have any kills, we need to make sure that
877 // all defs are located AFTER the kills. On the other hand, if there aren't kills,
878 // the multiple defs for a regPair are in different locations.
879 // If we generate any kills, we will mark all currentLiveVars as being preferenced
880 // to avoid the killed registers. This is somewhat conservative.
882 bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc)
884 regMaskTP killMask = getKillSetForNode(tree);
885 bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH));
886 if (killMask != RBM_NONE)
888 // The killMask identifies a set of registers that will be used during codegen.
889 // Mark these as modified here, so when we do final frame layout, we'll know about
890 // all these registers. This is especially important if killMask contains
891 // callee-saved registers, which affect the frame size since we need to save/restore them.
892 // In the case where we have a copyBlk with GC pointers, can need to call the
893 // CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and RDI, if
894 // LSRA doesn't assign RSI/RDI, they wouldn't get marked as modified until codegen,
895 // which is too late.
896 compiler->codeGen->regSet.rsSetRegsModified(killMask DEBUGARG(true));
898 addRefsForPhysRegMask(killMask, currentLoc, RefTypeKill, true);
900 // TODO-CQ: It appears to be valuable for both fp and int registers to avoid killing the callee
901 // save regs on infrequently exectued paths. However, it results in a large number of asmDiffs,
902 // many of which appear to be regressions (because there is more spill on the infrequently path),
903 // but are not really because the frequent path becomes smaller. Validating these diffs will need
904 // to be done before making this change.
905 // if (!blockSequence[curBBSeqNum]->isRunRarely())
906 if (enregisterLocalVars)
908 VarSetOps::Iter iter(compiler, currentLiveVars);
909 unsigned varIndex = 0;
910 while (iter.NextElem(&varIndex))
912 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
913 LclVarDsc* varDsc = compiler->lvaTable + varNum;
914 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
915 if (varTypeNeedsPartialCalleeSave(varDsc->lvType))
917 if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex))
923 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
924 if (varTypeIsFloating(varDsc) &&
925 !VarSetOps::IsMember(compiler, fpCalleeSaveCandidateVars, varIndex))
929 Interval* interval = getIntervalForLocalVar(varIndex);
932 interval->preferCalleeSave = true;
934 regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask);
936 if (newPreferences != RBM_NONE)
938 interval->updateRegisterPreferences(newPreferences);
942 // If there are no callee-saved registers, the call could kill all the registers.
943 // This is a valid state, so in that case assert should not trigger. The RA will spill in order to
944 // free a register later.
945 assert(compiler->opts.compDbgEnC || (calleeSaveRegs(varDsc->lvType)) == RBM_NONE);
950 if (compiler->killGCRefs(tree))
952 RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKillGCRefs, tree,
953 (allRegs(TYP_REF) & ~RBM_ARG_REGS));
961 //----------------------------------------------------------------------------
962 // defineNewInternalTemp: Defines a ref position for an internal temp.
965 // tree - Gentree node requiring an internal register
966 // regType - Register type
967 // currentLoc - Location of the temp Def position
968 // regMask - register mask of candidates for temp
970 RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP regMask)
972 Interval* current = newInterval(regType);
973 current->isInternal = true;
974 return newRefPosition(current, currentLoc, RefTypeDef, tree, regMask, 0);
977 //------------------------------------------------------------------------
978 // buildInternalRegisterDefsForNode - build Def positions for internal
979 // registers required for tree node.
982 // tree - Gentree node that needs internal registers
983 // temps - in-out array which is populated with ref positions
984 // created for Def of internal registers
987 // The total number of Def positions created for internal registers of tree no.
988 int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree, TreeNodeInfo* info, RefPosition* temps[])
991 int internalIntCount = info->internalIntCount;
992 regMaskTP internalCands = info->getInternalCandidates(this);
994 // If the number of internal integer registers required is the same as the number of candidate integer registers in
995 // the candidate set, then they must be handled as fixed registers.
996 // (E.g. for the integer registers that floating point arguments must be copied into for a varargs call.)
997 bool fixedRegs = false;
998 regMaskTP internalIntCandidates = (internalCands & allRegs(TYP_INT));
999 if (((int)genCountBits(internalIntCandidates)) == internalIntCount)
1004 for (count = 0; count < internalIntCount; count++)
1006 regMaskTP internalIntCands = (internalCands & allRegs(TYP_INT));
1009 internalIntCands = genFindLowestBit(internalIntCands);
1010 internalCands &= ~internalIntCands;
1012 temps[count] = defineNewInternalTemp(tree, IntRegisterType, internalIntCands);
1015 int internalFloatCount = info->internalFloatCount;
1016 for (int i = 0; i < internalFloatCount; i++)
1018 regMaskTP internalFPCands = (internalCands & internalFloatRegCandidates());
1019 temps[count++] = defineNewInternalTemp(tree, FloatRegisterType, internalFPCands);
1022 assert(count < MaxInternalRegisters);
1023 assert(count == (internalIntCount + internalFloatCount));
1027 //------------------------------------------------------------------------
1028 // buildInternalRegisterUsesForNode - adds Use positions for internal
1029 // registers required for tree node.
1032 // tree - Gentree node that needs internal registers
1033 // defs - int array containing Def positions of internal
1035 // total - Total number of Def positions in 'defs' array.
1039 void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree, TreeNodeInfo* info, RefPosition* defs[], int total)
1041 assert(total < MaxInternalRegisters);
1043 // defs[] has been populated by buildInternalRegisterDefsForNode
1044 // now just add uses to the defs previously added.
1045 for (int i = 0; i < total; i++)
1047 RefPosition* prevRefPosition = defs[i];
1048 assert(prevRefPosition != nullptr);
1049 regMaskTP mask = prevRefPosition->registerAssignment;
1050 if (prevRefPosition->isPhysRegRef)
1052 newRefPosition(defs[i]->getReg()->regNum, currentLoc, RefTypeUse, tree, mask);
1056 RefPosition* newest = newRefPosition(defs[i]->getInterval(), currentLoc, RefTypeUse, tree, mask, 0);
1058 if (info->isInternalRegDelayFree)
1060 newest->delayRegFree = true;
1066 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1067 //------------------------------------------------------------------------
1068 // buildUpperVectorSaveRefPositions - Create special RefPositions for saving
1069 // the upper half of a set of large vector.
1072 // tree - The current node being handled
1073 // currentLoc - The location of the current node
1075 // Return Value: Returns the set of lclVars that are killed by this node, and therefore
1076 // required RefTypeUpperVectorSaveDef RefPositions.
1078 // Notes: The returned set is used by buildUpperVectorRestoreRefPositions.
1081 LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc)
1083 assert(enregisterLocalVars);
1084 VARSET_TP liveLargeVectors(VarSetOps::MakeEmpty(compiler));
1085 regMaskTP fpCalleeKillSet = RBM_NONE;
1086 if (!VarSetOps::IsEmpty(compiler, largeVectorVars))
1088 // We actually need to find any calls that kill the upper-half of the callee-save vector registers.
1089 // But we will use as a proxy any node that kills floating point registers.
1090 // (Note that some calls are masquerading as other nodes at this point so we can't just check for calls.)
1091 fpCalleeKillSet = getKillSetForNode(tree);
1092 if ((fpCalleeKillSet & RBM_FLT_CALLEE_TRASH) != RBM_NONE)
1094 VarSetOps::AssignNoCopy(compiler, liveLargeVectors,
1095 VarSetOps::Intersection(compiler, currentLiveVars, largeVectorVars));
1096 VarSetOps::Iter iter(compiler, liveLargeVectors);
1097 unsigned varIndex = 0;
1098 while (iter.NextElem(&varIndex))
1100 Interval* varInterval = getIntervalForLocalVar(varIndex);
1101 Interval* tempInterval = newInterval(varInterval->registerType);
1102 tempInterval->isInternal = true;
1104 newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveDef, tree, RBM_FLT_CALLEE_SAVED);
1105 // We are going to save the existing relatedInterval of varInterval on tempInterval, so that we can set
1106 // the tempInterval as the relatedInterval of varInterval, so that we can build the corresponding
1107 // RefTypeUpperVectorSaveUse RefPosition. We will then restore the relatedInterval onto varInterval,
1108 // and set varInterval as the relatedInterval of tempInterval.
1109 tempInterval->relatedInterval = varInterval->relatedInterval;
1110 varInterval->relatedInterval = tempInterval;
1114 return liveLargeVectors;
1117 // buildUpperVectorRestoreRefPositions - Create special RefPositions for restoring
1118 // the upper half of a set of large vectors.
1121 // tree - The current node being handled
1122 // currentLoc - The location of the current node
1123 // liveLargeVectors - The set of lclVars needing restores (returned by buildUpperVectorSaveRefPositions)
1125 void LinearScan::buildUpperVectorRestoreRefPositions(GenTree* tree,
1126 LsraLocation currentLoc,
1127 VARSET_VALARG_TP liveLargeVectors)
1129 assert(enregisterLocalVars);
1130 if (!VarSetOps::IsEmpty(compiler, liveLargeVectors))
1132 VarSetOps::Iter iter(compiler, liveLargeVectors);
1133 unsigned varIndex = 0;
1134 while (iter.NextElem(&varIndex))
1136 Interval* varInterval = getIntervalForLocalVar(varIndex);
1137 Interval* tempInterval = varInterval->relatedInterval;
1138 assert(tempInterval->isInternal == true);
1140 newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveUse, tree, RBM_FLT_CALLEE_SAVED);
1141 // Restore the relatedInterval onto varInterval, and set varInterval as the relatedInterval
1143 varInterval->relatedInterval = tempInterval->relatedInterval;
1144 tempInterval->relatedInterval = varInterval;
1148 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1151 //------------------------------------------------------------------------
1152 // ComputeOperandDstCount: computes the number of registers defined by a
1155 // For most nodes, this is simple:
1156 // - Nodes that do not produce values (e.g. stores and other void-typed
1157 // nodes) and nodes that immediately use the registers they define
1158 // produce no registers
1159 // - Nodes that are marked as defining N registers define N registers.
1161 // For contained nodes, however, things are more complicated: for purposes
1162 // of bookkeeping, a contained node is treated as producing the transitive
1163 // closure of the registers produced by its sources.
1166 // operand - The operand for which to compute a register count.
1169 // The number of registers defined by `operand`.
1172 int LinearScan::ComputeOperandDstCount(GenTree* operand)
1174 // GT_ARGPLACE is the only non-LIR node that is currently in the trees at this stage, though
1175 // note that it is not in the linear order. It seems best to check for !IsLIR() rather than
1176 // GT_ARGPLACE directly, since it's that characteristic that makes it irrelevant for this method.
1177 if (!operand->IsLIR())
1181 if (operand->isContained())
1184 for (GenTree* op : operand->Operands())
1186 dstCount += ComputeOperandDstCount(op);
1191 if (operand->IsUnusedValue())
1193 // Operands that define an unused value do not produce any registers.
1196 if (operand->IsValue())
1198 // Operands that are values and are not contained consume all of their operands
1199 // and produce one or more registers.
1200 return operand->GetRegisterDstCount();
1204 // This must be one of the operand types that are neither contained nor produce a value.
1205 // Stores and void-typed operands may be encountered when processing call nodes, which contain
1206 // pointers to argument setup stores.
1207 assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() ||
1208 operand->OperIsCompare() || operand->OperIs(GT_CMP) || operand->IsSIMDEqualityOrInequality() ||
1209 operand->TypeGet() == TYP_VOID);
1214 //------------------------------------------------------------------------
1215 // ComputeAvailableSrcCount: computes the number of registers available as
1216 // sources for a node.
1218 // This is simply the sum of the number of registers produced by each
1219 // operand to the node.
1222 // node - The node for which to compute a source count.
1225 // The number of registers available as sources for `node`.
1228 int LinearScan::ComputeAvailableSrcCount(GenTree* node)
1231 for (GenTree* operand : node->Operands())
1233 numSources += ComputeOperandDstCount(operand);
1240 //------------------------------------------------------------------------
1241 // buildRefPositionsForNode: The main entry point for building the RefPositions
1242 // and "tree temp" Intervals for a given node.
1245 // tree - The node for which we are building RefPositions
1246 // block - The BasicBlock in which the node resides
1247 // currentLoc - The LsraLocation of the given node
1249 void LinearScan::buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation currentLoc)
1252 assert(!isRegPairType(tree->TypeGet()));
1253 #endif // _TARGET_ARM_
1255 // The LIR traversal doesn't visit GT_LIST or GT_ARGPLACE nodes.
1256 // GT_CLS_VAR nodes should have been eliminated by rationalizer.
1257 assert(tree->OperGet() != GT_ARGPLACE);
1258 assert(tree->OperGet() != GT_LIST);
1259 assert(tree->OperGet() != GT_CLS_VAR);
1261 // The LIR traversal visits only the first node in a GT_FIELD_LIST.
1262 assert((tree->OperGet() != GT_FIELD_LIST) || tree->AsFieldList()->IsFieldListHead());
1264 // The set of internal temporary registers used by this node are stored in the
1265 // gtRsvdRegs register mask. Clear it out.
1266 tree->gtRsvdRegs = RBM_NONE;
1272 compiler->gtDispTree(tree, nullptr, nullptr, true);
1276 // If the node produces a value that will be consumed by a parent node, its TreeNodeInfo will
1277 // be allocated in the LocationInfoListNode. Otherwise, we'll just use a local value that will
1278 // be thrown away when we're done.
1279 LocationInfoListNode* locationInfo = nullptr;
1280 TreeNodeInfo tempInfo;
1281 TreeNodeInfo* info = nullptr;
1284 if (!tree->isContained())
1286 if (tree->IsValue())
1288 locationInfo = listNodePool.GetNode(currentLoc, nullptr, tree);
1289 currentNodeInfo = &locationInfo->info;
1293 currentNodeInfo = &tempInfo;
1295 info = currentNodeInfo;
1296 info->Initialize(this, tree);
1298 assert(info->IsValid(this));
1299 consume = info->srcCount;
1300 produce = info->dstCount;
1306 tree->dumpLIRFlags();
1315 if (tree->isContained())
1317 JITDUMP("Contained\n");
1319 else if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD) && tree->IsUnusedValue())
1321 JITDUMP("Unused\n");
1325 JITDUMP(" consume=%d produce=%d\n", consume, produce);
1330 assert(((consume == 0) && (produce == 0)) || (ComputeAvailableSrcCount(tree) == consume));
1332 if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD))
1334 LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum];
1335 if (isCandidateVar(varDsc))
1337 assert(consume == 0);
1339 // We handle tracked variables differently from non-tracked ones. If it is tracked,
1340 // we simply add a use or def of the tracked variable. Otherwise, for a use we need
1341 // to actually add the appropriate references for loading or storing the variable.
1343 // It won't actually get used or defined until the appropriate ancestor tree node
1344 // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument
1347 assert(varDsc->lvTracked);
1348 unsigned varIndex = varDsc->lvVarIndex;
1350 if (!tree->IsUnusedValue() && !tree->isContained())
1352 assert(produce != 0);
1354 locationInfo->interval = getIntervalForLocalVar(varIndex);
1355 defList.Append(locationInfo);
1360 if (tree->isContained())
1365 // Handle the case of local variable assignment
1366 Interval* varDefInterval = nullptr;
1368 GenTree* defNode = tree;
1370 // noAdd means the node creates a def but for purposes of map
1371 // management do not add it because data is not flowing up the
1374 bool noAdd = info->isLocalDefUse;
1375 RefPosition* prevPos = nullptr;
1377 bool isSpecialPutArg = false;
1379 assert(!tree->OperIsAssignment());
1380 if (tree->OperIsLocalStore())
1382 GenTreeLclVarCommon* const store = tree->AsLclVarCommon();
1383 assert((consume > 1) || (regType(store->gtOp1->TypeGet()) == regType(store->TypeGet())));
1385 LclVarDsc* varDsc = &compiler->lvaTable[store->gtLclNum];
1386 if (isCandidateVar(varDsc))
1388 // We always push the tracked lclVar intervals
1389 assert(varDsc->lvTracked);
1390 unsigned varIndex = varDsc->lvVarIndex;
1391 varDefInterval = getIntervalForLocalVar(varIndex);
1392 assert((store->gtFlags & GTF_VAR_DEF) != 0);
1400 assert(consume <= MAX_RET_REG_COUNT);
1403 // Get the location info for the register defined by the first operand.
1404 LocationInfoListNode& operandInfo = *(useList.Begin());
1405 assert(operandInfo.treeNode == tree->gtGetOp1());
1407 Interval* srcInterval = operandInfo.interval;
1408 if (srcInterval->relatedInterval == nullptr)
1410 // Preference the source to the dest, unless this is a non-last-use localVar.
1411 // Note that the last-use info is not correct, but it is a better approximation than preferencing
1412 // the source to the dest, if the source's lifetime extends beyond the dest.
1413 if (!srcInterval->isLocalVar || (operandInfo.treeNode->gtFlags & GTF_VAR_DEATH) != 0)
1415 srcInterval->assignRelatedInterval(varDefInterval);
1418 else if (!srcInterval->isLocalVar)
1420 // Preference the source to dest, if src is not a local var.
1421 srcInterval->assignRelatedInterval(varDefInterval);
1425 else if (store->gtOp1->OperIs(GT_BITCAST))
1427 store->gtType = store->gtOp1->gtType = store->gtOp1->AsUnOp()->gtOp1->TypeGet();
1429 // Get the location info for the register defined by the first operand.
1430 LocationInfoListNode& operandInfo = *(useList.Begin());
1431 assert(operandInfo.treeNode == tree->gtGetOp1());
1433 Interval* srcInterval = operandInfo.interval;
1434 srcInterval->registerType = regType(store->TypeGet());
1436 RefPosition* srcDefPosition = srcInterval->firstRefPosition;
1437 assert(srcDefPosition != nullptr);
1438 assert(srcDefPosition->refType == RefTypeDef);
1439 assert(srcDefPosition->treeNode == store->gtOp1);
1441 srcDefPosition->registerAssignment = allRegs(store->TypeGet());
1442 operandInfo.info.setSrcCandidates(this, allRegs(store->TypeGet()));
1445 else if (noAdd && produce == 0)
1447 // Dead nodes may remain after tree rationalization, decomposition or lowering.
1448 // They should be marked as UnusedValue.
1449 // TODO-Cleanup: Identify and remove these dead nodes prior to register allocation.
1450 assert(!noAdd || (produce != 0));
1453 Interval* prefSrcInterval = nullptr;
1455 // If this is a binary operator that will be encoded with 2 operand fields
1456 // (i.e. the target is read-modify-write), preference the dst to op1.
1458 bool hasDelayFreeSrc = info->hasDelayFreeSrc;
1460 #if defined(DEBUG) && defined(_TARGET_X86_)
1461 // On x86, `LSRA_LIMIT_CALLER` is too restrictive to allow the use of special put args: this stress mode
1462 // leaves only three registers allocatable--eax, ecx, and edx--of which the latter two are also used for the
1463 // first two integral arguments to a call. This can leave us with too few registers to succesfully allocate in
1464 // situations like the following:
1466 // t1026 = lclVar ref V52 tmp35 u:3 REG NA <l:$3a1, c:$98d>
1469 // t1352 = * putarg_reg ref REG NA
1471 // t342 = lclVar int V14 loc6 u:4 REG NA $50c
1473 // t343 = const int 1 REG NA $41
1477 // t344 = * + int REG NA $495
1479 // t345 = lclVar int V04 arg4 u:2 REG NA $100
1483 // t346 = * % int REG NA $496
1486 // t1353 = * putarg_reg int REG NA
1488 // t1354 = lclVar ref V52 tmp35 (last use) REG NA
1491 // t1355 = * lea(b+0) byref REG NA
1493 // Here, the first `putarg_reg` would normally be considered a special put arg, which would remove `ecx` from the
1494 // set of allocatable registers, leaving only `eax` and `edx`. The allocator will then fail to allocate a register
1495 // for the def of `t345` if arg4 is not a register candidate: the corresponding ref position will be constrained to
1496 // { `ecx`, `ebx`, `esi`, `edi` }, which `LSRA_LIMIT_CALLER` will further constrain to `ecx`, which will not be
1497 // available due to the special put arg.
1498 const bool supportsSpecialPutArg = getStressLimitRegs() != LSRA_LIMIT_CALLER;
1500 const bool supportsSpecialPutArg = true;
1503 if (supportsSpecialPutArg)
1505 if ((tree->OperGet() == GT_PUTARG_REG) && isCandidateLocalRef(tree->gtGetOp1()) &&
1506 (tree->gtGetOp1()->gtFlags & GTF_VAR_DEATH) == 0)
1508 // This is the case for a "pass-through" copy of a lclVar. In the case where it is a non-last-use,
1509 // we don't want the def of the copy to kill the lclVar register, if it is assigned the same register
1510 // (which is actually what we hope will happen).
1511 JITDUMP("Setting putarg_reg as a pass-through of a non-last use lclVar\n");
1513 // Get the register information for the first operand of the node.
1514 LocationInfoListNode* operandDef = useList.Begin();
1515 assert(operandDef->treeNode == tree->gtGetOp1());
1517 // Preference the destination to the interval of the first register defined by the first operand.
1518 Interval* srcInterval = operandDef->interval;
1519 assert(srcInterval->isLocalVar);
1520 prefSrcInterval = srcInterval;
1521 isSpecialPutArg = true;
1522 INDEBUG(specialPutArgCount++);
1524 else if (tree->IsCall())
1526 INDEBUG(specialPutArgCount = 0);
1530 RefPosition* internalRefs[MaxInternalRegisters];
1533 // If we are constraining the registers for allocation, we will modify all the RefPositions
1534 // we've built for this node after we've created them. In order to do that, we'll remember
1535 // the last RefPosition prior to those created for this node.
1536 RefPositionIterator refPositionMark = refPositions.backPosition();
1539 // Make intervals for all the 'internal' register requirements for this node,
1540 // where internal means additional registers required temporarily.
1541 // Create a RefTypeDef RefPosition for each such interval.
1542 int internalCount = buildInternalRegisterDefsForNode(tree, info, internalRefs);
1544 // Make use RefPositions for all used values.
1546 for (LocationInfoListNode *listNode = useList.Begin(), *end = useList.End(); listNode != end;
1547 listNode = listNode->Next())
1549 LocationInfo& locInfo = *static_cast<LocationInfo*>(listNode);
1551 // For tree temps, a use is always a last use and the end of the range;
1552 // this is set by default in newRefPosition
1553 GenTree* const useNode = locInfo.treeNode;
1554 assert(useNode != nullptr);
1556 Interval* srcInterval = locInfo.interval;
1557 TreeNodeInfo& useNodeInfo = locInfo.info;
1558 if (useNodeInfo.isTgtPref)
1560 prefSrcInterval = srcInterval;
1563 const bool delayRegFree = (hasDelayFreeSrc && useNodeInfo.isDelayFree);
1565 regMaskTP candidates = useNodeInfo.getSrcCandidates(this);
1567 regMaskTP allCandidates = candidates;
1569 if (useNode->OperIsPutArgSplit() || useNode->OperIsMultiRegOp())
1571 // get i-th candidate, set bits in useCandidates must be in sequential order.
1572 candidates = genFindLowestReg(allCandidates);
1573 allCandidates &= ~candidates;
1575 #endif // _TARGET_ARM_
1577 assert((candidates & allRegs(srcInterval->registerType)) != 0);
1579 GenTree* refPosNode;
1580 if (srcInterval->isLocalVar)
1582 // We have only approximate last-use information at this point. This is because the
1583 // execution order doesn't actually reflect the true order in which the localVars
1584 // are referenced - but the order of the RefPositions will, so we recompute it after
1585 // RefPositions are built.
1586 // Use the old value for setting currentLiveVars - note that we do this with the
1587 // not-quite-correct setting of lastUse. However, this is OK because
1588 // 1) this is only for preferencing, which doesn't require strict correctness, and
1589 // for determing which largeVectors require having their upper-half saved & restored.
1590 // (Issue #17481 tracks the issue that this system results in excessive spills and
1591 // should be changed.)
1592 // 2) the cases where these out-of-order uses occur should not overlap a kill (they are
1593 // only known to occur within a single expression).
1594 if ((useNode->gtFlags & GTF_VAR_DEATH) != 0)
1596 VarSetOps::RemoveElemD(compiler, currentLiveVars, srcInterval->getVarIndex(compiler));
1598 refPosNode = useNode;
1602 // For non-localVar uses we record nothing, as nothing needs to be written back to the tree.
1603 refPosNode = nullptr;
1606 RefPosition* pos = newRefPosition(srcInterval, currentLoc, RefTypeUse, refPosNode, candidates, 0);
1609 pos->delayRegFree = true;
1612 if (useNode->IsRegOptional())
1614 pos->setAllocateIfProfitable(true);
1618 // Create additional use RefPositions for multi-reg nodes.
1619 for (int idx = 1; idx < locInfo.info.dstCount; idx++)
1621 noway_assert(srcInterval->relatedInterval != nullptr);
1622 srcInterval = srcInterval->relatedInterval;
1624 if (useNode->OperIsPutArgSplit() ||
1625 (compiler->opts.compUseSoftFP && (useNode->OperIsPutArgReg() || useNode->OperGet() == GT_BITCAST)))
1627 // get first candidate, set bits in useCandidates must be in sequential order.
1628 candidates = genFindLowestReg(allCandidates);
1629 allCandidates &= ~candidates;
1631 #endif // _TARGET_ARM_
1632 RefPosition* pos = newRefPosition(srcInterval, currentLoc, RefTypeUse, refPosNode, candidates, idx);
1637 assert(consumed == consume);
1640 listNodePool.ReturnNodes(useList);
1643 buildInternalRegisterUsesForNode(tree, info, internalRefs, internalCount);
1645 RegisterType registerType = getDefType(tree);
1646 regMaskTP candidates = info->getDstCandidates(this);
1647 regMaskTP useCandidates = info->getSrcCandidates(this);
1650 if (VERBOSE && produce)
1652 printf("Def candidates ");
1653 dumpRegMask(candidates);
1654 printf(", Use candidates ");
1655 dumpRegMask(useCandidates);
1660 #if defined(_TARGET_AMD64_)
1661 // Multi-reg call node is the only node that could produce multi-reg value
1662 assert(produce <= 1 || (tree->IsMultiRegCall() && produce == MAX_RET_REG_COUNT));
1663 #endif // _TARGET_xxx_
1665 // Add kill positions before adding def positions
1666 buildKillPositionsForNode(tree, currentLoc + 1);
1668 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1669 VARSET_TP liveLargeVectors(VarSetOps::UninitVal());
1670 if (enregisterLocalVars && (RBM_FLT_CALLEE_SAVED != RBM_NONE))
1672 // Build RefPositions for saving any live large vectors.
1673 // This must be done after the kills, so that we know which large vectors are still live.
1674 VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc + 1));
1676 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1678 ReturnTypeDesc* retTypeDesc = nullptr;
1679 bool isMultiRegCall = tree->IsMultiRegCall();
1682 retTypeDesc = tree->AsCall()->GetReturnTypeDesc();
1683 assert((int)genCountBits(candidates) == produce);
1684 assert(candidates == retTypeDesc->GetABIReturnRegs());
1688 LocationInfoList locationInfoList;
1689 LsraLocation defLocation = currentLoc + 1;
1690 Interval* interval = varDefInterval;
1691 // For nodes that define multiple registers, subsequent intervals will be linked using the 'relatedInterval' field.
1692 // Keep track of the previous interval allocated, for that purpose.
1693 Interval* prevInterval = nullptr;
1694 for (int i = 0; i < produce; i++)
1696 regMaskTP currCandidates = candidates;
1698 // In case of multi-reg call node, registerType is given by
1699 // the type of ith position return register.
1702 registerType = retTypeDesc->GetReturnRegType((unsigned)i);
1703 currCandidates = genRegMask(retTypeDesc->GetABIReturnReg(i));
1704 useCandidates = allRegs(registerType);
1708 // If oper is GT_PUTARG_REG, set bits in useCandidates must be in sequential order.
1709 if (tree->OperIsPutArgSplit() || tree->OperIsMultiRegOp())
1711 // get i-th candidate
1712 currCandidates = genFindLowestReg(candidates);
1713 candidates &= ~currCandidates;
1715 #endif // _TARGET_ARM_
1717 if (interval == nullptr)
1719 // Make a new interval
1720 interval = newInterval(registerType);
1721 if (hasDelayFreeSrc || info->isInternalRegDelayFree)
1723 interval->hasInterferingUses = true;
1725 else if (tree->OperIsConst())
1727 assert(!tree->IsReuseRegVal());
1728 interval->isConstant = true;
1731 if ((currCandidates & useCandidates) != RBM_NONE)
1733 interval->updateRegisterPreferences(currCandidates & useCandidates);
1736 if (isSpecialPutArg)
1738 interval->isSpecialPutArg = true;
1743 assert(registerTypesEquivalent(interval->registerType, registerType));
1744 assert(interval->isLocalVar);
1745 if ((tree->gtFlags & GTF_VAR_DEATH) == 0)
1747 VarSetOps::AddElemD(compiler, currentLiveVars, interval->getVarIndex(compiler));
1751 if (prefSrcInterval != nullptr)
1753 interval->assignRelatedIntervalIfUnassigned(prefSrcInterval);
1756 // for assignments, we want to create a refposition for the def
1762 locationInfo->interval = interval;
1763 prevInterval = interval;
1764 defList.Append(locationInfo);
1768 // This is the 2nd or subsequent register defined by a multi-reg node.
1769 // Connect them using 'relatedInterval'.
1770 noway_assert(prevInterval != nullptr);
1771 prevInterval->relatedInterval = interval;
1772 prevInterval = interval;
1773 prevInterval->isMultiReg = true;
1774 interval->isMultiReg = true;
1778 RefPosition* pos = newRefPosition(interval, defLocation, RefTypeDef, defNode, currCandidates, (unsigned)i);
1779 if (info->isLocalDefUse)
1781 // This must be an unused value, OR it is a special node for which we allocate
1782 // a target register even though it produces no value.
1783 assert(defNode->IsUnusedValue() || (defNode->gtOper == GT_LOCKADD));
1784 pos->isLocalDefUse = true;
1785 pos->lastUse = true;
1787 interval->updateRegisterPreferences(currCandidates);
1788 interval->updateRegisterPreferences(useCandidates);
1792 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1793 // SaveDef position must be at the same location as Def position of call node.
1794 if (enregisterLocalVars)
1796 buildUpperVectorRestoreRefPositions(tree, defLocation, liveLargeVectors);
1798 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1801 // If we are constraining registers, modify all the RefPositions we've just built to specify the
1802 // minimum reg count required.
1803 if ((getStressLimitRegs() != LSRA_LIMIT_NONE) || (getSelectionHeuristics() != LSRA_SELECT_DEFAULT))
1805 // The number of registers required for a tree node is the sum of
1806 // consume + produce + internalCount + specialPutArgCount.
1807 // This is the minimum set of registers that needs to be ensured in the candidate set of ref positions created.
1809 unsigned minRegCount =
1810 consume + produce + info->internalIntCount + info->internalFloatCount + specialPutArgCount;
1812 for (refPositionMark++; refPositionMark != refPositions.end(); refPositionMark++)
1814 RefPosition* newRefPosition = &(*refPositionMark);
1815 unsigned minRegCountForRef = minRegCount;
1816 if (RefTypeIsUse(newRefPosition->refType) && newRefPosition->delayRegFree)
1818 // If delayRegFree, then Use will interfere with the destination of the consuming node.
1819 // Therefore, we also need add the kill set of the consuming node to minRegCount.
1821 // For example consider the following IR on x86, where v01 and v02
1822 // are method args coming in ecx and edx respectively.
1825 // For GT_DIV, the minRegCount will be 3 without adding kill set of GT_DIV node.
1827 // Assume further JitStressRegs=2, which would constrain candidates to callee trashable
1828 // regs { eax, ecx, edx } on use positions of v01 and v02. LSRA allocates ecx for v01.
1829 // The use position of v02 cannot be allocated a reg since it is marked delay-reg free and
1830 // {eax,edx} are getting killed before the def of GT_DIV. For this reason, minRegCount for
1831 // the use position of v02 also needs to take into account the kill set of its consuming node.
1832 regMaskTP killMask = getKillSetForNode(tree);
1833 if (killMask != RBM_NONE)
1835 minRegCountForRef += genCountBits(killMask);
1838 newRefPosition->minRegCandidateCount = minRegCountForRef;
1839 if (newRefPosition->IsActualRef() && doReverseCallerCallee())
1841 Interval* interval = newRefPosition->getInterval();
1842 regMaskTP oldAssignment = newRefPosition->registerAssignment;
1843 regMaskTP calleeSaveMask = calleeSaveRegs(interval->registerType);
1844 newRefPosition->registerAssignment =
1845 getConstrainedRegMask(oldAssignment, calleeSaveMask, minRegCountForRef);
1846 if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) &&
1847 !interval->isLocalVar)
1849 checkConflictingDefUse(newRefPosition);
1858 //------------------------------------------------------------------------
1859 // buildPhysRegRecords: Make an interval for each physical register
1861 void LinearScan::buildPhysRegRecords()
1863 RegisterType regType = IntRegisterType;
1864 for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
1866 RegRecord* curr = &physRegs[reg];
1871 //------------------------------------------------------------------------
1872 // getNonEmptyBlock: Return the first non-empty block starting with 'block'
1875 // block - the BasicBlock from which we start looking
1878 // The first non-empty BasicBlock we find.
1880 BasicBlock* getNonEmptyBlock(BasicBlock* block)
1882 while (block != nullptr && block->bbTreeList == nullptr)
1884 BasicBlock* nextBlock = block->bbNext;
1885 // Note that here we use the version of NumSucc that does not take a compiler.
1886 // That way this doesn't have to take a compiler, or be an instance method, e.g. of LinearScan.
1887 // If we have an empty block, it must have jump type BBJ_NONE or BBJ_ALWAYS, in which
1888 // case we don't need the version that takes a compiler.
1889 assert(block->NumSucc() == 1 && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_NONE)));
1890 // sometimes the first block is empty and ends with an uncond branch
1891 // assert( block->GetSucc(0) == nextBlock);
1894 assert(block != nullptr && block->bbTreeList != nullptr);
1898 //------------------------------------------------------------------------
1899 // insertZeroInitRefPositions: Handle lclVars that are live-in to the first block
1902 // Prior to calling this method, 'currentLiveVars' must be set to the set of register
1903 // candidate variables that are liveIn to the first block.
1904 // For each register candidate that is live-in to the first block:
1905 // - If it is a GC ref, or if compInitMem is set, a ZeroInit RefPosition will be created.
1906 // - Otherwise, it will be marked as spilled, since it will not be assigned a register
1907 // on entry and will be loaded from memory on the undefined path.
1908 // Note that, when the compInitMem option is not set, we may encounter these on
1909 // paths that are protected by the same condition as an earlier def. However, since
1910 // we don't do the analysis to determine this - and couldn't rely on always identifying
1911 // such cases even if we tried - we must conservatively treat the undefined path as
1912 // being possible. This is a relatively rare case, so the introduced conservatism is
1913 // not expected to warrant the analysis required to determine the best placement of
1914 // an initialization.
1916 void LinearScan::insertZeroInitRefPositions()
1918 assert(enregisterLocalVars);
1920 VARSET_TP expectedLiveVars(VarSetOps::Intersection(compiler, registerCandidateVars, compiler->fgFirstBB->bbLiveIn));
1921 assert(VarSetOps::Equal(compiler, currentLiveVars, expectedLiveVars));
1924 // insert defs for this, then a block boundary
1926 VarSetOps::Iter iter(compiler, currentLiveVars);
1927 unsigned varIndex = 0;
1928 while (iter.NextElem(&varIndex))
1930 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
1931 LclVarDsc* varDsc = compiler->lvaTable + varNum;
1932 if (!varDsc->lvIsParam && isCandidateVar(varDsc))
1934 JITDUMP("V%02u was live in to first block:", varNum);
1935 Interval* interval = getIntervalForLocalVar(varIndex);
1936 if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet()))
1938 JITDUMP(" creating ZeroInit\n");
1939 GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode();
1941 newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, allRegs(interval->registerType));
1942 varDsc->lvMustInit = true;
1946 setIntervalAsSpilled(interval);
1947 JITDUMP(" marking as spilled\n");
1953 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1954 //------------------------------------------------------------------------
1955 // unixAmd64UpdateRegStateForArg: Sets the register state for an argument of type STRUCT for System V systems.
1958 // argDsc - the LclVarDsc for the argument of interest
1961 // See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp
1962 // for how state for argument is updated for unix non-structs and Windows AMD64 structs.
1964 void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc)
1966 assert(varTypeIsStruct(argDsc));
1967 RegState* intRegState = &compiler->codeGen->intRegState;
1968 RegState* floatRegState = &compiler->codeGen->floatRegState;
1970 if ((argDsc->lvArgReg != REG_STK) && (argDsc->lvArgReg != REG_NA))
1972 if (genRegMask(argDsc->lvArgReg) & (RBM_ALLFLOAT))
1974 assert(genRegMask(argDsc->lvArgReg) & (RBM_FLTARG_REGS));
1975 floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg);
1979 assert(genRegMask(argDsc->lvArgReg) & (RBM_ARG_REGS));
1980 intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg);
1984 if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA))
1986 if (genRegMask(argDsc->lvOtherArgReg) & (RBM_ALLFLOAT))
1988 assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_FLTARG_REGS));
1989 floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg);
1993 assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_ARG_REGS));
1994 intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg);
1999 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2001 //------------------------------------------------------------------------
2002 // updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate
2003 // regState (either compiler->intRegState or compiler->floatRegState),
2004 // with the lvArgReg on "argDsc"
2007 // argDsc - the argument for which the state is to be updated.
2009 // Return Value: None
2012 // The argument is live on entry to the function
2013 // (or is untracked and therefore assumed live)
2016 // This relies on a method in regAlloc.cpp that is shared between LSRA
2017 // and regAlloc. It is further abstracted here because regState is updated
2018 // separately for tracked and untracked variables in LSRA.
2020 void LinearScan::updateRegStateForArg(LclVarDsc* argDsc)
2022 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2023 // For System V AMD64 calls the argDsc can have 2 registers (for structs.)
2024 // Handle them here.
2025 if (varTypeIsStruct(argDsc))
2027 unixAmd64UpdateRegStateForArg(argDsc);
2030 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
2032 RegState* intRegState = &compiler->codeGen->intRegState;
2033 RegState* floatRegState = &compiler->codeGen->floatRegState;
2034 // In the case of AMD64 we'll still use the floating point registers
2035 // to model the register usage for argument on vararg calls, so
2036 // we will ignore the varargs condition to determine whether we use
2037 // XMM registers or not for setting up the call.
2038 bool isFloat = (isFloatRegType(argDsc->lvType)
2039 #ifndef _TARGET_AMD64_
2040 && !compiler->info.compIsVarArgs
2042 && !compiler->opts.compUseSoftFP);
2044 if (argDsc->lvIsHfaRegArg())
2051 JITDUMP("Float arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg));
2052 compiler->raUpdateRegStateForArg(floatRegState, argDsc);
2056 JITDUMP("Int arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg));
2057 #if FEATURE_MULTIREG_ARGS
2058 if (argDsc->lvOtherArgReg != REG_NA)
2060 JITDUMP("(second half) in reg %s\n", getRegName(argDsc->lvOtherArgReg));
2062 #endif // FEATURE_MULTIREG_ARGS
2063 compiler->raUpdateRegStateForArg(intRegState, argDsc);
2068 //------------------------------------------------------------------------
2069 // buildIntervals: The main entry point for building the data structures over
2070 // which we will do register allocation.
2072 void LinearScan::buildIntervals()
2076 JITDUMP("\nbuildIntervals ========\n");
2078 // Build (empty) records for all of the physical registers
2079 buildPhysRegRecords();
2084 printf("\n-----------------\n");
2085 printf("LIVENESS:\n");
2086 printf("-----------------\n");
2087 foreach_block(compiler, block)
2089 printf("BB%02u use def in out\n", block->bbNum);
2090 dumpConvertedVarSet(compiler, block->bbVarUse);
2092 dumpConvertedVarSet(compiler, block->bbVarDef);
2094 dumpConvertedVarSet(compiler, block->bbLiveIn);
2096 dumpConvertedVarSet(compiler, block->bbLiveOut);
2103 // We will determine whether we should double align the frame during
2104 // identifyCandidates(), but we initially assume that we will not.
2105 doDoubleAlign = false;
2108 identifyCandidates();
2110 // Figure out if we're going to use a frame pointer. We need to do this before building
2111 // the ref positions, because those objects will embed the frame register in various register masks
2112 // if the frame pointer is not reserved. If we decide to have a frame pointer, setFrameType() will
2113 // remove the frame pointer from the masks.
2116 DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_PRE));
2119 JITDUMP("\nbuildIntervals second part ========\n");
2121 // TODO-Cleanup: This duplicates prior behavior where entry (ParamDef) RefPositions were
2122 // being assigned the bbNum of the last block traversed in the 2nd phase of Lowering.
2123 // Previously, the block sequencing was done for the (formerly separate) Build pass,
2124 // and the curBBNum was left as the last block sequenced. This block was then used to set the
2125 // weight for the entry (ParamDef) RefPositions. It would be logical to set this to the
2126 // normalized entry weight (compiler->fgCalledCount), but that results in a net regression.
2127 if (!blockSequencingDone)
2131 curBBNum = blockSequence[bbSeqCount - 1]->bbNum;
2133 // Next, create ParamDef RefPositions for all the tracked parameters, in order of their varIndex.
2134 // Assign these RefPositions to the (nonexistent) BB0.
2138 unsigned int lclNum;
2140 RegState* intRegState = &compiler->codeGen->intRegState;
2141 RegState* floatRegState = &compiler->codeGen->floatRegState;
2142 intRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
2143 floatRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
2145 for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++)
2147 lclNum = compiler->lvaTrackedToVarNum[varIndex];
2148 argDsc = &(compiler->lvaTable[lclNum]);
2150 if (!argDsc->lvIsParam)
2155 // Only reserve a register if the argument is actually used.
2156 // Is it dead on entry? If compJmpOpUsed is true, then the arguments
2157 // have to be kept alive, so we have to consider it as live on entry.
2158 // Use lvRefCnt instead of checking bbLiveIn because if it's volatile we
2159 // won't have done dataflow on it, but it needs to be marked as live-in so
2160 // it will get saved in the prolog.
2161 if (!compiler->compJmpOpUsed && argDsc->lvRefCnt == 0 && !compiler->opts.compDbgCode)
2166 if (argDsc->lvIsRegArg)
2168 updateRegStateForArg(argDsc);
2171 if (isCandidateVar(argDsc))
2173 Interval* interval = getIntervalForLocalVar(varIndex);
2174 regMaskTP mask = allRegs(TypeGet(argDsc));
2175 if (argDsc->lvIsRegArg)
2177 // Set this interval as currently assigned to that register
2178 regNumber inArgReg = argDsc->lvArgReg;
2179 assert(inArgReg < REG_COUNT);
2180 mask = genRegMask(inArgReg);
2181 assignPhysReg(inArgReg, interval);
2183 RefPosition* pos = newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, mask);
2185 else if (varTypeIsStruct(argDsc->lvType))
2187 for (unsigned fieldVarNum = argDsc->lvFieldLclStart;
2188 fieldVarNum < argDsc->lvFieldLclStart + argDsc->lvFieldCnt; ++fieldVarNum)
2190 LclVarDsc* fieldVarDsc = &(compiler->lvaTable[fieldVarNum]);
2191 if (fieldVarDsc->lvLRACandidate)
2193 assert(fieldVarDsc->lvTracked);
2194 Interval* interval = getIntervalForLocalVar(fieldVarDsc->lvVarIndex);
2196 newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, allRegs(TypeGet(fieldVarDsc)));
2202 // We can overwrite the register (i.e. codegen saves it on entry)
2203 assert(argDsc->lvRefCnt == 0 || !argDsc->lvIsRegArg || argDsc->lvDoNotEnregister ||
2204 !argDsc->lvLRACandidate || (varTypeIsFloating(argDsc->TypeGet()) && compiler->opts.compDbgCode));
2208 // Now set up the reg state for the non-tracked args
2209 // (We do this here because we want to generate the ParamDef RefPositions in tracked
2210 // order, so that loop doesn't hit the non-tracked args)
2212 for (unsigned argNum = 0; argNum < compiler->info.compArgsCount; argNum++, argDsc++)
2214 argDsc = &(compiler->lvaTable[argNum]);
2216 if (argDsc->lvPromotedStruct())
2218 noway_assert(argDsc->lvFieldCnt == 1); // We only handle one field here
2220 unsigned fieldVarNum = argDsc->lvFieldLclStart;
2221 argDsc = &(compiler->lvaTable[fieldVarNum]);
2223 noway_assert(argDsc->lvIsParam);
2224 if (!argDsc->lvTracked && argDsc->lvIsRegArg)
2226 updateRegStateForArg(argDsc);
2230 // If there is a secret stub param, it is also live in
2231 if (compiler->info.compPublishStubParam)
2233 intRegState->rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM;
2236 BasicBlock* predBlock = nullptr;
2237 BasicBlock* prevBlock = nullptr;
2239 // Initialize currentLiveVars to the empty set. We will set it to the current
2240 // live-in at the entry to each block (this will include the incoming args on
2241 // the first block).
2242 VarSetOps::AssignNoCopy(compiler, currentLiveVars, VarSetOps::MakeEmpty(compiler));
2244 for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock())
2246 JITDUMP("\nNEW BLOCK BB%02u\n", block->bbNum);
2248 bool predBlockIsAllocated = false;
2249 predBlock = findPredBlockForLiveIn(block, prevBlock DEBUGARG(&predBlockIsAllocated));
2252 JITDUMP("\n\nSetting BB%02u as the predecessor for determining incoming variable registers of BB%02u\n",
2253 block->bbNum, predBlock->bbNum);
2254 assert(predBlock->bbNum <= bbNumMaxBeforeResolution);
2255 blockInfo[block->bbNum].predBBNum = predBlock->bbNum;
2258 if (enregisterLocalVars)
2260 VarSetOps::AssignNoCopy(compiler, currentLiveVars,
2261 VarSetOps::Intersection(compiler, registerCandidateVars, block->bbLiveIn));
2263 if (block == compiler->fgFirstBB)
2265 insertZeroInitRefPositions();
2266 // The first real location is at 1; 0 is for the entry.
2270 // Any lclVars live-in to a block are resolution candidates.
2271 VarSetOps::UnionD(compiler, resolutionCandidateVars, currentLiveVars);
2273 // Determine if we need any DummyDefs.
2274 // We need DummyDefs for cases where "predBlock" isn't really a predecessor.
2275 // Note that it's possible to have uses of unitialized variables, in which case even the first
2276 // block may require DummyDefs, which we are not currently adding - this means that these variables
2277 // will always be considered to be in memory on entry (and reloaded when the use is encountered).
2278 // TODO-CQ: Consider how best to tune this. Currently, if we create DummyDefs for uninitialized
2279 // variables (which may actually be initialized along the dynamically executed paths, but not
2280 // on all static paths), we wind up with excessive liveranges for some of these variables.
2281 VARSET_TP newLiveIn(VarSetOps::MakeCopy(compiler, currentLiveVars));
2284 // Compute set difference: newLiveIn = currentLiveVars - predBlock->bbLiveOut
2285 VarSetOps::DiffD(compiler, newLiveIn, predBlock->bbLiveOut);
2287 bool needsDummyDefs = (!VarSetOps::IsEmpty(compiler, newLiveIn) && block != compiler->fgFirstBB);
2289 // Create dummy def RefPositions
2293 // If we are using locations from a predecessor, we should never require DummyDefs.
2294 assert(!predBlockIsAllocated);
2296 JITDUMP("Creating dummy definitions\n");
2297 VarSetOps::Iter iter(compiler, newLiveIn);
2298 unsigned varIndex = 0;
2299 while (iter.NextElem(&varIndex))
2301 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
2302 LclVarDsc* varDsc = compiler->lvaTable + varNum;
2303 // Add a dummyDef for any candidate vars that are in the "newLiveIn" set.
2304 // If this is the entry block, don't add any incoming parameters (they're handled with ParamDefs).
2305 if (isCandidateVar(varDsc) && (predBlock != nullptr || !varDsc->lvIsParam))
2307 Interval* interval = getIntervalForLocalVar(varIndex);
2308 RefPosition* pos = newRefPosition(interval, currentLoc, RefTypeDummyDef, nullptr,
2309 allRegs(interval->registerType));
2312 JITDUMP("Finished creating dummy definitions\n\n");
2316 // Add a dummy RefPosition to mark the block boundary.
2317 // Note that we do this AFTER adding the exposed uses above, because the
2318 // register positions for those exposed uses need to be recorded at
2321 RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE);
2325 LIR::Range& blockRange = LIR::AsRange(block);
2326 for (GenTree* node : blockRange.NonPhiNodes())
2328 // We increment the number position of each tree node by 2 to simplify the logic when there's the case of
2329 // a tree that implicitly does a dual-definition of temps (the long case). In this case it is easier to
2330 // already have an idle spot to handle a dual-def instead of making some messy adjustments if we only
2331 // increment the number position by one.
2332 CLANG_FORMAT_COMMENT_ANCHOR;
2335 node->gtSeqNum = currentLoc;
2336 // In DEBUG, we want to set the gtRegTag to GT_REGTAG_REG, so that subsequent dumps will so the register
2338 // Although this looks like a no-op it sets the tag.
2339 node->gtRegNum = node->gtRegNum;
2342 buildRefPositionsForNode(node, block, currentLoc);
2345 if (currentLoc > maxNodeLocation)
2347 maxNodeLocation = currentLoc;
2353 // Note: the visited set is cleared in LinearScan::doLinearScan()
2354 markBlockVisited(block);
2355 assert(defList.IsEmpty());
2357 if (enregisterLocalVars)
2359 // Insert exposed uses for a lclVar that is live-out of 'block' but not live-in to the
2360 // next block, or any unvisited successors.
2361 // This will address lclVars that are live on a backedge, as well as those that are kept
2362 // live at a GT_JMP.
2364 // Blocks ending with "jmp method" are marked as BBJ_HAS_JMP,
2365 // and jmp call is represented using GT_JMP node which is a leaf node.
2366 // Liveness phase keeps all the arguments of the method live till the end of
2367 // block by adding them to liveout set of the block containing GT_JMP.
2369 // The target of a GT_JMP implicitly uses all the current method arguments, however
2370 // there are no actual references to them. This can cause LSRA to assert, because
2371 // the variables are live but it sees no references. In order to correctly model the
2372 // liveness of these arguments, we add dummy exposed uses, in the same manner as for
2373 // backward branches. This will happen automatically via expUseSet.
2375 // Note that a block ending with GT_JMP has no successors and hence the variables
2376 // for which dummy use ref positions are added are arguments of the method.
2378 VARSET_TP expUseSet(VarSetOps::MakeCopy(compiler, block->bbLiveOut));
2379 VarSetOps::IntersectionD(compiler, expUseSet, registerCandidateVars);
2380 BasicBlock* nextBlock = getNextBlock();
2381 if (nextBlock != nullptr)
2383 VarSetOps::DiffD(compiler, expUseSet, nextBlock->bbLiveIn);
2385 for (BasicBlock* succ : block->GetAllSuccs(compiler))
2387 if (VarSetOps::IsEmpty(compiler, expUseSet))
2392 if (isBlockVisited(succ))
2396 VarSetOps::DiffD(compiler, expUseSet, succ->bbLiveIn);
2399 if (!VarSetOps::IsEmpty(compiler, expUseSet))
2401 JITDUMP("Exposed uses:");
2402 VarSetOps::Iter iter(compiler, expUseSet);
2403 unsigned varIndex = 0;
2404 while (iter.NextElem(&varIndex))
2406 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
2407 LclVarDsc* varDsc = compiler->lvaTable + varNum;
2408 assert(isCandidateVar(varDsc));
2409 Interval* interval = getIntervalForLocalVar(varIndex);
2411 newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType));
2412 JITDUMP(" V%02u", varNum);
2417 // Clear the "last use" flag on any vars that are live-out from this block.
2419 VarSetOps::Iter iter(compiler, block->bbLiveOut);
2420 unsigned varIndex = 0;
2421 while (iter.NextElem(&varIndex))
2423 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
2424 LclVarDsc* const varDsc = &compiler->lvaTable[varNum];
2425 if (isCandidateVar(varDsc))
2427 RefPosition* const lastRP = getIntervalForLocalVar(varIndex)->lastRefPosition;
2428 if ((lastRP != nullptr) && (lastRP->bbNum == block->bbNum))
2430 lastRP->lastUse = false;
2437 checkLastUses(block);
2442 dumpConvertedVarSet(compiler, block->bbVarUse);
2444 dumpConvertedVarSet(compiler, block->bbVarDef);
2453 if (enregisterLocalVars)
2455 if (compiler->lvaKeepAliveAndReportThis())
2457 // If we need to KeepAliveAndReportThis, add a dummy exposed use of it at the end
2458 unsigned keepAliveVarNum = compiler->info.compThisArg;
2459 assert(compiler->info.compIsStatic == false);
2460 LclVarDsc* varDsc = compiler->lvaTable + keepAliveVarNum;
2461 if (isCandidateVar(varDsc))
2463 JITDUMP("Adding exposed use of this, for lvaKeepAliveAndReportThis\n");
2464 Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex);
2466 newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType));
2471 if (getLsraExtendLifeTimes())
2474 for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++)
2476 if (varDsc->lvLRACandidate)
2478 JITDUMP("Adding exposed use of V%02u for LsraExtendLifetimes\n", lclNum);
2479 Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex);
2481 newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType));
2488 // If the last block has successors, create a RefTypeBB to record
2491 if (prevBlock->NumSucc(compiler) > 0)
2493 RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE);
2497 // Make sure we don't have any blocks that were not visited
2498 foreach_block(compiler, block)
2500 assert(isBlockVisited(block));
2505 lsraDumpIntervals("BEFORE VALIDATING INTERVALS");
2506 dumpRefPositions("BEFORE VALIDATING INTERVALS");
2507 validateIntervals();
2513 //------------------------------------------------------------------------
2514 // validateIntervals: A DEBUG-only method that checks that the lclVar RefPositions
2515 // do not reflect uses of undefined values
2517 // Notes: If an undefined use is encountered, it merely prints a message.
2519 // TODO-Cleanup: This should probably assert, or at least print the message only
2520 // when doing a JITDUMP.
2522 void LinearScan::validateIntervals()
2524 if (enregisterLocalVars)
2526 for (unsigned i = 0; i < compiler->lvaTrackedCount; i++)
2528 if (!compiler->lvaTable[compiler->lvaTrackedToVarNum[i]].lvLRACandidate)
2532 Interval* interval = getIntervalForLocalVar(i);
2534 bool defined = false;
2535 printf("-----------------\n");
2536 for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition)
2539 RefType refType = ref->refType;
2540 if (!defined && RefTypeIsUse(refType))
2542 if (compiler->info.compMethodName != nullptr)
2544 printf("%s: ", compiler->info.compMethodName);
2546 printf("LocalVar V%02u: undefined use at %u\n", interval->varNum, ref->nodeLocation);
2548 // Note that there can be multiple last uses if they are on disjoint paths,
2549 // so we can't really check the lastUse flag
2554 if (RefTypeIsDef(refType))
2564 //------------------------------------------------------------------------
2565 // GetIndirInfo: Get the source registers for an indirection that might be contained.
2568 // node - The node of interest
2571 // The number of source registers used by the *parent* of this node.
2574 // Adds the defining node for each register to the useList.
2576 int LinearScan::GetIndirInfo(GenTreeIndir* indirTree)
2578 GenTree* const addr = indirTree->gtOp1;
2579 if (!addr->isContained())
2581 appendLocationInfoToList(addr);
2584 if (!addr->OperIs(GT_LEA))
2589 GenTreeAddrMode* const addrMode = addr->AsAddrMode();
2591 unsigned srcCount = 0;
2592 if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained())
2594 appendLocationInfoToList(addrMode->Base());
2597 if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained())
2599 appendLocationInfoToList(addrMode->Index());
2605 //------------------------------------------------------------------------
2606 // GetOperandInfo: Get the source registers for an operand that might be contained.
2609 // node - The node of interest
2610 // useList - The list of uses for the node that we're currently processing
2613 // The number of source registers used by the *parent* of this node.
2616 // Adds the defining node for each register to the given useList.
2618 int LinearScan::GetOperandInfo(GenTree* node)
2620 if (!node->isContained())
2622 appendLocationInfoToList(node);
2626 #if !defined(_TARGET_64BIT_)
2627 if (node->OperIs(GT_LONG))
2629 return appendBinaryLocationInfoToList(node->AsOp());
2631 #endif // !defined(_TARGET_64BIT_)
2632 if (node->OperIsIndir())
2634 const unsigned srcCount = GetIndirInfo(node->AsIndir());
2637 if (node->OperIsHWIntrinsic())
2639 appendLocationInfoToList(node->gtGetOp1());
2646 //------------------------------------------------------------------------
2647 // GetOperandInfo: Get the source registers for an operand that might be contained.
2650 // node - The node of interest
2651 // useList - The list of uses for the node that we're currently processing
2654 // The number of source registers used by the *parent* of this node.
2657 // Adds the defining node for each register to the useList.
2659 int LinearScan::GetOperandInfo(GenTree* node, LocationInfoListNode** pFirstInfo)
2661 LocationInfoListNode* prevLast = useList.Last();
2662 int srcCount = GetOperandInfo(node);
2663 if (prevLast == nullptr)
2665 *pFirstInfo = useList.Begin();
2669 *pFirstInfo = prevLast->Next();
2674 void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node)
2678 _internalIntCount = 0;
2679 _internalFloatCount = 0;
2681 isLocalDefUse = false;
2682 isDelayFree = false;
2683 hasDelayFreeSrc = false;
2685 isInternalRegDelayFree = false;
2687 regMaskTP dstCandidates;
2689 // if there is a reg indicated on the tree node, use that for dstCandidates
2690 // the exception is the NOP, which sometimes show up around late args.
2691 // TODO-Cleanup: get rid of those NOPs.
2692 if (node->gtRegNum == REG_STK)
2694 dstCandidates = RBM_NONE;
2696 else if (node->gtRegNum == REG_NA || node->gtOper == GT_NOP)
2699 if (node->OperGet() == GT_PUTARG_REG)
2701 dstCandidates = lsra->allRegs(TYP_INT);
2706 dstCandidates = lsra->allRegs(node->TypeGet());
2711 dstCandidates = genRegMask(node->gtRegNum);
2714 setDstCandidates(lsra, dstCandidates);
2715 srcCandsIndex = dstCandsIndex;
2717 setInternalCandidates(lsra, lsra->allRegs(TYP_INT));
2720 isInitialized = true;
2723 assert(IsValid(lsra));
2726 //------------------------------------------------------------------------
2727 // getSrcCandidates: Get the source candidates (candidates for the consumer
2728 // of the node) from the TreeNodeInfo
2731 // lsra - the LinearScan object
2734 // The set of registers (as a register mask) that are candidates for the
2735 // consumer of the node
2738 // The LinearScan object maintains the mapping from the indices kept in the
2739 // TreeNodeInfo to the actual register masks.
2741 regMaskTP TreeNodeInfo::getSrcCandidates(LinearScan* lsra)
2743 return lsra->GetRegMaskForIndex(srcCandsIndex);
2746 //------------------------------------------------------------------------
2747 // setSrcCandidates: Set the source candidates (candidates for the consumer
2748 // of the node) on the TreeNodeInfo
2751 // lsra - the LinearScan object
2753 // Notes: see getSrcCandidates
2755 void TreeNodeInfo::setSrcCandidates(LinearScan* lsra, regMaskTP mask)
2757 LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask);
2758 assert(FitsIn<unsigned char>(i));
2759 srcCandsIndex = (unsigned char)i;
2762 //------------------------------------------------------------------------
2763 // getDstCandidates: Get the dest candidates (candidates for the definition
2764 // of the node) from the TreeNodeInfo
2767 // lsra - the LinearScan object
2770 // The set of registers (as a register mask) that are candidates for the
2773 // Notes: see getSrcCandidates
2775 regMaskTP TreeNodeInfo::getDstCandidates(LinearScan* lsra)
2777 return lsra->GetRegMaskForIndex(dstCandsIndex);
2780 //------------------------------------------------------------------------
2781 // setDstCandidates: Set the dest candidates (candidates for the definition
2782 // of the node) on the TreeNodeInfo
2785 // lsra - the LinearScan object
2787 // Notes: see getSrcCandidates
2789 void TreeNodeInfo::setDstCandidates(LinearScan* lsra, regMaskTP mask)
2791 LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask);
2792 assert(FitsIn<unsigned char>(i));
2793 dstCandsIndex = (unsigned char)i;
2796 //------------------------------------------------------------------------
2797 // getInternalCandidates: Get the internal candidates (candidates for the internal
2798 // temporary registers used by a node) from the TreeNodeInfo
2801 // lsra - the LinearScan object
2804 // The set of registers (as a register mask) that are candidates for the
2805 // internal temporary registers.
2807 // Notes: see getSrcCandidates
2809 regMaskTP TreeNodeInfo::getInternalCandidates(LinearScan* lsra)
2811 return lsra->GetRegMaskForIndex(internalCandsIndex);
2814 //------------------------------------------------------------------------
2815 // getInternalCandidates: Set the internal candidates (candidates for the internal
2816 // temporary registers used by a node) on the TreeNodeInfo
2819 // lsra - the LinearScan object
2821 // Notes: see getSrcCandidates
2823 void TreeNodeInfo::setInternalCandidates(LinearScan* lsra, regMaskTP mask)
2825 LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask);
2826 assert(FitsIn<unsigned char>(i));
2827 internalCandsIndex = (unsigned char)i;
2830 //------------------------------------------------------------------------
2831 // addInternalCandidates: Add internal candidates (candidates for the internal
2832 // temporary registers used by a node) on the TreeNodeInfo
2835 // lsra - the LinearScan object
2837 // Notes: see getSrcCandidates
2839 void TreeNodeInfo::addInternalCandidates(LinearScan* lsra, regMaskTP mask)
2841 LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(lsra->GetRegMaskForIndex(internalCandsIndex) | mask);
2842 assert(FitsIn<unsigned char>(i));
2843 internalCandsIndex = (unsigned char)i;
2846 //------------------------------------------------------------------------
2847 // BuildStoreLoc: Set register requirements for a store of a lclVar
2850 // storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
2854 // - Setting the appropriate candidates for a store of a multi-reg call return value.
2855 // - Handling of contained immediates.
2856 // - Requesting an internal register for SIMD12 stores.
2858 void LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc)
2860 TreeNodeInfo* info = currentNodeInfo;
2861 GenTree* op1 = storeLoc->gtGetOp1();
2863 assert(info->dstCount == 0);
2865 if (op1->IsMultiRegCall())
2867 // This is the case of var = call where call is returning
2868 // a value in multiple return registers.
2869 // Must be a store lclvar.
2870 assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
2872 // srcCount = number of registers in which the value is returned by call
2873 GenTreeCall* call = op1->AsCall();
2874 ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
2875 unsigned regCount = retTypeDesc->GetReturnRegCount();
2876 info->srcCount = regCount;
2878 // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
2879 regMaskTP srcCandidates = allMultiRegCallNodeRegs(call);
2880 LocationInfoListNode* locInfo = getLocationInfo(op1);
2881 locInfo->info.setSrcCandidates(this, srcCandidates);
2882 useList.Append(locInfo);
2884 #ifndef _TARGET_64BIT_
2885 else if (varTypeIsLong(op1))
2887 if (op1->OperIs(GT_MUL_LONG))
2890 // This is actually a bug. A GT_MUL_LONG produces two registers, but is modeled as only producing
2891 // eax (and killing edx). This only works because it always occurs as var = GT_MUL_LONG (ensured by
2892 // DecomposeMul), and therefore edx won't be reused before the store.
2893 // TODO-X86-Cleanup: GT_MUL_LONG should be a multireg node on x86, just as on ARM.
2898 appendLocationInfoToList(op1);
2902 assert(op1->OperIs(GT_LONG));
2903 assert(op1->isContained() && !op1->gtOp.gtOp1->isContained() && !op1->gtOp.gtOp2->isContained());
2904 info->srcCount = appendBinaryLocationInfoToList(op1->AsOp());
2905 assert(info->srcCount == 2);
2908 #endif // !_TARGET_64BIT_
2909 else if (op1->isContained())
2916 appendLocationInfoToList(op1);
2920 if (varTypeIsSIMD(storeLoc))
2922 if (!op1->isContained() && (storeLoc->TypeGet() == TYP_SIMD12))
2924 // Need an additional register to extract upper 4 bytes of Vector3.
2925 #ifdef _TARGET_XARCH_
2926 info->internalFloatCount = 1;
2927 info->setInternalCandidates(this, allSIMDRegs());
2928 #elif defined(_TARGET_ARM64_)
2929 info->internalIntCount = 1;
2931 #error "Unknown target architecture for STORE_LCL_VAR of SIMD12"
2935 #endif // FEATURE_SIMD
2938 //------------------------------------------------------------------------
2939 // BuildSimple: Sets the srcCount for all the trees
2940 // without special handling based on the tree node type.
2943 // tree - The node of interest
2948 void LinearScan::BuildSimple(GenTree* tree)
2950 TreeNodeInfo* info = currentNodeInfo;
2951 unsigned kind = tree->OperKind();
2952 assert(info->dstCount == (tree->IsValue() ? 1 : 0));
2953 if (kind & (GTK_CONST | GTK_LEAF))
2957 else if (kind & (GTK_SMPOP))
2959 info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
2967 //------------------------------------------------------------------------
2968 // BuildReturn: Set the NodeInfo for a GT_RETURN.
2971 // tree - The node of interest
2976 void LinearScan::BuildReturn(GenTree* tree)
2978 TreeNodeInfo* info = currentNodeInfo;
2979 assert(info->dstCount == 0);
2980 GenTree* op1 = tree->gtGetOp1();
2982 #if !defined(_TARGET_64BIT_)
2983 if (tree->TypeGet() == TYP_LONG)
2985 assert((op1->OperGet() == GT_LONG) && op1->isContained());
2986 GenTree* loVal = op1->gtGetOp1();
2987 GenTree* hiVal = op1->gtGetOp2();
2989 LocationInfoListNode* loValInfo = getLocationInfo(loVal);
2990 LocationInfoListNode* hiValInfo = getLocationInfo(hiVal);
2991 loValInfo->info.setSrcCandidates(this, RBM_LNGRET_LO);
2992 hiValInfo->info.setSrcCandidates(this, RBM_LNGRET_HI);
2993 useList.Append(loValInfo);
2994 useList.Append(hiValInfo);
2997 #endif // !defined(_TARGET_64BIT_)
2998 if ((tree->TypeGet() != TYP_VOID) && !op1->isContained())
3000 regMaskTP useCandidates = RBM_NONE;
3004 #if FEATURE_MULTIREG_RET
3005 if (varTypeIsStruct(tree))
3007 // op1 has to be either an lclvar or a multi-reg returning call
3008 if (op1->OperGet() != GT_LCL_VAR)
3010 noway_assert(op1->IsMultiRegCall());
3012 ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
3013 info->srcCount = retTypeDesc->GetReturnRegCount();
3014 useCandidates = retTypeDesc->GetABIReturnRegs();
3018 #endif // FEATURE_MULTIREG_RET
3020 // Non-struct type return - determine useCandidates
3021 switch (tree->TypeGet())
3024 useCandidates = RBM_NONE;
3027 useCandidates = RBM_FLOATRET;
3030 // We ONLY want the valid double register in the RBM_DOUBLERET mask.
3031 useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE);
3034 useCandidates = RBM_LNGRET;
3037 useCandidates = RBM_INTRET;
3042 LocationInfoListNode* locationInfo = getLocationInfo(op1);
3043 if (useCandidates != RBM_NONE)
3045 locationInfo->info.setSrcCandidates(this, useCandidates);
3047 useList.Append(locationInfo);
3051 //------------------------------------------------------------------------
3052 // BuildPutArgReg: Set the NodeInfo for a PUTARG_REG.
3055 // node - The PUTARG_REG node.
3056 // argReg - The register in which to pass the argument.
3057 // info - The info for the node's using call.
3058 // isVarArgs - True if the call uses a varargs calling convention.
3059 // callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register.
3064 void LinearScan::BuildPutArgReg(GenTreeUnOp* node)
3066 TreeNodeInfo* info = currentNodeInfo;
3067 assert(node != nullptr);
3068 assert(node->OperIsPutArgReg());
3070 regNumber argReg = node->gtRegNum;
3071 assert(argReg != REG_NA);
3073 // Set the register requirements for the node.
3074 regMaskTP argMask = genRegMask(argReg);
3077 // If type of node is `long` then it is actually `double`.
3078 // The actual `long` types must have been transformed as a field list with two fields.
3079 if (node->TypeGet() == TYP_LONG)
3082 info->dstCount = info->srcCount;
3083 assert(genRegArgNext(argReg) == REG_NEXT(argReg));
3084 argMask |= genRegMask(REG_NEXT(argReg));
3086 #endif // _TARGET_ARM_
3087 info->setDstCandidates(this, argMask);
3088 info->setSrcCandidates(this, argMask);
3090 // To avoid redundant moves, have the argument operand computed in the
3091 // register in which the argument is passed to the call.
3092 LocationInfoListNode* op1Info = getLocationInfo(node->gtOp.gtOp1);
3093 op1Info->info.setSrcCandidates(this, info->getSrcCandidates(this));
3094 op1Info->info.isDelayFree = true;
3095 useList.Append(op1Info);
3098 //------------------------------------------------------------------------
3099 // HandleFloatVarArgs: Handle additional register requirements for a varargs call
3102 // call - The call node of interest
3103 // argNode - The current argument
3109 // In the case of a varargs call, the ABI dictates that if we have floating point args,
3110 // we must pass the enregistered arguments in both the integer and floating point registers.
3111 // Since the integer register is not associated with the arg node, we will reserve it as
3112 // an internal register on the call so that it is not used during the evaluation of the call node
3113 // (e.g. for the target).
3114 void LinearScan::HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs)
3117 TreeNodeInfo* info = currentNodeInfo;
3118 if (call->IsVarargs() && varTypeIsFloating(argNode))
3120 *callHasFloatRegArgs = true;
3122 regNumber argReg = argNode->gtRegNum;
3123 regNumber targetReg = compiler->getCallArgIntRegister(argReg);
3124 info->setInternalIntCount(info->internalIntCount + 1);
3125 info->addInternalCandidates(this, genRegMask(targetReg));
3127 #endif // FEATURE_VARARG
3130 //------------------------------------------------------------------------
3131 // BuildGCWriteBarrier: Handle additional register requirements for a GC write barrier
3134 // tree - The STORE_IND for which a write barrier is required
3136 void LinearScan::BuildGCWriteBarrier(GenTree* tree)
3138 TreeNodeInfo* info = currentNodeInfo;
3139 GenTree* dst = tree;
3140 GenTree* addr = tree->gtOp.gtOp1;
3141 GenTree* src = tree->gtOp.gtOp2;
3142 LocationInfoListNode* addrInfo = getLocationInfo(addr);
3143 LocationInfoListNode* srcInfo = getLocationInfo(src);
3145 // In the case where we are doing a helper assignment, even if the dst
3146 // is an indir through an lea, we need to actually instantiate the
3147 // lea in a register
3148 assert(!addr->isContained() && !src->isContained());
3149 useList.Append(addrInfo);
3150 useList.Append(srcInfo);
3152 assert(info->dstCount == 0);
3153 bool customSourceRegs = false;
3155 #if defined(_TARGET_ARM64_)
3157 // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST)
3158 // the 'src' goes into x15 (REG_WRITE_BARRIER_SRC)
3160 addrInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_DST);
3161 srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_SRC);
3162 customSourceRegs = true;
3164 #elif defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3166 bool useOptimizedWriteBarrierHelper = compiler->codeGen->genUseOptimizedWriteBarriers(tree, src);
3167 if (useOptimizedWriteBarrierHelper)
3169 // Special write barrier:
3170 // op1 (addr) goes into REG_WRITE_BARRIER (rdx) and
3171 // op2 (src) goes into any int register.
3172 addrInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER);
3173 srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_SRC);
3174 customSourceRegs = true;
3177 #endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
3179 if (!customSourceRegs)
3181 // For the standard JIT Helper calls:
3182 // op1 (addr) goes into REG_ARG_0 and
3183 // op2 (src) goes into REG_ARG_1
3184 addrInfo->info.setSrcCandidates(this, RBM_ARG_0);
3185 srcInfo->info.setSrcCandidates(this, RBM_ARG_1);
3188 // Both src and dst must reside in a register, which they should since we haven't set
3189 // either of them as contained.
3190 assert(addrInfo->info.dstCount == 1);
3191 assert(srcInfo->info.dstCount == 1);
3194 //------------------------------------------------------------------------
3195 // BuildCmp: Set the register requirements for a compare.
3198 // tree - The node of interest
3203 void LinearScan::BuildCmp(GenTree* tree)
3205 TreeNodeInfo* info = currentNodeInfo;
3206 assert(tree->OperIsCompare() || tree->OperIs(GT_CMP) || tree->OperIs(GT_JCMP));
3209 assert((info->dstCount == 1) || (tree->TypeGet() == TYP_VOID));
3212 // If the compare is used by a jump, we just need to set the condition codes. If not, then we need
3213 // to store the result into the low byte of a register, which requires the dst be a byteable register.
3214 // We always set the dst candidates, though, because if this is compare is consumed by a jump, they
3215 // won't be used. We might be able to use GTF_RELOP_JMP_USED to determine this case, but it's not clear
3216 // that flag is maintained until this location (especially for decomposed long compares).
3217 info->setDstCandidates(this, RBM_BYTE_REGS);
3218 #endif // _TARGET_X86_
3220 info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
3223 #endif // !LEGACY_BACKEND