ead71c970b2c2ad520363347b566b827003cbeb7
[platform/upstream/coreclr.git] / src / jit / lsra.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 /*
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8
9                  Linear Scan Register Allocation
10
11                          a.k.a. LSRA
12
13   Preconditions
14     - All register requirements are expressed in the code stream, either as destination
15       registers of tree nodes, or as internal registers.  These requirements are
16       expressed in the RefPositions built for each node by BuildNode(), which includes:
17       - The register uses and definitions.
18       - The register restrictions (candidates) of the target register, both from itself,
19         as producer of the value (dstCandidates), and from its consuming node (srcCandidates).
20         Note that when we talk about srcCandidates we are referring to the destination register
21         (not any of its sources).
22       - The number (internalCount) of registers required, and their register restrictions (internalCandidates).
23         These are neither inputs nor outputs of the node, but used in the sequence of code generated for the tree.
24     "Internal registers" are registers used during the code sequence generated for the node.
25     The register lifetimes must obey the following lifetime model:
26     - First, any internal registers are defined.
27     - Next, any source registers are used (and are then freed if they are last use and are not identified as
28       "delayRegFree").
29     - Next, the internal registers are used (and are then freed).
30     - Next, any registers in the kill set for the instruction are killed.
31     - Next, the destination register(s) are defined (multiple destination registers are only supported on ARM)
32     - Finally, any "delayRegFree" source registers are freed.
33   There are several things to note about this order:
34     - The internal registers will never overlap any use, but they may overlap a destination register.
35     - Internal registers are never live beyond the node.
36     - The "delayRegFree" annotation is used for instructions that are only available in a Read-Modify-Write form.
37       That is, the destination register is one of the sources.  In this case, we must not use the same register for
38       the non-RMW operand as for the destination.
39
40   Overview (doLinearScan):
41     - Walk all blocks, building intervals and RefPositions (buildIntervals)
42     - Allocate registers (allocateRegisters)
43     - Annotate nodes with register assignments (resolveRegisters)
44     - Add move nodes as needed to resolve conflicting register
45       assignments across non-adjacent edges. (resolveEdges, called from resolveRegisters)
46
47   Postconditions:
48
49     Tree nodes (GenTree):
50     - GenTree::gtRegNum (and gtRegPair for ARM) is annotated with the register
51       assignment for a node. If the node does not require a register, it is
52       annotated as such (gtRegNum = REG_NA). For a variable definition or interior
53       tree node (an "implicit" definition), this is the register to put the result.
54       For an expression use, this is the place to find the value that has previously
55       been computed.
56       - In most cases, this register must satisfy the constraints specified for the RefPosition.
57       - In some cases, this is difficult:
58         - If a lclVar node currently lives in some register, it may not be desirable to move it
59           (i.e. its current location may be desirable for future uses, e.g. if it's a callee save register,
60           but needs to be in a specific arg register for a call).
61         - In other cases there may be conflicts on the restrictions placed by the defining node and the node which
62           consumes it
63       - If such a node is constrained to a single fixed register (e.g. an arg register, or a return from a call),
64         then LSRA is free to annotate the node with a different register.  The code generator must issue the appropriate
65         move.
66       - However, if such a node is constrained to a set of registers, and its current location does not satisfy that
67         requirement, LSRA must insert a GT_COPY node between the node and its parent.  The gtRegNum on the GT_COPY node
68         must satisfy the register requirement of the parent.
69     - GenTree::gtRsvdRegs has a set of registers used for internal temps.
70     - A tree node is marked GTF_SPILL if the tree node must be spilled by the code generator after it has been
71       evaluated.
72       - LSRA currently does not set GTF_SPILLED on such nodes, because it caused problems in the old code generator.
73         In the new backend perhaps this should change (see also the note below under CodeGen).
74     - A tree node is marked GTF_SPILLED if it is a lclVar that must be reloaded prior to use.
75       - The register (gtRegNum) on the node indicates the register to which it must be reloaded.
76       - For lclVar nodes, since the uses and defs are distinct tree nodes, it is always possible to annotate the node
77         with the register to which the variable must be reloaded.
78       - For other nodes, since they represent both the def and use, if the value must be reloaded to a different
79         register, LSRA must insert a GT_RELOAD node in order to specify the register to which it should be reloaded.
80
81     Local variable table (LclVarDsc):
82     - LclVarDsc::lvRegister is set to true if a local variable has the
83       same register assignment for its entire lifetime.
84     - LclVarDsc::lvRegNum / lvOtherReg: these are initialized to their
85       first value at the end of LSRA (it looks like lvOtherReg isn't?
86       This is probably a bug (ARM)). Codegen will set them to their current value
87       as it processes the trees, since a variable can (now) be assigned different
88       registers over its lifetimes.
89
90 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
91 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
92 */
93
94 #include "jitpch.h"
95 #ifdef _MSC_VER
96 #pragma hdrstop
97 #endif
98
99 #include "lsra.h"
100
101 #ifdef DEBUG
102 const char* LinearScan::resolveTypeName[] = {"Split", "Join", "Critical", "SharedCritical"};
103 #endif // DEBUG
104
105 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
106 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
107 XX                                                                           XX
108 XX                    Small Helper functions                                 XX
109 XX                                                                           XX
110 XX                                                                           XX
111 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
112 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
113 */
114
115 //--------------------------------------------------------------
116 // lsraAssignRegToTree: Assign the given reg to tree node.
117 //
118 // Arguments:
119 //    tree    -    Gentree node
120 //    reg     -    register to be assigned
121 //    regIdx  -    register idx, if tree is a multi-reg call node.
122 //                 regIdx will be zero for single-reg result producing tree nodes.
123 //
124 // Return Value:
125 //    None
126 //
127 void lsraAssignRegToTree(GenTree* tree, regNumber reg, unsigned regIdx)
128 {
129     if (regIdx == 0)
130     {
131         tree->gtRegNum = reg;
132     }
133 #if !defined(_TARGET_64BIT_)
134     else if (tree->OperIsMultiRegOp())
135     {
136         assert(regIdx == 1);
137         GenTreeMultiRegOp* mul = tree->AsMultiRegOp();
138         mul->gtOtherReg        = reg;
139     }
140 #endif // _TARGET_64BIT_
141 #if FEATURE_MULTIREG_RET
142     else if (tree->OperGet() == GT_COPY)
143     {
144         assert(regIdx == 1);
145         GenTreeCopyOrReload* copy = tree->AsCopyOrReload();
146         copy->gtOtherRegs[0]      = (regNumberSmall)reg;
147     }
148 #endif // FEATURE_MULTIREG_RET
149 #if FEATURE_ARG_SPLIT
150     else if (tree->OperIsPutArgSplit())
151     {
152         GenTreePutArgSplit* putArg = tree->AsPutArgSplit();
153         putArg->SetRegNumByIdx(reg, regIdx);
154     }
155 #endif // FEATURE_ARG_SPLIT
156     else
157     {
158         assert(tree->IsMultiRegCall());
159         GenTreeCall* call = tree->AsCall();
160         call->SetRegNumByIdx(reg, regIdx);
161     }
162 }
163
164 //-------------------------------------------------------------
165 // getWeight: Returns the weight of the RefPosition.
166 //
167 // Arguments:
168 //    refPos   -   ref position
169 //
170 // Returns:
171 //    Weight of ref position.
172 unsigned LinearScan::getWeight(RefPosition* refPos)
173 {
174     unsigned weight;
175     GenTree* treeNode = refPos->treeNode;
176
177     if (treeNode != nullptr)
178     {
179         if (isCandidateLocalRef(treeNode))
180         {
181             // Tracked locals: use weighted ref cnt as the weight of the
182             // ref position.
183             GenTreeLclVarCommon* lclCommon = treeNode->AsLclVarCommon();
184             LclVarDsc*           varDsc    = &(compiler->lvaTable[lclCommon->gtLclNum]);
185             weight                         = varDsc->lvRefCntWtd();
186             if (refPos->getInterval()->isSpilled)
187             {
188                 // Decrease the weight if the interval has already been spilled.
189                 weight -= BB_UNITY_WEIGHT;
190             }
191         }
192         else
193         {
194             // Non-candidate local ref or non-lcl tree node.
195             // These are considered to have two references in the basic block:
196             // a def and a use and hence weighted ref count would be 2 times
197             // the basic block weight in which they appear.
198             // However, it is generally more harmful to spill tree temps, so we
199             // double that.
200             const unsigned TREE_TEMP_REF_COUNT    = 2;
201             const unsigned TREE_TEMP_BOOST_FACTOR = 2;
202             weight = TREE_TEMP_REF_COUNT * TREE_TEMP_BOOST_FACTOR * blockInfo[refPos->bbNum].weight;
203         }
204     }
205     else
206     {
207         // Non-tree node ref positions.  These will have a single
208         // reference in the basic block and hence their weighted
209         // refcount is equal to the block weight in which they
210         // appear.
211         weight = blockInfo[refPos->bbNum].weight;
212     }
213
214     return weight;
215 }
216
217 // allRegs represents a set of registers that can
218 // be used to allocate the specified type in any point
219 // in time (more of a 'bank' of registers).
220 regMaskTP LinearScan::allRegs(RegisterType rt)
221 {
222     if (rt == TYP_FLOAT)
223     {
224         return availableFloatRegs;
225     }
226     else if (rt == TYP_DOUBLE)
227     {
228         return availableDoubleRegs;
229     }
230 #ifdef FEATURE_SIMD
231     // TODO-Cleanup: Add an RBM_ALLSIMD
232     else if (varTypeIsSIMD(rt))
233     {
234         return availableDoubleRegs;
235     }
236 #endif // FEATURE_SIMD
237     else
238     {
239         return availableIntRegs;
240     }
241 }
242
243 regMaskTP LinearScan::allByteRegs()
244 {
245 #ifdef _TARGET_X86_
246     return availableIntRegs & RBM_BYTE_REGS;
247 #else
248     return availableIntRegs;
249 #endif
250 }
251
252 regMaskTP LinearScan::allSIMDRegs()
253 {
254     return availableFloatRegs;
255 }
256
257 //------------------------------------------------------------------------
258 // internalFloatRegCandidates: Return the set of registers that are appropriate
259 //                             for use as internal float registers.
260 //
261 // Return Value:
262 //    The set of registers (as a regMaskTP).
263 //
264 // Notes:
265 //    compFloatingPointUsed is only required to be set if it is possible that we
266 //    will use floating point callee-save registers.
267 //    It is unlikely, if an internal register is the only use of floating point,
268 //    that it will select a callee-save register.  But to be safe, we restrict
269 //    the set of candidates if compFloatingPointUsed is not already set.
270
271 regMaskTP LinearScan::internalFloatRegCandidates()
272 {
273     if (compiler->compFloatingPointUsed)
274     {
275         return allRegs(TYP_FLOAT);
276     }
277     else
278     {
279         return RBM_FLT_CALLEE_TRASH;
280     }
281 }
282
283 /*****************************************************************************
284  * Inline functions for RegRecord
285  *****************************************************************************/
286
287 bool RegRecord::isFree()
288 {
289     return ((assignedInterval == nullptr || !assignedInterval->isActive) && !isBusyUntilNextKill);
290 }
291
292 /*****************************************************************************
293  * Inline functions for LinearScan
294  *****************************************************************************/
295 RegRecord* LinearScan::getRegisterRecord(regNumber regNum)
296 {
297     assert((unsigned)regNum < ArrLen(physRegs));
298     return &physRegs[regNum];
299 }
300
301 #ifdef DEBUG
302
303 //----------------------------------------------------------------------------
304 // getConstrainedRegMask: Returns new regMask which is the intersection of
305 // regMaskActual and regMaskConstraint if the new regMask has at least
306 // minRegCount registers, otherwise returns regMaskActual.
307 //
308 // Arguments:
309 //     regMaskActual      -  regMask that needs to be constrained
310 //     regMaskConstraint  -  regMask constraint that needs to be
311 //                           applied to regMaskActual
312 //     minRegCount        -  Minimum number of regs that should be
313 //                           be present in new regMask.
314 //
315 // Return Value:
316 //     New regMask that has minRegCount registers after instersection.
317 //     Otherwise returns regMaskActual.
318 regMaskTP LinearScan::getConstrainedRegMask(regMaskTP regMaskActual, regMaskTP regMaskConstraint, unsigned minRegCount)
319 {
320     regMaskTP newMask = regMaskActual & regMaskConstraint;
321     if (genCountBits(newMask) >= minRegCount)
322     {
323         return newMask;
324     }
325
326     return regMaskActual;
327 }
328
329 //------------------------------------------------------------------------
330 // stressLimitRegs: Given a set of registers, expressed as a register mask, reduce
331 //            them based on the current stress options.
332 //
333 // Arguments:
334 //    mask      - The current mask of register candidates for a node
335 //
336 // Return Value:
337 //    A possibly-modified mask, based on the value of COMPlus_JitStressRegs.
338 //
339 // Notes:
340 //    This is the method used to implement the stress options that limit
341 //    the set of registers considered for allocation.
342
343 regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask)
344 {
345     if (getStressLimitRegs() != LSRA_LIMIT_NONE)
346     {
347         // The refPosition could be null, for example when called
348         // by getTempRegForResolution().
349         int minRegCount = (refPosition != nullptr) ? refPosition->minRegCandidateCount : 1;
350
351         switch (getStressLimitRegs())
352         {
353             case LSRA_LIMIT_CALLEE:
354                 if (!compiler->opts.compDbgEnC)
355                 {
356                     mask = getConstrainedRegMask(mask, RBM_CALLEE_SAVED, minRegCount);
357                 }
358                 break;
359
360             case LSRA_LIMIT_CALLER:
361             {
362                 mask = getConstrainedRegMask(mask, RBM_CALLEE_TRASH, minRegCount);
363             }
364             break;
365
366             case LSRA_LIMIT_SMALL_SET:
367                 if ((mask & LsraLimitSmallIntSet) != RBM_NONE)
368                 {
369                     mask = getConstrainedRegMask(mask, LsraLimitSmallIntSet, minRegCount);
370                 }
371                 else if ((mask & LsraLimitSmallFPSet) != RBM_NONE)
372                 {
373                     mask = getConstrainedRegMask(mask, LsraLimitSmallFPSet, minRegCount);
374                 }
375                 break;
376
377             default:
378                 unreached();
379         }
380
381         if (refPosition != nullptr && refPosition->isFixedRegRef)
382         {
383             mask |= refPosition->registerAssignment;
384         }
385     }
386
387     return mask;
388 }
389 #endif // DEBUG
390
391 //------------------------------------------------------------------------
392 // conflictingFixedRegReference: Determine whether the current RegRecord has a
393 //                               fixed register use that conflicts with 'refPosition'
394 //
395 // Arguments:
396 //    refPosition - The RefPosition of interest
397 //
398 // Return Value:
399 //    Returns true iff the given RefPosition is NOT a fixed use of this register,
400 //    AND either:
401 //    - there is a RefPosition on this RegRecord at the nodeLocation of the given RefPosition, or
402 //    - the given RefPosition has a delayRegFree, and there is a RefPosition on this RegRecord at
403 //      the nodeLocation just past the given RefPosition.
404 //
405 // Assumptions:
406 //    'refPosition is non-null.
407
408 bool RegRecord::conflictingFixedRegReference(RefPosition* refPosition)
409 {
410     // Is this a fixed reference of this register?  If so, there is no conflict.
411     if (refPosition->isFixedRefOfRegMask(genRegMask(regNum)))
412     {
413         return false;
414     }
415     // Otherwise, check for conflicts.
416     // There is a conflict if:
417     // 1. There is a recent RefPosition on this RegRecord that is at this location,
418     //    except in the case where it is a special "putarg" that is associated with this interval, OR
419     // 2. There is an upcoming RefPosition at this location, or at the next location
420     //    if refPosition is a delayed use (i.e. must be kept live through the next/def location).
421
422     LsraLocation refLocation = refPosition->nodeLocation;
423     if (recentRefPosition != nullptr && recentRefPosition->refType != RefTypeKill &&
424         recentRefPosition->nodeLocation == refLocation &&
425         (!isBusyUntilNextKill || assignedInterval != refPosition->getInterval()))
426     {
427         return true;
428     }
429     LsraLocation nextPhysRefLocation = getNextRefLocation();
430     if (nextPhysRefLocation == refLocation || (refPosition->delayRegFree && nextPhysRefLocation == (refLocation + 1)))
431     {
432         return true;
433     }
434     return false;
435 }
436
437 /*****************************************************************************
438  * Inline functions for Interval
439  *****************************************************************************/
440 RefPosition* Referenceable::getNextRefPosition()
441 {
442     if (recentRefPosition == nullptr)
443     {
444         return firstRefPosition;
445     }
446     else
447     {
448         return recentRefPosition->nextRefPosition;
449     }
450 }
451
452 LsraLocation Referenceable::getNextRefLocation()
453 {
454     RefPosition* nextRefPosition = getNextRefPosition();
455     if (nextRefPosition == nullptr)
456     {
457         return MaxLocation;
458     }
459     else
460     {
461         return nextRefPosition->nodeLocation;
462     }
463 }
464
465 // Iterate through all the registers of the given type
466 class RegisterIterator
467 {
468     friend class Registers;
469
470 public:
471     RegisterIterator(RegisterType type) : regType(type)
472     {
473         if (useFloatReg(regType))
474         {
475             currentRegNum = REG_FP_FIRST;
476         }
477         else
478         {
479             currentRegNum = REG_INT_FIRST;
480         }
481     }
482
483 protected:
484     static RegisterIterator Begin(RegisterType regType)
485     {
486         return RegisterIterator(regType);
487     }
488     static RegisterIterator End(RegisterType regType)
489     {
490         RegisterIterator endIter = RegisterIterator(regType);
491         // This assumes only integer and floating point register types
492         // if we target a processor with additional register types,
493         // this would have to change
494         if (useFloatReg(regType))
495         {
496             // This just happens to work for both double & float
497             endIter.currentRegNum = REG_NEXT(REG_FP_LAST);
498         }
499         else
500         {
501             endIter.currentRegNum = REG_NEXT(REG_INT_LAST);
502         }
503         return endIter;
504     }
505
506 public:
507     void operator++(int dummy) // int dummy is c++ for "this is postfix ++"
508     {
509         currentRegNum = REG_NEXT(currentRegNum);
510 #ifdef _TARGET_ARM_
511         if (regType == TYP_DOUBLE)
512             currentRegNum = REG_NEXT(currentRegNum);
513 #endif
514     }
515     void operator++() // prefix operator++
516     {
517         currentRegNum = REG_NEXT(currentRegNum);
518 #ifdef _TARGET_ARM_
519         if (regType == TYP_DOUBLE)
520             currentRegNum = REG_NEXT(currentRegNum);
521 #endif
522     }
523     regNumber operator*()
524     {
525         return currentRegNum;
526     }
527     bool operator!=(const RegisterIterator& other)
528     {
529         return other.currentRegNum != currentRegNum;
530     }
531
532 private:
533     regNumber    currentRegNum;
534     RegisterType regType;
535 };
536
537 class Registers
538 {
539 public:
540     friend class RegisterIterator;
541     RegisterType type;
542     Registers(RegisterType t)
543     {
544         type = t;
545     }
546     RegisterIterator begin()
547     {
548         return RegisterIterator::Begin(type);
549     }
550     RegisterIterator end()
551     {
552         return RegisterIterator::End(type);
553     }
554 };
555
556 #ifdef DEBUG
557 void LinearScan::dumpVarToRegMap(VarToRegMap map)
558 {
559     bool anyPrinted = false;
560     for (unsigned varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++)
561     {
562         unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
563         if (map[varIndex] != REG_STK)
564         {
565             printf("V%02u=%s ", varNum, getRegName(map[varIndex]));
566             anyPrinted = true;
567         }
568     }
569     if (!anyPrinted)
570     {
571         printf("none");
572     }
573     printf("\n");
574 }
575
576 void LinearScan::dumpInVarToRegMap(BasicBlock* block)
577 {
578     printf("Var=Reg beg of " FMT_BB ": ", block->bbNum);
579     VarToRegMap map = getInVarToRegMap(block->bbNum);
580     dumpVarToRegMap(map);
581 }
582
583 void LinearScan::dumpOutVarToRegMap(BasicBlock* block)
584 {
585     printf("Var=Reg end of " FMT_BB ": ", block->bbNum);
586     VarToRegMap map = getOutVarToRegMap(block->bbNum);
587     dumpVarToRegMap(map);
588 }
589
590 #endif // DEBUG
591
592 LinearScanInterface* getLinearScanAllocator(Compiler* comp)
593 {
594     return new (comp, CMK_LSRA) LinearScan(comp);
595 }
596
597 //------------------------------------------------------------------------
598 // LSRA constructor
599 //
600 // Arguments:
601 //    theCompiler
602 //
603 // Notes:
604 //    The constructor takes care of initializing the data structures that are used
605 //    during Lowering, including (in DEBUG) getting the stress environment variables,
606 //    as they may affect the block ordering.
607
608 LinearScan::LinearScan(Compiler* theCompiler)
609     : compiler(theCompiler)
610     , intervals(theCompiler->getAllocator(CMK_LSRA_Interval))
611     , allocationPassComplete(false)
612     , refPositions(theCompiler->getAllocator(CMK_LSRA_RefPosition))
613     , listNodePool(theCompiler)
614 {
615 #ifdef DEBUG
616     maxNodeLocation   = 0;
617     activeRefPosition = nullptr;
618
619     // Get the value of the environment variable that controls stress for register allocation
620     lsraStressMask = JitConfig.JitStressRegs();
621 #if 0
622     if (lsraStressMask != 0)
623     {
624         // The code in this #if can be used to debug JitStressRegs issues according to
625         // method hash.  To use, simply set environment variables JitStressRegsHashLo and JitStressRegsHashHi
626         unsigned methHash = compiler->info.compMethodHash();
627         char* lostr = getenv("JitStressRegsHashLo");
628         unsigned methHashLo = 0;
629         bool dump = false;
630         if (lostr != nullptr)
631         {
632             sscanf_s(lostr, "%x", &methHashLo);
633             dump = true;
634         }
635         char* histr = getenv("JitStressRegsHashHi");
636         unsigned methHashHi = UINT32_MAX;
637         if (histr != nullptr)
638         {
639             sscanf_s(histr, "%x", &methHashHi);
640             dump = true;
641         }
642         if (methHash < methHashLo || methHash > methHashHi)
643         {
644             lsraStressMask = 0;
645         }
646         else if (dump == true)
647         {
648             printf("JitStressRegs = %x for method %s, hash = 0x%x.\n",
649                 lsraStressMask, compiler->info.compFullName, compiler->info.compMethodHash());
650             printf("");         // in our logic this causes a flush
651         }
652     }
653 #endif // 0
654 #endif // DEBUG
655
656     // Assume that we will enregister local variables if it's not disabled. We'll reset it if we
657     // have no tracked locals when we start allocating. Note that new tracked lclVars may be added
658     // after the first liveness analysis - either by optimizations or by Lowering, and the tracked
659     // set won't be recomputed until after Lowering (and this constructor is called prior to Lowering),
660     // so we don't want to check that yet.
661     enregisterLocalVars = ((compiler->opts.compFlags & CLFLG_REGVAR) != 0);
662 #ifdef _TARGET_ARM64_
663     availableIntRegs = (RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd);
664 #else
665     availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd);
666 #endif
667
668 #if ETW_EBP_FRAMED
669     availableIntRegs &= ~RBM_FPBASE;
670 #endif // ETW_EBP_FRAMED
671
672     availableFloatRegs  = RBM_ALLFLOAT;
673     availableDoubleRegs = RBM_ALLDOUBLE;
674
675 #ifdef _TARGET_AMD64_
676     if (compiler->opts.compDbgEnC)
677     {
678         // On x64 when the EnC option is set, we always save exactly RBP, RSI and RDI.
679         // RBP is not available to the register allocator, so RSI and RDI are the only
680         // callee-save registers available.
681         availableIntRegs &= ~RBM_CALLEE_SAVED | RBM_RSI | RBM_RDI;
682         availableFloatRegs &= ~RBM_CALLEE_SAVED;
683         availableDoubleRegs &= ~RBM_CALLEE_SAVED;
684     }
685 #endif // _TARGET_AMD64_
686     compiler->rpFrameType           = FT_NOT_SET;
687     compiler->rpMustCreateEBPCalled = false;
688
689     compiler->codeGen->intRegState.rsIsFloat   = false;
690     compiler->codeGen->floatRegState.rsIsFloat = true;
691
692     // Block sequencing (the order in which we schedule).
693     // Note that we don't initialize the bbVisitedSet until we do the first traversal
694     // This is so that any blocks that are added during the first traversal
695     // are accounted for (and we don't have BasicBlockEpoch issues).
696     blockSequencingDone   = false;
697     blockSequence         = nullptr;
698     blockSequenceWorkList = nullptr;
699     curBBSeqNum           = 0;
700     bbSeqCount            = 0;
701
702     // Information about each block, including predecessor blocks used for variable locations at block entry.
703     blockInfo = nullptr;
704
705     pendingDelayFree = false;
706     tgtPrefUse       = nullptr;
707 }
708
709 //------------------------------------------------------------------------
710 // getNextCandidateFromWorkList: Get the next candidate for block sequencing
711 //
712 // Arguments:
713 //    None.
714 //
715 // Return Value:
716 //    The next block to be placed in the sequence.
717 //
718 // Notes:
719 //    This method currently always returns the next block in the list, and relies on having
720 //    blocks added to the list only when they are "ready", and on the
721 //    addToBlockSequenceWorkList() method to insert them in the proper order.
722 //    However, a block may be in the list and already selected, if it was subsequently
723 //    encountered as both a flow and layout successor of the most recently selected
724 //    block.
725
726 BasicBlock* LinearScan::getNextCandidateFromWorkList()
727 {
728     BasicBlockList* nextWorkList = nullptr;
729     for (BasicBlockList* workList = blockSequenceWorkList; workList != nullptr; workList = nextWorkList)
730     {
731         nextWorkList          = workList->next;
732         BasicBlock* candBlock = workList->block;
733         removeFromBlockSequenceWorkList(workList, nullptr);
734         if (!isBlockVisited(candBlock))
735         {
736             return candBlock;
737         }
738     }
739     return nullptr;
740 }
741
742 //------------------------------------------------------------------------
743 // setBlockSequence: Determine the block order for register allocation.
744 //
745 // Arguments:
746 //    None
747 //
748 // Return Value:
749 //    None
750 //
751 // Notes:
752 //    On return, the blockSequence array contains the blocks, in the order in which they
753 //    will be allocated.
754 //    This method clears the bbVisitedSet on LinearScan, and when it returns the set
755 //    contains all the bbNums for the block.
756
757 void LinearScan::setBlockSequence()
758 {
759     assert(!blockSequencingDone); // The method should be called only once.
760
761     compiler->EnsureBasicBlockEpoch();
762 #ifdef DEBUG
763     blockEpoch = compiler->GetCurBasicBlockEpoch();
764 #endif // DEBUG
765
766     // Initialize the "visited" blocks set.
767     bbVisitedSet = BlockSetOps::MakeEmpty(compiler);
768
769     BlockSet readySet(BlockSetOps::MakeEmpty(compiler));
770     BlockSet predSet(BlockSetOps::MakeEmpty(compiler));
771
772     assert(blockSequence == nullptr && bbSeqCount == 0);
773     blockSequence            = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount];
774     bbNumMaxBeforeResolution = compiler->fgBBNumMax;
775     blockInfo                = new (compiler, CMK_LSRA) LsraBlockInfo[bbNumMaxBeforeResolution + 1];
776
777     assert(blockSequenceWorkList == nullptr);
778
779     bool addedInternalBlocks = false;
780     verifiedAllBBs           = false;
781     hasCriticalEdges         = false;
782     BasicBlock* nextBlock;
783     // We use a bbNum of 0 for entry RefPositions.
784     // The other information in blockInfo[0] will never be used.
785     blockInfo[0].weight = BB_UNITY_WEIGHT;
786     for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = nextBlock)
787     {
788         blockSequence[bbSeqCount] = block;
789         markBlockVisited(block);
790         bbSeqCount++;
791         nextBlock = nullptr;
792
793         // Initialize the blockInfo.
794         // predBBNum will be set later.  0 is never used as a bbNum.
795         assert(block->bbNum != 0);
796         blockInfo[block->bbNum].predBBNum = 0;
797         // We check for critical edges below, but initialize to false.
798         blockInfo[block->bbNum].hasCriticalInEdge  = false;
799         blockInfo[block->bbNum].hasCriticalOutEdge = false;
800         blockInfo[block->bbNum].weight             = block->getBBWeight(compiler);
801
802 #if TRACK_LSRA_STATS
803         blockInfo[block->bbNum].spillCount         = 0;
804         blockInfo[block->bbNum].copyRegCount       = 0;
805         blockInfo[block->bbNum].resolutionMovCount = 0;
806         blockInfo[block->bbNum].splitEdgeCount     = 0;
807 #endif // TRACK_LSRA_STATS
808
809         if (block->GetUniquePred(compiler) == nullptr)
810         {
811             for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
812             {
813                 BasicBlock* predBlock = pred->flBlock;
814                 if (predBlock->NumSucc(compiler) > 1)
815                 {
816                     blockInfo[block->bbNum].hasCriticalInEdge = true;
817                     hasCriticalEdges                          = true;
818                     break;
819                 }
820                 else if (predBlock->bbJumpKind == BBJ_SWITCH)
821                 {
822                     assert(!"Switch with single successor");
823                 }
824             }
825         }
826
827         // Determine which block to schedule next.
828
829         // First, update the NORMAL successors of the current block, adding them to the worklist
830         // according to the desired order.  We will handle the EH successors below.
831         bool checkForCriticalOutEdge = (block->NumSucc(compiler) > 1);
832         if (!checkForCriticalOutEdge && block->bbJumpKind == BBJ_SWITCH)
833         {
834             assert(!"Switch with single successor");
835         }
836
837         const unsigned numSuccs = block->NumSucc(compiler);
838         for (unsigned succIndex = 0; succIndex < numSuccs; succIndex++)
839         {
840             BasicBlock* succ = block->GetSucc(succIndex, compiler);
841             if (checkForCriticalOutEdge && succ->GetUniquePred(compiler) == nullptr)
842             {
843                 blockInfo[block->bbNum].hasCriticalOutEdge = true;
844                 hasCriticalEdges                           = true;
845                 // We can stop checking now.
846                 checkForCriticalOutEdge = false;
847             }
848
849             if (isTraversalLayoutOrder() || isBlockVisited(succ))
850             {
851                 continue;
852             }
853
854             // We've now seen a predecessor, so add it to the work list and the "readySet".
855             // It will be inserted in the worklist according to the specified traversal order
856             // (i.e. pred-first or random, since layout order is handled above).
857             if (!BlockSetOps::IsMember(compiler, readySet, succ->bbNum))
858             {
859                 addToBlockSequenceWorkList(readySet, succ, predSet);
860                 BlockSetOps::AddElemD(compiler, readySet, succ->bbNum);
861             }
862         }
863
864         // For layout order, simply use bbNext
865         if (isTraversalLayoutOrder())
866         {
867             nextBlock = block->bbNext;
868             continue;
869         }
870
871         while (nextBlock == nullptr)
872         {
873             nextBlock = getNextCandidateFromWorkList();
874
875             // TODO-Throughput: We would like to bypass this traversal if we know we've handled all
876             // the blocks - but fgBBcount does not appear to be updated when blocks are removed.
877             if (nextBlock == nullptr /* && bbSeqCount != compiler->fgBBcount*/ && !verifiedAllBBs)
878             {
879                 // If we don't encounter all blocks by traversing the regular successor links, do a full
880                 // traversal of all the blocks, and add them in layout order.
881                 // This may include:
882                 //   - internal-only blocks (in the fgAddCodeList) which may not be in the flow graph
883                 //     (these are not even in the bbNext links).
884                 //   - blocks that have become unreachable due to optimizations, but that are strongly
885                 //     connected (these are not removed)
886                 //   - EH blocks
887
888                 for (Compiler::AddCodeDsc* desc = compiler->fgAddCodeList; desc != nullptr; desc = desc->acdNext)
889                 {
890                     if (!isBlockVisited(block))
891                     {
892                         addToBlockSequenceWorkList(readySet, block, predSet);
893                         BlockSetOps::AddElemD(compiler, readySet, block->bbNum);
894                     }
895                 }
896
897                 for (BasicBlock* block = compiler->fgFirstBB; block; block = block->bbNext)
898                 {
899                     if (!isBlockVisited(block))
900                     {
901                         addToBlockSequenceWorkList(readySet, block, predSet);
902                         BlockSetOps::AddElemD(compiler, readySet, block->bbNum);
903                     }
904                 }
905                 verifiedAllBBs = true;
906             }
907             else
908             {
909                 break;
910             }
911         }
912     }
913     blockSequencingDone = true;
914
915 #ifdef DEBUG
916     // Make sure that we've visited all the blocks.
917     for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
918     {
919         assert(isBlockVisited(block));
920     }
921
922     JITDUMP("LSRA Block Sequence: ");
923     int i = 1;
924     for (BasicBlock *block = startBlockSequence(); block != nullptr; ++i, block = moveToNextBlock())
925     {
926         JITDUMP(FMT_BB, block->bbNum);
927
928         if (block->isMaxBBWeight())
929         {
930             JITDUMP("(MAX) ");
931         }
932         else
933         {
934             JITDUMP("(%6s) ", refCntWtd2str(block->getBBWeight(compiler)));
935         }
936
937         if (i % 10 == 0)
938         {
939             JITDUMP("\n                     ");
940         }
941     }
942     JITDUMP("\n\n");
943 #endif
944 }
945
946 //------------------------------------------------------------------------
947 // compareBlocksForSequencing: Compare two basic blocks for sequencing order.
948 //
949 // Arguments:
950 //    block1            - the first block for comparison
951 //    block2            - the second block for comparison
952 //    useBlockWeights   - whether to use block weights for comparison
953 //
954 // Return Value:
955 //    -1 if block1 is preferred.
956 //     0 if the blocks are equivalent.
957 //     1 if block2 is preferred.
958 //
959 // Notes:
960 //    See addToBlockSequenceWorkList.
961 int LinearScan::compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights)
962 {
963     if (useBlockWeights)
964     {
965         unsigned weight1 = block1->getBBWeight(compiler);
966         unsigned weight2 = block2->getBBWeight(compiler);
967
968         if (weight1 > weight2)
969         {
970             return -1;
971         }
972         else if (weight1 < weight2)
973         {
974             return 1;
975         }
976     }
977
978     // If weights are the same prefer LOWER bbnum
979     if (block1->bbNum < block2->bbNum)
980     {
981         return -1;
982     }
983     else if (block1->bbNum == block2->bbNum)
984     {
985         return 0;
986     }
987     else
988     {
989         return 1;
990     }
991 }
992
993 //------------------------------------------------------------------------
994 // addToBlockSequenceWorkList: Add a BasicBlock to the work list for sequencing.
995 //
996 // Arguments:
997 //    sequencedBlockSet - the set of blocks that are already sequenced
998 //    block             - the new block to be added
999 //    predSet           - the buffer to save predecessors set. A block set allocated by the caller used here as a
1000 //    temporary block set for constructing a predecessor set. Allocated by the caller to avoid reallocating a new block
1001 //    set with every call to this function
1002 //
1003 // Return Value:
1004 //    None.
1005 //
1006 // Notes:
1007 //    The first block in the list will be the next one to be sequenced, as soon
1008 //    as we encounter a block whose successors have all been sequenced, in pred-first
1009 //    order, or the very next block if we are traversing in random order (once implemented).
1010 //    This method uses a comparison method to determine the order in which to place
1011 //    the blocks in the list.  This method queries whether all predecessors of the
1012 //    block are sequenced at the time it is added to the list and if so uses block weights
1013 //    for inserting the block.  A block is never inserted ahead of its predecessors.
1014 //    A block at the time of insertion may not have all its predecessors sequenced, in
1015 //    which case it will be sequenced based on its block number. Once a block is inserted,
1016 //    its priority\order will not be changed later once its remaining predecessors are
1017 //    sequenced.  This would mean that work list may not be sorted entirely based on
1018 //    block weights alone.
1019 //
1020 //    Note also that, when random traversal order is implemented, this method
1021 //    should insert the blocks into the list in random order, so that we can always
1022 //    simply select the first block in the list.
1023 void LinearScan::addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet)
1024 {
1025     // The block that is being added is not already sequenced
1026     assert(!BlockSetOps::IsMember(compiler, sequencedBlockSet, block->bbNum));
1027
1028     // Get predSet of block
1029     BlockSetOps::ClearD(compiler, predSet);
1030     flowList* pred;
1031     for (pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
1032     {
1033         BlockSetOps::AddElemD(compiler, predSet, pred->flBlock->bbNum);
1034     }
1035
1036     // If either a rarely run block or all its preds are already sequenced, use block's weight to sequence
1037     bool useBlockWeight = block->isRunRarely() || BlockSetOps::IsSubset(compiler, sequencedBlockSet, predSet);
1038
1039     BasicBlockList* prevNode = nullptr;
1040     BasicBlockList* nextNode = blockSequenceWorkList;
1041
1042     while (nextNode != nullptr)
1043     {
1044         int seqResult;
1045
1046         if (nextNode->block->isRunRarely())
1047         {
1048             // If the block that is yet to be sequenced is a rarely run block, always use block weights for sequencing
1049             seqResult = compareBlocksForSequencing(nextNode->block, block, true);
1050         }
1051         else if (BlockSetOps::IsMember(compiler, predSet, nextNode->block->bbNum))
1052         {
1053             // always prefer unsequenced pred blocks
1054             seqResult = -1;
1055         }
1056         else
1057         {
1058             seqResult = compareBlocksForSequencing(nextNode->block, block, useBlockWeight);
1059         }
1060
1061         if (seqResult > 0)
1062         {
1063             break;
1064         }
1065
1066         prevNode = nextNode;
1067         nextNode = nextNode->next;
1068     }
1069
1070     BasicBlockList* newListNode = new (compiler, CMK_LSRA) BasicBlockList(block, nextNode);
1071     if (prevNode == nullptr)
1072     {
1073         blockSequenceWorkList = newListNode;
1074     }
1075     else
1076     {
1077         prevNode->next = newListNode;
1078     }
1079 }
1080
1081 void LinearScan::removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode)
1082 {
1083     if (listNode == blockSequenceWorkList)
1084     {
1085         assert(prevNode == nullptr);
1086         blockSequenceWorkList = listNode->next;
1087     }
1088     else
1089     {
1090         assert(prevNode != nullptr && prevNode->next == listNode);
1091         prevNode->next = listNode->next;
1092     }
1093     // TODO-Cleanup: consider merging Compiler::BlockListNode and BasicBlockList
1094     // compiler->FreeBlockListNode(listNode);
1095 }
1096
1097 // Initialize the block order for allocation (called each time a new traversal begins).
1098 BasicBlock* LinearScan::startBlockSequence()
1099 {
1100     if (!blockSequencingDone)
1101     {
1102         setBlockSequence();
1103     }
1104     else
1105     {
1106         clearVisitedBlocks();
1107     }
1108
1109     BasicBlock* curBB = compiler->fgFirstBB;
1110     curBBSeqNum       = 0;
1111     curBBNum          = curBB->bbNum;
1112     assert(blockSequence[0] == compiler->fgFirstBB);
1113     markBlockVisited(curBB);
1114     return curBB;
1115 }
1116
1117 //------------------------------------------------------------------------
1118 // moveToNextBlock: Move to the next block in order for allocation or resolution.
1119 //
1120 // Arguments:
1121 //    None
1122 //
1123 // Return Value:
1124 //    The next block.
1125 //
1126 // Notes:
1127 //    This method is used when the next block is actually going to be handled.
1128 //    It changes curBBNum.
1129
1130 BasicBlock* LinearScan::moveToNextBlock()
1131 {
1132     BasicBlock* nextBlock = getNextBlock();
1133     curBBSeqNum++;
1134     if (nextBlock != nullptr)
1135     {
1136         curBBNum = nextBlock->bbNum;
1137     }
1138     return nextBlock;
1139 }
1140
1141 //------------------------------------------------------------------------
1142 // getNextBlock: Get the next block in order for allocation or resolution.
1143 //
1144 // Arguments:
1145 //    None
1146 //
1147 // Return Value:
1148 //    The next block.
1149 //
1150 // Notes:
1151 //    This method does not actually change the current block - it is used simply
1152 //    to determine which block will be next.
1153
1154 BasicBlock* LinearScan::getNextBlock()
1155 {
1156     assert(blockSequencingDone);
1157     unsigned int nextBBSeqNum = curBBSeqNum + 1;
1158     if (nextBBSeqNum < bbSeqCount)
1159     {
1160         return blockSequence[nextBBSeqNum];
1161     }
1162     return nullptr;
1163 }
1164
1165 //------------------------------------------------------------------------
1166 // doLinearScan: The main method for register allocation.
1167 //
1168 // Arguments:
1169 //    None
1170 //
1171 // Return Value:
1172 //    None.
1173 //
1174
1175 void LinearScan::doLinearScan()
1176 {
1177     // Check to see whether we have any local variables to enregister.
1178     // We initialize this in the constructor based on opt settings,
1179     // but we don't want to spend time on the lclVar parts of LinearScan
1180     // if we have no tracked locals.
1181     if (enregisterLocalVars && (compiler->lvaTrackedCount == 0))
1182     {
1183         enregisterLocalVars = false;
1184     }
1185
1186     splitBBNumToTargetBBNumMap = nullptr;
1187
1188     // This is complicated by the fact that physical registers have refs associated
1189     // with locations where they are killed (e.g. calls), but we don't want to
1190     // count these as being touched.
1191
1192     compiler->codeGen->regSet.rsClearRegsModified();
1193
1194     initMaxSpill();
1195     buildIntervals();
1196     DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_REFPOS));
1197     compiler->EndPhase(PHASE_LINEAR_SCAN_BUILD);
1198
1199     DBEXEC(VERBOSE, lsraDumpIntervals("after buildIntervals"));
1200
1201     initVarRegMaps();
1202     allocateRegisters();
1203     allocationPassComplete = true;
1204     compiler->EndPhase(PHASE_LINEAR_SCAN_ALLOC);
1205     resolveRegisters();
1206     compiler->EndPhase(PHASE_LINEAR_SCAN_RESOLVE);
1207
1208     assert(blockSequencingDone); // Should do at least one traversal.
1209     assert(blockEpoch == compiler->GetCurBasicBlockEpoch());
1210
1211 #if TRACK_LSRA_STATS
1212     if ((JitConfig.DisplayLsraStats() != 0)
1213 #ifdef DEBUG
1214         || VERBOSE
1215 #endif
1216         )
1217     {
1218         dumpLsraStats(jitstdout);
1219     }
1220 #endif // TRACK_LSRA_STATS
1221
1222     DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_POST));
1223
1224     compiler->compLSRADone = true;
1225 }
1226
1227 //------------------------------------------------------------------------
1228 // recordVarLocationsAtStartOfBB: Update live-in LclVarDscs with the appropriate
1229 //    register location at the start of a block, during codegen.
1230 //
1231 // Arguments:
1232 //    bb - the block for which code is about to be generated.
1233 //
1234 // Return Value:
1235 //    None.
1236 //
1237 // Assumptions:
1238 //    CodeGen will take care of updating the reg masks and the current var liveness,
1239 //    after calling this method.
1240 //    This is because we need to kill off the dead registers before setting the newly live ones.
1241
1242 void LinearScan::recordVarLocationsAtStartOfBB(BasicBlock* bb)
1243 {
1244     if (!enregisterLocalVars)
1245     {
1246         return;
1247     }
1248     JITDUMP("Recording Var Locations at start of " FMT_BB "\n", bb->bbNum);
1249     VarToRegMap map   = getInVarToRegMap(bb->bbNum);
1250     unsigned    count = 0;
1251
1252     VarSetOps::AssignNoCopy(compiler, currentLiveVars,
1253                             VarSetOps::Intersection(compiler, registerCandidateVars, bb->bbLiveIn));
1254     VarSetOps::Iter iter(compiler, currentLiveVars);
1255     unsigned        varIndex = 0;
1256     while (iter.NextElem(&varIndex))
1257     {
1258         unsigned   varNum = compiler->lvaTrackedToVarNum[varIndex];
1259         LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
1260         regNumber  regNum = getVarReg(map, varIndex);
1261
1262         regNumber oldRegNum = varDsc->lvRegNum;
1263         regNumber newRegNum = regNum;
1264
1265         if (oldRegNum != newRegNum)
1266         {
1267             JITDUMP("  V%02u(%s->%s)", varNum, compiler->compRegVarName(oldRegNum),
1268                     compiler->compRegVarName(newRegNum));
1269             varDsc->lvRegNum = newRegNum;
1270             count++;
1271
1272 #ifdef USING_VARIABLE_LIVE_RANGE
1273             if (bb->bbPrev != nullptr && VarSetOps::IsMember(compiler, bb->bbPrev->bbLiveOut, varIndex))
1274             {
1275                 // varDsc was alive on previous block end ("bb->bbPrev->bbLiveOut"), so it has an open
1276                 // "VariableLiveRange" which should change to be according "getInVarToRegMap"
1277                 compiler->codeGen->getVariableLiveKeeper()->siUpdateVariableLiveRange(varDsc, varNum);
1278             }
1279 #endif // USING_VARIABLE_LIVE_RANGE
1280         }
1281         else if (newRegNum != REG_STK)
1282         {
1283             JITDUMP("  V%02u(%s)", varNum, compiler->compRegVarName(newRegNum));
1284             count++;
1285         }
1286     }
1287
1288     if (count == 0)
1289     {
1290         JITDUMP("  <none>\n");
1291     }
1292
1293     JITDUMP("\n");
1294 }
1295
1296 void Interval::setLocalNumber(Compiler* compiler, unsigned lclNum, LinearScan* linScan)
1297 {
1298     LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
1299     assert(varDsc->lvTracked);
1300     assert(varDsc->lvVarIndex < compiler->lvaTrackedCount);
1301
1302     linScan->localVarIntervals[varDsc->lvVarIndex] = this;
1303
1304     assert(linScan->getIntervalForLocalVar(varDsc->lvVarIndex) == this);
1305     this->isLocalVar = true;
1306     this->varNum     = lclNum;
1307 }
1308
1309 // identify the candidates which we are not going to enregister due to
1310 // being used in EH in a way we don't want to deal with
1311 // this logic cloned from fgInterBlockLocalVarLiveness
1312 void LinearScan::identifyCandidatesExceptionDataflow()
1313 {
1314     VARSET_TP   exceptVars(VarSetOps::MakeEmpty(compiler));
1315     VARSET_TP   filterVars(VarSetOps::MakeEmpty(compiler));
1316     VARSET_TP   finallyVars(VarSetOps::MakeEmpty(compiler));
1317     BasicBlock* block;
1318
1319     foreach_block(compiler, block)
1320     {
1321         if (block->bbCatchTyp != BBCT_NONE)
1322         {
1323             // live on entry to handler
1324             VarSetOps::UnionD(compiler, exceptVars, block->bbLiveIn);
1325         }
1326
1327         if (block->bbJumpKind == BBJ_EHFILTERRET)
1328         {
1329             // live on exit from filter
1330             VarSetOps::UnionD(compiler, filterVars, block->bbLiveOut);
1331         }
1332         else if (block->bbJumpKind == BBJ_EHFINALLYRET)
1333         {
1334             // live on exit from finally
1335             VarSetOps::UnionD(compiler, finallyVars, block->bbLiveOut);
1336         }
1337 #if FEATURE_EH_FUNCLETS
1338         // Funclets are called and returned from, as such we can only count on the frame
1339         // pointer being restored, and thus everything live in or live out must be on the
1340         // stack
1341         if (block->bbFlags & BBF_FUNCLET_BEG)
1342         {
1343             VarSetOps::UnionD(compiler, exceptVars, block->bbLiveIn);
1344         }
1345         if ((block->bbJumpKind == BBJ_EHFINALLYRET) || (block->bbJumpKind == BBJ_EHFILTERRET) ||
1346             (block->bbJumpKind == BBJ_EHCATCHRET))
1347         {
1348             VarSetOps::UnionD(compiler, exceptVars, block->bbLiveOut);
1349         }
1350 #endif // FEATURE_EH_FUNCLETS
1351     }
1352
1353     // slam them all together (there was really no need to use more than 2 bitvectors here)
1354     VarSetOps::UnionD(compiler, exceptVars, filterVars);
1355     VarSetOps::UnionD(compiler, exceptVars, finallyVars);
1356
1357     /* Mark all pointer variables live on exit from a 'finally'
1358         block as either volatile for non-GC ref types or as
1359         'explicitly initialized' (volatile and must-init) for GC-ref types */
1360
1361     VarSetOps::Iter iter(compiler, exceptVars);
1362     unsigned        varIndex = 0;
1363     while (iter.NextElem(&varIndex))
1364     {
1365         unsigned   varNum = compiler->lvaTrackedToVarNum[varIndex];
1366         LclVarDsc* varDsc = compiler->lvaTable + varNum;
1367
1368         compiler->lvaSetVarDoNotEnregister(varNum DEBUGARG(Compiler::DNER_LiveInOutOfHandler));
1369
1370         if (varTypeIsGC(varDsc))
1371         {
1372             if (VarSetOps::IsMember(compiler, finallyVars, varIndex) && !varDsc->lvIsParam)
1373             {
1374                 varDsc->lvMustInit = true;
1375             }
1376         }
1377     }
1378 }
1379
1380 bool LinearScan::isRegCandidate(LclVarDsc* varDsc)
1381 {
1382     if (!enregisterLocalVars)
1383     {
1384         return false;
1385     }
1386     assert((compiler->opts.compFlags & CLFLG_REGVAR) != 0);
1387
1388     if (!varDsc->lvTracked)
1389     {
1390         return false;
1391     }
1392
1393 #if !defined(_TARGET_64BIT_)
1394     if (varDsc->lvType == TYP_LONG)
1395     {
1396         // Long variables should not be register candidates.
1397         // Lowering will have split any candidate lclVars into lo/hi vars.
1398         return false;
1399     }
1400 #endif // !defined(_TARGET_64BIT)
1401
1402     // If we have JMP, reg args must be put on the stack
1403
1404     if (compiler->compJmpOpUsed && varDsc->lvIsRegArg)
1405     {
1406         return false;
1407     }
1408
1409     // Don't allocate registers for dependently promoted struct fields
1410     if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
1411     {
1412         return false;
1413     }
1414
1415     // Don't enregister if the ref count is zero.
1416     if (varDsc->lvRefCnt() == 0)
1417     {
1418         varDsc->setLvRefCntWtd(0);
1419         return false;
1420     }
1421
1422     // Variables that are address-exposed are never enregistered, or tracked.
1423     // A struct may be promoted, and a struct that fits in a register may be fully enregistered.
1424     // Pinned variables may not be tracked (a condition of the GCInfo representation)
1425     // or enregistered, on x86 -- it is believed that we can enregister pinned (more properly, "pinning")
1426     // references when using the general GC encoding.
1427     unsigned lclNum = (unsigned)(varDsc - compiler->lvaTable);
1428     if (varDsc->lvAddrExposed || !varTypeIsEnregisterable(varDsc))
1429     {
1430 #ifdef DEBUG
1431         Compiler::DoNotEnregisterReason dner = Compiler::DNER_AddrExposed;
1432         if (!varDsc->lvAddrExposed)
1433         {
1434             dner = Compiler::DNER_IsStruct;
1435         }
1436 #endif // DEBUG
1437         compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(dner));
1438         return false;
1439     }
1440     else if (varDsc->lvPinned)
1441     {
1442         varDsc->lvTracked = 0;
1443 #ifdef JIT32_GCENCODER
1444         compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_PinningRef));
1445 #endif // JIT32_GCENCODER
1446         return false;
1447     }
1448
1449     //  Are we not optimizing and we have exception handlers?
1450     //   if so mark all args and locals as volatile, so that they
1451     //   won't ever get enregistered.
1452     //
1453     if (compiler->opts.MinOpts() && compiler->compHndBBtabCount > 0)
1454     {
1455         compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_LiveInOutOfHandler));
1456     }
1457
1458     if (varDsc->lvDoNotEnregister)
1459     {
1460         return false;
1461     }
1462
1463     switch (genActualType(varDsc->TypeGet()))
1464     {
1465 #if CPU_HAS_FP_SUPPORT
1466         case TYP_FLOAT:
1467         case TYP_DOUBLE:
1468             return !compiler->opts.compDbgCode;
1469
1470 #endif // CPU_HAS_FP_SUPPORT
1471
1472         case TYP_INT:
1473         case TYP_LONG:
1474         case TYP_REF:
1475         case TYP_BYREF:
1476             break;
1477
1478 #ifdef FEATURE_SIMD
1479         case TYP_SIMD12:
1480         case TYP_SIMD16:
1481         case TYP_SIMD32:
1482             return !varDsc->lvPromoted;
1483
1484         // TODO-1stClassStructs: Move TYP_SIMD8 up with the other SIMD types, after handling the param issue
1485         // (passing & returning as TYP_LONG).
1486         case TYP_SIMD8:
1487             return false;
1488 #endif // FEATURE_SIMD
1489
1490         case TYP_STRUCT:
1491             return false;
1492
1493         case TYP_UNDEF:
1494         case TYP_UNKNOWN:
1495             noway_assert(!"lvType not set correctly");
1496             varDsc->lvType = TYP_INT;
1497             return false;
1498
1499         default:
1500             return false;
1501     }
1502
1503     return true;
1504 }
1505
1506 // Identify locals & compiler temps that are register candidates
1507 // TODO-Cleanup: This was cloned from Compiler::lvaSortByRefCount() in lclvars.cpp in order
1508 // to avoid perturbation, but should be merged.
1509
1510 void LinearScan::identifyCandidates()
1511 {
1512     if (enregisterLocalVars)
1513     {
1514         // Initialize the set of lclVars that are candidates for register allocation.
1515         VarSetOps::AssignNoCopy(compiler, registerCandidateVars, VarSetOps::MakeEmpty(compiler));
1516
1517         // Initialize the sets of lclVars that are used to determine whether, and for which lclVars,
1518         // we need to perform resolution across basic blocks.
1519         // Note that we can't do this in the constructor because the number of tracked lclVars may
1520         // change between the constructor and the actual allocation.
1521         VarSetOps::AssignNoCopy(compiler, resolutionCandidateVars, VarSetOps::MakeEmpty(compiler));
1522         VarSetOps::AssignNoCopy(compiler, splitOrSpilledVars, VarSetOps::MakeEmpty(compiler));
1523
1524         // We set enregisterLocalVars to true only if there are tracked lclVars
1525         assert(compiler->lvaCount != 0);
1526     }
1527     else if (compiler->lvaCount == 0)
1528     {
1529         // Nothing to do. Note that even if enregisterLocalVars is false, we still need to set the
1530         // lvLRACandidate field on all the lclVars to false if we have any.
1531         return;
1532     }
1533
1534     if (compiler->compHndBBtabCount > 0)
1535     {
1536         identifyCandidatesExceptionDataflow();
1537     }
1538
1539     unsigned   lclNum;
1540     LclVarDsc* varDsc;
1541
1542     // While we build intervals for the candidate lclVars, we will determine the floating point
1543     // lclVars, if any, to consider for callee-save register preferencing.
1544     // We maintain two sets of FP vars - those that meet the first threshold of weighted ref Count,
1545     // and those that meet the second.
1546     // The first threshold is used for methods that are heuristically deemed either to have light
1547     // fp usage, or other factors that encourage conservative use of callee-save registers, such
1548     // as multiple exits (where there might be an early exit that woudl be excessively penalized by
1549     // lots of prolog/epilog saves & restores).
1550     // The second threshold is used where there are factors deemed to make it more likely that fp
1551     // fp callee save registers will be needed, such as loops or many fp vars.
1552     // We keep two sets of vars, since we collect some of the information to determine which set to
1553     // use as we iterate over the vars.
1554     // When we are generating AVX code on non-Unix (FEATURE_PARTIAL_SIMD_CALLEE_SAVE), we maintain an
1555     // additional set of LargeVectorType vars, and there is a separate threshold defined for those.
1556     // It is assumed that if we encounter these, that we should consider this a "high use" scenario,
1557     // so we don't maintain two sets of these vars.
1558     // This is defined as thresholdLargeVectorRefCntWtd, as we are likely to use the same mechanism
1559     // for vectors on Arm64, though the actual value may differ.
1560
1561     unsigned int floatVarCount        = 0;
1562     unsigned int thresholdFPRefCntWtd = 4 * BB_UNITY_WEIGHT;
1563     unsigned int maybeFPRefCntWtd     = 2 * BB_UNITY_WEIGHT;
1564     VARSET_TP    fpMaybeCandidateVars(VarSetOps::UninitVal());
1565 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1566     unsigned int largeVectorVarCount           = 0;
1567     unsigned int thresholdLargeVectorRefCntWtd = 4 * BB_UNITY_WEIGHT;
1568 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1569     if (enregisterLocalVars)
1570     {
1571         VarSetOps::AssignNoCopy(compiler, fpCalleeSaveCandidateVars, VarSetOps::MakeEmpty(compiler));
1572         VarSetOps::AssignNoCopy(compiler, fpMaybeCandidateVars, VarSetOps::MakeEmpty(compiler));
1573 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1574         VarSetOps::AssignNoCopy(compiler, largeVectorVars, VarSetOps::MakeEmpty(compiler));
1575         VarSetOps::AssignNoCopy(compiler, largeVectorCalleeSaveCandidateVars, VarSetOps::MakeEmpty(compiler));
1576 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1577     }
1578 #if DOUBLE_ALIGN
1579     unsigned refCntStk       = 0;
1580     unsigned refCntReg       = 0;
1581     unsigned refCntWtdReg    = 0;
1582     unsigned refCntStkParam  = 0; // sum of     ref counts for all stack based parameters
1583     unsigned refCntWtdStkDbl = 0; // sum of wtd ref counts for stack based doubles
1584     doDoubleAlign            = false;
1585     bool checkDoubleAlign    = true;
1586     if (compiler->codeGen->isFramePointerRequired() || compiler->opts.MinOpts())
1587     {
1588         checkDoubleAlign = false;
1589     }
1590     else
1591     {
1592         switch (compiler->getCanDoubleAlign())
1593         {
1594             case MUST_DOUBLE_ALIGN:
1595                 doDoubleAlign    = true;
1596                 checkDoubleAlign = false;
1597                 break;
1598             case CAN_DOUBLE_ALIGN:
1599                 break;
1600             case CANT_DOUBLE_ALIGN:
1601                 doDoubleAlign    = false;
1602                 checkDoubleAlign = false;
1603                 break;
1604             default:
1605                 unreached();
1606         }
1607     }
1608 #endif // DOUBLE_ALIGN
1609
1610     // Check whether register variables are permitted.
1611     if (!enregisterLocalVars)
1612     {
1613         localVarIntervals = nullptr;
1614     }
1615     else if (compiler->lvaTrackedCount > 0)
1616     {
1617         // initialize mapping from tracked local to interval
1618         localVarIntervals = new (compiler, CMK_LSRA) Interval*[compiler->lvaTrackedCount];
1619     }
1620
1621     INTRACK_STATS(regCandidateVarCount = 0);
1622     for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++)
1623     {
1624         // Initialize all variables to REG_STK
1625         varDsc->lvRegNum = REG_STK;
1626 #ifndef _TARGET_64BIT_
1627         varDsc->lvOtherReg = REG_STK;
1628 #endif // _TARGET_64BIT_
1629
1630         if (!enregisterLocalVars)
1631         {
1632             varDsc->lvLRACandidate = false;
1633             continue;
1634         }
1635
1636 #if DOUBLE_ALIGN
1637         if (checkDoubleAlign)
1638         {
1639             if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
1640             {
1641                 refCntStkParam += varDsc->lvRefCnt();
1642             }
1643             else if (!isRegCandidate(varDsc) || varDsc->lvDoNotEnregister)
1644             {
1645                 refCntStk += varDsc->lvRefCnt();
1646                 if ((varDsc->lvType == TYP_DOUBLE) ||
1647                     ((varTypeIsStruct(varDsc) && varDsc->lvStructDoubleAlign &&
1648                       (compiler->lvaGetPromotionType(varDsc) != Compiler::PROMOTION_TYPE_INDEPENDENT))))
1649                 {
1650                     refCntWtdStkDbl += varDsc->lvRefCntWtd();
1651                 }
1652             }
1653             else
1654             {
1655                 refCntReg += varDsc->lvRefCnt();
1656                 refCntWtdReg += varDsc->lvRefCntWtd();
1657             }
1658         }
1659 #endif // DOUBLE_ALIGN
1660
1661         // Start with the assumption that it's a candidate.
1662
1663         varDsc->lvLRACandidate = 1;
1664
1665         // Start with lvRegister as false - set it true only if the variable gets
1666         // the same register assignment throughout
1667         varDsc->lvRegister = false;
1668
1669         if (!isRegCandidate(varDsc))
1670         {
1671             varDsc->lvLRACandidate = 0;
1672             if (varDsc->lvTracked)
1673             {
1674                 localVarIntervals[varDsc->lvVarIndex] = nullptr;
1675             }
1676             continue;
1677         }
1678
1679         if (varDsc->lvLRACandidate)
1680         {
1681             var_types type   = genActualType(varDsc->TypeGet());
1682             Interval* newInt = newInterval(type);
1683             newInt->setLocalNumber(compiler, lclNum, this);
1684             VarSetOps::AddElemD(compiler, registerCandidateVars, varDsc->lvVarIndex);
1685
1686             // we will set this later when we have determined liveness
1687             varDsc->lvMustInit = false;
1688
1689             if (varDsc->lvIsStructField)
1690             {
1691                 newInt->isStructField = true;
1692             }
1693
1694             INTRACK_STATS(regCandidateVarCount++);
1695
1696             // We maintain two sets of FP vars - those that meet the first threshold of weighted ref Count,
1697             // and those that meet the second (see the definitions of thresholdFPRefCntWtd and maybeFPRefCntWtd
1698             // above).
1699             CLANG_FORMAT_COMMENT_ANCHOR;
1700
1701 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1702             // Additionally, when we are generating code for a target with partial SIMD callee-save
1703             // (AVX on non-UNIX amd64 and 16-byte vectors on arm64), we keep a separate set of the
1704             // LargeVectorType vars.
1705             if (varTypeNeedsPartialCalleeSave(varDsc->lvType))
1706             {
1707                 largeVectorVarCount++;
1708                 VarSetOps::AddElemD(compiler, largeVectorVars, varDsc->lvVarIndex);
1709                 unsigned refCntWtd = varDsc->lvRefCntWtd();
1710                 if (refCntWtd >= thresholdLargeVectorRefCntWtd)
1711                 {
1712                     VarSetOps::AddElemD(compiler, largeVectorCalleeSaveCandidateVars, varDsc->lvVarIndex);
1713                 }
1714             }
1715             else
1716 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1717                 if (regType(type) == FloatRegisterType)
1718             {
1719                 floatVarCount++;
1720                 unsigned refCntWtd = varDsc->lvRefCntWtd();
1721                 if (varDsc->lvIsRegArg)
1722                 {
1723                     // Don't count the initial reference for register params.  In those cases,
1724                     // using a callee-save causes an extra copy.
1725                     refCntWtd -= BB_UNITY_WEIGHT;
1726                 }
1727                 if (refCntWtd >= thresholdFPRefCntWtd)
1728                 {
1729                     VarSetOps::AddElemD(compiler, fpCalleeSaveCandidateVars, varDsc->lvVarIndex);
1730                 }
1731                 else if (refCntWtd >= maybeFPRefCntWtd)
1732                 {
1733                     VarSetOps::AddElemD(compiler, fpMaybeCandidateVars, varDsc->lvVarIndex);
1734                 }
1735             }
1736         }
1737         else
1738         {
1739             localVarIntervals[varDsc->lvVarIndex] = nullptr;
1740         }
1741     }
1742
1743 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1744     // Create Intervals to use for the save & restore of the upper halves of large vector lclVars.
1745     if (enregisterLocalVars)
1746     {
1747         VarSetOps::Iter largeVectorVarsIter(compiler, largeVectorVars);
1748         unsigned        largeVectorVarIndex = 0;
1749         while (largeVectorVarsIter.NextElem(&largeVectorVarIndex))
1750         {
1751             makeUpperVectorInterval(largeVectorVarIndex);
1752         }
1753     }
1754 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1755
1756 #if DOUBLE_ALIGN
1757     if (checkDoubleAlign)
1758     {
1759         // TODO-CQ: Fine-tune this:
1760         // In the legacy reg predictor, this runs after allocation, and then demotes any lclVars
1761         // allocated to the frame pointer, which is probably the wrong order.
1762         // However, because it runs after allocation, it can determine the impact of demoting
1763         // the lclVars allocated to the frame pointer.
1764         // => Here, estimate of the EBP refCnt and weighted refCnt is a wild guess.
1765         //
1766         unsigned refCntEBP    = refCntReg / 8;
1767         unsigned refCntWtdEBP = refCntWtdReg / 8;
1768
1769         doDoubleAlign =
1770             compiler->shouldDoubleAlign(refCntStk, refCntEBP, refCntWtdEBP, refCntStkParam, refCntWtdStkDbl);
1771     }
1772 #endif // DOUBLE_ALIGN
1773
1774     // The factors we consider to determine which set of fp vars to use as candidates for callee save
1775     // registers current include the number of fp vars, whether there are loops, and whether there are
1776     // multiple exits.  These have been selected somewhat empirically, but there is probably room for
1777     // more tuning.
1778     CLANG_FORMAT_COMMENT_ANCHOR;
1779
1780 #ifdef DEBUG
1781     if (VERBOSE)
1782     {
1783         printf("\nFP callee save candidate vars: ");
1784         if (enregisterLocalVars && !VarSetOps::IsEmpty(compiler, fpCalleeSaveCandidateVars))
1785         {
1786             dumpConvertedVarSet(compiler, fpCalleeSaveCandidateVars);
1787             printf("\n");
1788         }
1789         else
1790         {
1791             printf("None\n\n");
1792         }
1793     }
1794 #endif
1795
1796     JITDUMP("floatVarCount = %d; hasLoops = %d, singleExit = %d\n", floatVarCount, compiler->fgHasLoops,
1797             (compiler->fgReturnBlocks == nullptr || compiler->fgReturnBlocks->next == nullptr));
1798
1799     // Determine whether to use the 2nd, more aggressive, threshold for fp callee saves.
1800     if (floatVarCount > 6 && compiler->fgHasLoops &&
1801         (compiler->fgReturnBlocks == nullptr || compiler->fgReturnBlocks->next == nullptr))
1802     {
1803         assert(enregisterLocalVars);
1804 #ifdef DEBUG
1805         if (VERBOSE)
1806         {
1807             printf("Adding additional fp callee save candidates: \n");
1808             if (!VarSetOps::IsEmpty(compiler, fpMaybeCandidateVars))
1809             {
1810                 dumpConvertedVarSet(compiler, fpMaybeCandidateVars);
1811                 printf("\n");
1812             }
1813             else
1814             {
1815                 printf("None\n\n");
1816             }
1817         }
1818 #endif
1819         VarSetOps::UnionD(compiler, fpCalleeSaveCandidateVars, fpMaybeCandidateVars);
1820     }
1821
1822 #ifdef _TARGET_ARM_
1823 #ifdef DEBUG
1824     if (VERBOSE)
1825     {
1826         // Frame layout is only pre-computed for ARM
1827         printf("\nlvaTable after IdentifyCandidates\n");
1828         compiler->lvaTableDump(Compiler::FrameLayoutState::PRE_REGALLOC_FRAME_LAYOUT);
1829     }
1830 #endif // DEBUG
1831 #endif // _TARGET_ARM_
1832 }
1833
1834 // TODO-Throughput: This mapping can surely be more efficiently done
1835 void LinearScan::initVarRegMaps()
1836 {
1837     if (!enregisterLocalVars)
1838     {
1839         inVarToRegMaps  = nullptr;
1840         outVarToRegMaps = nullptr;
1841         return;
1842     }
1843     assert(compiler->lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked
1844                                        // variables.
1845
1846     // The compiler memory allocator requires that the allocation be an
1847     // even multiple of int-sized objects
1848     unsigned int varCount = compiler->lvaTrackedCount;
1849     regMapCount           = roundUp(varCount, (unsigned)sizeof(int));
1850
1851     // Not sure why blocks aren't numbered from zero, but they don't appear to be.
1852     // So, if we want to index by bbNum we have to know the maximum value.
1853     unsigned int bbCount = compiler->fgBBNumMax + 1;
1854
1855     inVarToRegMaps  = new (compiler, CMK_LSRA) regNumberSmall*[bbCount];
1856     outVarToRegMaps = new (compiler, CMK_LSRA) regNumberSmall*[bbCount];
1857
1858     if (varCount > 0)
1859     {
1860         // This VarToRegMap is used during the resolution of critical edges.
1861         sharedCriticalVarToRegMap = new (compiler, CMK_LSRA) regNumberSmall[regMapCount];
1862
1863         for (unsigned int i = 0; i < bbCount; i++)
1864         {
1865             VarToRegMap inVarToRegMap  = new (compiler, CMK_LSRA) regNumberSmall[regMapCount];
1866             VarToRegMap outVarToRegMap = new (compiler, CMK_LSRA) regNumberSmall[regMapCount];
1867
1868             for (unsigned int j = 0; j < regMapCount; j++)
1869             {
1870                 inVarToRegMap[j]  = REG_STK;
1871                 outVarToRegMap[j] = REG_STK;
1872             }
1873             inVarToRegMaps[i]  = inVarToRegMap;
1874             outVarToRegMaps[i] = outVarToRegMap;
1875         }
1876     }
1877     else
1878     {
1879         sharedCriticalVarToRegMap = nullptr;
1880         for (unsigned int i = 0; i < bbCount; i++)
1881         {
1882             inVarToRegMaps[i]  = nullptr;
1883             outVarToRegMaps[i] = nullptr;
1884         }
1885     }
1886 }
1887
1888 void LinearScan::setInVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg)
1889 {
1890     assert(enregisterLocalVars);
1891     assert(reg < UCHAR_MAX && varNum < compiler->lvaCount);
1892     inVarToRegMaps[bbNum][compiler->lvaTable[varNum].lvVarIndex] = (regNumberSmall)reg;
1893 }
1894
1895 void LinearScan::setOutVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg)
1896 {
1897     assert(enregisterLocalVars);
1898     assert(reg < UCHAR_MAX && varNum < compiler->lvaCount);
1899     outVarToRegMaps[bbNum][compiler->lvaTable[varNum].lvVarIndex] = (regNumberSmall)reg;
1900 }
1901
1902 LinearScan::SplitEdgeInfo LinearScan::getSplitEdgeInfo(unsigned int bbNum)
1903 {
1904     assert(enregisterLocalVars);
1905     SplitEdgeInfo splitEdgeInfo;
1906     assert(bbNum <= compiler->fgBBNumMax);
1907     assert(bbNum > bbNumMaxBeforeResolution);
1908     assert(splitBBNumToTargetBBNumMap != nullptr);
1909     splitBBNumToTargetBBNumMap->Lookup(bbNum, &splitEdgeInfo);
1910     assert(splitEdgeInfo.toBBNum <= bbNumMaxBeforeResolution);
1911     assert(splitEdgeInfo.fromBBNum <= bbNumMaxBeforeResolution);
1912     return splitEdgeInfo;
1913 }
1914
1915 VarToRegMap LinearScan::getInVarToRegMap(unsigned int bbNum)
1916 {
1917     assert(enregisterLocalVars);
1918     assert(bbNum <= compiler->fgBBNumMax);
1919     // For the blocks inserted to split critical edges, the inVarToRegMap is
1920     // equal to the outVarToRegMap at the "from" block.
1921     if (bbNum > bbNumMaxBeforeResolution)
1922     {
1923         SplitEdgeInfo splitEdgeInfo = getSplitEdgeInfo(bbNum);
1924         unsigned      fromBBNum     = splitEdgeInfo.fromBBNum;
1925         if (fromBBNum == 0)
1926         {
1927             assert(splitEdgeInfo.toBBNum != 0);
1928             return inVarToRegMaps[splitEdgeInfo.toBBNum];
1929         }
1930         else
1931         {
1932             return outVarToRegMaps[fromBBNum];
1933         }
1934     }
1935
1936     return inVarToRegMaps[bbNum];
1937 }
1938
1939 VarToRegMap LinearScan::getOutVarToRegMap(unsigned int bbNum)
1940 {
1941     assert(enregisterLocalVars);
1942     assert(bbNum <= compiler->fgBBNumMax);
1943     // For the blocks inserted to split critical edges, the outVarToRegMap is
1944     // equal to the inVarToRegMap at the target.
1945     if (bbNum > bbNumMaxBeforeResolution)
1946     {
1947         // If this is an empty block, its in and out maps are both the same.
1948         // We identify this case by setting fromBBNum or toBBNum to 0, and using only the other.
1949         SplitEdgeInfo splitEdgeInfo = getSplitEdgeInfo(bbNum);
1950         unsigned      toBBNum       = splitEdgeInfo.toBBNum;
1951         if (toBBNum == 0)
1952         {
1953             assert(splitEdgeInfo.fromBBNum != 0);
1954             return outVarToRegMaps[splitEdgeInfo.fromBBNum];
1955         }
1956         else
1957         {
1958             return inVarToRegMaps[toBBNum];
1959         }
1960     }
1961     return outVarToRegMaps[bbNum];
1962 }
1963
1964 //------------------------------------------------------------------------
1965 // setVarReg: Set the register associated with a variable in the given 'bbVarToRegMap'.
1966 //
1967 // Arguments:
1968 //    bbVarToRegMap   - the map of interest
1969 //    trackedVarIndex - the lvVarIndex for the variable
1970 //    reg             - the register to which it is being mapped
1971 //
1972 // Return Value:
1973 //    None
1974 //
1975 void LinearScan::setVarReg(VarToRegMap bbVarToRegMap, unsigned int trackedVarIndex, regNumber reg)
1976 {
1977     assert(trackedVarIndex < compiler->lvaTrackedCount);
1978     regNumberSmall regSmall = (regNumberSmall)reg;
1979     assert((regNumber)regSmall == reg);
1980     bbVarToRegMap[trackedVarIndex] = regSmall;
1981 }
1982
1983 //------------------------------------------------------------------------
1984 // getVarReg: Get the register associated with a variable in the given 'bbVarToRegMap'.
1985 //
1986 // Arguments:
1987 //    bbVarToRegMap   - the map of interest
1988 //    trackedVarIndex - the lvVarIndex for the variable
1989 //
1990 // Return Value:
1991 //    The register to which 'trackedVarIndex' is mapped
1992 //
1993 regNumber LinearScan::getVarReg(VarToRegMap bbVarToRegMap, unsigned int trackedVarIndex)
1994 {
1995     assert(enregisterLocalVars);
1996     assert(trackedVarIndex < compiler->lvaTrackedCount);
1997     return (regNumber)bbVarToRegMap[trackedVarIndex];
1998 }
1999
2000 // Initialize the incoming VarToRegMap to the given map values (generally a predecessor of
2001 // the block)
2002 VarToRegMap LinearScan::setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarToRegMap)
2003 {
2004     assert(enregisterLocalVars);
2005     VarToRegMap inVarToRegMap = inVarToRegMaps[bbNum];
2006     memcpy(inVarToRegMap, srcVarToRegMap, (regMapCount * sizeof(regNumber)));
2007     return inVarToRegMap;
2008 }
2009
2010 //------------------------------------------------------------------------
2011 // checkLastUses: Check correctness of last use flags
2012 //
2013 // Arguments:
2014 //    The block for which we are checking last uses.
2015 //
2016 // Notes:
2017 //    This does a backward walk of the RefPositions, starting from the liveOut set.
2018 //    This method was previously used to set the last uses, which were computed by
2019 //    liveness, but were not create in some cases of multiple lclVar references in the
2020 //    same tree. However, now that last uses are computed as RefPositions are created,
2021 //    that is no longer necessary, and this method is simply retained as a check.
2022 //    The exception to the check-only behavior is when LSRA_EXTEND_LIFETIMES if set via
2023 //    COMPlus_JitStressRegs. In that case, this method is required, because even though
2024 //    the RefPositions will not be marked lastUse in that case, we still need to correclty
2025 //    mark the last uses on the tree nodes, which is done by this method.
2026 //
2027 #ifdef DEBUG
2028 void LinearScan::checkLastUses(BasicBlock* block)
2029 {
2030     if (VERBOSE)
2031     {
2032         JITDUMP("\n\nCHECKING LAST USES for " FMT_BB ", liveout=", block->bbNum);
2033         dumpConvertedVarSet(compiler, block->bbLiveOut);
2034         JITDUMP("\n==============================\n");
2035     }
2036
2037     unsigned keepAliveVarNum = BAD_VAR_NUM;
2038     if (compiler->lvaKeepAliveAndReportThis())
2039     {
2040         keepAliveVarNum = compiler->info.compThisArg;
2041         assert(compiler->info.compIsStatic == false);
2042     }
2043
2044     // find which uses are lastUses
2045
2046     // Work backwards starting with live out.
2047     // 'computedLive' is updated to include any exposed use (including those in this
2048     // block that we've already seen).  When we encounter a use, if it's
2049     // not in that set, then it's a last use.
2050
2051     VARSET_TP computedLive(VarSetOps::MakeCopy(compiler, block->bbLiveOut));
2052
2053     bool                       foundDiff       = false;
2054     RefPositionReverseIterator reverseIterator = refPositions.rbegin();
2055     RefPosition*               currentRefPosition;
2056     for (currentRefPosition = &reverseIterator; currentRefPosition->refType != RefTypeBB;
2057          reverseIterator++, currentRefPosition = &reverseIterator)
2058     {
2059         // We should never see ParamDefs or ZeroInits within a basic block.
2060         assert(currentRefPosition->refType != RefTypeParamDef && currentRefPosition->refType != RefTypeZeroInit);
2061         if (currentRefPosition->isIntervalRef() && currentRefPosition->getInterval()->isLocalVar)
2062         {
2063             unsigned varNum   = currentRefPosition->getInterval()->varNum;
2064             unsigned varIndex = currentRefPosition->getInterval()->getVarIndex(compiler);
2065
2066             LsraLocation loc = currentRefPosition->nodeLocation;
2067
2068             // We should always have a tree node for a localVar, except for the "special" RefPositions.
2069             GenTree* tree = currentRefPosition->treeNode;
2070             assert(tree != nullptr || currentRefPosition->refType == RefTypeExpUse ||
2071                    currentRefPosition->refType == RefTypeDummyDef);
2072
2073             if (!VarSetOps::IsMember(compiler, computedLive, varIndex) && varNum != keepAliveVarNum)
2074             {
2075                 // There was no exposed use, so this is a "last use" (and we mark it thus even if it's a def)
2076
2077                 if (extendLifetimes())
2078                 {
2079                     // NOTE: this is a bit of a hack. When extending lifetimes, the "last use" bit will be clear.
2080                     // This bit, however, would normally be used during resolveLocalRef to set the value of
2081                     // GTF_VAR_DEATH on the node for a ref position. If this bit is not set correctly even when
2082                     // extending lifetimes, the code generator will assert as it expects to have accurate last
2083                     // use information. To avoid these asserts, set the GTF_VAR_DEATH bit here.
2084                     // Note also that extendLifetimes() is an LSRA stress mode, so it will only be true for
2085                     // Checked or Debug builds, for which this method will be executed.
2086                     if (tree != nullptr)
2087                     {
2088                         tree->gtFlags |= GTF_VAR_DEATH;
2089                     }
2090                 }
2091                 else if (!currentRefPosition->lastUse)
2092                 {
2093                     JITDUMP("missing expected last use of V%02u @%u\n", compiler->lvaTrackedToVarNum[varIndex], loc);
2094                     foundDiff = true;
2095                 }
2096                 VarSetOps::AddElemD(compiler, computedLive, varIndex);
2097             }
2098             else if (currentRefPosition->lastUse)
2099             {
2100                 JITDUMP("unexpected last use of V%02u @%u\n", compiler->lvaTrackedToVarNum[varIndex], loc);
2101                 foundDiff = true;
2102             }
2103             else if (extendLifetimes() && tree != nullptr)
2104             {
2105                 // NOTE: see the comment above re: the extendLifetimes hack.
2106                 tree->gtFlags &= ~GTF_VAR_DEATH;
2107             }
2108
2109             if (currentRefPosition->refType == RefTypeDef || currentRefPosition->refType == RefTypeDummyDef)
2110             {
2111                 VarSetOps::RemoveElemD(compiler, computedLive, varIndex);
2112             }
2113         }
2114
2115         assert(reverseIterator != refPositions.rend());
2116     }
2117
2118     VARSET_TP liveInNotComputedLive(VarSetOps::Diff(compiler, block->bbLiveIn, computedLive));
2119
2120     VarSetOps::Iter liveInNotComputedLiveIter(compiler, liveInNotComputedLive);
2121     unsigned        liveInNotComputedLiveIndex = 0;
2122     while (liveInNotComputedLiveIter.NextElem(&liveInNotComputedLiveIndex))
2123     {
2124         unsigned varNum = compiler->lvaTrackedToVarNum[liveInNotComputedLiveIndex];
2125         if (compiler->lvaTable[varNum].lvLRACandidate)
2126         {
2127             JITDUMP(FMT_BB ": V%02u is in LiveIn set, but not computed live.\n", block->bbNum, varNum);
2128             foundDiff = true;
2129         }
2130     }
2131
2132     VarSetOps::DiffD(compiler, computedLive, block->bbLiveIn);
2133     const VARSET_TP& computedLiveNotLiveIn(computedLive); // reuse the buffer.
2134     VarSetOps::Iter  computedLiveNotLiveInIter(compiler, computedLiveNotLiveIn);
2135     unsigned         computedLiveNotLiveInIndex = 0;
2136     while (computedLiveNotLiveInIter.NextElem(&computedLiveNotLiveInIndex))
2137     {
2138         unsigned varNum = compiler->lvaTrackedToVarNum[computedLiveNotLiveInIndex];
2139         if (compiler->lvaTable[varNum].lvLRACandidate)
2140         {
2141             JITDUMP(FMT_BB ": V%02u is computed live, but not in LiveIn set.\n", block->bbNum, varNum);
2142             foundDiff = true;
2143         }
2144     }
2145
2146     assert(!foundDiff);
2147 }
2148 #endif // DEBUG
2149
2150 //------------------------------------------------------------------------
2151 // findPredBlockForLiveIn: Determine which block should be used for the register locations of the live-in variables.
2152 //
2153 // Arguments:
2154 //    block                 - The block for which we're selecting a predecesor.
2155 //    prevBlock             - The previous block in in allocation order.
2156 //    pPredBlockIsAllocated - A debug-only argument that indicates whether any of the predecessors have been seen
2157 //                            in allocation order.
2158 //
2159 // Return Value:
2160 //    The selected predecessor.
2161 //
2162 // Assumptions:
2163 //    in DEBUG, caller initializes *pPredBlockIsAllocated to false, and it will be set to true if the block
2164 //    returned is in fact a predecessor.
2165 //
2166 // Notes:
2167 //    This will select a predecessor based on the heuristics obtained by getLsraBlockBoundaryLocations(), which can be
2168 //    one of:
2169 //      LSRA_BLOCK_BOUNDARY_PRED    - Use the register locations of a predecessor block (default)
2170 //      LSRA_BLOCK_BOUNDARY_LAYOUT  - Use the register locations of the previous block in layout order.
2171 //                                    This is the only case where this actually returns a different block.
2172 //      LSRA_BLOCK_BOUNDARY_ROTATE  - Rotate the register locations from a predecessor.
2173 //                                    For this case, the block returned is the same as for LSRA_BLOCK_BOUNDARY_PRED, but
2174 //                                    the register locations will be "rotated" to stress the resolution and allocation
2175 //                                    code.
2176
2177 BasicBlock* LinearScan::findPredBlockForLiveIn(BasicBlock* block,
2178                                                BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated))
2179 {
2180     BasicBlock* predBlock = nullptr;
2181 #ifdef DEBUG
2182     assert(*pPredBlockIsAllocated == false);
2183     if (getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_LAYOUT)
2184     {
2185         if (prevBlock != nullptr)
2186         {
2187             predBlock = prevBlock;
2188         }
2189     }
2190     else
2191 #endif // DEBUG
2192         if (block != compiler->fgFirstBB)
2193     {
2194         predBlock = block->GetUniquePred(compiler);
2195         if (predBlock != nullptr)
2196         {
2197             if (isBlockVisited(predBlock))
2198             {
2199                 if (predBlock->bbJumpKind == BBJ_COND)
2200                 {
2201                     // Special handling to improve matching on backedges.
2202                     BasicBlock* otherBlock = (block == predBlock->bbNext) ? predBlock->bbJumpDest : predBlock->bbNext;
2203                     noway_assert(otherBlock != nullptr);
2204                     if (isBlockVisited(otherBlock))
2205                     {
2206                         // This is the case when we have a conditional branch where one target has already
2207                         // been visited.  It would be best to use the same incoming regs as that block,
2208                         // so that we have less likelihood of having to move registers.
2209                         // For example, in determining the block to use for the starting register locations for
2210                         // "block" in the following example, we'd like to use the same predecessor for "block"
2211                         // as for "otherBlock", so that both successors of predBlock have the same locations, reducing
2212                         // the likelihood of needing a split block on a backedge:
2213                         //
2214                         //   otherPred
2215                         //       |
2216                         //   otherBlock <-+
2217                         //     . . .      |
2218                         //                |
2219                         //   predBlock----+
2220                         //       |
2221                         //     block
2222                         //
2223                         for (flowList* pred = otherBlock->bbPreds; pred != nullptr; pred = pred->flNext)
2224                         {
2225                             BasicBlock* otherPred = pred->flBlock;
2226                             if (otherPred->bbNum == blockInfo[otherBlock->bbNum].predBBNum)
2227                             {
2228                                 predBlock = otherPred;
2229                                 break;
2230                             }
2231                         }
2232                     }
2233                 }
2234             }
2235             else
2236             {
2237                 predBlock = nullptr;
2238             }
2239         }
2240         else
2241         {
2242             for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
2243             {
2244                 BasicBlock* candidatePredBlock = pred->flBlock;
2245                 if (isBlockVisited(candidatePredBlock))
2246                 {
2247                     if (predBlock == nullptr || predBlock->bbWeight < candidatePredBlock->bbWeight)
2248                     {
2249                         predBlock = candidatePredBlock;
2250                         INDEBUG(*pPredBlockIsAllocated = true;)
2251                     }
2252                 }
2253             }
2254         }
2255         if (predBlock == nullptr)
2256         {
2257             predBlock = prevBlock;
2258             assert(predBlock != nullptr);
2259             JITDUMP("\n\nNo allocated predecessor; ");
2260         }
2261     }
2262     return predBlock;
2263 }
2264
2265 #ifdef DEBUG
2266 void LinearScan::dumpVarRefPositions(const char* title)
2267 {
2268     if (enregisterLocalVars)
2269     {
2270         printf("\nVAR REFPOSITIONS %s\n", title);
2271
2272         for (unsigned i = 0; i < compiler->lvaCount; i++)
2273         {
2274             printf("--- V%02u", i);
2275
2276             LclVarDsc* varDsc = compiler->lvaTable + i;
2277             if (varDsc->lvIsRegCandidate())
2278             {
2279                 Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex);
2280                 printf("  (Interval %d)\n", interval->intervalIndex);
2281                 for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition)
2282                 {
2283                     ref->dump();
2284                 }
2285             }
2286             else
2287             {
2288                 printf("\n");
2289             }
2290         }
2291         printf("\n");
2292     }
2293 }
2294
2295 #endif // DEBUG
2296
2297 // Set the default rpFrameType based upon codeGen->isFramePointerRequired()
2298 // This was lifted from the register predictor
2299 //
2300 void LinearScan::setFrameType()
2301 {
2302     FrameType frameType = FT_NOT_SET;
2303 #if DOUBLE_ALIGN
2304     compiler->codeGen->setDoubleAlign(false);
2305     if (doDoubleAlign)
2306     {
2307         frameType = FT_DOUBLE_ALIGN_FRAME;
2308         compiler->codeGen->setDoubleAlign(true);
2309     }
2310     else
2311 #endif // DOUBLE_ALIGN
2312         if (compiler->codeGen->isFramePointerRequired())
2313     {
2314         frameType = FT_EBP_FRAME;
2315     }
2316     else
2317     {
2318         if (compiler->rpMustCreateEBPCalled == false)
2319         {
2320 #ifdef DEBUG
2321             const char* reason;
2322 #endif // DEBUG
2323             compiler->rpMustCreateEBPCalled = true;
2324             if (compiler->rpMustCreateEBPFrame(INDEBUG(&reason)))
2325             {
2326                 JITDUMP("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason);
2327                 compiler->codeGen->setFrameRequired(true);
2328             }
2329         }
2330
2331         if (compiler->codeGen->isFrameRequired())
2332         {
2333             frameType = FT_EBP_FRAME;
2334         }
2335         else
2336         {
2337             frameType = FT_ESP_FRAME;
2338         }
2339     }
2340
2341     switch (frameType)
2342     {
2343         case FT_ESP_FRAME:
2344             noway_assert(!compiler->codeGen->isFramePointerRequired());
2345             noway_assert(!compiler->codeGen->isFrameRequired());
2346             compiler->codeGen->setFramePointerUsed(false);
2347             break;
2348         case FT_EBP_FRAME:
2349             compiler->codeGen->setFramePointerUsed(true);
2350             break;
2351 #if DOUBLE_ALIGN
2352         case FT_DOUBLE_ALIGN_FRAME:
2353             noway_assert(!compiler->codeGen->isFramePointerRequired());
2354             compiler->codeGen->setFramePointerUsed(false);
2355             break;
2356 #endif // DOUBLE_ALIGN
2357         default:
2358             noway_assert(!"rpFrameType not set correctly!");
2359             break;
2360     }
2361
2362     // If we are using FPBASE as the frame register, we cannot also use it for
2363     // a local var.
2364     regMaskTP removeMask = RBM_NONE;
2365     if (frameType == FT_EBP_FRAME)
2366     {
2367         removeMask |= RBM_FPBASE;
2368     }
2369
2370     compiler->rpFrameType = frameType;
2371
2372 #ifdef _TARGET_ARMARCH_
2373     // Determine whether we need to reserve a register for large lclVar offsets.
2374     if (compiler->compRsvdRegCheck(Compiler::REGALLOC_FRAME_LAYOUT))
2375     {
2376         // We reserve R10/IP1 in this case to hold the offsets in load/store instructions
2377         compiler->codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD;
2378         assert(REG_OPT_RSVD != REG_FP);
2379         JITDUMP("  Reserved REG_OPT_RSVD (%s) due to large frame\n", getRegName(REG_OPT_RSVD));
2380         removeMask |= RBM_OPT_RSVD;
2381     }
2382 #endif // _TARGET_ARMARCH_
2383
2384     if ((removeMask != RBM_NONE) && ((availableIntRegs & removeMask) != 0))
2385     {
2386         // We know that we're already in "read mode" for availableIntRegs. However,
2387         // we need to remove these registers, so subsequent users (like callers
2388         // to allRegs()) get the right thing. The RemoveRegistersFromMasks() code
2389         // fixes up everything that already took a dependency on the value that was
2390         // previously read, so this completes the picture.
2391         availableIntRegs.OverrideAssign(availableIntRegs & ~removeMask);
2392     }
2393 }
2394
2395 //------------------------------------------------------------------------
2396 // copyOrMoveRegInUse: Is 'ref' a copyReg/moveReg that is still busy at the given location?
2397 //
2398 // Arguments:
2399 //    ref: The RefPosition of interest
2400 //    loc: The LsraLocation at which we're determining whether it's busy.
2401 //
2402 // Return Value:
2403 //    true iff 'ref' is active at the given location
2404 //
2405 bool copyOrMoveRegInUse(RefPosition* ref, LsraLocation loc)
2406 {
2407     if (!ref->copyReg && !ref->moveReg)
2408     {
2409         return false;
2410     }
2411     if (ref->getRefEndLocation() >= loc)
2412     {
2413         return true;
2414     }
2415     Interval*    interval = ref->getInterval();
2416     RefPosition* nextRef  = interval->getNextRefPosition();
2417     if (nextRef != nullptr && nextRef->treeNode == ref->treeNode && nextRef->getRefEndLocation() >= loc)
2418     {
2419         return true;
2420     }
2421     return false;
2422 }
2423
2424 // Determine whether the register represented by "physRegRecord" is available at least
2425 // at the "currentLoc", and if so, return the next location at which it is in use in
2426 // "nextRefLocationPtr"
2427 //
2428 bool LinearScan::registerIsAvailable(RegRecord*    physRegRecord,
2429                                      LsraLocation  currentLoc,
2430                                      LsraLocation* nextRefLocationPtr,
2431                                      RegisterType  regType)
2432 {
2433     *nextRefLocationPtr          = MaxLocation;
2434     LsraLocation nextRefLocation = MaxLocation;
2435     regMaskTP    regMask         = genRegMask(physRegRecord->regNum);
2436     if (physRegRecord->isBusyUntilNextKill)
2437     {
2438         return false;
2439     }
2440
2441     RefPosition* nextPhysReference = physRegRecord->getNextRefPosition();
2442     if (nextPhysReference != nullptr)
2443     {
2444         nextRefLocation = nextPhysReference->nodeLocation;
2445         // if (nextPhysReference->refType == RefTypeFixedReg) nextRefLocation--;
2446     }
2447     else if (!physRegRecord->isCalleeSave)
2448     {
2449         nextRefLocation = MaxLocation - 1;
2450     }
2451
2452     Interval* assignedInterval = physRegRecord->assignedInterval;
2453
2454     if (assignedInterval != nullptr)
2455     {
2456         RefPosition* recentReference = assignedInterval->recentRefPosition;
2457
2458         // The only case where we have an assignedInterval, but recentReference is null
2459         // is where this interval is live at procedure entry (i.e. an arg register), in which
2460         // case it's still live and its assigned register is not available
2461         // (Note that the ParamDef will be recorded as a recentReference when we encounter
2462         // it, but we will be allocating registers, potentially to other incoming parameters,
2463         // as we process the ParamDefs.)
2464
2465         if (recentReference == nullptr)
2466         {
2467             return false;
2468         }
2469
2470         // Is this a copyReg/moveReg?  It is if the register assignment doesn't match.
2471         // (the recentReference may not be a copyReg/moveReg, because we could have seen another
2472         // reference since the copyReg/moveReg)
2473
2474         if (!assignedInterval->isAssignedTo(physRegRecord->regNum))
2475         {
2476             // If the recentReference is for a different register, it can be reassigned, but
2477             // otherwise don't reassign it if it's still in use.
2478             // (Note that it is unlikely that we have a recent copy or move to a different register,
2479             // where this physRegRecord is still pointing at an earlier copy or move, but it is possible,
2480             // especially in stress modes.)
2481             if ((recentReference->registerAssignment == regMask) && copyOrMoveRegInUse(recentReference, currentLoc))
2482             {
2483                 return false;
2484             }
2485         }
2486         else if (!assignedInterval->isActive && assignedInterval->isConstant)
2487         {
2488             // Treat this as unassigned, i.e. do nothing.
2489             // TODO-CQ: Consider adjusting the heuristics (probably in the caller of this method)
2490             // to avoid reusing these registers.
2491         }
2492         // If this interval isn't active, it's available if it isn't referenced
2493         // at this location (or the previous location, if the recent RefPosition
2494         // is a delayRegFree).
2495         else if (!assignedInterval->isActive &&
2496                  (recentReference->refType == RefTypeExpUse || recentReference->getRefEndLocation() < currentLoc))
2497         {
2498             // This interval must have a next reference (otherwise it wouldn't be assigned to this register)
2499             RefPosition* nextReference = recentReference->nextRefPosition;
2500             if (nextReference != nullptr)
2501             {
2502                 if (nextReference->nodeLocation < nextRefLocation)
2503                 {
2504                     nextRefLocation = nextReference->nodeLocation;
2505                 }
2506             }
2507             else
2508             {
2509                 assert(recentReference->copyReg && recentReference->registerAssignment != regMask);
2510             }
2511         }
2512         else
2513         {
2514             return false;
2515         }
2516     }
2517     if (nextRefLocation < *nextRefLocationPtr)
2518     {
2519         *nextRefLocationPtr = nextRefLocation;
2520     }
2521
2522 #ifdef _TARGET_ARM_
2523     if (regType == TYP_DOUBLE)
2524     {
2525         // Recurse, but check the other half this time (TYP_FLOAT)
2526         if (!registerIsAvailable(findAnotherHalfRegRec(physRegRecord), currentLoc, nextRefLocationPtr, TYP_FLOAT))
2527             return false;
2528         nextRefLocation = *nextRefLocationPtr;
2529     }
2530 #endif // _TARGET_ARM_
2531
2532     return (nextRefLocation >= currentLoc);
2533 }
2534
2535 //------------------------------------------------------------------------
2536 // getRegisterType: Get the RegisterType to use for the given RefPosition
2537 //
2538 // Arguments:
2539 //    currentInterval: The interval for the current allocation
2540 //    refPosition:     The RefPosition of the current Interval for which a register is being allocated
2541 //
2542 // Return Value:
2543 //    The RegisterType that should be allocated for this RefPosition
2544 //
2545 // Notes:
2546 //    This will nearly always be identical to the registerType of the interval, except in the case
2547 //    of SIMD types of 8 bytes (currently only Vector2) when they are passed and returned in integer
2548 //    registers, or copied to a return temp.
2549 //    This method need only be called in situations where we may be dealing with the register requirements
2550 //    of a RefTypeUse RefPosition (i.e. not when we are only looking at the type of an interval, nor when
2551 //    we are interested in the "defining" type of the interval).  This is because the situation of interest
2552 //    only happens at the use (where it must be copied to an integer register).
2553
2554 RegisterType LinearScan::getRegisterType(Interval* currentInterval, RefPosition* refPosition)
2555 {
2556     assert(refPosition->getInterval() == currentInterval);
2557     RegisterType regType    = currentInterval->registerType;
2558     regMaskTP    candidates = refPosition->registerAssignment;
2559
2560     assert((candidates & allRegs(regType)) != RBM_NONE);
2561     return regType;
2562 }
2563
2564 //------------------------------------------------------------------------
2565 // isMatchingConstant: Check to see whether a given register contains the constant referenced
2566 //                     by the given RefPosition
2567 //
2568 // Arguments:
2569 //    physRegRecord:   The RegRecord for the register we're interested in.
2570 //    refPosition:     The RefPosition for a constant interval.
2571 //
2572 // Return Value:
2573 //    True iff the register was defined by an identical constant node as the current interval.
2574 //
2575 bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition)
2576 {
2577     if ((physRegRecord->assignedInterval == nullptr) || !physRegRecord->assignedInterval->isConstant)
2578     {
2579         return false;
2580     }
2581     noway_assert(refPosition->treeNode != nullptr);
2582     GenTree* otherTreeNode = physRegRecord->assignedInterval->firstRefPosition->treeNode;
2583     noway_assert(otherTreeNode != nullptr);
2584
2585     if (refPosition->treeNode->OperGet() == otherTreeNode->OperGet())
2586     {
2587         switch (otherTreeNode->OperGet())
2588         {
2589             case GT_CNS_INT:
2590                 if ((refPosition->treeNode->AsIntCon()->IconValue() == otherTreeNode->AsIntCon()->IconValue()) &&
2591                     (varTypeGCtype(refPosition->treeNode) == varTypeGCtype(otherTreeNode)))
2592                 {
2593 #ifdef _TARGET_64BIT_
2594                     // If the constant is negative, only reuse registers of the same type.
2595                     // This is because, on a 64-bit system, we do not sign-extend immediates in registers to
2596                     // 64-bits unless they are actually longs, as this requires a longer instruction.
2597                     // This doesn't apply to a 32-bit system, on which long values occupy multiple registers.
2598                     // (We could sign-extend, but we would have to always sign-extend, because if we reuse more
2599                     // than once, we won't have access to the instruction that originally defines the constant).
2600                     if ((refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet()) ||
2601                         (refPosition->treeNode->AsIntCon()->IconValue() >= 0))
2602 #endif // _TARGET_64BIT_
2603                     {
2604                         return true;
2605                     }
2606                 }
2607                 break;
2608             case GT_CNS_DBL:
2609             {
2610                 // For floating point constants, the values must be identical, not simply compare
2611                 // equal.  So we compare the bits.
2612                 if (refPosition->treeNode->AsDblCon()->isBitwiseEqual(otherTreeNode->AsDblCon()) &&
2613                     (refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet()))
2614                 {
2615                     return true;
2616                 }
2617                 break;
2618             }
2619             default:
2620                 break;
2621         }
2622     }
2623     return false;
2624 }
2625
2626 //------------------------------------------------------------------------
2627 // tryAllocateFreeReg: Find a free register that satisfies the requirements for refPosition,
2628 //                     and takes into account the preferences for the given Interval
2629 //
2630 // Arguments:
2631 //    currentInterval: The interval for the current allocation
2632 //    refPosition:     The RefPosition of the current Interval for which a register is being allocated
2633 //
2634 // Return Value:
2635 //    The regNumber, if any, allocated to the RefPositon.  Returns REG_NA if no free register is found.
2636 //
2637 // Notes:
2638 //    TODO-CQ: Consider whether we need to use a different order for tree temps than for vars, as
2639 //    reg predict does
2640
2641 static const regNumber lsraRegOrder[]      = {REG_VAR_ORDER};
2642 const unsigned         lsraRegOrderSize    = ArrLen(lsraRegOrder);
2643 static const regNumber lsraRegOrderFlt[]   = {REG_VAR_ORDER_FLT};
2644 const unsigned         lsraRegOrderFltSize = ArrLen(lsraRegOrderFlt);
2645
2646 regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* refPosition)
2647 {
2648     regNumber foundReg = REG_NA;
2649
2650     RegisterType     regType = getRegisterType(currentInterval, refPosition);
2651     const regNumber* regOrder;
2652     unsigned         regOrderSize;
2653     if (useFloatReg(regType))
2654     {
2655         regOrder     = lsraRegOrderFlt;
2656         regOrderSize = lsraRegOrderFltSize;
2657     }
2658     else
2659     {
2660         regOrder     = lsraRegOrder;
2661         regOrderSize = lsraRegOrderSize;
2662     }
2663
2664     LsraLocation currentLocation = refPosition->nodeLocation;
2665     RefPosition* nextRefPos      = refPosition->nextRefPosition;
2666     LsraLocation nextLocation    = (nextRefPos == nullptr) ? currentLocation : nextRefPos->nodeLocation;
2667     regMaskTP    candidates      = refPosition->registerAssignment;
2668     regMaskTP    preferences     = currentInterval->registerPreferences;
2669
2670     if (RefTypeIsDef(refPosition->refType))
2671     {
2672         if (currentInterval->hasConflictingDefUse)
2673         {
2674             resolveConflictingDefAndUse(currentInterval, refPosition);
2675             candidates = refPosition->registerAssignment;
2676         }
2677         // Otherwise, check for the case of a fixed-reg def of a reg that will be killed before the
2678         // use, or interferes at the point of use (which shouldn't happen, but Lower doesn't mark
2679         // the contained nodes as interfering).
2680         // Note that we may have a ParamDef RefPosition that is marked isFixedRegRef, but which
2681         // has had its registerAssignment changed to no longer be a single register.
2682         else if (refPosition->isFixedRegRef && nextRefPos != nullptr && RefTypeIsUse(nextRefPos->refType) &&
2683                  !nextRefPos->isFixedRegRef && genMaxOneBit(refPosition->registerAssignment))
2684         {
2685             regNumber  defReg       = refPosition->assignedReg();
2686             RegRecord* defRegRecord = getRegisterRecord(defReg);
2687
2688             RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition;
2689             assert(currFixedRegRefPosition != nullptr &&
2690                    currFixedRegRefPosition->nodeLocation == refPosition->nodeLocation);
2691
2692             // If there is another fixed reference to this register before the use, change the candidates
2693             // on this RefPosition to include that of nextRefPos.
2694             if (currFixedRegRefPosition->nextRefPosition != nullptr &&
2695                 currFixedRegRefPosition->nextRefPosition->nodeLocation <= nextRefPos->getRefEndLocation())
2696             {
2697                 candidates |= nextRefPos->registerAssignment;
2698                 if (preferences == refPosition->registerAssignment)
2699                 {
2700                     preferences = candidates;
2701                 }
2702             }
2703         }
2704     }
2705
2706     preferences &= candidates;
2707     if (preferences == RBM_NONE)
2708     {
2709         preferences = candidates;
2710     }
2711
2712 #ifdef DEBUG
2713     candidates = stressLimitRegs(refPosition, candidates);
2714 #endif
2715     assert(candidates != RBM_NONE);
2716
2717     Interval* relatedInterval = currentInterval->relatedInterval;
2718     if (currentInterval->isSpecialPutArg)
2719     {
2720         // This is not actually a preference, it's merely to track the lclVar that this
2721         // "specialPutArg" is using.
2722         relatedInterval = nullptr;
2723     }
2724     Interval* nextRelatedInterval  = relatedInterval;
2725     Interval* finalRelatedInterval = relatedInterval;
2726     Interval* rangeEndInterval     = relatedInterval;
2727     regMaskTP relatedPreferences   = (relatedInterval == nullptr) ? RBM_NONE : relatedInterval->getCurrentPreferences();
2728     LsraLocation rangeEndLocation  = refPosition->getRangeEndLocation();
2729     bool         preferCalleeSave  = currentInterval->preferCalleeSave;
2730     bool         avoidByteRegs     = false;
2731 #ifdef _TARGET_X86_
2732     if ((relatedPreferences & ~RBM_BYTE_REGS) != RBM_NONE)
2733     {
2734         avoidByteRegs = true;
2735     }
2736 #endif
2737
2738     // Follow the chain of related intervals, as long as:
2739     // - The next reference is a def. We don't want to use the relatedInterval for preferencing if its next reference
2740     //   is not a new definition (as it either is or will become live).
2741     // - The next (def) reference is downstream. Otherwise we could iterate indefinitely because the preferences can be
2742     // circular.
2743     // - The intersection of preferenced registers is non-empty.
2744     //
2745     while (nextRelatedInterval != nullptr)
2746     {
2747         RefPosition* nextRelatedRefPosition = nextRelatedInterval->getNextRefPosition();
2748
2749         // Only use the relatedInterval for preferencing if the related interval's next reference
2750         // is a new definition.
2751         if ((nextRelatedRefPosition != nullptr) && RefTypeIsDef(nextRelatedRefPosition->refType))
2752         {
2753             finalRelatedInterval = nextRelatedInterval;
2754             nextRelatedInterval  = nullptr;
2755
2756             // First, get the preferences for this interval
2757             regMaskTP thisRelatedPreferences = finalRelatedInterval->getCurrentPreferences();
2758             // Now, determine if they are compatible and update the relatedPreferences that we'll consider.
2759             regMaskTP newRelatedPreferences = thisRelatedPreferences & relatedPreferences;
2760             if (newRelatedPreferences != RBM_NONE && (!avoidByteRegs || thisRelatedPreferences != RBM_BYTE_REGS))
2761             {
2762                 bool thisIsSingleReg = isSingleRegister(newRelatedPreferences);
2763                 if (!thisIsSingleReg || (finalRelatedInterval->isLocalVar &&
2764                                          getRegisterRecord(genRegNumFromMask(newRelatedPreferences))->isFree()))
2765                 {
2766                     relatedPreferences = newRelatedPreferences;
2767                     // If this Interval has a downstream def without a single-register preference, continue to iterate.
2768                     if (nextRelatedRefPosition->nodeLocation > rangeEndLocation)
2769                     {
2770                         preferCalleeSave    = (preferCalleeSave || finalRelatedInterval->preferCalleeSave);
2771                         rangeEndLocation    = nextRelatedRefPosition->getRangeEndLocation();
2772                         rangeEndInterval    = finalRelatedInterval;
2773                         nextRelatedInterval = finalRelatedInterval->relatedInterval;
2774                     }
2775                 }
2776             }
2777         }
2778         else
2779         {
2780             if (nextRelatedInterval == relatedInterval)
2781             {
2782                 relatedInterval    = nullptr;
2783                 relatedPreferences = RBM_NONE;
2784             }
2785             nextRelatedInterval = nullptr;
2786         }
2787     }
2788
2789     // For floating point, we want to be less aggressive about using callee-save registers.
2790     // So in that case, we just need to ensure that the current RefPosition is covered.
2791     RefPosition* rangeEndRefPosition;
2792     RefPosition* lastRefPosition = currentInterval->lastRefPosition;
2793     if (useFloatReg(currentInterval->registerType))
2794     {
2795         rangeEndRefPosition = refPosition;
2796         preferCalleeSave    = currentInterval->preferCalleeSave;
2797     }
2798     else
2799     {
2800         rangeEndRefPosition = refPosition->getRangeEndRef();
2801         // If we have a chain of related intervals, and a finalRelatedInterval that
2802         // is not currently occupying a register, and whose lifetime begins after this one,
2803         // we want to try to select a register that will cover its lifetime.
2804         if ((rangeEndInterval != nullptr) && (rangeEndInterval->assignedReg == nullptr) &&
2805             (rangeEndInterval->getNextRefLocation() >= rangeEndRefPosition->nodeLocation))
2806         {
2807             lastRefPosition = rangeEndInterval->lastRefPosition;
2808         }
2809     }
2810
2811     // If this has a delayed use (due to being used in a rmw position of a
2812     // non-commutative operator), its endLocation is delayed until the "def"
2813     // position, which is one location past the use (getRefEndLocation() takes care of this).
2814     rangeEndLocation          = rangeEndRefPosition->getRefEndLocation();
2815     LsraLocation lastLocation = lastRefPosition->getRefEndLocation();
2816     regNumber    prevReg      = REG_NA;
2817
2818     if (currentInterval->assignedReg)
2819     {
2820         bool useAssignedReg = false;
2821         // This was an interval that was previously allocated to the given
2822         // physical register, and we should try to allocate it to that register
2823         // again, if possible and reasonable.
2824         // Use it preemptively (i.e. before checking other available regs)
2825         // only if it is preferred and available.
2826
2827         RegRecord* regRec    = currentInterval->assignedReg;
2828         prevReg              = regRec->regNum;
2829         regMaskTP prevRegBit = genRegMask(prevReg);
2830
2831         // Is it in the preferred set of regs?
2832         if ((prevRegBit & preferences) != RBM_NONE)
2833         {
2834             // Is it currently available?
2835             LsraLocation nextPhysRefLoc;
2836             if (registerIsAvailable(regRec, currentLocation, &nextPhysRefLoc, currentInterval->registerType))
2837             {
2838                 // If the register is next referenced at this location, only use it if
2839                 // this has a fixed reg requirement (i.e. this is the reference that caused
2840                 // the FixedReg ref to be created)
2841
2842                 if (!regRec->conflictingFixedRegReference(refPosition))
2843                 {
2844                     useAssignedReg = true;
2845                 }
2846             }
2847         }
2848         if (useAssignedReg)
2849         {
2850             regNumber foundReg = prevReg;
2851             assignPhysReg(regRec, currentInterval);
2852             refPosition->registerAssignment = genRegMask(foundReg);
2853             return foundReg;
2854         }
2855         else
2856         {
2857             // Don't keep trying to allocate to this register
2858             currentInterval->assignedReg = nullptr;
2859         }
2860     }
2861
2862     //-------------------------------------------------------------------------
2863     // Register Selection
2864
2865     RegRecord* availablePhysRegInterval = nullptr;
2866     bool       unassignInterval         = false;
2867
2868     // Each register will receive a score which is the sum of the scoring criteria below.
2869     // These were selected on the assumption that they will have an impact on the "goodness"
2870     // of a register selection, and have been tuned to a certain extent by observing the impact
2871     // of the ordering on asmDiffs.  However, there is probably much more room for tuning,
2872     // and perhaps additional criteria.
2873     //
2874     // These are FLAGS (bits) so that we can easily order them and add them together.
2875     // If the scores are equal, but one covers more of the current interval's range,
2876     // then it wins.  Otherwise, the one encountered earlier in the regOrder wins.
2877
2878     enum RegisterScore
2879     {
2880         VALUE_AVAILABLE = 0x40, // It is a constant value that is already in an acceptable register.
2881         COVERS          = 0x20, // It is in the interval's preference set and it covers the entire lifetime.
2882         OWN_PREFERENCE  = 0x10, // It is in the preference set of this interval.
2883         COVERS_RELATED  = 0x08, // It is in the preference set of the related interval and covers the entire lifetime.
2884         RELATED_PREFERENCE = 0x04, // It is in the preference set of the related interval.
2885         CALLER_CALLEE      = 0x02, // It is in the right "set" for the interval (caller or callee-save).
2886         UNASSIGNED         = 0x01, // It is not currently assigned to an inactive interval.
2887     };
2888
2889     int bestScore = 0;
2890
2891     // Compute the best possible score so we can stop looping early if we find it.
2892     // TODO-Throughput: At some point we may want to short-circuit the computation of each score, but
2893     // probably not until we've tuned the order of these criteria.  At that point,
2894     // we'll need to avoid the short-circuit if we've got a stress option to reverse
2895     // the selection.
2896     int bestPossibleScore = COVERS + UNASSIGNED + OWN_PREFERENCE + CALLER_CALLEE;
2897     if (relatedPreferences != RBM_NONE)
2898     {
2899         bestPossibleScore |= RELATED_PREFERENCE + COVERS_RELATED;
2900     }
2901
2902     LsraLocation bestLocation = MinLocation;
2903
2904     // In non-debug builds, this will simply get optimized away
2905     bool reverseSelect = false;
2906 #ifdef DEBUG
2907     reverseSelect = doReverseSelect();
2908 #endif // DEBUG
2909
2910     // An optimization for the common case where there is only one candidate -
2911     // avoid looping over all the other registers
2912
2913     regNumber singleReg = REG_NA;
2914
2915     if (genMaxOneBit(candidates))
2916     {
2917         regOrderSize = 1;
2918         singleReg    = genRegNumFromMask(candidates);
2919         regOrder     = &singleReg;
2920     }
2921
2922     for (unsigned i = 0; i < regOrderSize && (candidates != RBM_NONE); i++)
2923     {
2924         regNumber regNum       = regOrder[i];
2925         regMaskTP candidateBit = genRegMask(regNum);
2926
2927         if (!(candidates & candidateBit))
2928         {
2929             continue;
2930         }
2931
2932         candidates &= ~candidateBit;
2933
2934         RegRecord* physRegRecord = getRegisterRecord(regNum);
2935
2936         int          score               = 0;
2937         LsraLocation nextPhysRefLocation = MaxLocation;
2938
2939         // By chance, is this register already holding this interval, as a copyReg or having
2940         // been restored as inactive after a kill?
2941         if (physRegRecord->assignedInterval == currentInterval)
2942         {
2943             availablePhysRegInterval = physRegRecord;
2944             unassignInterval         = false;
2945             break;
2946         }
2947
2948         // Find the next RefPosition of the physical register
2949         if (!registerIsAvailable(physRegRecord, currentLocation, &nextPhysRefLocation, regType))
2950         {
2951             continue;
2952         }
2953
2954         // If the register is next referenced at this location, only use it if
2955         // this has a fixed reg requirement (i.e. this is the reference that caused
2956         // the FixedReg ref to be created)
2957
2958         if (physRegRecord->conflictingFixedRegReference(refPosition))
2959         {
2960             continue;
2961         }
2962
2963         // If this is a definition of a constant interval, check to see if its value is already in this register.
2964         if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType) &&
2965             isMatchingConstant(physRegRecord, refPosition))
2966         {
2967             score |= VALUE_AVAILABLE;
2968         }
2969
2970         // If the nextPhysRefLocation is a fixedRef for the rangeEndRefPosition, increment it so that
2971         // we don't think it isn't covering the live range.
2972         // This doesn't handle the case where earlier RefPositions for this Interval are also
2973         // FixedRefs of this regNum, but at least those are only interesting in the case where those
2974         // are "local last uses" of the Interval - otherwise the liveRange would interfere with the reg.
2975         if (nextPhysRefLocation == rangeEndLocation && rangeEndRefPosition->isFixedRefOfReg(regNum))
2976         {
2977             INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_INCREMENT_RANGE_END, currentInterval));
2978             nextPhysRefLocation++;
2979         }
2980
2981         if ((candidateBit & preferences) != RBM_NONE)
2982         {
2983             score |= OWN_PREFERENCE;
2984             if (nextPhysRefLocation > rangeEndLocation)
2985             {
2986                 score |= COVERS;
2987             }
2988         }
2989         if ((candidateBit & relatedPreferences) != RBM_NONE)
2990         {
2991             score |= RELATED_PREFERENCE;
2992             if (nextPhysRefLocation > relatedInterval->lastRefPosition->nodeLocation)
2993             {
2994                 score |= COVERS_RELATED;
2995             }
2996         }
2997
2998         // If we had a fixed-reg def of a reg that will be killed before the use, prefer it to any other registers
2999         // with the same score.  (Note that we haven't changed the original registerAssignment on the RefPosition).
3000         // Overload the RELATED_PREFERENCE value.
3001         else if (candidateBit == refPosition->registerAssignment)
3002         {
3003             score |= RELATED_PREFERENCE;
3004         }
3005
3006         if ((preferCalleeSave && physRegRecord->isCalleeSave) || (!preferCalleeSave && !physRegRecord->isCalleeSave))
3007         {
3008             score |= CALLER_CALLEE;
3009         }
3010
3011         // The register is considered unassigned if it has no assignedInterval, OR
3012         // if its next reference is beyond the range of this interval.
3013         if (!isAssigned(physRegRecord, lastLocation ARM_ARG(currentInterval->registerType)))
3014         {
3015             score |= UNASSIGNED;
3016         }
3017
3018         bool foundBetterCandidate = false;
3019
3020         if (score > bestScore)
3021         {
3022             foundBetterCandidate = true;
3023         }
3024         else if (score == bestScore)
3025         {
3026             // Prefer a register that covers the range.
3027             if (bestLocation <= lastLocation)
3028             {
3029                 if (nextPhysRefLocation > bestLocation)
3030                 {
3031                     foundBetterCandidate = true;
3032                 }
3033             }
3034             // If both cover the range, prefer a register that is killed sooner (leaving the longer range register
3035             // available). If both cover the range and also getting killed at the same location, prefer the one which
3036             // is same as previous assignment.
3037             else if (nextPhysRefLocation > lastLocation)
3038             {
3039                 if (nextPhysRefLocation < bestLocation)
3040                 {
3041                     foundBetterCandidate = true;
3042                 }
3043                 else if (nextPhysRefLocation == bestLocation && prevReg == regNum)
3044                 {
3045                     foundBetterCandidate = true;
3046                 }
3047             }
3048         }
3049
3050 #ifdef DEBUG
3051         if (doReverseSelect() && bestScore != 0)
3052         {
3053             foundBetterCandidate = !foundBetterCandidate;
3054         }
3055 #endif // DEBUG
3056
3057         if (foundBetterCandidate)
3058         {
3059             bestLocation             = nextPhysRefLocation;
3060             availablePhysRegInterval = physRegRecord;
3061             unassignInterval         = true;
3062             bestScore                = score;
3063         }
3064
3065         // there is no way we can get a better score so break out
3066         if (!reverseSelect && score == bestPossibleScore && bestLocation == rangeEndLocation + 1)
3067         {
3068             break;
3069         }
3070     }
3071
3072     if (availablePhysRegInterval != nullptr)
3073     {
3074         if (unassignInterval && isAssigned(availablePhysRegInterval ARM_ARG(currentInterval->registerType)))
3075         {
3076             Interval* const intervalToUnassign = availablePhysRegInterval->assignedInterval;
3077             unassignPhysReg(availablePhysRegInterval ARM_ARG(currentInterval->registerType));
3078
3079             if ((bestScore & VALUE_AVAILABLE) != 0 && intervalToUnassign != nullptr)
3080             {
3081                 assert(intervalToUnassign->isConstant);
3082                 refPosition->treeNode->SetReuseRegVal();
3083             }
3084             // If we considered this "unassigned" because this interval's lifetime ends before
3085             // the next ref, remember it.
3086             else if ((bestScore & UNASSIGNED) != 0 && intervalToUnassign != nullptr)
3087             {
3088                 updatePreviousInterval(availablePhysRegInterval, intervalToUnassign, intervalToUnassign->registerType);
3089             }
3090         }
3091         else
3092         {
3093             assert((bestScore & VALUE_AVAILABLE) == 0);
3094         }
3095         assignPhysReg(availablePhysRegInterval, currentInterval);
3096         foundReg                        = availablePhysRegInterval->regNum;
3097         regMaskTP foundRegMask          = genRegMask(foundReg);
3098         refPosition->registerAssignment = foundRegMask;
3099     }
3100
3101     return foundReg;
3102 }
3103
3104 //------------------------------------------------------------------------
3105 // canSpillReg: Determine whether we can spill physRegRecord
3106 //
3107 // Arguments:
3108 //    physRegRecord             - reg to spill
3109 //    refLocation               - Location of RefPosition where this register will be spilled
3110 //    recentAssignedRefWeight   - Weight of recent assigned RefPosition which will be determined in this function
3111 //    farthestRefPosWeight      - Current farthestRefPosWeight at allocateBusyReg()
3112 //
3113 // Return Value:
3114 //    True  - if we can spill physRegRecord
3115 //    False - otherwise
3116 //
3117 // Note: This helper is designed to be used only from allocateBusyReg() and canSpillDoubleReg()
3118 //
3119 bool LinearScan::canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight)
3120 {
3121     assert(physRegRecord->assignedInterval != nullptr);
3122     RefPosition* recentAssignedRef = physRegRecord->assignedInterval->recentRefPosition;
3123
3124     if (recentAssignedRef != nullptr)
3125     {
3126         if (isRefPositionActive(recentAssignedRef, refLocation))
3127         {
3128             // We can't spill a register that's active at the current location
3129             return false;
3130         }
3131
3132         // We don't prefer to spill a register if the weight of recentAssignedRef > weight
3133         // of the spill candidate found so far.  We would consider spilling a greater weight
3134         // ref position only if the refPosition being allocated must need a reg.
3135         *recentAssignedRefWeight = getWeight(recentAssignedRef);
3136     }
3137     return true;
3138 }
3139
3140 #ifdef _TARGET_ARM_
3141 //------------------------------------------------------------------------
3142 // canSpillDoubleReg: Determine whether we can spill physRegRecord
3143 //
3144 // Arguments:
3145 //    physRegRecord             - reg to spill (must be a valid double register)
3146 //    refLocation               - Location of RefPosition where this register will be spilled
3147 //    recentAssignedRefWeight   - Weight of recent assigned RefPosition which will be determined in this function
3148 //
3149 // Return Value:
3150 //    True  - if we can spill physRegRecord
3151 //    False - otherwise
3152 //
3153 // Notes:
3154 //    This helper is designed to be used only from allocateBusyReg() and canSpillDoubleReg().
3155 //    The recentAssignedRefWeight is not updated if either register cannot be spilled.
3156 //
3157 bool LinearScan::canSpillDoubleReg(RegRecord*   physRegRecord,
3158                                    LsraLocation refLocation,
3159                                    unsigned*    recentAssignedRefWeight)
3160 {
3161     assert(genIsValidDoubleReg(physRegRecord->regNum));
3162     bool     retVal  = true;
3163     unsigned weight  = BB_ZERO_WEIGHT;
3164     unsigned weight2 = BB_ZERO_WEIGHT;
3165
3166     RegRecord* physRegRecord2 = findAnotherHalfRegRec(physRegRecord);
3167
3168     if ((physRegRecord->assignedInterval != nullptr) && !canSpillReg(physRegRecord, refLocation, &weight))
3169     {
3170         return false;
3171     }
3172     if (physRegRecord2->assignedInterval != nullptr)
3173     {
3174         if (!canSpillReg(physRegRecord2, refLocation, &weight2))
3175         {
3176             return false;
3177         }
3178         if (weight2 > weight)
3179         {
3180             weight = weight2;
3181         }
3182     }
3183     *recentAssignedRefWeight = weight;
3184     return true;
3185 }
3186 #endif
3187
3188 #ifdef _TARGET_ARM_
3189 //------------------------------------------------------------------------
3190 // unassignDoublePhysReg: unassign a double register (pair)
3191 //
3192 // Arguments:
3193 //    doubleRegRecord - reg to unassign
3194 //
3195 // Note:
3196 //    The given RegRecord must be a valid (even numbered) double register.
3197 //
3198 void LinearScan::unassignDoublePhysReg(RegRecord* doubleRegRecord)
3199 {
3200     assert(genIsValidDoubleReg(doubleRegRecord->regNum));
3201
3202     RegRecord* doubleRegRecordLo = doubleRegRecord;
3203     RegRecord* doubleRegRecordHi = findAnotherHalfRegRec(doubleRegRecordLo);
3204     // For a double register, we has following four cases.
3205     // Case 1: doubleRegRecLo is assigned to TYP_DOUBLE interval
3206     // Case 2: doubleRegRecLo and doubleRegRecHi are assigned to different TYP_FLOAT intervals
3207     // Case 3: doubelRegRecLo is assgined to TYP_FLOAT interval and doubleRegRecHi is nullptr
3208     // Case 4: doubleRegRecordLo is nullptr, and doubleRegRecordHi is assigned to a TYP_FLOAT interval
3209     if (doubleRegRecordLo->assignedInterval != nullptr)
3210     {
3211         if (doubleRegRecordLo->assignedInterval->registerType == TYP_DOUBLE)
3212         {
3213             // Case 1: doubleRegRecLo is assigned to TYP_DOUBLE interval
3214             unassignPhysReg(doubleRegRecordLo, doubleRegRecordLo->assignedInterval->recentRefPosition);
3215         }
3216         else
3217         {
3218             // Case 2: doubleRegRecLo and doubleRegRecHi are assigned to different TYP_FLOAT intervals
3219             // Case 3: doubelRegRecLo is assgined to TYP_FLOAT interval and doubleRegRecHi is nullptr
3220             assert(doubleRegRecordLo->assignedInterval->registerType == TYP_FLOAT);
3221             unassignPhysReg(doubleRegRecordLo, doubleRegRecordLo->assignedInterval->recentRefPosition);
3222
3223             if (doubleRegRecordHi != nullptr)
3224             {
3225                 if (doubleRegRecordHi->assignedInterval != nullptr)
3226                 {
3227                     assert(doubleRegRecordHi->assignedInterval->registerType == TYP_FLOAT);
3228                     unassignPhysReg(doubleRegRecordHi, doubleRegRecordHi->assignedInterval->recentRefPosition);
3229                 }
3230             }
3231         }
3232     }
3233     else
3234     {
3235         // Case 4: doubleRegRecordLo is nullptr, and doubleRegRecordHi is assigned to a TYP_FLOAT interval
3236         assert(doubleRegRecordHi->assignedInterval != nullptr);
3237         assert(doubleRegRecordHi->assignedInterval->registerType == TYP_FLOAT);
3238         unassignPhysReg(doubleRegRecordHi, doubleRegRecordHi->assignedInterval->recentRefPosition);
3239     }
3240 }
3241
3242 #endif // _TARGET_ARM_
3243
3244 //------------------------------------------------------------------------
3245 // isRefPositionActive: Determine whether a given RefPosition is active at the given location
3246 //
3247 // Arguments:
3248 //    refPosition - the RefPosition of interest
3249 //    refLocation - the LsraLocation at which we want to know if it is active
3250 //
3251 // Return Value:
3252 //    True  - if this RefPosition occurs at the given location, OR
3253 //            if it occurs at the previous location and is marked delayRegFree.
3254 //    False - otherwise
3255 //
3256 bool LinearScan::isRefPositionActive(RefPosition* refPosition, LsraLocation refLocation)
3257 {
3258     return (refPosition->nodeLocation == refLocation ||
3259             ((refPosition->nodeLocation + 1 == refLocation) && refPosition->delayRegFree));
3260 }
3261
3262 //----------------------------------------------------------------------------------------
3263 // isRegInUse: Test whether regRec is being used at the refPosition
3264 //
3265 // Arguments:
3266 //    regRec - A register to be tested
3267 //    refPosition - RefPosition where regRec is tested
3268 //
3269 // Return Value:
3270 //    True - if regRec is being used
3271 //    False - otherwise
3272 //
3273 // Notes:
3274 //    This helper is designed to be used only from allocateBusyReg(), where:
3275 //    - This register was *not* found when looking for a free register, and
3276 //    - The caller must have already checked for the case where 'refPosition' is a fixed ref
3277 //      (asserted at the beginning of this method).
3278 //
3279 bool LinearScan::isRegInUse(RegRecord* regRec, RefPosition* refPosition)
3280 {
3281     // We shouldn't reach this check if 'refPosition' is a FixedReg of this register.
3282     assert(!refPosition->isFixedRefOfReg(regRec->regNum));
3283     Interval* assignedInterval = regRec->assignedInterval;
3284     if (assignedInterval != nullptr)
3285     {
3286         if (!assignedInterval->isActive)
3287         {
3288             // This can only happen if we have a recentRefPosition active at this location that hasn't yet been freed.
3289             CLANG_FORMAT_COMMENT_ANCHOR;
3290
3291             if (isRefPositionActive(assignedInterval->recentRefPosition, refPosition->nodeLocation))
3292             {
3293                 return true;
3294             }
3295             else
3296             {
3297 #ifdef _TARGET_ARM_
3298                 // In the case of TYP_DOUBLE, we may have the case where 'assignedInterval' is inactive,
3299                 // but the other half register is active. If so, it must be have an active recentRefPosition,
3300                 // as above.
3301                 if (refPosition->getInterval()->registerType == TYP_DOUBLE)
3302                 {
3303                     RegRecord* otherHalfRegRec = findAnotherHalfRegRec(regRec);
3304                     if (!otherHalfRegRec->assignedInterval->isActive)
3305                     {
3306                         if (isRefPositionActive(otherHalfRegRec->assignedInterval->recentRefPosition,
3307                                                 refPosition->nodeLocation))
3308                         {
3309                             return true;
3310                         }
3311                         else
3312                         {
3313                             assert(!"Unexpected inactive assigned interval in isRegInUse");
3314                             return true;
3315                         }
3316                     }
3317                 }
3318                 else
3319 #endif
3320                 {
3321                     assert(!"Unexpected inactive assigned interval in isRegInUse");
3322                     return true;
3323                 }
3324             }
3325         }
3326         RefPosition* nextAssignedRef = assignedInterval->getNextRefPosition();
3327
3328         // We should never spill a register that's occupied by an Interval with its next use at the current
3329         // location.
3330         // Normally this won't occur (unless we actually had more uses in a single node than there are registers),
3331         // because we'll always find something with a later nextLocation, but it can happen in stress when
3332         // we have LSRA_SELECT_NEAREST.
3333         if ((nextAssignedRef != nullptr) && isRefPositionActive(nextAssignedRef, refPosition->nodeLocation) &&
3334             !nextAssignedRef->RegOptional())
3335         {
3336             return true;
3337         }
3338     }
3339     return false;
3340 }
3341
3342 //------------------------------------------------------------------------
3343 // isSpillCandidate: Determine if a register is a spill candidate for a given RefPosition.
3344 //
3345 // Arguments:
3346 //    current               The interval for the current allocation
3347 //    refPosition           The RefPosition of the current Interval for which a register is being allocated
3348 //    physRegRecord         The RegRecord for the register we're considering for spill
3349 //    nextLocation          An out (reference) parameter in which the next use location of the
3350 //                          given RegRecord will be returned.
3351 //
3352 // Return Value:
3353 //    True iff the given register can be spilled to accommodate the given RefPosition.
3354 //
3355 bool LinearScan::isSpillCandidate(Interval*     current,
3356                                   RefPosition*  refPosition,
3357                                   RegRecord*    physRegRecord,
3358                                   LsraLocation& nextLocation)
3359 {
3360     regMaskTP    candidateBit = genRegMask(physRegRecord->regNum);
3361     LsraLocation refLocation  = refPosition->nodeLocation;
3362     if (physRegRecord->isBusyUntilNextKill)
3363     {
3364         return false;
3365     }
3366     Interval* assignedInterval = physRegRecord->assignedInterval;
3367     if (assignedInterval != nullptr)
3368     {
3369         nextLocation = assignedInterval->getNextRefLocation();
3370     }
3371 #ifdef _TARGET_ARM_
3372     RegRecord* physRegRecord2    = nullptr;
3373     Interval*  assignedInterval2 = nullptr;
3374
3375     // For ARM32, a double occupies a consecutive even/odd pair of float registers.
3376     if (current->registerType == TYP_DOUBLE)
3377     {
3378         assert(genIsValidDoubleReg(physRegRecord->regNum));
3379         physRegRecord2 = findAnotherHalfRegRec(physRegRecord);
3380         if (physRegRecord2->isBusyUntilNextKill)
3381         {
3382             return false;
3383         }
3384         assignedInterval2 = physRegRecord2->assignedInterval;
3385         if ((assignedInterval2 != nullptr) && (assignedInterval2->getNextRefLocation() > nextLocation))
3386         {
3387             nextLocation = assignedInterval2->getNextRefLocation();
3388         }
3389     }
3390 #endif
3391
3392     // If there is a fixed reference at the same location (and it's not due to this reference),
3393     // don't use it.
3394     if (physRegRecord->conflictingFixedRegReference(refPosition))
3395     {
3396         return false;
3397     }
3398
3399     if (refPosition->isFixedRefOfRegMask(candidateBit))
3400     {
3401         // Either:
3402         // - there is a fixed reference due to this node, OR
3403         // - or there is a fixed use fed by a def at this node, OR
3404         // - or we have restricted the set of registers for stress.
3405         // In any case, we must use this register as it's the only candidate
3406         // TODO-CQ: At the time we allocate a register to a fixed-reg def, if it's not going
3407         // to remain live until the use, we should set the candidates to allRegs(regType)
3408         // to avoid a spill - codegen can then insert the copy.
3409         // If this is marked as allocateIfProfitable, the caller will compare the weights
3410         // of this RefPosition and the RefPosition to which it is currently assigned.
3411         assert(refPosition->isFixedRegRef ||
3412                (refPosition->nextRefPosition != nullptr && refPosition->nextRefPosition->isFixedRegRef) ||
3413                candidatesAreStressLimited());
3414         return true;
3415     }
3416
3417     // If this register is not assigned to an interval, either
3418     // - it has a FixedReg reference at the current location that is not this reference, OR
3419     // - this is the special case of a fixed loReg, where this interval has a use at the same location
3420     // In either case, we cannot use it
3421     CLANG_FORMAT_COMMENT_ANCHOR;
3422
3423 #ifdef _TARGET_ARM_
3424     if (assignedInterval == nullptr && assignedInterval2 == nullptr)
3425 #else
3426     if (assignedInterval == nullptr)
3427 #endif
3428     {
3429         RefPosition* nextPhysRegPosition = physRegRecord->getNextRefPosition();
3430         assert((nextPhysRegPosition != nullptr) && (nextPhysRegPosition->nodeLocation == refLocation) &&
3431                (candidateBit != refPosition->registerAssignment));
3432         return false;
3433     }
3434
3435     if (isRegInUse(physRegRecord, refPosition))
3436     {
3437         return false;
3438     }
3439
3440 #ifdef _TARGET_ARM_
3441     if (current->registerType == TYP_DOUBLE)
3442     {
3443         if (isRegInUse(physRegRecord2, refPosition))
3444         {
3445             return false;
3446         }
3447     }
3448 #endif
3449     return true;
3450 }
3451
3452 //------------------------------------------------------------------------
3453 // allocateBusyReg: Find a busy register that satisfies the requirements for refPosition,
3454 //                  and that can be spilled.
3455 //
3456 // Arguments:
3457 //    current               The interval for the current allocation
3458 //    refPosition           The RefPosition of the current Interval for which a register is being allocated
3459 //    allocateIfProfitable  If true, a reg may not be allocated if all other ref positions currently
3460 //                          occupying registers are more important than the 'refPosition'.
3461 //
3462 // Return Value:
3463 //    The regNumber allocated to the RefPositon.  Returns REG_NA if no free register is found.
3464 //
3465 // Note:  Currently this routine uses weight and farthest distance of next reference
3466 // to select a ref position for spilling.
3467 // a) if allocateIfProfitable = false
3468 //        The ref position chosen for spilling will be the lowest weight
3469 //        of all and if there is is more than one ref position with the
3470 //        same lowest weight, among them choses the one with farthest
3471 //        distance to its next reference.
3472 //
3473 // b) if allocateIfProfitable = true
3474 //        The ref position chosen for spilling will not only be lowest weight
3475 //        of all but also has a weight lower than 'refPosition'.  If there is
3476 //        no such ref position, reg will not be allocated.
3477 //
3478 regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable)
3479 {
3480     regNumber foundReg = REG_NA;
3481
3482     RegisterType regType     = getRegisterType(current, refPosition);
3483     regMaskTP    candidates  = refPosition->registerAssignment;
3484     regMaskTP    preferences = (current->registerPreferences & candidates);
3485     if (preferences == RBM_NONE)
3486     {
3487         preferences = candidates;
3488     }
3489     if (candidates == RBM_NONE)
3490     {
3491         // This assumes only integer and floating point register types
3492         // if we target a processor with additional register types,
3493         // this would have to change
3494         candidates = allRegs(regType);
3495     }
3496
3497 #ifdef DEBUG
3498     candidates = stressLimitRegs(refPosition, candidates);
3499 #endif // DEBUG
3500
3501     // TODO-CQ: Determine whether/how to take preferences into account in addition to
3502     // prefering the one with the furthest ref position when considering
3503     // a candidate to spill
3504     RegRecord* farthestRefPhysRegRecord = nullptr;
3505 #ifdef _TARGET_ARM_
3506     RegRecord* farthestRefPhysRegRecord2 = nullptr;
3507 #endif
3508     LsraLocation farthestLocation = MinLocation;
3509     LsraLocation refLocation      = refPosition->nodeLocation;
3510     unsigned     farthestRefPosWeight;
3511     if (allocateIfProfitable)
3512     {
3513         // If allocating a reg is optional, we will consider those ref positions
3514         // whose weight is less than 'refPosition' for spilling.
3515         farthestRefPosWeight = getWeight(refPosition);
3516     }
3517     else
3518     {
3519         // If allocating a reg is a must, we start off with max weight so
3520         // that the first spill candidate will be selected based on
3521         // farthest distance alone.  Since we start off with farthestLocation
3522         // initialized to MinLocation, the first available ref position
3523         // will be selected as spill candidate and its weight as the
3524         // fathestRefPosWeight.
3525         farthestRefPosWeight = BB_MAX_WEIGHT;
3526     }
3527
3528     for (regNumber regNum : Registers(regType))
3529     {
3530         regMaskTP candidateBit = genRegMask(regNum);
3531         if (!(candidates & candidateBit))
3532         {
3533             continue;
3534         }
3535         RegRecord*   physRegRecord  = getRegisterRecord(regNum);
3536         RegRecord*   physRegRecord2 = nullptr; // only used for _TARGET_ARM_
3537         LsraLocation nextLocation   = MinLocation;
3538         LsraLocation physRegNextLocation;
3539         if (!isSpillCandidate(current, refPosition, physRegRecord, nextLocation))
3540         {
3541             assert(candidates != candidateBit);
3542             continue;
3543         }
3544
3545         // We've passed the preliminary checks for a spill candidate.
3546         // Now, if we have a recentAssignedRef, check that it is going to be OK to spill it.
3547         Interval*    assignedInterval        = physRegRecord->assignedInterval;
3548         unsigned     recentAssignedRefWeight = BB_ZERO_WEIGHT;
3549         RefPosition* recentAssignedRef       = nullptr;
3550         RefPosition* recentAssignedRef2      = nullptr;
3551 #ifdef _TARGET_ARM_
3552         if (current->registerType == TYP_DOUBLE)
3553         {
3554             recentAssignedRef           = (assignedInterval == nullptr) ? nullptr : assignedInterval->recentRefPosition;
3555             physRegRecord2              = findAnotherHalfRegRec(physRegRecord);
3556             Interval* assignedInterval2 = physRegRecord2->assignedInterval;
3557             recentAssignedRef2 = (assignedInterval2 == nullptr) ? nullptr : assignedInterval2->recentRefPosition;
3558             if (!canSpillDoubleReg(physRegRecord, refLocation, &recentAssignedRefWeight))
3559             {
3560                 continue;
3561             }
3562         }
3563         else
3564 #endif
3565         {
3566             recentAssignedRef = assignedInterval->recentRefPosition;
3567             if (!canSpillReg(physRegRecord, refLocation, &recentAssignedRefWeight))
3568             {
3569                 continue;
3570             }
3571         }
3572         if (recentAssignedRefWeight > farthestRefPosWeight)
3573         {
3574             continue;
3575         }
3576
3577         physRegNextLocation = physRegRecord->getNextRefLocation();
3578         if (nextLocation > physRegNextLocation)
3579         {
3580             nextLocation = physRegNextLocation;
3581         }
3582
3583         bool isBetterLocation;
3584
3585 #ifdef DEBUG
3586         if (doSelectNearest() && farthestRefPhysRegRecord != nullptr)
3587         {
3588             isBetterLocation = (nextLocation <= farthestLocation);
3589         }
3590         else
3591 #endif
3592             // This if-stmt is associated with the above else
3593             if (recentAssignedRefWeight < farthestRefPosWeight)
3594         {
3595             isBetterLocation = true;
3596         }
3597         else
3598         {
3599             // This would mean the weight of spill ref position we found so far is equal
3600             // to the weight of the ref position that is being evaluated.  In this case
3601             // we prefer to spill ref position whose distance to its next reference is
3602             // the farthest.
3603             assert(recentAssignedRefWeight == farthestRefPosWeight);
3604
3605             // If allocateIfProfitable=true, the first spill candidate selected
3606             // will be based on weight alone. After we have found a spill
3607             // candidate whose weight is less than the 'refPosition', we will
3608             // consider farthest distance when there is a tie in weights.
3609             // This is to ensure that we don't spill a ref position whose
3610             // weight is equal to weight of 'refPosition'.
3611             if (allocateIfProfitable && farthestRefPhysRegRecord == nullptr)
3612             {
3613                 isBetterLocation = false;
3614             }
3615             else
3616             {
3617                 isBetterLocation = (nextLocation > farthestLocation);
3618
3619                 if (nextLocation > farthestLocation)
3620                 {
3621                     isBetterLocation = true;
3622                 }
3623                 else if (nextLocation == farthestLocation)
3624                 {
3625                     // Both weight and distance are equal.
3626                     // Prefer that ref position which is marked both reload and
3627                     // allocate if profitable.  These ref positions don't need
3628                     // need to be spilled as they are already in memory and
3629                     // codegen considers them as contained memory operands.
3630                     CLANG_FORMAT_COMMENT_ANCHOR;
3631 #ifdef _TARGET_ARM_
3632                     // TODO-CQ-ARM: Just conservatively "and" two conditions. We may implement a better condition later.
3633                     isBetterLocation = true;
3634                     if (recentAssignedRef != nullptr)
3635                         isBetterLocation &= (recentAssignedRef->reload && recentAssignedRef->RegOptional());
3636
3637                     if (recentAssignedRef2 != nullptr)
3638                         isBetterLocation &= (recentAssignedRef2->reload && recentAssignedRef2->RegOptional());
3639 #else
3640                     isBetterLocation =
3641                         (recentAssignedRef != nullptr) && recentAssignedRef->reload && recentAssignedRef->RegOptional();
3642 #endif
3643                 }
3644                 else
3645                 {
3646                     isBetterLocation = false;
3647                 }
3648             }
3649         }
3650
3651         if (isBetterLocation)
3652         {
3653             farthestLocation         = nextLocation;
3654             farthestRefPhysRegRecord = physRegRecord;
3655 #ifdef _TARGET_ARM_
3656             farthestRefPhysRegRecord2 = physRegRecord2;
3657 #endif
3658             farthestRefPosWeight = recentAssignedRefWeight;
3659         }
3660     }
3661
3662 #if DEBUG
3663     if (allocateIfProfitable)
3664     {
3665         // There may not be a spill candidate or if one is found
3666         // its weight must be less than the weight of 'refPosition'
3667         assert((farthestRefPhysRegRecord == nullptr) || (farthestRefPosWeight < getWeight(refPosition)));
3668     }
3669     else
3670     {
3671         // Must have found a spill candidate.
3672         assert(farthestRefPhysRegRecord != nullptr);
3673
3674         if (farthestLocation == refLocation)
3675         {
3676             // This must be a RefPosition that is constrained to use a single register, either directly,
3677             // or at the use, or by stress.
3678             bool isConstrained = (refPosition->isFixedRegRef || (refPosition->nextRefPosition != nullptr &&
3679                                                                  refPosition->nextRefPosition->isFixedRegRef) ||
3680                                   candidatesAreStressLimited());
3681             if (!isConstrained)
3682             {
3683 #ifdef _TARGET_ARM_
3684                 Interval* assignedInterval =
3685                     (farthestRefPhysRegRecord == nullptr) ? nullptr : farthestRefPhysRegRecord->assignedInterval;
3686                 Interval* assignedInterval2 =
3687                     (farthestRefPhysRegRecord2 == nullptr) ? nullptr : farthestRefPhysRegRecord2->assignedInterval;
3688                 RefPosition* nextRefPosition =
3689                     (assignedInterval == nullptr) ? nullptr : assignedInterval->getNextRefPosition();
3690                 RefPosition* nextRefPosition2 =
3691                     (assignedInterval2 == nullptr) ? nullptr : assignedInterval2->getNextRefPosition();
3692                 if (nextRefPosition != nullptr)
3693                 {
3694                     if (nextRefPosition2 != nullptr)
3695                     {
3696                         assert(nextRefPosition->RegOptional() || nextRefPosition2->RegOptional());
3697                     }
3698                     else
3699                     {
3700                         assert(nextRefPosition->RegOptional());
3701                     }
3702                 }
3703                 else
3704                 {
3705                     assert(nextRefPosition2 != nullptr && nextRefPosition2->RegOptional());
3706                 }
3707 #else  // !_TARGET_ARM_
3708                 Interval*    assignedInterval = farthestRefPhysRegRecord->assignedInterval;
3709                 RefPosition* nextRefPosition  = assignedInterval->getNextRefPosition();
3710                 assert(nextRefPosition->RegOptional());
3711 #endif // !_TARGET_ARM_
3712             }
3713         }
3714         else
3715         {
3716             assert(farthestLocation > refLocation);
3717         }
3718     }
3719 #endif // DEBUG
3720
3721     if (farthestRefPhysRegRecord != nullptr)
3722     {
3723         foundReg = farthestRefPhysRegRecord->regNum;
3724
3725 #ifdef _TARGET_ARM_
3726         if (current->registerType == TYP_DOUBLE)
3727         {
3728             assert(genIsValidDoubleReg(foundReg));
3729             unassignDoublePhysReg(farthestRefPhysRegRecord);
3730         }
3731         else
3732 #endif
3733         {
3734             unassignPhysReg(farthestRefPhysRegRecord, farthestRefPhysRegRecord->assignedInterval->recentRefPosition);
3735         }
3736
3737         assignPhysReg(farthestRefPhysRegRecord, current);
3738         refPosition->registerAssignment = genRegMask(foundReg);
3739     }
3740     else
3741     {
3742         foundReg                        = REG_NA;
3743         refPosition->registerAssignment = RBM_NONE;
3744     }
3745
3746     return foundReg;
3747 }
3748
3749 // Grab a register to use to copy and then immediately use.
3750 // This is called only for localVar intervals that already have a register
3751 // assignment that is not compatible with the current RefPosition.
3752 // This is not like regular assignment, because we don't want to change
3753 // any preferences or existing register assignments.
3754 // Prefer a free register that's got the earliest next use.
3755 // Otherwise, spill something with the farthest next use
3756 //
3757 regNumber LinearScan::assignCopyReg(RefPosition* refPosition)
3758 {
3759     Interval* currentInterval = refPosition->getInterval();
3760     assert(currentInterval != nullptr);
3761     assert(currentInterval->isActive);
3762
3763     bool         foundFreeReg = false;
3764     RegRecord*   bestPhysReg  = nullptr;
3765     LsraLocation bestLocation = MinLocation;
3766     regMaskTP    candidates   = refPosition->registerAssignment;
3767
3768     // Save the relatedInterval, if any, so that it doesn't get modified during allocation.
3769     Interval* savedRelatedInterval   = currentInterval->relatedInterval;
3770     currentInterval->relatedInterval = nullptr;
3771
3772     // We don't want really want to change the default assignment,
3773     // so 1) pretend this isn't active, and 2) remember the old reg
3774     regNumber  oldPhysReg   = currentInterval->physReg;
3775     RegRecord* oldRegRecord = currentInterval->assignedReg;
3776     assert(oldRegRecord->regNum == oldPhysReg);
3777     currentInterval->isActive = false;
3778
3779     regNumber allocatedReg = tryAllocateFreeReg(currentInterval, refPosition);
3780     if (allocatedReg == REG_NA)
3781     {
3782         allocatedReg = allocateBusyReg(currentInterval, refPosition, false);
3783     }
3784
3785     // Now restore the old info
3786     currentInterval->relatedInterval = savedRelatedInterval;
3787     currentInterval->physReg         = oldPhysReg;
3788     currentInterval->assignedReg     = oldRegRecord;
3789     currentInterval->isActive        = true;
3790
3791     refPosition->copyReg = true;
3792     return allocatedReg;
3793 }
3794
3795 //------------------------------------------------------------------------
3796 // isAssigned: This is the function to check if the given RegRecord has an assignedInterval
3797 //             regardless of lastLocation.
3798 //             So it would be call isAssigned() with Maxlocation value.
3799 //
3800 // Arguments:
3801 //    regRec       - The RegRecord to check that it is assigned.
3802 //    newRegType   - There are elements to judge according to the upcoming register type.
3803 //
3804 // Return Value:
3805 //    Returns true if the given RegRecord has an assignedInterval.
3806 //
3807 // Notes:
3808 //    There is the case to check if the RegRecord has an assignedInterval regardless of Lastlocation.
3809 //
3810 bool LinearScan::isAssigned(RegRecord* regRec ARM_ARG(RegisterType newRegType))
3811 {
3812     return isAssigned(regRec, MaxLocation ARM_ARG(newRegType));
3813 }
3814
3815 //------------------------------------------------------------------------
3816 // isAssigned: Check whether the given RegRecord has an assignedInterval
3817 //             that has a reference prior to the given location.
3818 //
3819 // Arguments:
3820 //    regRec       - The RegRecord of interest
3821 //    lastLocation - The LsraLocation up to which we want to check
3822 //    newRegType   - The `RegisterType` of interval we want to check
3823 //                   (this is for the purposes of checking the other half of a TYP_DOUBLE RegRecord)
3824 //
3825 // Return value:
3826 //    Returns true if the given RegRecord (and its other half, if TYP_DOUBLE) has an assignedInterval
3827 //    that is referenced prior to the given location
3828 //
3829 // Notes:
3830 //    The register is not considered to be assigned if it has no assignedInterval, or that Interval's
3831 //    next reference is beyond lastLocation
3832 //
3833 bool LinearScan::isAssigned(RegRecord* regRec, LsraLocation lastLocation ARM_ARG(RegisterType newRegType))
3834 {
3835     Interval* assignedInterval = regRec->assignedInterval;
3836
3837     if ((assignedInterval == nullptr) || assignedInterval->getNextRefLocation() > lastLocation)
3838     {
3839 #ifdef _TARGET_ARM_
3840         if (newRegType == TYP_DOUBLE)
3841         {
3842             RegRecord* anotherRegRec = findAnotherHalfRegRec(regRec);
3843
3844             if ((anotherRegRec->assignedInterval == nullptr) ||
3845                 (anotherRegRec->assignedInterval->getNextRefLocation() > lastLocation))
3846             {
3847                 // In case the newRegType is a double register,
3848                 // the score would be set UNASSIGNED if another register is also not set.
3849                 return false;
3850             }
3851         }
3852         else
3853 #endif
3854         {
3855             return false;
3856         }
3857     }
3858
3859     return true;
3860 }
3861
3862 // Check if the interval is already assigned and if it is then unassign the physical record
3863 // then set the assignedInterval to 'interval'
3864 //
3865 void LinearScan::checkAndAssignInterval(RegRecord* regRec, Interval* interval)
3866 {
3867     Interval* assignedInterval = regRec->assignedInterval;
3868     if (assignedInterval != nullptr && assignedInterval != interval)
3869     {
3870         // This is allocated to another interval.  Either it is inactive, or it was allocated as a
3871         // copyReg and is therefore not the "assignedReg" of the other interval.  In the latter case,
3872         // we simply unassign it - in the former case we need to set the physReg on the interval to
3873         // REG_NA to indicate that it is no longer in that register.
3874         // The lack of checking for this case resulted in an assert in the retail version of System.dll,
3875         // in method SerialStream.GetDcbFlag.
3876         // Note that we can't check for the copyReg case, because we may have seen a more recent
3877         // RefPosition for the Interval that was NOT a copyReg.
3878         if (assignedInterval->assignedReg == regRec)
3879         {
3880             assert(assignedInterval->isActive == false);
3881             assignedInterval->physReg = REG_NA;
3882         }
3883         unassignPhysReg(regRec->regNum);
3884     }
3885 #ifdef _TARGET_ARM_
3886     // If 'interval' and 'assignedInterval' were both TYP_DOUBLE, then we have unassigned 'assignedInterval'
3887     // from both halves. Otherwise, if 'interval' is TYP_DOUBLE, we now need to unassign the other half.
3888     if ((interval->registerType == TYP_DOUBLE) &&
3889         ((assignedInterval == nullptr) || (assignedInterval->registerType == TYP_FLOAT)))
3890     {
3891         RegRecord* otherRegRecord = getSecondHalfRegRec(regRec);
3892         assignedInterval          = otherRegRecord->assignedInterval;
3893         if (assignedInterval != nullptr && assignedInterval != interval)
3894         {
3895             if (assignedInterval->assignedReg == otherRegRecord)
3896             {
3897                 assert(assignedInterval->isActive == false);
3898                 assignedInterval->physReg = REG_NA;
3899             }
3900             unassignPhysReg(otherRegRecord->regNum);
3901         }
3902     }
3903 #endif
3904
3905     updateAssignedInterval(regRec, interval, interval->registerType);
3906 }
3907
3908 // Assign the given physical register interval to the given interval
3909 void LinearScan::assignPhysReg(RegRecord* regRec, Interval* interval)
3910 {
3911     regMaskTP assignedRegMask = genRegMask(regRec->regNum);
3912     compiler->codeGen->regSet.rsSetRegsModified(assignedRegMask DEBUGARG(true));
3913
3914     checkAndAssignInterval(regRec, interval);
3915     interval->assignedReg = regRec;
3916
3917     interval->physReg  = regRec->regNum;
3918     interval->isActive = true;
3919     if (interval->isLocalVar)
3920     {
3921         // Prefer this register for future references
3922         interval->updateRegisterPreferences(assignedRegMask);
3923     }
3924 }
3925
3926 //------------------------------------------------------------------------
3927 // setIntervalAsSplit: Set this Interval as being split
3928 //
3929 // Arguments:
3930 //    interval - The Interval which is being split
3931 //
3932 // Return Value:
3933 //    None.
3934 //
3935 // Notes:
3936 //    The given Interval will be marked as split, and it will be added to the
3937 //    set of splitOrSpilledVars.
3938 //
3939 // Assumptions:
3940 //    "interval" must be a lclVar interval, as tree temps are never split.
3941 //    This is asserted in the call to getVarIndex().
3942 //
3943 void LinearScan::setIntervalAsSplit(Interval* interval)
3944 {
3945     if (interval->isLocalVar)
3946     {
3947         unsigned varIndex = interval->getVarIndex(compiler);
3948         if (!interval->isSplit)
3949         {
3950             VarSetOps::AddElemD(compiler, splitOrSpilledVars, varIndex);
3951         }
3952         else
3953         {
3954             assert(VarSetOps::IsMember(compiler, splitOrSpilledVars, varIndex));
3955         }
3956     }
3957     interval->isSplit = true;
3958 }
3959
3960 //------------------------------------------------------------------------
3961 // setIntervalAsSpilled: Set this Interval as being spilled
3962 //
3963 // Arguments:
3964 //    interval - The Interval which is being spilled
3965 //
3966 // Return Value:
3967 //    None.
3968 //
3969 // Notes:
3970 //    The given Interval will be marked as spilled, and it will be added
3971 //    to the set of splitOrSpilledVars.
3972 //
3973 void LinearScan::setIntervalAsSpilled(Interval* interval)
3974 {
3975 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
3976     if (interval->isUpperVector)
3977     {
3978         assert(interval->relatedInterval->isLocalVar);
3979         interval->isSpilled = true;
3980         // Now we need to mark the local as spilled also, even if the lower half is never spilled,
3981         // as this will use the upper part of its home location.
3982         interval = interval->relatedInterval;
3983     }
3984 #endif
3985     if (interval->isLocalVar)
3986     {
3987         unsigned varIndex = interval->getVarIndex(compiler);
3988         if (!interval->isSpilled)
3989         {
3990             VarSetOps::AddElemD(compiler, splitOrSpilledVars, varIndex);
3991         }
3992         else
3993         {
3994             assert(VarSetOps::IsMember(compiler, splitOrSpilledVars, varIndex));
3995         }
3996     }
3997     interval->isSpilled = true;
3998 }
3999
4000 //------------------------------------------------------------------------
4001 // spill: Spill this Interval between "fromRefPosition" and "toRefPosition"
4002 //
4003 // Arguments:
4004 //    fromRefPosition - The RefPosition at which the Interval is to be spilled
4005 //    toRefPosition   - The RefPosition at which it must be reloaded
4006 //
4007 // Return Value:
4008 //    None.
4009 //
4010 // Assumptions:
4011 //    fromRefPosition and toRefPosition must not be null
4012 //
4013 void LinearScan::spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition)
4014 {
4015     assert(fromRefPosition != nullptr && toRefPosition != nullptr);
4016     assert(fromRefPosition->getInterval() == interval && toRefPosition->getInterval() == interval);
4017     assert(fromRefPosition->nextRefPosition == toRefPosition);
4018
4019     if (!fromRefPosition->lastUse)
4020     {
4021         // If not allocated a register, Lcl var def/use ref positions even if reg optional
4022         // should be marked as spillAfter.
4023         if (fromRefPosition->RegOptional() && !(interval->isLocalVar && fromRefPosition->IsActualRef()))
4024         {
4025             fromRefPosition->registerAssignment = RBM_NONE;
4026         }
4027         else
4028         {
4029             fromRefPosition->spillAfter = true;
4030         }
4031     }
4032     assert(toRefPosition != nullptr);
4033
4034 #ifdef DEBUG
4035     if (VERBOSE)
4036     {
4037         dumpLsraAllocationEvent(LSRA_EVENT_SPILL, interval);
4038     }
4039 #endif // DEBUG
4040
4041     INTRACK_STATS(updateLsraStat(LSRA_STAT_SPILL, fromRefPosition->bbNum));
4042
4043     interval->isActive = false;
4044     setIntervalAsSpilled(interval);
4045
4046     // If fromRefPosition occurs before the beginning of this block, mark this as living in the stack
4047     // on entry to this block.
4048     if (fromRefPosition->nodeLocation <= curBBStartLocation)
4049     {
4050         // This must be a lclVar interval
4051         assert(interval->isLocalVar);
4052         setInVarRegForBB(curBBNum, interval->varNum, REG_STK);
4053     }
4054 }
4055
4056 //------------------------------------------------------------------------
4057 // unassignPhysRegNoSpill: Unassign the given physical register record from
4058 //                         an active interval, without spilling.
4059 //
4060 // Arguments:
4061 //    regRec           - the RegRecord to be unasssigned
4062 //
4063 // Return Value:
4064 //    None.
4065 //
4066 // Assumptions:
4067 //    The assignedInterval must not be null, and must be active.
4068 //
4069 // Notes:
4070 //    This method is used to unassign a register when an interval needs to be moved to a
4071 //    different register, but not (yet) spilled.
4072
4073 void LinearScan::unassignPhysRegNoSpill(RegRecord* regRec)
4074 {
4075     Interval* assignedInterval = regRec->assignedInterval;
4076     assert(assignedInterval != nullptr && assignedInterval->isActive);
4077     assignedInterval->isActive = false;
4078     unassignPhysReg(regRec, nullptr);
4079     assignedInterval->isActive = true;
4080 }
4081
4082 //------------------------------------------------------------------------
4083 // checkAndClearInterval: Clear the assignedInterval for the given
4084 //                        physical register record
4085 //
4086 // Arguments:
4087 //    regRec           - the physical RegRecord to be unasssigned
4088 //    spillRefPosition - The RefPosition at which the assignedInterval is to be spilled
4089 //                       or nullptr if we aren't spilling
4090 //
4091 // Return Value:
4092 //    None.
4093 //
4094 // Assumptions:
4095 //    see unassignPhysReg
4096 //
4097 void LinearScan::checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefPosition)
4098 {
4099     Interval* assignedInterval = regRec->assignedInterval;
4100     assert(assignedInterval != nullptr);
4101     regNumber thisRegNum = regRec->regNum;
4102
4103     if (spillRefPosition == nullptr)
4104     {
4105         // Note that we can't assert  for the copyReg case
4106         //
4107         if (assignedInterval->physReg == thisRegNum)
4108         {
4109             assert(assignedInterval->isActive == false);
4110         }
4111     }
4112     else
4113     {
4114         assert(spillRefPosition->getInterval() == assignedInterval);
4115     }
4116
4117     updateAssignedInterval(regRec, nullptr, assignedInterval->registerType);
4118 }
4119
4120 //------------------------------------------------------------------------
4121 // unassignPhysReg: Unassign the given physical register record, and spill the
4122 //                  assignedInterval at the given spillRefPosition, if any.
4123 //
4124 // Arguments:
4125 //    regRec           - The RegRecord to be unasssigned
4126 //    newRegType       - The RegisterType of interval that would be assigned
4127 //
4128 // Return Value:
4129 //    None.
4130 //
4131 // Notes:
4132 //    On ARM architecture, Intervals have to be unassigned considering
4133 //    with the register type of interval that would be assigned.
4134 //
4135 void LinearScan::unassignPhysReg(RegRecord* regRec ARM_ARG(RegisterType newRegType))
4136 {
4137     RegRecord* regRecToUnassign = regRec;
4138 #ifdef _TARGET_ARM_
4139     RegRecord* anotherRegRec = nullptr;
4140
4141     if ((regRecToUnassign->assignedInterval != nullptr) &&
4142         (regRecToUnassign->assignedInterval->registerType == TYP_DOUBLE))
4143     {
4144         // If the register type of interval(being unassigned or new) is TYP_DOUBLE,
4145         // It should have to be valid double register (even register)
4146         if (!genIsValidDoubleReg(regRecToUnassign->regNum))
4147         {
4148             regRecToUnassign = findAnotherHalfRegRec(regRec);
4149         }
4150     }
4151     else
4152     {
4153         if (newRegType == TYP_DOUBLE)
4154         {
4155             anotherRegRec = findAnotherHalfRegRec(regRecToUnassign);
4156         }
4157     }
4158 #endif
4159
4160     if (regRecToUnassign->assignedInterval != nullptr)
4161     {
4162         unassignPhysReg(regRecToUnassign, regRecToUnassign->assignedInterval->recentRefPosition);
4163     }
4164 #ifdef _TARGET_ARM_
4165     if ((anotherRegRec != nullptr) && (anotherRegRec->assignedInterval != nullptr))
4166     {
4167         unassignPhysReg(anotherRegRec, anotherRegRec->assignedInterval->recentRefPosition);
4168     }
4169 #endif
4170 }
4171
4172 //------------------------------------------------------------------------
4173 // unassignPhysReg: Unassign the given physical register record, and spill the
4174 //                  assignedInterval at the given spillRefPosition, if any.
4175 //
4176 // Arguments:
4177 //    regRec           - the RegRecord to be unasssigned
4178 //    spillRefPosition - The RefPosition at which the assignedInterval is to be spilled
4179 //
4180 // Return Value:
4181 //    None.
4182 //
4183 // Assumptions:
4184 //    The assignedInterval must not be null.
4185 //    If spillRefPosition is null, the assignedInterval must be inactive, or not currently
4186 //    assigned to this register (e.g. this is a copyReg for that Interval).
4187 //    Otherwise, spillRefPosition must be associated with the assignedInterval.
4188 //
4189 void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPosition)
4190 {
4191     Interval* assignedInterval = regRec->assignedInterval;
4192     assert(assignedInterval != nullptr);
4193     regNumber thisRegNum = regRec->regNum;
4194
4195     // Is assignedInterval actually still assigned to this register?
4196     bool intervalIsAssigned = (assignedInterval->physReg == thisRegNum);
4197
4198 #ifdef _TARGET_ARM_
4199     RegRecord* anotherRegRec = nullptr;
4200
4201     // Prepare second half RegRecord of a double register for TYP_DOUBLE
4202     if (assignedInterval->registerType == TYP_DOUBLE)
4203     {
4204         assert(isFloatRegType(regRec->registerType));
4205
4206         anotherRegRec = findAnotherHalfRegRec(regRec);
4207
4208         // Both two RegRecords should have been assigned to the same interval.
4209         assert(assignedInterval == anotherRegRec->assignedInterval);
4210         if (!intervalIsAssigned && (assignedInterval->physReg == anotherRegRec->regNum))
4211         {
4212             intervalIsAssigned = true;
4213         }
4214     }
4215 #endif // _TARGET_ARM_
4216
4217     checkAndClearInterval(regRec, spillRefPosition);
4218
4219 #ifdef _TARGET_ARM_
4220     if (assignedInterval->registerType == TYP_DOUBLE)
4221     {
4222         // Both two RegRecords should have been unassigned together.
4223         assert(regRec->assignedInterval == nullptr);
4224         assert(anotherRegRec->assignedInterval == nullptr);
4225     }
4226 #endif // _TARGET_ARM_
4227
4228     RefPosition* nextRefPosition = nullptr;
4229     if (spillRefPosition != nullptr)
4230     {
4231         nextRefPosition = spillRefPosition->nextRefPosition;
4232     }
4233
4234     if (!intervalIsAssigned && assignedInterval->physReg != REG_NA)
4235     {
4236         // This must have been a temporary copy reg, but we can't assert that because there
4237         // may have been intervening RefPositions that were not copyRegs.
4238
4239         // reg->assignedInterval has already been set to nullptr by checkAndClearInterval()
4240         assert(regRec->assignedInterval == nullptr);
4241         return;
4242     }
4243
4244     regNumber victimAssignedReg = assignedInterval->physReg;
4245     assignedInterval->physReg   = REG_NA;
4246
4247     bool spill = assignedInterval->isActive && nextRefPosition != nullptr;
4248     if (spill)
4249     {
4250         // If this is an active interval, it must have a recentRefPosition,
4251         // otherwise it would not be active
4252         assert(spillRefPosition != nullptr);
4253
4254 #if 0
4255         // TODO-CQ: Enable this and insert an explicit GT_COPY (otherwise there's no way to communicate
4256         // to codegen that we want the copyReg to be the new home location).
4257         // If the last reference was a copyReg, and we're spilling the register
4258         // it was copied from, then make the copyReg the new primary location
4259         // if possible
4260         if (spillRefPosition->copyReg)
4261         {
4262             regNumber copyFromRegNum = victimAssignedReg;
4263             regNumber copyRegNum = genRegNumFromMask(spillRefPosition->registerAssignment);
4264             if (copyFromRegNum == thisRegNum &&
4265                 getRegisterRecord(copyRegNum)->assignedInterval == assignedInterval)
4266             {
4267                 assert(copyRegNum != thisRegNum);
4268                 assignedInterval->physReg = copyRegNum;
4269                 assignedInterval->assignedReg = this->getRegisterRecord(copyRegNum);
4270                 return;
4271             }
4272         }
4273 #endif // 0
4274 #ifdef DEBUG
4275         // With JitStressRegs == 0x80 (LSRA_EXTEND_LIFETIMES), we may have a RefPosition
4276         // that is not marked lastUse even though the treeNode is a lastUse.  In that case
4277         // we must not mark it for spill because the register will have been immediately freed
4278         // after use.  While we could conceivably add special handling for this case in codegen,
4279         // it would be messy and undesirably cause the "bleeding" of LSRA stress modes outside
4280         // of LSRA.
4281         if (extendLifetimes() && assignedInterval->isLocalVar && RefTypeIsUse(spillRefPosition->refType) &&
4282             spillRefPosition->treeNode != nullptr && (spillRefPosition->treeNode->gtFlags & GTF_VAR_DEATH) != 0)
4283         {
4284             dumpLsraAllocationEvent(LSRA_EVENT_SPILL_EXTENDED_LIFETIME, assignedInterval);
4285             assignedInterval->isActive = false;
4286             spill                      = false;
4287             // If the spillRefPosition occurs before the beginning of this block, it will have
4288             // been marked as living in this register on entry to this block, but we now need
4289             // to mark this as living on the stack.
4290             if (spillRefPosition->nodeLocation <= curBBStartLocation)
4291             {
4292                 setInVarRegForBB(curBBNum, assignedInterval->varNum, REG_STK);
4293                 if (spillRefPosition->nextRefPosition != nullptr)
4294                 {
4295                     setIntervalAsSpilled(assignedInterval);
4296                 }
4297             }
4298             else
4299             {
4300                 // Otherwise, we need to mark spillRefPosition as lastUse, or the interval
4301                 // will remain active beyond its allocated range during the resolution phase.
4302                 spillRefPosition->lastUse = true;
4303             }
4304         }
4305         else
4306 #endif // DEBUG
4307         {
4308             spillInterval(assignedInterval, spillRefPosition, nextRefPosition);
4309         }
4310     }
4311     // Maintain the association with the interval, if it has more references.
4312     // Or, if we "remembered" an interval assigned to this register, restore it.
4313     if (nextRefPosition != nullptr)
4314     {
4315         assignedInterval->assignedReg = regRec;
4316     }
4317     else if (canRestorePreviousInterval(regRec, assignedInterval))
4318     {
4319         regRec->assignedInterval = regRec->previousInterval;
4320         regRec->previousInterval = nullptr;
4321
4322 #ifdef _TARGET_ARM_
4323         // Note:
4324         //   We can not use updateAssignedInterval() and updatePreviousInterval() here,
4325         //   because regRec may not be a even-numbered float register.
4326
4327         // Update second half RegRecord of a double register for TYP_DOUBLE
4328         if (regRec->assignedInterval->registerType == TYP_DOUBLE)
4329         {
4330             RegRecord* anotherHalfRegRec = findAnotherHalfRegRec(regRec);
4331
4332             anotherHalfRegRec->assignedInterval = regRec->assignedInterval;
4333             anotherHalfRegRec->previousInterval = nullptr;
4334         }
4335 #endif // _TARGET_ARM_
4336
4337 #ifdef DEBUG
4338         if (spill)
4339         {
4340             dumpLsraAllocationEvent(LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL, regRec->assignedInterval,
4341                                     thisRegNum);
4342         }
4343         else
4344         {
4345             dumpLsraAllocationEvent(LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL, regRec->assignedInterval, thisRegNum);
4346         }
4347 #endif // DEBUG
4348     }
4349     else
4350     {
4351         updateAssignedInterval(regRec, nullptr, assignedInterval->registerType);
4352         updatePreviousInterval(regRec, nullptr, assignedInterval->registerType);
4353     }
4354 }
4355
4356 //------------------------------------------------------------------------
4357 // spillGCRefs: Spill any GC-type intervals that are currently in registers.a
4358 //
4359 // Arguments:
4360 //    killRefPosition - The RefPosition for the kill
4361 //
4362 // Return Value:
4363 //    None.
4364 //
4365 void LinearScan::spillGCRefs(RefPosition* killRefPosition)
4366 {
4367     // For each physical register that can hold a GC type,
4368     // if it is occupied by an interval of a GC type, spill that interval.
4369     regMaskTP candidateRegs = killRefPosition->registerAssignment;
4370     while (candidateRegs != RBM_NONE)
4371     {
4372         regMaskTP nextRegBit = genFindLowestBit(candidateRegs);
4373         candidateRegs &= ~nextRegBit;
4374         regNumber  nextReg          = genRegNumFromMask(nextRegBit);
4375         RegRecord* regRecord        = getRegisterRecord(nextReg);
4376         Interval*  assignedInterval = regRecord->assignedInterval;
4377         if (assignedInterval == nullptr || (assignedInterval->isActive == false) ||
4378             !varTypeIsGC(assignedInterval->registerType))
4379         {
4380             continue;
4381         }
4382         unassignPhysReg(regRecord, assignedInterval->recentRefPosition);
4383     }
4384     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DONE_KILL_GC_REFS, nullptr, REG_NA, nullptr));
4385 }
4386
4387 //------------------------------------------------------------------------
4388 // processBlockEndAllocation: Update var locations after 'currentBlock' has been allocated
4389 //
4390 // Arguments:
4391 //    currentBlock - the BasicBlock we have just finished allocating registers for
4392 //
4393 // Return Value:
4394 //    None
4395 //
4396 // Notes:
4397 //    Calls processBlockEndLocations() to set the outVarToRegMap, then gets the next block,
4398 //    and sets the inVarToRegMap appropriately.
4399
4400 void LinearScan::processBlockEndAllocation(BasicBlock* currentBlock)
4401 {
4402     assert(currentBlock != nullptr);
4403     if (enregisterLocalVars)
4404     {
4405         processBlockEndLocations(currentBlock);
4406     }
4407     markBlockVisited(currentBlock);
4408
4409     // Get the next block to allocate.
4410     // When the last block in the method has successors, there will be a final "RefTypeBB" to
4411     // ensure that we get the varToRegMap set appropriately, but in that case we don't need
4412     // to worry about "nextBlock".
4413     BasicBlock* nextBlock = getNextBlock();
4414     if (nextBlock != nullptr)
4415     {
4416         processBlockStartLocations(nextBlock);
4417     }
4418 }
4419
4420 //------------------------------------------------------------------------
4421 // rotateBlockStartLocation: When in the LSRA_BLOCK_BOUNDARY_ROTATE stress mode, attempt to
4422 //                           "rotate" the register assignment for a localVar to the next higher
4423 //                           register that is available.
4424 //
4425 // Arguments:
4426 //    interval      - the Interval for the variable whose register is getting rotated
4427 //    targetReg     - its register assignment from the predecessor block being used for live-in
4428 //    availableRegs - registers available for use
4429 //
4430 // Return Value:
4431 //    The new register to use.
4432
4433 #ifdef DEBUG
4434 regNumber LinearScan::rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs)
4435 {
4436     if (targetReg != REG_STK && getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_ROTATE)
4437     {
4438         // If we're rotating the register locations at block boundaries, try to use
4439         // the next higher register number of the appropriate register type.
4440         regMaskTP candidateRegs = allRegs(interval->registerType) & availableRegs;
4441         regNumber firstReg      = REG_NA;
4442         regNumber newReg        = REG_NA;
4443         while (candidateRegs != RBM_NONE)
4444         {
4445             regMaskTP nextRegBit = genFindLowestBit(candidateRegs);
4446             candidateRegs &= ~nextRegBit;
4447             regNumber nextReg = genRegNumFromMask(nextRegBit);
4448             if (nextReg > targetReg)
4449             {
4450                 newReg = nextReg;
4451                 break;
4452             }
4453             else if (firstReg == REG_NA)
4454             {
4455                 firstReg = nextReg;
4456             }
4457         }
4458         if (newReg == REG_NA)
4459         {
4460             assert(firstReg != REG_NA);
4461             newReg = firstReg;
4462         }
4463         targetReg = newReg;
4464     }
4465     return targetReg;
4466 }
4467 #endif // DEBUG
4468
4469 #ifdef _TARGET_ARM_
4470 //--------------------------------------------------------------------------------------
4471 // isSecondHalfReg: Test if recRec is second half of double register
4472 //                  which is assigned to an interval.
4473 //
4474 // Arguments:
4475 //    regRec - a register to be tested
4476 //    interval - an interval which is assigned to some register
4477 //
4478 // Assumptions:
4479 //    None
4480 //
4481 // Return Value:
4482 //    True only if regRec is second half of assignedReg in interval
4483 //
4484 bool LinearScan::isSecondHalfReg(RegRecord* regRec, Interval* interval)
4485 {
4486     RegRecord* assignedReg = interval->assignedReg;
4487
4488     if (assignedReg != nullptr && interval->registerType == TYP_DOUBLE)
4489     {
4490         // interval should have been allocated to a valid double register
4491         assert(genIsValidDoubleReg(assignedReg->regNum));
4492
4493         // Find a second half RegRecord of double register
4494         regNumber firstRegNum  = assignedReg->regNum;
4495         regNumber secondRegNum = REG_NEXT(firstRegNum);
4496
4497         assert(genIsValidFloatReg(secondRegNum) && !genIsValidDoubleReg(secondRegNum));
4498
4499         RegRecord* secondRegRec = getRegisterRecord(secondRegNum);
4500
4501         return secondRegRec == regRec;
4502     }
4503
4504     return false;
4505 }
4506
4507 //------------------------------------------------------------------------------------------
4508 // getSecondHalfRegRec: Get the second (odd) half of an ARM32 double register
4509 //
4510 // Arguments:
4511 //    regRec - A float RegRecord
4512 //
4513 // Assumptions:
4514 //    regRec must be a valid double register (i.e. even)
4515 //
4516 // Return Value:
4517 //    The RegRecord for the second half of the double register
4518 //
4519 RegRecord* LinearScan::getSecondHalfRegRec(RegRecord* regRec)
4520 {
4521     regNumber  secondHalfRegNum;
4522     RegRecord* secondHalfRegRec;
4523
4524     assert(genIsValidDoubleReg(regRec->regNum));
4525
4526     secondHalfRegNum = REG_NEXT(regRec->regNum);
4527     secondHalfRegRec = getRegisterRecord(secondHalfRegNum);
4528
4529     return secondHalfRegRec;
4530 }
4531 //------------------------------------------------------------------------------------------
4532 // findAnotherHalfRegRec: Find another half RegRecord which forms same ARM32 double register
4533 //
4534 // Arguments:
4535 //    regRec - A float RegRecord
4536 //
4537 // Assumptions:
4538 //    None
4539 //
4540 // Return Value:
4541 //    A RegRecord which forms same double register with regRec
4542 //
4543 RegRecord* LinearScan::findAnotherHalfRegRec(RegRecord* regRec)
4544 {
4545     regNumber  anotherHalfRegNum;
4546     RegRecord* anotherHalfRegRec;
4547
4548     assert(genIsValidFloatReg(regRec->regNum));
4549
4550     // Find another half register for TYP_DOUBLE interval,
4551     // following same logic in canRestorePreviousInterval().
4552     if (genIsValidDoubleReg(regRec->regNum))
4553     {
4554         anotherHalfRegNum = REG_NEXT(regRec->regNum);
4555         assert(!genIsValidDoubleReg(anotherHalfRegNum));
4556     }
4557     else
4558     {
4559         anotherHalfRegNum = REG_PREV(regRec->regNum);
4560         assert(genIsValidDoubleReg(anotherHalfRegNum));
4561     }
4562     anotherHalfRegRec = getRegisterRecord(anotherHalfRegNum);
4563
4564     return anotherHalfRegRec;
4565 }
4566 #endif
4567
4568 //--------------------------------------------------------------------------------------
4569 // canRestorePreviousInterval: Test if we can restore previous interval
4570 //
4571 // Arguments:
4572 //    regRec - a register which contains previous interval to be restored
4573 //    assignedInterval - an interval just unassigned
4574 //
4575 // Assumptions:
4576 //    None
4577 //
4578 // Return Value:
4579 //    True only if previous interval of regRec can be restored
4580 //
4581 bool LinearScan::canRestorePreviousInterval(RegRecord* regRec, Interval* assignedInterval)
4582 {
4583     bool retVal =
4584         (regRec->previousInterval != nullptr && regRec->previousInterval != assignedInterval &&
4585          regRec->previousInterval->assignedReg == regRec && regRec->previousInterval->getNextRefPosition() != nullptr);
4586
4587 #ifdef _TARGET_ARM_
4588     if (retVal && regRec->previousInterval->registerType == TYP_DOUBLE)
4589     {
4590         RegRecord* anotherHalfRegRec = findAnotherHalfRegRec(regRec);
4591
4592         retVal = retVal && anotherHalfRegRec->assignedInterval == nullptr;
4593     }
4594 #endif
4595
4596     return retVal;
4597 }
4598
4599 bool LinearScan::isAssignedToInterval(Interval* interval, RegRecord* regRec)
4600 {
4601     bool isAssigned = (interval->assignedReg == regRec);
4602 #ifdef _TARGET_ARM_
4603     isAssigned |= isSecondHalfReg(regRec, interval);
4604 #endif
4605     return isAssigned;
4606 }
4607
4608 void LinearScan::unassignIntervalBlockStart(RegRecord* regRecord, VarToRegMap inVarToRegMap)
4609 {
4610     // Is there another interval currently assigned to this register?  If so unassign it.
4611     Interval* assignedInterval = regRecord->assignedInterval;
4612     if (assignedInterval != nullptr)
4613     {
4614         if (isAssignedToInterval(assignedInterval, regRecord))
4615         {
4616             // Only localVars, constants or vector upper halves should be assigned to registers at block boundaries.
4617             if (!assignedInterval->isLocalVar)
4618             {
4619                 assert(assignedInterval->isConstant || assignedInterval->IsUpperVector());
4620                 // Don't need to update the VarToRegMap.
4621                 inVarToRegMap = nullptr;
4622             }
4623
4624             regNumber assignedRegNum = assignedInterval->assignedReg->regNum;
4625
4626             // If the interval is active, it will be set to active when we reach its new
4627             // register assignment (which we must not yet have done, or it wouldn't still be
4628             // assigned to this register).
4629             assignedInterval->isActive = false;
4630             unassignPhysReg(assignedInterval->assignedReg, nullptr);
4631             if ((inVarToRegMap != nullptr) && inVarToRegMap[assignedInterval->getVarIndex(compiler)] == assignedRegNum)
4632             {
4633                 inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK;
4634             }
4635         }
4636         else
4637         {
4638             // This interval is no longer assigned to this register.
4639             updateAssignedInterval(regRecord, nullptr, assignedInterval->registerType);
4640         }
4641     }
4642 }
4643
4644 //------------------------------------------------------------------------
4645 // processBlockStartLocations: Update var locations on entry to 'currentBlock' and clear constant
4646 //                             registers.
4647 //
4648 // Arguments:
4649 //    currentBlock   - the BasicBlock we are about to allocate registers for
4650 //    allocationPass - true if we are currently allocating registers (versus writing them back)
4651 //
4652 // Return Value:
4653 //    None
4654 //
4655 // Notes:
4656 //    During the allocation pass, we use the outVarToRegMap of the selected predecessor to
4657 //    determine the lclVar locations for the inVarToRegMap.
4658 //    During the resolution (write-back) pass, we only modify the inVarToRegMap in cases where
4659 //    a lclVar was spilled after the block had been completed.
4660 void LinearScan::processBlockStartLocations(BasicBlock* currentBlock)
4661 {
4662     // If we have no register candidates we should only call this method during allocation.
4663
4664     assert(enregisterLocalVars || !allocationPassComplete);
4665
4666     if (!enregisterLocalVars)
4667     {
4668         // Just clear any constant registers and return.
4669         for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
4670         {
4671             RegRecord* physRegRecord    = getRegisterRecord(reg);
4672             Interval*  assignedInterval = physRegRecord->assignedInterval;
4673
4674             if (assignedInterval != nullptr)
4675             {
4676                 assert(assignedInterval->isConstant);
4677                 physRegRecord->assignedInterval = nullptr;
4678             }
4679         }
4680         return;
4681     }
4682
4683     unsigned    predBBNum         = blockInfo[currentBlock->bbNum].predBBNum;
4684     VarToRegMap predVarToRegMap   = getOutVarToRegMap(predBBNum);
4685     VarToRegMap inVarToRegMap     = getInVarToRegMap(currentBlock->bbNum);
4686     bool        hasCriticalInEdge = blockInfo[currentBlock->bbNum].hasCriticalInEdge;
4687
4688     VarSetOps::AssignNoCopy(compiler, currentLiveVars,
4689                             VarSetOps::Intersection(compiler, registerCandidateVars, currentBlock->bbLiveIn));
4690 #ifdef DEBUG
4691     if (getLsraExtendLifeTimes())
4692     {
4693         VarSetOps::AssignNoCopy(compiler, currentLiveVars, registerCandidateVars);
4694     }
4695     // If we are rotating register assignments at block boundaries, we want to make the
4696     // inactive registers available for the rotation.
4697     regMaskTP inactiveRegs = RBM_NONE;
4698 #endif // DEBUG
4699     regMaskTP       liveRegs = RBM_NONE;
4700     VarSetOps::Iter iter(compiler, currentLiveVars);
4701     unsigned        varIndex = 0;
4702     while (iter.NextElem(&varIndex))
4703     {
4704         unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
4705         if (!compiler->lvaTable[varNum].lvLRACandidate)
4706         {
4707             continue;
4708         }
4709         regNumber    targetReg;
4710         Interval*    interval        = getIntervalForLocalVar(varIndex);
4711         RefPosition* nextRefPosition = interval->getNextRefPosition();
4712         assert(nextRefPosition != nullptr);
4713
4714         if (!allocationPassComplete)
4715         {
4716             targetReg = getVarReg(predVarToRegMap, varIndex);
4717 #ifdef DEBUG
4718             regNumber newTargetReg = rotateBlockStartLocation(interval, targetReg, (~liveRegs | inactiveRegs));
4719             if (newTargetReg != targetReg)
4720             {
4721                 targetReg = newTargetReg;
4722                 setIntervalAsSplit(interval);
4723             }
4724 #endif // DEBUG
4725             setVarReg(inVarToRegMap, varIndex, targetReg);
4726         }
4727         else // allocationPassComplete (i.e. resolution/write-back pass)
4728         {
4729             targetReg = getVarReg(inVarToRegMap, varIndex);
4730             // There are four cases that we need to consider during the resolution pass:
4731             // 1. This variable had a register allocated initially, and it was not spilled in the RefPosition
4732             //    that feeds this block.  In this case, both targetReg and predVarToRegMap[varIndex] will be targetReg.
4733             // 2. This variable had not been spilled prior to the end of predBB, but was later spilled, so
4734             //    predVarToRegMap[varIndex] will be REG_STK, but targetReg is its former allocated value.
4735             //    In this case, we will normally change it to REG_STK.  We will update its "spilled" status when we
4736             //    encounter it in resolveLocalRef().
4737             // 2a. If the next RefPosition is marked as a copyReg, we need to retain the allocated register.  This is
4738             //     because the copyReg RefPosition will not have recorded the "home" register, yet downstream
4739             //     RefPositions rely on the correct "home" register.
4740             // 3. This variable was spilled before we reached the end of predBB.  In this case, both targetReg and
4741             //    predVarToRegMap[varIndex] will be REG_STK, and the next RefPosition will have been marked
4742             //    as reload during allocation time if necessary (note that by the time we actually reach the next
4743             //    RefPosition, we may be using a different predecessor, at which it is still in a register).
4744             // 4. This variable was spilled during the allocation of this block, so targetReg is REG_STK
4745             //    (because we set inVarToRegMap at the time we spilled it), but predVarToRegMap[varIndex]
4746             //    is not REG_STK.  We retain the REG_STK value in the inVarToRegMap.
4747             if (targetReg != REG_STK)
4748             {
4749                 if (getVarReg(predVarToRegMap, varIndex) != REG_STK)
4750                 {
4751                     // Case #1 above.
4752                     assert(getVarReg(predVarToRegMap, varIndex) == targetReg ||
4753                            getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_ROTATE);
4754                 }
4755                 else if (!nextRefPosition->copyReg)
4756                 {
4757                     // case #2 above.
4758                     setVarReg(inVarToRegMap, varIndex, REG_STK);
4759                     targetReg = REG_STK;
4760                 }
4761                 // Else case 2a. - retain targetReg.
4762             }
4763             // Else case #3 or #4, we retain targetReg and nothing further to do or assert.
4764         }
4765         if (interval->physReg == targetReg)
4766         {
4767             if (interval->isActive)
4768             {
4769                 assert(targetReg != REG_STK);
4770                 assert(interval->assignedReg != nullptr && interval->assignedReg->regNum == targetReg &&
4771                        interval->assignedReg->assignedInterval == interval);
4772                 liveRegs |= genRegMask(targetReg);
4773                 continue;
4774             }
4775         }
4776         else if (interval->physReg != REG_NA)
4777         {
4778             // This can happen if we are using the locations from a basic block other than the
4779             // immediately preceding one - where the variable was in a different location.
4780             if (targetReg != REG_STK)
4781             {
4782                 // Unassign it from the register (it will get a new register below).
4783                 if (interval->assignedReg != nullptr && interval->assignedReg->assignedInterval == interval)
4784                 {
4785                     interval->isActive = false;
4786                     unassignPhysReg(getRegisterRecord(interval->physReg), nullptr);
4787                 }
4788                 else
4789                 {
4790                     // This interval was live in this register the last time we saw a reference to it,
4791                     // but has since been displaced.
4792                     interval->physReg = REG_NA;
4793                 }
4794             }
4795             else if (!allocationPassComplete)
4796             {
4797                 // Keep the register assignment - if another var has it, it will get unassigned.
4798                 // Otherwise, resolution will fix it up later, and it will be more
4799                 // likely to match other assignments this way.
4800                 interval->isActive = true;
4801                 liveRegs |= genRegMask(interval->physReg);
4802                 INDEBUG(inactiveRegs |= genRegMask(interval->physReg));
4803                 setVarReg(inVarToRegMap, varIndex, interval->physReg);
4804             }
4805             else
4806             {
4807                 interval->physReg = REG_NA;
4808             }
4809         }
4810         if (targetReg != REG_STK)
4811         {
4812             RegRecord* targetRegRecord = getRegisterRecord(targetReg);
4813             liveRegs |= genRegMask(targetReg);
4814             if (!interval->isActive)
4815             {
4816                 interval->isActive    = true;
4817                 interval->physReg     = targetReg;
4818                 interval->assignedReg = targetRegRecord;
4819             }
4820             if (targetRegRecord->assignedInterval != interval)
4821             {
4822 #ifdef _TARGET_ARM_
4823                 // If this is a TYP_DOUBLE interval, and the assigned interval is either null or is TYP_FLOAT,
4824                 // we also need to unassign the other half of the register.
4825                 // Note that if the assigned interval is TYP_DOUBLE, it will be unassigned below.
4826                 if ((interval->registerType == TYP_DOUBLE) &&
4827                     ((targetRegRecord->assignedInterval == nullptr) ||
4828                      (targetRegRecord->assignedInterval->registerType == TYP_FLOAT)))
4829                 {
4830                     assert(genIsValidDoubleReg(targetReg));
4831                     unassignIntervalBlockStart(findAnotherHalfRegRec(targetRegRecord),
4832                                                allocationPassComplete ? nullptr : inVarToRegMap);
4833                 }
4834 #endif // _TARGET_ARM_
4835                 unassignIntervalBlockStart(targetRegRecord, allocationPassComplete ? nullptr : inVarToRegMap);
4836                 assignPhysReg(targetRegRecord, interval);
4837             }
4838             if (interval->recentRefPosition != nullptr && !interval->recentRefPosition->copyReg &&
4839                 interval->recentRefPosition->registerAssignment != genRegMask(targetReg))
4840             {
4841                 interval->getNextRefPosition()->outOfOrder = true;
4842             }
4843         }
4844     }
4845
4846     // Unassign any registers that are no longer live.
4847     for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
4848     {
4849         if ((liveRegs & genRegMask(reg)) == 0)
4850         {
4851             RegRecord* physRegRecord    = getRegisterRecord(reg);
4852             Interval*  assignedInterval = physRegRecord->assignedInterval;
4853
4854             if (assignedInterval != nullptr)
4855             {
4856                 assert(assignedInterval->isLocalVar || assignedInterval->isConstant ||
4857                        assignedInterval->IsUpperVector());
4858
4859                 if (!assignedInterval->isConstant && assignedInterval->assignedReg == physRegRecord)
4860                 {
4861                     assignedInterval->isActive = false;
4862                     if (assignedInterval->getNextRefPosition() == nullptr)
4863                     {
4864                         unassignPhysReg(physRegRecord, nullptr);
4865                     }
4866                     if (!assignedInterval->IsUpperVector())
4867                     {
4868                         inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK;
4869                     }
4870                 }
4871                 else
4872                 {
4873                     // This interval may still be active, but was in another register in an
4874                     // intervening block.
4875                     updateAssignedInterval(physRegRecord, nullptr, assignedInterval->registerType);
4876                 }
4877
4878 #ifdef _TARGET_ARM_
4879                 // unassignPhysReg, above, may have restored a 'previousInterval', in which case we need to
4880                 // get the value of 'physRegRecord->assignedInterval' rather than using 'assignedInterval'.
4881                 if (physRegRecord->assignedInterval != nullptr)
4882                 {
4883                     assignedInterval = physRegRecord->assignedInterval;
4884                 }
4885                 if (assignedInterval->registerType == TYP_DOUBLE)
4886                 {
4887                     // Skip next float register, because we already addressed a double register
4888                     assert(genIsValidDoubleReg(reg));
4889                     reg = REG_NEXT(reg);
4890                 }
4891 #endif // _TARGET_ARM_
4892             }
4893         }
4894 #ifdef _TARGET_ARM_
4895         else
4896         {
4897             RegRecord* physRegRecord    = getRegisterRecord(reg);
4898             Interval*  assignedInterval = physRegRecord->assignedInterval;
4899
4900             if (assignedInterval != nullptr && assignedInterval->registerType == TYP_DOUBLE)
4901             {
4902                 // Skip next float register, because we already addressed a double register
4903                 assert(genIsValidDoubleReg(reg));
4904                 reg = REG_NEXT(reg);
4905             }
4906         }
4907 #endif // _TARGET_ARM_
4908     }
4909 }
4910
4911 //------------------------------------------------------------------------
4912 // processBlockEndLocations: Record the variables occupying registers after completing the current block.
4913 //
4914 // Arguments:
4915 //    currentBlock - the block we have just completed.
4916 //
4917 // Return Value:
4918 //    None
4919 //
4920 // Notes:
4921 //    This must be called both during the allocation and resolution (write-back) phases.
4922 //    This is because we need to have the outVarToRegMap locations in order to set the locations
4923 //    at successor blocks during allocation time, but if lclVars are spilled after a block has been
4924 //    completed, we need to record the REG_STK location for those variables at resolution time.
4925
4926 void LinearScan::processBlockEndLocations(BasicBlock* currentBlock)
4927 {
4928     assert(currentBlock != nullptr && currentBlock->bbNum == curBBNum);
4929     VarToRegMap outVarToRegMap = getOutVarToRegMap(curBBNum);
4930
4931     VarSetOps::AssignNoCopy(compiler, currentLiveVars,
4932                             VarSetOps::Intersection(compiler, registerCandidateVars, currentBlock->bbLiveOut));
4933 #ifdef DEBUG
4934     if (getLsraExtendLifeTimes())
4935     {
4936         VarSetOps::Assign(compiler, currentLiveVars, registerCandidateVars);
4937     }
4938 #endif // DEBUG
4939     regMaskTP       liveRegs = RBM_NONE;
4940     VarSetOps::Iter iter(compiler, currentLiveVars);
4941     unsigned        varIndex = 0;
4942     while (iter.NextElem(&varIndex))
4943     {
4944         Interval* interval = getIntervalForLocalVar(varIndex);
4945         if (interval->isActive)
4946         {
4947             assert(interval->physReg != REG_NA && interval->physReg != REG_STK);
4948             setVarReg(outVarToRegMap, varIndex, interval->physReg);
4949         }
4950         else
4951         {
4952             outVarToRegMap[varIndex] = REG_STK;
4953         }
4954 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
4955         // Ensure that we have no partially-spilled large vector locals.
4956         assert(!varTypeNeedsPartialCalleeSave(interval->registerType) || !interval->isPartiallySpilled);
4957 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
4958     }
4959     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_END_BB));
4960 }
4961
4962 #ifdef DEBUG
4963 void LinearScan::dumpRefPositions(const char* str)
4964 {
4965     printf("------------\n");
4966     printf("REFPOSITIONS %s: \n", str);
4967     printf("------------\n");
4968     for (RefPosition& refPos : refPositions)
4969     {
4970         refPos.dump();
4971     }
4972 }
4973 #endif // DEBUG
4974
4975 bool LinearScan::registerIsFree(regNumber regNum, RegisterType regType)
4976 {
4977     RegRecord* physRegRecord = getRegisterRecord(regNum);
4978
4979     bool isFree = physRegRecord->isFree();
4980
4981 #ifdef _TARGET_ARM_
4982     if (isFree && regType == TYP_DOUBLE)
4983     {
4984         isFree = getSecondHalfRegRec(physRegRecord)->isFree();
4985     }
4986 #endif // _TARGET_ARM_
4987
4988     return isFree;
4989 }
4990
4991 //------------------------------------------------------------------------
4992 // LinearScan::freeRegister: Make a register available for use
4993 //
4994 // Arguments:
4995 //    physRegRecord - the RegRecord for the register to be freed.
4996 //
4997 // Return Value:
4998 //    None.
4999 //
5000 // Assumptions:
5001 //    None.
5002 //    It may be that the RegRecord has already been freed, e.g. due to a kill,
5003 //    in which case this method has no effect.
5004 //
5005 // Notes:
5006 //    If there is currently an Interval assigned to this register, and it has
5007 //    more references (i.e. this is a local last-use, but more uses and/or
5008 //    defs remain), it will remain assigned to the physRegRecord.  However, since
5009 //    it is marked inactive, the register will be available, albeit less desirable
5010 //    to allocate.
5011 void LinearScan::freeRegister(RegRecord* physRegRecord)
5012 {
5013     Interval* assignedInterval = physRegRecord->assignedInterval;
5014     // It may have already been freed by a "Kill"
5015     if (assignedInterval != nullptr)
5016     {
5017         assignedInterval->isActive = false;
5018         // If this is a constant node, that we may encounter again (e.g. constant),
5019         // don't unassign it until we need the register.
5020         if (!assignedInterval->isConstant)
5021         {
5022             RefPosition* nextRefPosition = assignedInterval->getNextRefPosition();
5023             // Unassign the register only if there are no more RefPositions, or the next
5024             // one is a def.  Note that the latter condition doesn't actually ensure that
5025             // there aren't subsequent uses that could be reached by a def in the assigned
5026             // register, but is merely a heuristic to avoid tying up the register (or using
5027             // it when it's non-optimal).  A better alternative would be to use SSA, so that
5028             // we wouldn't unnecessarily link separate live ranges to the same register.
5029             if (nextRefPosition == nullptr || RefTypeIsDef(nextRefPosition->refType))
5030             {
5031 #ifdef _TARGET_ARM_
5032                 assert((assignedInterval->registerType != TYP_DOUBLE) || genIsValidDoubleReg(physRegRecord->regNum));
5033 #endif // _TARGET_ARM_
5034                 unassignPhysReg(physRegRecord, nullptr);
5035             }
5036         }
5037     }
5038 }
5039
5040 void LinearScan::freeRegisters(regMaskTP regsToFree)
5041 {
5042     if (regsToFree == RBM_NONE)
5043     {
5044         return;
5045     }
5046
5047     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FREE_REGS));
5048     while (regsToFree != RBM_NONE)
5049     {
5050         regMaskTP nextRegBit = genFindLowestBit(regsToFree);
5051         regsToFree &= ~nextRegBit;
5052         regNumber nextReg = genRegNumFromMask(nextRegBit);
5053         freeRegister(getRegisterRecord(nextReg));
5054     }
5055 }
5056
5057 // Actual register allocation, accomplished by iterating over all of the previously
5058 // constructed Intervals
5059 // Loosely based on raAssignVars()
5060 //
5061 void LinearScan::allocateRegisters()
5062 {
5063     JITDUMP("*************** In LinearScan::allocateRegisters()\n");
5064     DBEXEC(VERBOSE, lsraDumpIntervals("before allocateRegisters"));
5065
5066     // at start, nothing is active except for register args
5067     for (Interval& interval : intervals)
5068     {
5069         Interval* currentInterval          = &interval;
5070         currentInterval->recentRefPosition = nullptr;
5071         currentInterval->isActive          = false;
5072         if (currentInterval->isLocalVar)
5073         {
5074             LclVarDsc* varDsc = currentInterval->getLocalVar(compiler);
5075             if (varDsc->lvIsRegArg && currentInterval->firstRefPosition != nullptr)
5076             {
5077                 currentInterval->isActive = true;
5078             }
5079         }
5080     }
5081
5082 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
5083     if (enregisterLocalVars)
5084     {
5085         VarSetOps::Iter largeVectorVarsIter(compiler, largeVectorVars);
5086         unsigned        largeVectorVarIndex = 0;
5087         while (largeVectorVarsIter.NextElem(&largeVectorVarIndex))
5088         {
5089             Interval* lclVarInterval           = getIntervalForLocalVar(largeVectorVarIndex);
5090             lclVarInterval->isPartiallySpilled = false;
5091         }
5092     }
5093 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
5094
5095     for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
5096     {
5097         getRegisterRecord(reg)->recentRefPosition = nullptr;
5098         getRegisterRecord(reg)->isActive          = false;
5099     }
5100
5101 #ifdef DEBUG
5102     regNumber lastAllocatedReg = REG_NA;
5103     if (VERBOSE)
5104     {
5105         dumpRefPositions("BEFORE ALLOCATION");
5106         dumpVarRefPositions("BEFORE ALLOCATION");
5107
5108         printf("\n\nAllocating Registers\n"
5109                "--------------------\n");
5110         // Start with a small set of commonly used registers, so that we don't keep having to print a new title.
5111         registersToDump = LsraLimitSmallIntSet | LsraLimitSmallFPSet;
5112         dumpRegRecordHeader();
5113         // Now print an empty "RefPosition", since we complete the dump of the regs at the beginning of the loop.
5114         printf(indentFormat, "");
5115     }
5116 #endif // DEBUG
5117
5118     BasicBlock* currentBlock = nullptr;
5119
5120     LsraLocation prevLocation    = MinLocation;
5121     regMaskTP    regsToFree      = RBM_NONE;
5122     regMaskTP    delayRegsToFree = RBM_NONE;
5123
5124     // This is the most recent RefPosition for which a register was allocated
5125     // - currently only used for DEBUG but maintained in non-debug, for clarity of code
5126     //   (and will be optimized away because in non-debug spillAlways() unconditionally returns false)
5127     RefPosition* lastAllocatedRefPosition = nullptr;
5128
5129     bool handledBlockEnd = false;
5130
5131     for (RefPosition& refPositionIterator : refPositions)
5132     {
5133         RefPosition* currentRefPosition = &refPositionIterator;
5134
5135 #ifdef DEBUG
5136         // Set the activeRefPosition to null until we're done with any boundary handling.
5137         activeRefPosition = nullptr;
5138         if (VERBOSE)
5139         {
5140             // We're really dumping the RegRecords "after" the previous RefPosition, but it's more convenient
5141             // to do this here, since there are a number of "continue"s in this loop.
5142             dumpRegRecords();
5143         }
5144 #endif // DEBUG
5145
5146         // This is the previousRefPosition of the current Referent, if any
5147         RefPosition* previousRefPosition = nullptr;
5148
5149         Interval*      currentInterval = nullptr;
5150         Referenceable* currentReferent = nullptr;
5151         bool           isInternalRef   = false;
5152         RefType        refType         = currentRefPosition->refType;
5153
5154         currentReferent = currentRefPosition->referent;
5155
5156         if (spillAlways() && lastAllocatedRefPosition != nullptr && !lastAllocatedRefPosition->isPhysRegRef &&
5157             !lastAllocatedRefPosition->getInterval()->isInternal &&
5158             (RefTypeIsDef(lastAllocatedRefPosition->refType) || lastAllocatedRefPosition->getInterval()->isLocalVar))
5159         {
5160             assert(lastAllocatedRefPosition->registerAssignment != RBM_NONE);
5161             RegRecord* regRecord = lastAllocatedRefPosition->getInterval()->assignedReg;
5162             unassignPhysReg(regRecord, lastAllocatedRefPosition);
5163             // Now set lastAllocatedRefPosition to null, so that we don't try to spill it again
5164             lastAllocatedRefPosition = nullptr;
5165         }
5166
5167         // We wait to free any registers until we've completed all the
5168         // uses for the current node.
5169         // This avoids reusing registers too soon.
5170         // We free before the last true def (after all the uses & internal
5171         // registers), and then again at the beginning of the next node.
5172         // This is made easier by assigning two LsraLocations per node - one
5173         // for all the uses, internal registers & all but the last def, and
5174         // another for the final def (if any).
5175
5176         LsraLocation currentLocation = currentRefPosition->nodeLocation;
5177
5178         if ((regsToFree | delayRegsToFree) != RBM_NONE)
5179         {
5180             // Free at a new location, or at a basic block boundary
5181             if (refType == RefTypeBB)
5182             {
5183                 assert(currentLocation > prevLocation);
5184             }
5185             if (currentLocation > prevLocation)
5186             {
5187                 freeRegisters(regsToFree);
5188                 if ((currentLocation > (prevLocation + 1)) && (delayRegsToFree != RBM_NONE))
5189                 {
5190                     // We should never see a delayReg that is delayed until a Location that has no RefPosition
5191                     // (that would be the RefPosition that it was supposed to interfere with).
5192                     assert(!"Found a delayRegFree associated with Location with no reference");
5193                     // However, to be cautious for the Release build case, we will free them.
5194                     freeRegisters(delayRegsToFree);
5195                     delayRegsToFree = RBM_NONE;
5196                 }
5197                 regsToFree      = delayRegsToFree;
5198                 delayRegsToFree = RBM_NONE;
5199             }
5200         }
5201         prevLocation = currentLocation;
5202
5203         // get previous refposition, then current refpos is the new previous
5204         if (currentReferent != nullptr)
5205         {
5206             previousRefPosition                = currentReferent->recentRefPosition;
5207             currentReferent->recentRefPosition = currentRefPosition;
5208         }
5209         else
5210         {
5211             assert((refType == RefTypeBB) || (refType == RefTypeKillGCRefs));
5212         }
5213
5214 #ifdef DEBUG
5215         activeRefPosition = currentRefPosition;
5216
5217         // For the purposes of register resolution, we handle the DummyDefs before
5218         // the block boundary - so the RefTypeBB is after all the DummyDefs.
5219         // However, for the purposes of allocation, we want to handle the block
5220         // boundary first, so that we can free any registers occupied by lclVars
5221         // that aren't live in the next block and make them available for the
5222         // DummyDefs.
5223
5224         // If we've already handled the BlockEnd, but now we're seeing the RefTypeBB,
5225         // dump it now.
5226         if ((refType == RefTypeBB) && handledBlockEnd)
5227         {
5228             dumpNewBlock(currentBlock, currentRefPosition->nodeLocation);
5229         }
5230 #endif // DEBUG
5231
5232         if (!handledBlockEnd && (refType == RefTypeBB || refType == RefTypeDummyDef))
5233         {
5234             // Free any delayed regs (now in regsToFree) before processing the block boundary
5235             freeRegisters(regsToFree);
5236             regsToFree         = RBM_NONE;
5237             handledBlockEnd    = true;
5238             curBBStartLocation = currentRefPosition->nodeLocation;
5239             if (currentBlock == nullptr)
5240             {
5241                 currentBlock = startBlockSequence();
5242                 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_START_BB, nullptr, REG_NA, compiler->fgFirstBB));
5243             }
5244             else
5245             {
5246                 processBlockEndAllocation(currentBlock);
5247                 currentBlock = moveToNextBlock();
5248                 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_START_BB, nullptr, REG_NA, currentBlock));
5249             }
5250         }
5251
5252         if (refType == RefTypeBB)
5253         {
5254             handledBlockEnd = false;
5255             continue;
5256         }
5257
5258         if (refType == RefTypeKillGCRefs)
5259         {
5260             spillGCRefs(currentRefPosition);
5261             continue;
5262         }
5263
5264         // If this is a FixedReg, disassociate any inactive constant interval from this register.
5265         // Otherwise, do nothing.
5266         if (refType == RefTypeFixedReg)
5267         {
5268             RegRecord* regRecord        = currentRefPosition->getReg();
5269             Interval*  assignedInterval = regRecord->assignedInterval;
5270
5271             if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant)
5272             {
5273                 regRecord->assignedInterval = nullptr;
5274
5275 #ifdef _TARGET_ARM_
5276                 // Update overlapping floating point register for TYP_DOUBLE
5277                 if (assignedInterval->registerType == TYP_DOUBLE)
5278                 {
5279                     regRecord = findAnotherHalfRegRec(regRecord);
5280                     assert(regRecord->assignedInterval == assignedInterval);
5281                     regRecord->assignedInterval = nullptr;
5282                 }
5283 #endif
5284             }
5285             INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition->assignedReg()));
5286             continue;
5287         }
5288
5289         // If this is an exposed use, do nothing - this is merely a placeholder to attempt to
5290         // ensure that a register is allocated for the full lifetime.  The resolution logic
5291         // will take care of moving to the appropriate register if needed.
5292
5293         if (refType == RefTypeExpUse)
5294         {
5295             INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_EXP_USE));
5296             continue;
5297         }
5298
5299         regNumber assignedRegister = REG_NA;
5300
5301         if (currentRefPosition->isIntervalRef())
5302         {
5303             currentInterval  = currentRefPosition->getInterval();
5304             assignedRegister = currentInterval->physReg;
5305
5306             // Identify the special cases where we decide up-front not to allocate
5307             bool allocate = true;
5308             bool didDump  = false;
5309
5310             if (refType == RefTypeParamDef || refType == RefTypeZeroInit)
5311             {
5312                 // For a ParamDef with a weighted refCount less than unity, don't enregister it at entry.
5313                 // TODO-CQ: Consider doing this only for stack parameters, since otherwise we may be needlessly
5314                 // inserting a store.
5315                 LclVarDsc* varDsc = currentInterval->getLocalVar(compiler);
5316                 assert(varDsc != nullptr);
5317                 if (refType == RefTypeParamDef && varDsc->lvRefCntWtd() <= BB_UNITY_WEIGHT)
5318                 {
5319                     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, currentInterval));
5320                     didDump  = true;
5321                     allocate = false;
5322                     setIntervalAsSpilled(currentInterval);
5323                 }
5324                 // If it has no actual references, mark it as "lastUse"; since they're not actually part
5325                 // of any flow they won't have been marked during dataflow.  Otherwise, if we allocate a
5326                 // register we won't unassign it.
5327                 else if (currentRefPosition->nextRefPosition == nullptr)
5328                 {
5329                     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ZERO_REF, currentInterval));
5330                     currentRefPosition->lastUse = true;
5331                 }
5332             }
5333 #ifdef FEATURE_SIMD
5334 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
5335             else if (currentInterval->isUpperVector)
5336             {
5337                 // This is a save or restore of the upper half of a large vector lclVar.
5338                 Interval* lclVarInterval = currentInterval->relatedInterval;
5339                 assert(lclVarInterval->isLocalVar);
5340                 if (refType == RefTypeUpperVectorSave)
5341                 {
5342                     if ((lclVarInterval->physReg == REG_NA) ||
5343                         (lclVarInterval->isPartiallySpilled && (currentInterval->physReg == REG_STK)))
5344                     {
5345                         allocate = false;
5346                     }
5347                     else
5348                     {
5349                         lclVarInterval->isPartiallySpilled = true;
5350                     }
5351                 }
5352                 else if (refType == RefTypeUpperVectorRestore)
5353                 {
5354                     assert(currentInterval->isUpperVector);
5355                     if (lclVarInterval->isPartiallySpilled)
5356                     {
5357                         lclVarInterval->isPartiallySpilled = false;
5358                     }
5359                     else
5360                     {
5361                         allocate = false;
5362                     }
5363                 }
5364             }
5365             else if (refType == RefTypeUpperVectorSave)
5366             {
5367                 assert(!currentInterval->isLocalVar);
5368                 // Note that this case looks a lot like the case below, but in this case we need to spill
5369                 // at the previous RefPosition.
5370                 // We may want to consider allocating two callee-save registers for this case, but it happens rarely
5371                 // enough that it may not warrant the additional complexity.
5372                 if (assignedRegister != REG_NA)
5373                 {
5374                     unassignPhysReg(getRegisterRecord(assignedRegister), currentInterval->firstRefPosition);
5375                     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
5376                 }
5377                 currentRefPosition->registerAssignment = RBM_NONE;
5378                 continue;
5379             }
5380 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
5381 #endif // FEATURE_SIMD
5382
5383             if (allocate == false)
5384             {
5385                 if (assignedRegister != REG_NA)
5386                 {
5387                     unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition);
5388                 }
5389                 else if (!didDump)
5390                 {
5391                     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
5392                     didDump = true;
5393                 }
5394                 currentRefPosition->registerAssignment = RBM_NONE;
5395                 continue;
5396             }
5397
5398             if (currentInterval->isSpecialPutArg)
5399             {
5400                 assert(!currentInterval->isLocalVar);
5401                 Interval* srcInterval = currentInterval->relatedInterval;
5402                 assert(srcInterval != nullptr && srcInterval->isLocalVar);
5403                 if (refType == RefTypeDef)
5404                 {
5405                     assert(srcInterval->recentRefPosition->nodeLocation == currentLocation - 1);
5406                     RegRecord* physRegRecord = srcInterval->assignedReg;
5407
5408                     // For a putarg_reg to be special, its next use location has to be the same
5409                     // as fixed reg's next kill location. Otherwise, if source lcl var's next use
5410                     // is after the kill of fixed reg but before putarg_reg's next use, fixed reg's
5411                     // kill would lead to spill of source but not the putarg_reg if it were treated
5412                     // as special.
5413                     if (srcInterval->isActive &&
5414                         genRegMask(srcInterval->physReg) == currentRefPosition->registerAssignment &&
5415                         currentInterval->getNextRefLocation() == physRegRecord->getNextRefLocation())
5416                     {
5417                         assert(physRegRecord->regNum == srcInterval->physReg);
5418
5419                         // Special putarg_reg acts as a pass-thru since both source lcl var
5420                         // and putarg_reg have the same register allocated.  Physical reg
5421                         // record of reg continue to point to source lcl var's interval
5422                         // instead of to putarg_reg's interval.  So if a spill of reg
5423                         // allocated to source lcl var happens, to reallocate to another
5424                         // tree node, before its use at call node it will lead to spill of
5425                         // lcl var instead of putarg_reg since physical reg record is pointing
5426                         // to lcl var's interval. As a result, arg reg would get trashed leading
5427                         // to bad codegen. The assumption here is that source lcl var of a
5428                         // special putarg_reg doesn't get spilled and re-allocated prior to
5429                         // its use at the call node.  This is ensured by marking physical reg
5430                         // record as busy until next kill.
5431                         physRegRecord->isBusyUntilNextKill = true;
5432                     }
5433                     else
5434                     {
5435                         currentInterval->isSpecialPutArg = false;
5436                     }
5437                 }
5438                 // If this is still a SpecialPutArg, continue;
5439                 if (currentInterval->isSpecialPutArg)
5440                 {
5441                     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_SPECIAL_PUTARG, currentInterval,
5442                                                     currentRefPosition->assignedReg()));
5443                     continue;
5444                 }
5445             }
5446
5447             if (assignedRegister == REG_NA && RefTypeIsUse(refType))
5448             {
5449                 currentRefPosition->reload = true;
5450                 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, currentInterval, assignedRegister));
5451             }
5452         }
5453
5454         regMaskTP assignedRegBit = RBM_NONE;
5455         bool      isInRegister   = false;
5456         if (assignedRegister != REG_NA)
5457         {
5458             isInRegister   = true;
5459             assignedRegBit = genRegMask(assignedRegister);
5460             if (!currentInterval->isActive)
5461             {
5462                 // If this is a use, it must have started the block on the stack, but the register
5463                 // was available for use so we kept the association.
5464                 if (RefTypeIsUse(refType))
5465                 {
5466                     assert(enregisterLocalVars);
5467                     assert(inVarToRegMaps[curBBNum][currentInterval->getVarIndex(compiler)] == REG_STK &&
5468                            previousRefPosition->nodeLocation <= curBBStartLocation);
5469                     isInRegister = false;
5470                 }
5471                 else
5472                 {
5473                     currentInterval->isActive = true;
5474                 }
5475             }
5476             assert(currentInterval->assignedReg != nullptr &&
5477                    currentInterval->assignedReg->regNum == assignedRegister &&
5478                    currentInterval->assignedReg->assignedInterval == currentInterval);
5479         }
5480
5481         // If this is a physical register, we unconditionally assign it to itself!
5482         if (currentRefPosition->isPhysRegRef)
5483         {
5484             RegRecord* currentReg       = currentRefPosition->getReg();
5485             Interval*  assignedInterval = currentReg->assignedInterval;
5486
5487             if (assignedInterval != nullptr)
5488             {
5489                 unassignPhysReg(currentReg, assignedInterval->recentRefPosition);
5490             }
5491             currentReg->isActive = true;
5492             assignedRegister     = currentReg->regNum;
5493             assignedRegBit       = genRegMask(assignedRegister);
5494             if (refType == RefTypeKill)
5495             {
5496                 currentReg->isBusyUntilNextKill = false;
5497             }
5498         }
5499         else if (previousRefPosition != nullptr)
5500         {
5501             assert(previousRefPosition->nextRefPosition == currentRefPosition);
5502             assert(assignedRegister == REG_NA || assignedRegBit == previousRefPosition->registerAssignment ||
5503                    currentRefPosition->outOfOrder || previousRefPosition->copyReg ||
5504                    previousRefPosition->refType == RefTypeExpUse || currentRefPosition->refType == RefTypeDummyDef);
5505         }
5506         else if (assignedRegister != REG_NA)
5507         {
5508             // Handle the case where this is a preassigned register (i.e. parameter).
5509             // We don't want to actually use the preassigned register if it's not
5510             // going to cover the lifetime - but we had to preallocate it to ensure
5511             // that it remained live.
5512             // TODO-CQ: At some point we may want to refine the analysis here, in case
5513             // it might be beneficial to keep it in this reg for PART of the lifetime
5514             if (currentInterval->isLocalVar)
5515             {
5516                 regMaskTP preferences        = currentInterval->registerPreferences;
5517                 bool      keepAssignment     = true;
5518                 bool      matchesPreferences = (preferences & genRegMask(assignedRegister)) != RBM_NONE;
5519
5520                 // Will the assigned register cover the lifetime?  If not, does it at least
5521                 // meet the preferences for the next RefPosition?
5522                 RegRecord*   physRegRecord     = getRegisterRecord(currentInterval->physReg);
5523                 RefPosition* nextPhysRegRefPos = physRegRecord->getNextRefPosition();
5524                 if (nextPhysRegRefPos != nullptr &&
5525                     nextPhysRegRefPos->nodeLocation <= currentInterval->lastRefPosition->nodeLocation)
5526                 {
5527                     // Check to see if the existing assignment matches the preferences (e.g. callee save registers)
5528                     // and ensure that the next use of this localVar does not occur after the nextPhysRegRefPos
5529                     // There must be a next RefPosition, because we know that the Interval extends beyond the
5530                     // nextPhysRegRefPos.
5531                     RefPosition* nextLclVarRefPos = currentRefPosition->nextRefPosition;
5532                     assert(nextLclVarRefPos != nullptr);
5533                     if (!matchesPreferences || nextPhysRegRefPos->nodeLocation < nextLclVarRefPos->nodeLocation ||
5534                         physRegRecord->conflictingFixedRegReference(nextLclVarRefPos))
5535                     {
5536                         keepAssignment = false;
5537                     }
5538                 }
5539                 else if (refType == RefTypeParamDef && !matchesPreferences)
5540                 {
5541                     // Don't use the register, even if available, if it doesn't match the preferences.
5542                     // Note that this case is only for ParamDefs, for which we haven't yet taken preferences
5543                     // into account (we've just automatically got the initial location).  In other cases,
5544                     // we would already have put it in a preferenced register, if it was available.
5545                     // TODO-CQ: Consider expanding this to check availability - that would duplicate
5546                     // code here, but otherwise we may wind up in this register anyway.
5547                     keepAssignment = false;
5548                 }
5549
5550                 if (keepAssignment == false)
5551                 {
5552                     currentRefPosition->registerAssignment = allRegs(currentInterval->registerType);
5553                     unassignPhysRegNoSpill(physRegRecord);
5554
5555                     // If the preferences are currently set to just this register, reset them to allRegs
5556                     // of the appropriate type (just as we just reset the registerAssignment for this
5557                     // RefPosition.
5558                     // Otherwise, simply remove this register from the preferences, if it's there.
5559
5560                     if (currentInterval->registerPreferences == assignedRegBit)
5561                     {
5562                         currentInterval->registerPreferences = currentRefPosition->registerAssignment;
5563                     }
5564                     else
5565                     {
5566                         currentInterval->registerPreferences &= ~assignedRegBit;
5567                     }
5568
5569                     assignedRegister = REG_NA;
5570                     assignedRegBit   = RBM_NONE;
5571                 }
5572             }
5573         }
5574
5575         if (assignedRegister != REG_NA)
5576         {
5577             RegRecord* physRegRecord = getRegisterRecord(assignedRegister);
5578
5579             // If there is a conflicting fixed reference, insert a copy.
5580             if (physRegRecord->conflictingFixedRegReference(currentRefPosition))
5581             {
5582                 // We may have already reassigned the register to the conflicting reference.
5583                 // If not, we need to unassign this interval.
5584                 if (physRegRecord->assignedInterval == currentInterval)
5585                 {
5586                     unassignPhysRegNoSpill(physRegRecord);
5587                 }
5588                 currentRefPosition->moveReg = true;
5589                 assignedRegister            = REG_NA;
5590                 setIntervalAsSplit(currentInterval);
5591                 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_MOVE_REG, currentInterval, assignedRegister));
5592             }
5593             else if ((genRegMask(assignedRegister) & currentRefPosition->registerAssignment) != 0)
5594             {
5595                 currentRefPosition->registerAssignment = assignedRegBit;
5596                 if (!currentReferent->isActive)
5597                 {
5598                     // If we've got an exposed use at the top of a block, the
5599                     // interval might not have been active.  Otherwise if it's a use,
5600                     // the interval must be active.
5601                     if (refType == RefTypeDummyDef)
5602                     {
5603                         currentReferent->isActive = true;
5604                         assert(getRegisterRecord(assignedRegister)->assignedInterval == currentInterval);
5605                     }
5606                     else
5607                     {
5608                         currentRefPosition->reload = true;
5609                     }
5610                 }
5611                 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, currentInterval, assignedRegister));
5612             }
5613             else
5614             {
5615                 assert(currentInterval != nullptr);
5616
5617                 // It's already in a register, but not one we need.
5618                 if (!RefTypeIsDef(currentRefPosition->refType))
5619                 {
5620                     regNumber copyReg = assignCopyReg(currentRefPosition);
5621                     assert(copyReg != REG_NA);
5622                     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, currentInterval, copyReg));
5623                     lastAllocatedRefPosition = currentRefPosition;
5624                     if (currentRefPosition->lastUse)
5625                     {
5626                         if (currentRefPosition->delayRegFree)
5627                         {
5628                             INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED, currentInterval,
5629                                                             assignedRegister));
5630                             delayRegsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment);
5631                         }
5632                         else
5633                         {
5634                             INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, currentInterval, assignedRegister));
5635                             regsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment);
5636                         }
5637                     }
5638                     // If this is a tree temp (non-localVar) interval, we will need an explicit move.
5639                     if (!currentInterval->isLocalVar)
5640                     {
5641                         currentRefPosition->moveReg = true;
5642                         currentRefPosition->copyReg = false;
5643                     }
5644                     continue;
5645                 }
5646                 else
5647                 {
5648                     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NEEDS_NEW_REG, nullptr, assignedRegister));
5649                     regsToFree |= genRegMask(assignedRegister);
5650                     // We want a new register, but we don't want this to be considered a spill.
5651                     assignedRegister = REG_NA;
5652                     if (physRegRecord->assignedInterval == currentInterval)
5653                     {
5654                         unassignPhysRegNoSpill(physRegRecord);
5655                     }
5656                 }
5657             }
5658         }
5659
5660         if (assignedRegister == REG_NA)
5661         {
5662             bool allocateReg = true;
5663
5664             if (currentRefPosition->RegOptional())
5665             {
5666                 // We can avoid allocating a register if it is a the last use requiring a reload.
5667                 if (currentRefPosition->lastUse && currentRefPosition->reload)
5668                 {
5669                     allocateReg = false;
5670                 }
5671
5672 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE && defined(_TARGET_XARCH_)
5673                 // We can also avoid allocating a register (in fact we don't want to) if we have
5674                 // an UpperVectorRestore on xarch where the value is on the stack.
5675                 if ((currentRefPosition->refType == RefTypeUpperVectorRestore) && (currentInterval->physReg == REG_NA))
5676                 {
5677                     assert(currentRefPosition->regOptional);
5678                     allocateReg = false;
5679                 }
5680 #endif
5681
5682 #ifdef DEBUG
5683                 // Under stress mode, don't attempt to allocate a reg to
5684                 // reg optional ref position, unless it's a ParamDef.
5685                 if (allocateReg && regOptionalNoAlloc())
5686                 {
5687                     allocateReg = false;
5688                 }
5689 #endif
5690             }
5691
5692             if (allocateReg)
5693             {
5694                 // Try to allocate a register
5695                 assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition);
5696             }
5697
5698             // If no register was found, and if the currentRefPosition must have a register,
5699             // then find a register to spill
5700             if (assignedRegister == REG_NA)
5701             {
5702                 bool isAllocatable = currentRefPosition->IsActualRef();
5703 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE && defined(_TARGET_ARM64_)
5704                 if (currentInterval->isUpperVector)
5705                 {
5706                     // On Arm64, we can't save the upper half to memory without a register.
5707                     isAllocatable = true;
5708                     assert(!currentRefPosition->RegOptional());
5709                 }
5710 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE && _TARGET_ARM64_
5711                 if (isAllocatable)
5712                 {
5713                     if (allocateReg)
5714                     {
5715                         assignedRegister =
5716                             allocateBusyReg(currentInterval, currentRefPosition, currentRefPosition->RegOptional());
5717                     }
5718
5719                     if (assignedRegister != REG_NA)
5720                     {
5721                         INDEBUG(
5722                             dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_SPILLED_REG, currentInterval, assignedRegister));
5723                     }
5724                     else
5725                     {
5726                         // This can happen only for those ref positions that are to be allocated
5727                         // only if profitable.
5728                         noway_assert(currentRefPosition->RegOptional());
5729
5730                         currentRefPosition->registerAssignment = RBM_NONE;
5731                         currentRefPosition->reload             = false;
5732                         setIntervalAsSpilled(currentInterval);
5733
5734                         INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
5735                     }
5736                 }
5737                 else
5738                 {
5739                     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
5740                     currentRefPosition->registerAssignment = RBM_NONE;
5741                     currentInterval->isActive              = false;
5742                     setIntervalAsSpilled(currentInterval);
5743                 }
5744             }
5745 #ifdef DEBUG
5746             else
5747             {
5748                 if (VERBOSE)
5749                 {
5750                     if (currentInterval->isConstant && (currentRefPosition->treeNode != nullptr) &&
5751                         currentRefPosition->treeNode->IsReuseRegVal())
5752                     {
5753                         dumpLsraAllocationEvent(LSRA_EVENT_REUSE_REG, currentInterval, assignedRegister, currentBlock);
5754                     }
5755                     else
5756                     {
5757                         dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, currentInterval, assignedRegister, currentBlock);
5758                     }
5759                 }
5760             }
5761 #endif // DEBUG
5762
5763             if (refType == RefTypeDummyDef && assignedRegister != REG_NA)
5764             {
5765                 setInVarRegForBB(curBBNum, currentInterval->varNum, assignedRegister);
5766             }
5767
5768             // If we allocated a register, and this is a use of a spilled value,
5769             // it should have been marked for reload above.
5770             if (assignedRegister != REG_NA && RefTypeIsUse(refType) && !isInRegister)
5771             {
5772                 assert(currentRefPosition->reload);
5773             }
5774         }
5775
5776         // If we allocated a register, record it
5777         if (currentInterval != nullptr && assignedRegister != REG_NA)
5778         {
5779             assignedRegBit                         = genRegMask(assignedRegister);
5780             currentRefPosition->registerAssignment = assignedRegBit;
5781             currentInterval->physReg               = assignedRegister;
5782             regsToFree &= ~assignedRegBit; // we'll set it again later if it's dead
5783
5784             // If this interval is dead, free the register.
5785             // The interval could be dead if this is a user variable, or if the
5786             // node is being evaluated for side effects, or a call whose result
5787             // is not used, etc.
5788             // If this is an UpperVector we'll neither free it nor preference it
5789             // (it will be freed when it is used).
5790             if (!currentInterval->IsUpperVector())
5791             {
5792                 if (currentRefPosition->lastUse || currentRefPosition->nextRefPosition == nullptr)
5793                 {
5794                     assert(currentRefPosition->isIntervalRef());
5795
5796                     if (refType != RefTypeExpUse && currentRefPosition->nextRefPosition == nullptr)
5797                     {
5798                         if (currentRefPosition->delayRegFree)
5799                         {
5800                             delayRegsToFree |= assignedRegBit;
5801
5802                             INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED));
5803                         }
5804                         else
5805                         {
5806                             regsToFree |= assignedRegBit;
5807
5808                             INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE));
5809                         }
5810                     }
5811                     else
5812                     {
5813                         currentInterval->isActive = false;
5814                     }
5815
5816                     // Update the register preferences for the relatedInterval, if this is 'preferencedToDef'.
5817                     // Don't propagate to subsequent relatedIntervals; that will happen as they are allocated, and we
5818                     // don't know yet whether the register will be retained.
5819                     if (currentInterval->relatedInterval != nullptr)
5820                     {
5821                         currentInterval->relatedInterval->updateRegisterPreferences(assignedRegBit);
5822                     }
5823                 }
5824             }
5825
5826             lastAllocatedRefPosition = currentRefPosition;
5827         }
5828     }
5829
5830 #ifdef JIT32_GCENCODER
5831     // For the JIT32_GCENCODER, when lvaKeepAliveAndReportThis is true, we must either keep this "this" pointer
5832     // in the same register for the entire method, or keep it on the stack. Rather than imposing this constraint
5833     // as we allocate, we will force all refs to the stack if it is split or spilled.
5834     if (enregisterLocalVars && compiler->lvaKeepAliveAndReportThis())
5835     {
5836         LclVarDsc* thisVarDsc = compiler->lvaGetDesc(compiler->info.compThisArg);
5837         if (!thisVarDsc->lvDoNotEnregister)
5838         {
5839             Interval* interval = getIntervalForLocalVar(thisVarDsc->lvVarIndex);
5840             if (interval->isSplit)
5841             {
5842                 // We'll have to spill this.
5843                 setIntervalAsSpilled(interval);
5844             }
5845             if (interval->isSpilled)
5846             {
5847                 for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition)
5848                 {
5849                     if (ref->RegOptional())
5850                     {
5851                         ref->registerAssignment = RBM_NONE;
5852                         ref->reload             = false;
5853                         ref->spillAfter         = false;
5854                     }
5855                     switch (ref->refType)
5856                     {
5857                         case RefTypeDef:
5858                             if (ref->registerAssignment != RBM_NONE)
5859                             {
5860                                 ref->spillAfter = true;
5861                             }
5862                             break;
5863                         case RefTypeUse:
5864                             if (ref->registerAssignment != RBM_NONE)
5865                             {
5866                                 ref->reload     = true;
5867                                 ref->spillAfter = true;
5868                                 ref->copyReg    = false;
5869                                 ref->moveReg    = false;
5870                             }
5871                             break;
5872                     }
5873                 }
5874             }
5875         }
5876     }
5877 #endif // JIT32_GCENCODER
5878
5879     // Free registers to clear associated intervals for resolution phase
5880     CLANG_FORMAT_COMMENT_ANCHOR;
5881
5882 #ifdef DEBUG
5883     if (getLsraExtendLifeTimes())
5884     {
5885         // If we have extended lifetimes, we need to make sure all the registers are freed.
5886         for (int regNumIndex = 0; regNumIndex <= REG_FP_LAST; regNumIndex++)
5887         {
5888             RegRecord& regRecord = physRegs[regNumIndex];
5889             Interval*  interval  = regRecord.assignedInterval;
5890             if (interval != nullptr)
5891             {
5892                 interval->isActive = false;
5893                 unassignPhysReg(&regRecord, nullptr);
5894             }
5895         }
5896     }
5897     else
5898 #endif // DEBUG
5899     {
5900         freeRegisters(regsToFree | delayRegsToFree);
5901     }
5902
5903 #ifdef DEBUG
5904     if (VERBOSE)
5905     {
5906         // Dump the RegRecords after the last RefPosition is handled.
5907         dumpRegRecords();
5908         printf("\n");
5909
5910         dumpRefPositions("AFTER ALLOCATION");
5911         dumpVarRefPositions("AFTER ALLOCATION");
5912
5913         // Dump the intervals that remain active
5914         printf("Active intervals at end of allocation:\n");
5915
5916         // We COULD just reuse the intervalIter from above, but ArrayListIterator doesn't
5917         // provide a Reset function (!) - we'll probably replace this so don't bother
5918         // adding it
5919
5920         for (Interval& interval : intervals)
5921         {
5922             if (interval.isActive)
5923             {
5924                 printf("Active ");
5925                 interval.dump();
5926             }
5927         }
5928
5929         printf("\n");
5930     }
5931 #endif // DEBUG
5932 }
5933
5934 //-----------------------------------------------------------------------------
5935 // updateAssignedInterval: Update assigned interval of register.
5936 //
5937 // Arguments:
5938 //    reg      -    register to be updated
5939 //    interval -    interval to be assigned
5940 //    regType  -    register type
5941 //
5942 // Return Value:
5943 //    None
5944 //
5945 // Assumptions:
5946 //    For ARM32, when "regType" is TYP_DOUBLE, "reg" should be a even-numbered
5947 //    float register, i.e. lower half of double register.
5948 //
5949 // Note:
5950 //    For ARM32, two float registers consisting a double register are updated
5951 //    together when "regType" is TYP_DOUBLE.
5952 //
5953 void LinearScan::updateAssignedInterval(RegRecord* reg, Interval* interval, RegisterType regType)
5954 {
5955 #ifdef _TARGET_ARM_
5956     // Update overlapping floating point register for TYP_DOUBLE.
5957     Interval* oldAssignedInterval = reg->assignedInterval;
5958     if (regType == TYP_DOUBLE)
5959     {
5960         RegRecord* anotherHalfReg = findAnotherHalfRegRec(reg);
5961
5962         anotherHalfReg->assignedInterval = interval;
5963     }
5964     else if ((oldAssignedInterval != nullptr) && (oldAssignedInterval->registerType == TYP_DOUBLE))
5965     {
5966         RegRecord* anotherHalfReg = findAnotherHalfRegRec(reg);
5967
5968         anotherHalfReg->assignedInterval = nullptr;
5969     }
5970 #endif
5971     reg->assignedInterval = interval;
5972 }
5973
5974 //-----------------------------------------------------------------------------
5975 // updatePreviousInterval: Update previous interval of register.
5976 //
5977 // Arguments:
5978 //    reg      -    register to be updated
5979 //    interval -    interval to be assigned
5980 //    regType  -    register type
5981 //
5982 // Return Value:
5983 //    None
5984 //
5985 // Assumptions:
5986 //    For ARM32, when "regType" is TYP_DOUBLE, "reg" should be a even-numbered
5987 //    float register, i.e. lower half of double register.
5988 //
5989 // Note:
5990 //    For ARM32, two float registers consisting a double register are updated
5991 //    together when "regType" is TYP_DOUBLE.
5992 //
5993 void LinearScan::updatePreviousInterval(RegRecord* reg, Interval* interval, RegisterType regType)
5994 {
5995     reg->previousInterval = interval;
5996
5997 #ifdef _TARGET_ARM_
5998     // Update overlapping floating point register for TYP_DOUBLE
5999     if (regType == TYP_DOUBLE)
6000     {
6001         RegRecord* anotherHalfReg = findAnotherHalfRegRec(reg);
6002
6003         anotherHalfReg->previousInterval = interval;
6004     }
6005 #endif
6006 }
6007
6008 // LinearScan::resolveLocalRef
6009 // Description:
6010 //      Update the graph for a local reference.
6011 //      Also, track the register (if any) that is currently occupied.
6012 // Arguments:
6013 //      treeNode: The lclVar that's being resolved
6014 //      currentRefPosition: the RefPosition associated with the treeNode
6015 //
6016 // Details:
6017 // This method is called for each local reference, during the resolveRegisters
6018 // phase of LSRA.  It is responsible for keeping the following in sync:
6019 //   - varDsc->lvRegNum (and lvOtherReg) contain the unique register location.
6020 //     If it is not in the same register through its lifetime, it is set to REG_STK.
6021 //   - interval->physReg is set to the assigned register
6022 //     (i.e. at the code location which is currently being handled by resolveRegisters())
6023 //     - interval->isActive is true iff the interval is live and occupying a register
6024 //     - interval->isSpilled should have already been set to true if the interval is EVER spilled
6025 //     - interval->isSplit is set to true if the interval does not occupy the same
6026 //       register throughout the method
6027 //   - RegRecord->assignedInterval points to the interval which currently occupies
6028 //     the register
6029 //   - For each lclVar node:
6030 //     - gtRegNum/gtRegPair is set to the currently allocated register(s).
6031 //     - GTF_SPILLED is set on a use if it must be reloaded prior to use.
6032 //     - GTF_SPILL is set if it must be spilled after use.
6033 //
6034 // A copyReg is an ugly case where the variable must be in a specific (fixed) register,
6035 // but it currently resides elsewhere.  The register allocator must track the use of the
6036 // fixed register, but it marks the lclVar node with the register it currently lives in
6037 // and the code generator does the necessary move.
6038 //
6039 // Before beginning, the varDsc for each parameter must be set to its initial location.
6040 //
6041 // NICE: Consider tracking whether an Interval is always in the same location (register/stack)
6042 // in which case it will require no resolution.
6043 //
6044 void LinearScan::resolveLocalRef(BasicBlock* block, GenTree* treeNode, RefPosition* currentRefPosition)
6045 {
6046     assert((block == nullptr) == (treeNode == nullptr));
6047     assert(enregisterLocalVars);
6048
6049     // Is this a tracked local?  Or just a register allocated for loading
6050     // a non-tracked one?
6051     Interval* interval = currentRefPosition->getInterval();
6052     assert(interval->isLocalVar);
6053
6054     interval->recentRefPosition = currentRefPosition;
6055     LclVarDsc* varDsc           = interval->getLocalVar(compiler);
6056
6057     // NOTE: we set the GTF_VAR_DEATH flag here unless we are extending lifetimes, in which case we write
6058     // this bit in checkLastUses. This is a bit of a hack, but is necessary because codegen requires
6059     // accurate last use info that is not reflected in the lastUse bit on ref positions when we are extending
6060     // lifetimes. See also the comments in checkLastUses.
6061     if ((treeNode != nullptr) && !extendLifetimes())
6062     {
6063         if (currentRefPosition->lastUse)
6064         {
6065             treeNode->gtFlags |= GTF_VAR_DEATH;
6066         }
6067         else
6068         {
6069             treeNode->gtFlags &= ~GTF_VAR_DEATH;
6070         }
6071     }
6072
6073     if (currentRefPosition->registerAssignment == RBM_NONE)
6074     {
6075         assert(currentRefPosition->RegOptional());
6076         assert(interval->isSpilled);
6077
6078         varDsc->lvRegNum = REG_STK;
6079         if (interval->assignedReg != nullptr && interval->assignedReg->assignedInterval == interval)
6080         {
6081             updateAssignedInterval(interval->assignedReg, nullptr, interval->registerType);
6082         }
6083         interval->assignedReg = nullptr;
6084         interval->physReg     = REG_NA;
6085         if (treeNode != nullptr)
6086         {
6087             treeNode->SetContained();
6088         }
6089
6090         return;
6091     }
6092
6093     // In most cases, assigned and home registers will be the same
6094     // The exception is the copyReg case, where we've assigned a register
6095     // for a specific purpose, but will be keeping the register assignment
6096     regNumber assignedReg = currentRefPosition->assignedReg();
6097     regNumber homeReg     = assignedReg;
6098
6099     // Undo any previous association with a physical register, UNLESS this
6100     // is a copyReg
6101     if (!currentRefPosition->copyReg)
6102     {
6103         regNumber oldAssignedReg = interval->physReg;
6104         if (oldAssignedReg != REG_NA && assignedReg != oldAssignedReg)
6105         {
6106             RegRecord* oldRegRecord = getRegisterRecord(oldAssignedReg);
6107             if (oldRegRecord->assignedInterval == interval)
6108             {
6109                 updateAssignedInterval(oldRegRecord, nullptr, interval->registerType);
6110             }
6111         }
6112     }
6113
6114     if (currentRefPosition->refType == RefTypeUse && !currentRefPosition->reload)
6115     {
6116         // Was this spilled after our predecessor was scheduled?
6117         if (interval->physReg == REG_NA)
6118         {
6119             assert(inVarToRegMaps[curBBNum][varDsc->lvVarIndex] == REG_STK);
6120             currentRefPosition->reload = true;
6121         }
6122     }
6123
6124     bool reload     = currentRefPosition->reload;
6125     bool spillAfter = currentRefPosition->spillAfter;
6126
6127     // In the reload case we either:
6128     // - Set the register to REG_STK if it will be referenced only from the home location, or
6129     // - Set the register to the assigned register and set GTF_SPILLED if it must be loaded into a register.
6130     if (reload)
6131     {
6132         assert(currentRefPosition->refType != RefTypeDef);
6133         assert(interval->isSpilled);
6134         varDsc->lvRegNum = REG_STK;
6135         if (!spillAfter)
6136         {
6137             interval->physReg = assignedReg;
6138         }
6139
6140         // If there is no treeNode, this must be a RefTypeExpUse, in
6141         // which case we did the reload already
6142         if (treeNode != nullptr)
6143         {
6144             treeNode->gtFlags |= GTF_SPILLED;
6145             if (spillAfter)
6146             {
6147                 if (currentRefPosition->RegOptional())
6148                 {
6149                     // This is a use of lclVar that is flagged as reg-optional
6150                     // by lower/codegen and marked for both reload and spillAfter.
6151                     // In this case we can avoid unnecessary reload and spill
6152                     // by setting reg on lclVar to REG_STK and reg on tree node
6153                     // to REG_NA.  Codegen will generate the code by considering
6154                     // it as a contained memory operand.
6155                     //
6156                     // Note that varDsc->lvRegNum is already to REG_STK above.
6157                     interval->physReg  = REG_NA;
6158                     treeNode->gtRegNum = REG_NA;
6159                     treeNode->gtFlags &= ~GTF_SPILLED;
6160                     treeNode->SetContained();
6161                 }
6162                 else
6163                 {
6164                     treeNode->gtFlags |= GTF_SPILL;
6165                 }
6166             }
6167         }
6168         else
6169         {
6170             assert(currentRefPosition->refType == RefTypeExpUse);
6171         }
6172     }
6173     else if (spillAfter && !RefTypeIsUse(currentRefPosition->refType))
6174     {
6175         // In the case of a pure def, don't bother spilling - just assign it to the
6176         // stack.  However, we need to remember that it was spilled.
6177
6178         assert(interval->isSpilled);
6179         varDsc->lvRegNum  = REG_STK;
6180         interval->physReg = REG_NA;
6181         if (treeNode != nullptr)
6182         {
6183             treeNode->gtRegNum = REG_NA;
6184         }
6185     }
6186     else // Not reload and Not pure-def that's spillAfter
6187     {
6188         if (currentRefPosition->copyReg || currentRefPosition->moveReg)
6189         {
6190             // For a copyReg or moveReg, we have two cases:
6191             //  - In the first case, we have a fixedReg - i.e. a register which the code
6192             //    generator is constrained to use.
6193             //    The code generator will generate the appropriate move to meet the requirement.
6194             //  - In the second case, we were forced to use a different register because of
6195             //    interference (or JitStressRegs).
6196             //    In this case, we generate a GT_COPY.
6197             // In either case, we annotate the treeNode with the register in which the value
6198             // currently lives.  For moveReg, the homeReg is the new register (as assigned above).
6199             // But for copyReg, the homeReg remains unchanged.
6200
6201             assert(treeNode != nullptr);
6202             treeNode->gtRegNum = interval->physReg;
6203
6204             if (currentRefPosition->copyReg)
6205             {
6206                 homeReg = interval->physReg;
6207             }
6208             else
6209             {
6210                 assert(interval->isSplit);
6211                 interval->physReg = assignedReg;
6212             }
6213
6214             if (!currentRefPosition->isFixedRegRef || currentRefPosition->moveReg)
6215             {
6216                 // This is the second case, where we need to generate a copy
6217                 insertCopyOrReload(block, treeNode, currentRefPosition->getMultiRegIdx(), currentRefPosition);
6218             }
6219         }
6220         else
6221         {
6222             interval->physReg = assignedReg;
6223
6224             if (!interval->isSpilled && !interval->isSplit)
6225             {
6226                 if (varDsc->lvRegNum != REG_STK)
6227                 {
6228                     // If the register assignments don't match, then this interval is split.
6229                     if (varDsc->lvRegNum != assignedReg)
6230                     {
6231                         setIntervalAsSplit(interval);
6232                         varDsc->lvRegNum = REG_STK;
6233                     }
6234                 }
6235                 else
6236                 {
6237                     varDsc->lvRegNum = assignedReg;
6238                 }
6239             }
6240         }
6241         if (spillAfter)
6242         {
6243             if (treeNode != nullptr)
6244             {
6245                 treeNode->gtFlags |= GTF_SPILL;
6246             }
6247             assert(interval->isSpilled);
6248             interval->physReg = REG_NA;
6249             varDsc->lvRegNum  = REG_STK;
6250         }
6251     }
6252
6253     // Update the physRegRecord for the register, so that we know what vars are in
6254     // regs at the block boundaries
6255     RegRecord* physRegRecord = getRegisterRecord(homeReg);
6256     if (spillAfter || currentRefPosition->lastUse)
6257     {
6258         interval->isActive    = false;
6259         interval->assignedReg = nullptr;
6260         interval->physReg     = REG_NA;
6261
6262         updateAssignedInterval(physRegRecord, nullptr, interval->registerType);
6263     }
6264     else
6265     {
6266         interval->isActive    = true;
6267         interval->assignedReg = physRegRecord;
6268
6269         updateAssignedInterval(physRegRecord, interval, interval->registerType);
6270     }
6271 }
6272
6273 void LinearScan::writeRegisters(RefPosition* currentRefPosition, GenTree* tree)
6274 {
6275     lsraAssignRegToTree(tree, currentRefPosition->assignedReg(), currentRefPosition->getMultiRegIdx());
6276 }
6277
6278 //------------------------------------------------------------------------
6279 // insertCopyOrReload: Insert a copy in the case where a tree node value must be moved
6280 //   to a different register at the point of use (GT_COPY), or it is reloaded to a different register
6281 //   than the one it was spilled from (GT_RELOAD).
6282 //
6283 // Arguments:
6284 //    block             - basic block in which GT_COPY/GT_RELOAD is inserted.
6285 //    tree              - This is the node to copy or reload.
6286 //                        Insert copy or reload node between this node and its parent.
6287 //    multiRegIdx       - register position of tree node for which copy or reload is needed.
6288 //    refPosition       - The RefPosition at which copy or reload will take place.
6289 //
6290 // Notes:
6291 //    The GT_COPY or GT_RELOAD will be inserted in the proper spot in execution order where the reload is to occur.
6292 //
6293 // For example, for this tree (numbers are execution order, lower is earlier and higher is later):
6294 //
6295 //                                   +---------+----------+
6296 //                                   |       GT_ADD (3)   |
6297 //                                   +---------+----------+
6298 //                                             |
6299 //                                           /   \
6300 //                                         /       \
6301 //                                       /           \
6302 //                   +-------------------+           +----------------------+
6303 //                   |         x (1)     | "tree"    |         y (2)        |
6304 //                   +-------------------+           +----------------------+
6305 //
6306 // generate this tree:
6307 //
6308 //                                   +---------+----------+
6309 //                                   |       GT_ADD (4)   |
6310 //                                   +---------+----------+
6311 //                                             |
6312 //                                           /   \
6313 //                                         /       \
6314 //                                       /           \
6315 //                   +-------------------+           +----------------------+
6316 //                   |  GT_RELOAD (3)    |           |         y (2)        |
6317 //                   +-------------------+           +----------------------+
6318 //                             |
6319 //                   +-------------------+
6320 //                   |         x (1)     | "tree"
6321 //                   +-------------------+
6322 //
6323 // Note in particular that the GT_RELOAD node gets inserted in execution order immediately before the parent of "tree",
6324 // which seems a bit weird since normally a node's parent (in this case, the parent of "x", GT_RELOAD in the "after"
6325 // picture) immediately follows all of its children (that is, normally the execution ordering is postorder).
6326 // The ordering must be this weird "out of normal order" way because the "x" node is being spilled, probably
6327 // because the expression in the tree represented above by "y" has high register requirements. We don't want
6328 // to reload immediately, of course. So we put GT_RELOAD where the reload should actually happen.
6329 //
6330 // Note that GT_RELOAD is required when we reload to a different register than the one we spilled to. It can also be
6331 // used if we reload to the same register. Normally, though, in that case we just mark the node with GTF_SPILLED,
6332 // and the unspilling code automatically reuses the same register, and does the reload when it notices that flag
6333 // when considering a node's operands.
6334 //
6335 void LinearScan::insertCopyOrReload(BasicBlock* block, GenTree* tree, unsigned multiRegIdx, RefPosition* refPosition)
6336 {
6337     LIR::Range& blockRange = LIR::AsRange(block);
6338
6339     LIR::Use treeUse;
6340     bool     foundUse = blockRange.TryGetUse(tree, &treeUse);
6341     assert(foundUse);
6342
6343     GenTree* parent = treeUse.User();
6344
6345     genTreeOps oper;
6346     if (refPosition->reload)
6347     {
6348         oper = GT_RELOAD;
6349     }
6350     else
6351     {
6352         oper = GT_COPY;
6353
6354 #if TRACK_LSRA_STATS
6355         updateLsraStat(LSRA_STAT_COPY_REG, block->bbNum);
6356 #endif
6357     }
6358
6359     // If the parent is a reload/copy node, then tree must be a multi-reg node
6360     // that has already had one of its registers spilled.
6361     // It is possible that one of its RefTypeDef positions got spilled and the next
6362     // use of it requires it to be in a different register.
6363     //
6364     // In this case set the i'th position reg of reload/copy node to the reg allocated
6365     // for copy/reload refPosition.  Essentially a copy/reload node will have a reg
6366     // for each multi-reg position of its child. If there is a valid reg in i'th
6367     // position of GT_COPY or GT_RELOAD node then the corresponding result of its
6368     // child needs to be copied or reloaded to that reg.
6369     if (parent->IsCopyOrReload())
6370     {
6371         noway_assert(parent->OperGet() == oper);
6372         noway_assert(tree->IsMultiRegNode());
6373         GenTreeCopyOrReload* copyOrReload = parent->AsCopyOrReload();
6374         noway_assert(copyOrReload->GetRegNumByIdx(multiRegIdx) == REG_NA);
6375         copyOrReload->SetRegNumByIdx(refPosition->assignedReg(), multiRegIdx);
6376     }
6377     else
6378     {
6379         // Create the new node, with "tree" as its only child.
6380         var_types treeType = tree->TypeGet();
6381
6382         GenTreeCopyOrReload* newNode = new (compiler, oper) GenTreeCopyOrReload(oper, treeType, tree);
6383         assert(refPosition->registerAssignment != RBM_NONE);
6384         SetLsraAdded(newNode);
6385         newNode->SetRegNumByIdx(refPosition->assignedReg(), multiRegIdx);
6386         if (refPosition->copyReg)
6387         {
6388             // This is a TEMPORARY copy
6389             assert(isCandidateLocalRef(tree));
6390             newNode->gtFlags |= GTF_VAR_DEATH;
6391         }
6392
6393         // Insert the copy/reload after the spilled node and replace the use of the original node with a use
6394         // of the copy/reload.
6395         blockRange.InsertAfter(tree, newNode);
6396         treeUse.ReplaceWith(compiler, newNode);
6397     }
6398 }
6399
6400 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6401 //------------------------------------------------------------------------
6402 // insertUpperVectorSave: Insert code to save the upper half of a vector that lives
6403 //                        in a callee-save register at the point of a kill (the upper half is
6404 //                        not preserved).
6405 //
6406 // Arguments:
6407 //    tree              - This is the node before which we will insert the Save.
6408 //                        It will be a call or some node that turns into a call.
6409 //    refPosition       - The RefTypeUpperVectorSave RefPosition.
6410 //    upperInterval     - The Interval for the upper half of the large vector lclVar.
6411 //    block             - the BasicBlock containing the call.
6412 //
6413 void LinearScan::insertUpperVectorSave(GenTree*     tree,
6414                                        RefPosition* refPosition,
6415                                        Interval*    upperVectorInterval,
6416                                        BasicBlock*  block)
6417 {
6418     JITDUMP("Inserting UpperVectorSave for RP #%d before %d.%s:\n", refPosition->rpNum, tree->gtTreeID,
6419             GenTree::OpName(tree->gtOper));
6420     Interval* lclVarInterval = upperVectorInterval->relatedInterval;
6421     assert(lclVarInterval->isLocalVar == true);
6422     assert(refPosition->getInterval() == upperVectorInterval);
6423     regNumber lclVarReg = lclVarInterval->physReg;
6424     if (lclVarReg == REG_NA)
6425     {
6426         return;
6427     }
6428
6429     LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum;
6430     assert(varTypeNeedsPartialCalleeSave(varDsc->lvType));
6431     assert((genRegMask(lclVarReg) & RBM_FLT_CALLEE_SAVED) != RBM_NONE);
6432
6433     // On Arm64, we must always have a register to save the upper half,
6434     // while on x86 we can spill directly to memory.
6435     regNumber spillReg = refPosition->assignedReg();
6436 #ifdef _TARGET_ARM64_
6437     bool spillToMem = refPosition->spillAfter;
6438     assert(spillReg != REG_NA);
6439 #else
6440     bool spillToMem = (spillReg == REG_NA);
6441     assert(!refPosition->spillAfter);
6442 #endif
6443
6444     LIR::Range& blockRange = LIR::AsRange(block);
6445
6446     // Insert the save before the call.
6447
6448     GenTree* saveLcl  = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType);
6449     saveLcl->gtRegNum = lclVarReg;
6450     SetLsraAdded(saveLcl);
6451
6452     GenTreeSIMD* simdNode =
6453         new (compiler, GT_SIMD) GenTreeSIMD(LargeVectorSaveType, saveLcl, nullptr, SIMDIntrinsicUpperSave,
6454                                             varDsc->lvBaseType, genTypeSize(varDsc->lvType));
6455     SetLsraAdded(simdNode);
6456     simdNode->gtRegNum = spillReg;
6457     if (spillToMem)
6458     {
6459         simdNode->gtFlags |= GTF_SPILL;
6460         upperVectorInterval->physReg = REG_NA;
6461     }
6462     else
6463     {
6464         assert((genRegMask(spillReg) & RBM_FLT_CALLEE_SAVED) != RBM_NONE);
6465         upperVectorInterval->physReg = spillReg;
6466     }
6467
6468     blockRange.InsertBefore(tree, LIR::SeqTree(compiler, simdNode));
6469     DISPTREE(simdNode);
6470     JITDUMP("\n");
6471 }
6472
6473 //------------------------------------------------------------------------
6474 // insertUpperVectorRestore: Insert code to restore the upper half of a vector that has been partially spilled.
6475 //
6476 // Arguments:
6477 //    tree                - This is the node for which we will insert the Restore.
6478 //                          If non-null, it will be a use of the large vector lclVar.
6479 //                          If null, the Restore will be added to the end of the block.
6480 //    upperVectorInterval - The Interval for the upper vector for the lclVar.
6481 //    block               - the BasicBlock into which we will be inserting the code.
6482 //
6483 // Notes:
6484 //    In the case where 'tree' is non-null, we will insert the restore just prior to
6485 //    its use, in order to ensure the proper ordering.
6486 //
6487 void LinearScan::insertUpperVectorRestore(GenTree*     tree,
6488                                           RefPosition* refPosition,
6489                                           Interval*    upperVectorInterval,
6490                                           BasicBlock*  block)
6491 {
6492     JITDUMP("Adding UpperVectorRestore for RP #%d ", refPosition->rpNum);
6493     Interval* lclVarInterval = upperVectorInterval->relatedInterval;
6494     assert(lclVarInterval->isLocalVar == true);
6495     regNumber lclVarReg = lclVarInterval->physReg;
6496
6497     // We should not call this method if the lclVar is not in a register (we should have simply marked the entire
6498     // lclVar as spilled).
6499     assert(lclVarReg != REG_NA);
6500     LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum;
6501     assert(varTypeNeedsPartialCalleeSave(varDsc->lvType));
6502
6503     GenTree* restoreLcl  = nullptr;
6504     restoreLcl           = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType);
6505     restoreLcl->gtRegNum = lclVarReg;
6506     SetLsraAdded(restoreLcl);
6507
6508     GenTreeSIMD* simdNode =
6509         new (compiler, GT_SIMD) GenTreeSIMD(varDsc->lvType, restoreLcl, nullptr, SIMDIntrinsicUpperRestore,
6510                                             varDsc->lvBaseType, genTypeSize(varDsc->lvType));
6511
6512     regNumber restoreReg = upperVectorInterval->physReg;
6513     SetLsraAdded(simdNode);
6514
6515     if (restoreReg == REG_NA)
6516     {
6517         // We need a stack location for this.
6518         assert(lclVarInterval->isSpilled);
6519 #ifdef _TARGET_AMD64_
6520         assert(refPosition->assignedReg() == REG_NA);
6521         simdNode->gtFlags |= GTF_NOREG_AT_USE;
6522 #else
6523         simdNode->gtFlags |= GTF_SPILLED;
6524         assert(refPosition->assignedReg() != REG_NA);
6525         restoreReg = refPosition->assignedReg();
6526 #endif
6527     }
6528     simdNode->gtRegNum = restoreReg;
6529
6530     LIR::Range& blockRange = LIR::AsRange(block);
6531     JITDUMP("Adding UpperVectorRestore ");
6532     if (tree != nullptr)
6533     {
6534         JITDUMP("before %d.%s:\n", tree->gtTreeID, GenTree::OpName(tree->gtOper));
6535         LIR::Use treeUse;
6536         bool     foundUse = blockRange.TryGetUse(tree, &treeUse);
6537         assert(foundUse);
6538         // We need to insert the restore prior to the use, not (necessarily) immediately after the lclVar.
6539         blockRange.InsertBefore(treeUse.User(), LIR::SeqTree(compiler, simdNode));
6540     }
6541     else
6542     {
6543         JITDUMP("at end of BB%02u:\n", block->bbNum);
6544         if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_SWITCH)
6545         {
6546             noway_assert(!blockRange.IsEmpty());
6547
6548             GenTree* branch = blockRange.LastNode();
6549             assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE ||
6550                    branch->OperGet() == GT_SWITCH);
6551
6552             blockRange.InsertBefore(branch, LIR::SeqTree(compiler, simdNode));
6553         }
6554         else
6555         {
6556             assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS);
6557             blockRange.InsertAtEnd(LIR::SeqTree(compiler, simdNode));
6558         }
6559     }
6560     DISPTREE(simdNode);
6561     JITDUMP("\n");
6562 }
6563 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6564
6565 //------------------------------------------------------------------------
6566 // initMaxSpill: Initializes the LinearScan members used to track the max number
6567 //               of concurrent spills.  This is needed so that we can set the
6568 //               fields in Compiler, so that the code generator, in turn can
6569 //               allocate the right number of spill locations.
6570 //
6571 // Arguments:
6572 //    None.
6573 //
6574 // Return Value:
6575 //    None.
6576 //
6577 // Assumptions:
6578 //    This is called before any calls to updateMaxSpill().
6579
6580 void LinearScan::initMaxSpill()
6581 {
6582     needDoubleTmpForFPCall = false;
6583     needFloatTmpForFPCall  = false;
6584     for (int i = 0; i < TYP_COUNT; i++)
6585     {
6586         maxSpill[i]     = 0;
6587         currentSpill[i] = 0;
6588     }
6589 }
6590
6591 //------------------------------------------------------------------------
6592 // recordMaxSpill: Sets the fields in Compiler for the max number of concurrent spills.
6593 //                 (See the comment on initMaxSpill.)
6594 //
6595 // Arguments:
6596 //    None.
6597 //
6598 // Return Value:
6599 //    None.
6600 //
6601 // Assumptions:
6602 //    This is called after updateMaxSpill() has been called for all "real"
6603 //    RefPositions.
6604
6605 void LinearScan::recordMaxSpill()
6606 {
6607     // Note: due to the temp normalization process (see tmpNormalizeType)
6608     // only a few types should actually be seen here.
6609     JITDUMP("Recording the maximum number of concurrent spills:\n");
6610 #ifdef _TARGET_X86_
6611     var_types returnType = RegSet::tmpNormalizeType(compiler->info.compRetType);
6612     if (needDoubleTmpForFPCall || (returnType == TYP_DOUBLE))
6613     {
6614         JITDUMP("Adding a spill temp for moving a double call/return value between xmm reg and x87 stack.\n");
6615         maxSpill[TYP_DOUBLE] += 1;
6616     }
6617     if (needFloatTmpForFPCall || (returnType == TYP_FLOAT))
6618     {
6619         JITDUMP("Adding a spill temp for moving a float call/return value between xmm reg and x87 stack.\n");
6620         maxSpill[TYP_FLOAT] += 1;
6621     }
6622 #endif // _TARGET_X86_
6623     for (int i = 0; i < TYP_COUNT; i++)
6624     {
6625         if (var_types(i) != RegSet::tmpNormalizeType(var_types(i)))
6626         {
6627             // Only normalized types should have anything in the maxSpill array.
6628             // We assume here that if type 'i' does not normalize to itself, then
6629             // nothing else normalizes to 'i', either.
6630             assert(maxSpill[i] == 0);
6631         }
6632         if (maxSpill[i] != 0)
6633         {
6634             JITDUMP("  %s: %d\n", varTypeName(var_types(i)), maxSpill[i]);
6635             compiler->codeGen->regSet.tmpPreAllocateTemps(var_types(i), maxSpill[i]);
6636         }
6637     }
6638     JITDUMP("\n");
6639 }
6640
6641 //------------------------------------------------------------------------
6642 // updateMaxSpill: Update the maximum number of concurrent spills
6643 //
6644 // Arguments:
6645 //    refPosition - the current RefPosition being handled
6646 //
6647 // Return Value:
6648 //    None.
6649 //
6650 // Assumptions:
6651 //    The RefPosition has an associated interval (getInterval() will
6652 //    otherwise assert).
6653 //
6654 // Notes:
6655 //    This is called for each "real" RefPosition during the writeback
6656 //    phase of LSRA.  It keeps track of how many concurrently-live
6657 //    spills there are, and the largest number seen so far.
6658
6659 void LinearScan::updateMaxSpill(RefPosition* refPosition)
6660 {
6661     RefType refType = refPosition->refType;
6662
6663 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6664     if ((refType == RefTypeUpperVectorSave) || (refType == RefTypeUpperVectorRestore))
6665     {
6666         Interval* interval = refPosition->getInterval();
6667         // If this is not an 'upperVector', it must be a tree temp that has been already
6668         // (fully) spilled.
6669         if (!interval->isUpperVector)
6670         {
6671             assert(interval->firstRefPosition->spillAfter);
6672         }
6673         else
6674         {
6675             // The UpperVector RefPositions spill to the localVar's home location.
6676             Interval* lclVarInterval = interval->relatedInterval;
6677             assert(lclVarInterval->isSpilled || (!refPosition->spillAfter && !refPosition->reload));
6678         }
6679         return;
6680     }
6681 #endif // !FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6682     if (refPosition->spillAfter || refPosition->reload ||
6683         (refPosition->RegOptional() && refPosition->assignedReg() == REG_NA))
6684     {
6685         Interval* interval = refPosition->getInterval();
6686         if (!interval->isLocalVar)
6687         {
6688             // The tmp allocation logic 'normalizes' types to a small number of
6689             // types that need distinct stack locations from each other.
6690             // Those types are currently gc refs, byrefs, <= 4 byte non-GC items,
6691             // 8-byte non-GC items, and 16-byte or 32-byte SIMD vectors.
6692             // LSRA is agnostic to those choices but needs
6693             // to know what they are here.
6694             var_types typ;
6695
6696 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6697             if (refType == RefTypeUpperVectorSave)
6698             {
6699                 typ = LargeVectorSaveType;
6700             }
6701             else
6702 #endif // !FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6703             {
6704                 GenTree* treeNode = refPosition->treeNode;
6705                 if (treeNode == nullptr)
6706                 {
6707                     assert(RefTypeIsUse(refType));
6708                     treeNode = interval->firstRefPosition->treeNode;
6709                 }
6710                 assert(treeNode != nullptr);
6711
6712                 // In case of multi-reg call nodes, we need to use the type
6713                 // of the return register given by multiRegIdx of the refposition.
6714                 if (treeNode->IsMultiRegCall())
6715                 {
6716                     ReturnTypeDesc* retTypeDesc = treeNode->AsCall()->GetReturnTypeDesc();
6717                     typ                         = retTypeDesc->GetReturnRegType(refPosition->getMultiRegIdx());
6718                 }
6719 #if FEATURE_ARG_SPLIT
6720                 else if (treeNode->OperIsPutArgSplit())
6721                 {
6722                     typ = treeNode->AsPutArgSplit()->GetRegType(refPosition->getMultiRegIdx());
6723                 }
6724 #if !defined(_TARGET_64BIT_)
6725                 else if (treeNode->OperIsPutArgReg())
6726                 {
6727                     // For double arg regs, the type is changed to long since they must be passed via `r0-r3`.
6728                     // However when they get spilled, they should be treated as separated int registers.
6729                     var_types typNode = treeNode->TypeGet();
6730                     typ               = (typNode == TYP_LONG) ? TYP_INT : typNode;
6731                 }
6732 #endif // !_TARGET_64BIT_
6733 #endif // FEATURE_ARG_SPLIT
6734                 else
6735                 {
6736                     typ = treeNode->TypeGet();
6737                 }
6738                 typ = RegSet::tmpNormalizeType(typ);
6739             }
6740
6741             if (refPosition->spillAfter && !refPosition->reload)
6742             {
6743                 currentSpill[typ]++;
6744                 if (currentSpill[typ] > maxSpill[typ])
6745                 {
6746                     maxSpill[typ] = currentSpill[typ];
6747                 }
6748             }
6749             else if (refPosition->reload)
6750             {
6751                 assert(currentSpill[typ] > 0);
6752                 currentSpill[typ]--;
6753             }
6754             else if (refPosition->RegOptional() && refPosition->assignedReg() == REG_NA)
6755             {
6756                 // A spill temp not getting reloaded into a reg because it is
6757                 // marked as allocate if profitable and getting used from its
6758                 // memory location.  To properly account max spill for typ we
6759                 // decrement spill count.
6760                 assert(RefTypeIsUse(refType));
6761                 assert(currentSpill[typ] > 0);
6762                 currentSpill[typ]--;
6763             }
6764             JITDUMP("  Max spill for %s is %d\n", varTypeName(typ), maxSpill[typ]);
6765         }
6766     }
6767 }
6768
6769 // This is the final phase of register allocation.  It writes the register assignments to
6770 // the tree, and performs resolution across joins and backedges.
6771 //
6772 void LinearScan::resolveRegisters()
6773 {
6774     // Iterate over the tree and the RefPositions in lockstep
6775     //  - annotate the tree with register assignments by setting gtRegNum or gtRegPair (for longs)
6776     //    on the tree node
6777     //  - track globally-live var locations
6778     //  - add resolution points at split/merge/critical points as needed
6779
6780     // Need to use the same traversal order as the one that assigns the location numbers.
6781
6782     // Dummy RefPositions have been added at any split, join or critical edge, at the
6783     // point where resolution may be required.  These are located:
6784     //  - for a split, at the top of the non-adjacent block
6785     //  - for a join, at the bottom of the non-adjacent joining block
6786     //  - for a critical edge, at the top of the target block of each critical
6787     //    edge.
6788     // Note that a target block may have multiple incoming critical or split edges
6789     //
6790     // These RefPositions record the expected location of the Interval at that point.
6791     // At each branch, we identify the location of each liveOut interval, and check
6792     // against the RefPositions at the target.
6793
6794     BasicBlock*  block;
6795     LsraLocation currentLocation = MinLocation;
6796
6797     // Clear register assignments - these will be reestablished as lclVar defs (including RefTypeParamDefs)
6798     // are encountered.
6799     if (enregisterLocalVars)
6800     {
6801         for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
6802         {
6803             RegRecord* physRegRecord    = getRegisterRecord(reg);
6804             Interval*  assignedInterval = physRegRecord->assignedInterval;
6805             if (assignedInterval != nullptr)
6806             {
6807                 assignedInterval->assignedReg = nullptr;
6808                 assignedInterval->physReg     = REG_NA;
6809             }
6810             physRegRecord->assignedInterval  = nullptr;
6811             physRegRecord->recentRefPosition = nullptr;
6812         }
6813
6814         // Clear "recentRefPosition" for lclVar intervals
6815         for (unsigned varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++)
6816         {
6817             if (localVarIntervals[varIndex] != nullptr)
6818             {
6819                 localVarIntervals[varIndex]->recentRefPosition = nullptr;
6820                 localVarIntervals[varIndex]->isActive          = false;
6821             }
6822             else
6823             {
6824                 assert(compiler->lvaTable[compiler->lvaTrackedToVarNum[varIndex]].lvLRACandidate == false);
6825             }
6826         }
6827     }
6828
6829     // handle incoming arguments and special temps
6830     RefPositionIterator refPosIterator     = refPositions.begin();
6831     RefPosition*        currentRefPosition = &refPosIterator;
6832
6833     if (enregisterLocalVars)
6834     {
6835         VarToRegMap entryVarToRegMap = inVarToRegMaps[compiler->fgFirstBB->bbNum];
6836         for (; refPosIterator != refPositions.end() &&
6837                (currentRefPosition->refType == RefTypeParamDef || currentRefPosition->refType == RefTypeZeroInit);
6838              ++refPosIterator, currentRefPosition = &refPosIterator)
6839         {
6840             Interval* interval = currentRefPosition->getInterval();
6841             assert(interval != nullptr && interval->isLocalVar);
6842             resolveLocalRef(nullptr, nullptr, currentRefPosition);
6843             regNumber reg      = REG_STK;
6844             int       varIndex = interval->getVarIndex(compiler);
6845
6846             if (!currentRefPosition->spillAfter && currentRefPosition->registerAssignment != RBM_NONE)
6847             {
6848                 reg = currentRefPosition->assignedReg();
6849             }
6850             else
6851             {
6852                 reg                = REG_STK;
6853                 interval->isActive = false;
6854             }
6855             setVarReg(entryVarToRegMap, varIndex, reg);
6856         }
6857     }
6858     else
6859     {
6860         assert(refPosIterator == refPositions.end() ||
6861                (refPosIterator->refType != RefTypeParamDef && refPosIterator->refType != RefTypeZeroInit));
6862     }
6863
6864     BasicBlock* insertionBlock = compiler->fgFirstBB;
6865     GenTree*    insertionPoint = LIR::AsRange(insertionBlock).FirstNonPhiNode();
6866
6867     // write back assignments
6868     for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock())
6869     {
6870         assert(curBBNum == block->bbNum);
6871
6872         if (enregisterLocalVars)
6873         {
6874             // Record the var locations at the start of this block.
6875             // (If it's fgFirstBB, we've already done that above, see entryVarToRegMap)
6876
6877             curBBStartLocation = currentRefPosition->nodeLocation;
6878             if (block != compiler->fgFirstBB)
6879             {
6880                 processBlockStartLocations(block);
6881             }
6882
6883             // Handle the DummyDefs, updating the incoming var location.
6884             for (; refPosIterator != refPositions.end() && currentRefPosition->refType == RefTypeDummyDef;
6885                  ++refPosIterator, currentRefPosition = &refPosIterator)
6886             {
6887                 assert(currentRefPosition->isIntervalRef());
6888                 // Don't mark dummy defs as reload
6889                 currentRefPosition->reload = false;
6890                 resolveLocalRef(nullptr, nullptr, currentRefPosition);
6891                 regNumber reg;
6892                 if (currentRefPosition->registerAssignment != RBM_NONE)
6893                 {
6894                     reg = currentRefPosition->assignedReg();
6895                 }
6896                 else
6897                 {
6898                     reg                                         = REG_STK;
6899                     currentRefPosition->getInterval()->isActive = false;
6900                 }
6901                 setInVarRegForBB(curBBNum, currentRefPosition->getInterval()->varNum, reg);
6902             }
6903         }
6904
6905         // The next RefPosition should be for the block.  Move past it.
6906         assert(refPosIterator != refPositions.end());
6907         assert(currentRefPosition->refType == RefTypeBB);
6908         ++refPosIterator;
6909         currentRefPosition = &refPosIterator;
6910
6911         // Handle the RefPositions for the block
6912         for (; refPosIterator != refPositions.end() && currentRefPosition->refType != RefTypeBB &&
6913                currentRefPosition->refType != RefTypeDummyDef;
6914              ++refPosIterator, currentRefPosition = &refPosIterator)
6915         {
6916             currentLocation = currentRefPosition->nodeLocation;
6917
6918             // Ensure that the spill & copy info is valid.
6919             // First, if it's reload, it must not be copyReg or moveReg
6920             assert(!currentRefPosition->reload || (!currentRefPosition->copyReg && !currentRefPosition->moveReg));
6921             // If it's copyReg it must not be moveReg, and vice-versa
6922             assert(!currentRefPosition->copyReg || !currentRefPosition->moveReg);
6923
6924             switch (currentRefPosition->refType)
6925             {
6926 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6927                 case RefTypeUpperVectorSave:
6928                 case RefTypeUpperVectorRestore:
6929 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6930                 case RefTypeUse:
6931                 case RefTypeDef:
6932                     // These are the ones we're interested in
6933                     break;
6934                 case RefTypeKill:
6935                 case RefTypeFixedReg:
6936                     // These require no handling at resolution time
6937                     assert(currentRefPosition->referent != nullptr);
6938                     currentRefPosition->referent->recentRefPosition = currentRefPosition;
6939                     continue;
6940                 case RefTypeExpUse:
6941                     // Ignore the ExpUse cases - a RefTypeExpUse would only exist if the
6942                     // variable is dead at the entry to the next block.  So we'll mark
6943                     // it as in its current location and resolution will take care of any
6944                     // mismatch.
6945                     assert(getNextBlock() == nullptr ||
6946                            !VarSetOps::IsMember(compiler, getNextBlock()->bbLiveIn,
6947                                                 currentRefPosition->getInterval()->getVarIndex(compiler)));
6948                     currentRefPosition->referent->recentRefPosition = currentRefPosition;
6949                     continue;
6950                 case RefTypeKillGCRefs:
6951                     // No action to take at resolution time, and no interval to update recentRefPosition for.
6952                     continue;
6953                 case RefTypeDummyDef:
6954                 case RefTypeParamDef:
6955                 case RefTypeZeroInit:
6956                 // Should have handled all of these already
6957                 default:
6958                     unreached();
6959                     break;
6960             }
6961             updateMaxSpill(currentRefPosition);
6962             GenTree* treeNode = currentRefPosition->treeNode;
6963
6964 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6965             if (currentRefPosition->refType == RefTypeUpperVectorSave)
6966             {
6967                 // The treeNode is a call or something that might become one.
6968                 noway_assert(treeNode != nullptr);
6969                 // If the associated interval is an UpperVector, this must be a RefPosition for a LargeVectorType
6970                 // LocalVar.
6971                 // Otherwise, this  is a non-lclVar interval that has been spilled, and we don't need to do anything.
6972                 Interval* interval = currentRefPosition->getInterval();
6973                 if (interval->isUpperVector)
6974                 {
6975                     Interval* localVarInterval = interval->relatedInterval;
6976                     if ((localVarInterval->physReg != REG_NA) && !localVarInterval->isPartiallySpilled)
6977                     {
6978                         // If the localVar is in a register, it must be a callee-save register (otherwise it would have
6979                         // already been spilled).
6980                         assert(localVarInterval->assignedReg->isCalleeSave);
6981                         // If we have allocated a register to spill it to, we will use that; otherwise, we will spill it
6982                         // to the stack.  We can use as a temp register any non-arg caller-save register.
6983                         currentRefPosition->referent->recentRefPosition = currentRefPosition;
6984                         insertUpperVectorSave(treeNode, currentRefPosition, currentRefPosition->getInterval(), block);
6985                         localVarInterval->isPartiallySpilled = true;
6986                     }
6987                 }
6988                 else
6989                 {
6990                     // This is a non-lclVar interval that must have been spilled.
6991                     assert(!currentRefPosition->getInterval()->isLocalVar);
6992                     assert(currentRefPosition->getInterval()->firstRefPosition->spillAfter);
6993                 }
6994                 continue;
6995             }
6996             else if (currentRefPosition->refType == RefTypeUpperVectorRestore)
6997             {
6998                 // Since we don't do partial restores of tree temp intervals, this must be an upperVector.
6999                 Interval* interval         = currentRefPosition->getInterval();
7000                 Interval* localVarInterval = interval->relatedInterval;
7001                 assert(interval->isUpperVector && (localVarInterval != nullptr));
7002                 if (localVarInterval->physReg != REG_NA)
7003                 {
7004                     assert(localVarInterval->isPartiallySpilled);
7005                     assert((localVarInterval->assignedReg != nullptr) &&
7006                            (localVarInterval->assignedReg->regNum == localVarInterval->physReg) &&
7007                            (localVarInterval->assignedReg->assignedInterval == localVarInterval));
7008                     insertUpperVectorRestore(treeNode, currentRefPosition, interval, block);
7009                 }
7010                 localVarInterval->isPartiallySpilled = false;
7011             }
7012 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
7013
7014             // Most uses won't actually need to be recorded (they're on the def).
7015             // In those cases, treeNode will be nullptr.
7016             if (treeNode == nullptr)
7017             {
7018                 // This is either a use, a dead def, or a field of a struct
7019                 Interval* interval = currentRefPosition->getInterval();
7020                 assert(currentRefPosition->refType == RefTypeUse ||
7021                        currentRefPosition->registerAssignment == RBM_NONE || interval->isStructField ||
7022                        interval->IsUpperVector());
7023
7024                 // TODO-Review: Need to handle the case where any of the struct fields
7025                 // are reloaded/spilled at this use
7026                 assert(!interval->isStructField ||
7027                        (currentRefPosition->reload == false && currentRefPosition->spillAfter == false));
7028
7029                 if (interval->isLocalVar && !interval->isStructField)
7030                 {
7031                     LclVarDsc* varDsc = interval->getLocalVar(compiler);
7032
7033                     // This must be a dead definition.  We need to mark the lclVar
7034                     // so that it's not considered a candidate for lvRegister, as
7035                     // this dead def will have to go to the stack.
7036                     assert(currentRefPosition->refType == RefTypeDef);
7037                     varDsc->lvRegNum = REG_STK;
7038                 }
7039                 continue;
7040             }
7041
7042             if (currentRefPosition->isIntervalRef() && currentRefPosition->getInterval()->isInternal)
7043             {
7044                 treeNode->gtRsvdRegs |= currentRefPosition->registerAssignment;
7045             }
7046             else
7047             {
7048                 writeRegisters(currentRefPosition, treeNode);
7049
7050                 if (treeNode->IsLocal() && currentRefPosition->getInterval()->isLocalVar)
7051                 {
7052                     resolveLocalRef(block, treeNode, currentRefPosition);
7053                 }
7054
7055                 // Mark spill locations on temps
7056                 // (local vars are handled in resolveLocalRef, above)
7057                 // Note that the tree node will be changed from GTF_SPILL to GTF_SPILLED
7058                 // in codegen, taking care of the "reload" case for temps
7059                 else if (currentRefPosition->spillAfter || (currentRefPosition->nextRefPosition != nullptr &&
7060                                                             currentRefPosition->nextRefPosition->moveReg))
7061                 {
7062                     if (treeNode != nullptr && currentRefPosition->isIntervalRef())
7063                     {
7064                         if (currentRefPosition->spillAfter)
7065                         {
7066                             treeNode->gtFlags |= GTF_SPILL;
7067
7068                             // If this is a constant interval that is reusing a pre-existing value, we actually need
7069                             // to generate the value at this point in order to spill it.
7070                             if (treeNode->IsReuseRegVal())
7071                             {
7072                                 treeNode->ResetReuseRegVal();
7073                             }
7074
7075                             // In case of multi-reg call node, also set spill flag on the
7076                             // register specified by multi-reg index of current RefPosition.
7077                             // Note that the spill flag on treeNode indicates that one or
7078                             // more its allocated registers are in that state.
7079                             if (treeNode->IsMultiRegCall())
7080                             {
7081                                 GenTreeCall* call = treeNode->AsCall();
7082                                 call->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx());
7083                             }
7084 #if FEATURE_ARG_SPLIT
7085                             else if (treeNode->OperIsPutArgSplit())
7086                             {
7087                                 GenTreePutArgSplit* splitArg = treeNode->AsPutArgSplit();
7088                                 splitArg->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx());
7089                             }
7090 #ifdef _TARGET_ARM_
7091                             else if (treeNode->OperIsMultiRegOp())
7092                             {
7093                                 GenTreeMultiRegOp* multiReg = treeNode->AsMultiRegOp();
7094                                 multiReg->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx());
7095                             }
7096 #endif // _TARGET_ARM_
7097 #endif // FEATURE_ARG_SPLIT
7098                         }
7099
7100                         // If the value is reloaded or moved to a different register, we need to insert
7101                         // a node to hold the register to which it should be reloaded
7102                         RefPosition* nextRefPosition = currentRefPosition->nextRefPosition;
7103                         noway_assert(nextRefPosition != nullptr);
7104                         if (INDEBUG(alwaysInsertReload() ||)
7105                                 nextRefPosition->assignedReg() != currentRefPosition->assignedReg())
7106                         {
7107 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
7108                             // Note that we asserted above that this is an Interval RefPosition.
7109                             Interval* currentInterval = currentRefPosition->getInterval();
7110                             if (!currentInterval->isUpperVector && nextRefPosition->refType == RefTypeUpperVectorSave)
7111                             {
7112                                 // The currentRefPosition is a spill of a tree temp.
7113                                 // These have no associated Restore, as we always spill if the vector is
7114                                 // in a register when this is encountered.
7115                                 // The nextRefPosition we're interested in (where we may need to insert a
7116                                 // reload or flag as GTF_NOREG_AT_USE) is the subsequent RefPosition.
7117                                 assert(!currentInterval->isLocalVar);
7118                                 nextRefPosition = nextRefPosition->nextRefPosition;
7119                                 assert(nextRefPosition->refType != RefTypeUpperVectorSave);
7120                             }
7121                             // UpperVector intervals may have unique assignments at each reference.
7122                             if (!currentInterval->isUpperVector)
7123 #endif
7124                             {
7125                                 if (nextRefPosition->assignedReg() != REG_NA)
7126                                 {
7127                                     insertCopyOrReload(block, treeNode, currentRefPosition->getMultiRegIdx(),
7128                                                        nextRefPosition);
7129                                 }
7130                                 else
7131                                 {
7132                                     assert(nextRefPosition->RegOptional());
7133
7134                                     // In case of tree temps, if def is spilled and use didn't
7135                                     // get a register, set a flag on tree node to be treated as
7136                                     // contained at the point of its use.
7137                                     if (currentRefPosition->spillAfter && currentRefPosition->refType == RefTypeDef &&
7138                                         nextRefPosition->refType == RefTypeUse)
7139                                     {
7140                                         assert(nextRefPosition->treeNode == nullptr);
7141                                         treeNode->gtFlags |= GTF_NOREG_AT_USE;
7142                                     }
7143                                 }
7144                             }
7145                         }
7146                     }
7147
7148                     // We should never have to "spill after" a temp use, since
7149                     // they're single use
7150                     else
7151                     {
7152                         unreached();
7153                     }
7154                 }
7155             }
7156         }
7157
7158         if (enregisterLocalVars)
7159         {
7160             processBlockEndLocations(block);
7161         }
7162     }
7163
7164     if (enregisterLocalVars)
7165     {
7166 #ifdef DEBUG
7167         if (VERBOSE)
7168         {
7169             printf("-----------------------\n");
7170             printf("RESOLVING BB BOUNDARIES\n");
7171             printf("-----------------------\n");
7172
7173             printf("Resolution Candidates: ");
7174             dumpConvertedVarSet(compiler, resolutionCandidateVars);
7175             printf("\n");
7176             printf("Has %sCritical Edges\n\n", hasCriticalEdges ? "" : "No");
7177
7178             printf("Prior to Resolution\n");
7179             foreach_block(compiler, block)
7180             {
7181                 printf("\n" FMT_BB " use def in out\n", block->bbNum);
7182                 dumpConvertedVarSet(compiler, block->bbVarUse);
7183                 printf("\n");
7184                 dumpConvertedVarSet(compiler, block->bbVarDef);
7185                 printf("\n");
7186                 dumpConvertedVarSet(compiler, block->bbLiveIn);
7187                 printf("\n");
7188                 dumpConvertedVarSet(compiler, block->bbLiveOut);
7189                 printf("\n");
7190
7191                 dumpInVarToRegMap(block);
7192                 dumpOutVarToRegMap(block);
7193             }
7194
7195             printf("\n\n");
7196         }
7197 #endif // DEBUG
7198
7199         resolveEdges();
7200
7201         // Verify register assignments on variables
7202         unsigned   lclNum;
7203         LclVarDsc* varDsc;
7204         for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++)
7205         {
7206             if (!isCandidateVar(varDsc))
7207             {
7208                 varDsc->lvRegNum = REG_STK;
7209             }
7210             else
7211             {
7212                 Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex);
7213
7214                 // Determine initial position for parameters
7215
7216                 if (varDsc->lvIsParam)
7217                 {
7218                     regMaskTP initialRegMask = interval->firstRefPosition->registerAssignment;
7219                     regNumber initialReg     = (initialRegMask == RBM_NONE || interval->firstRefPosition->spillAfter)
7220                                                ? REG_STK
7221                                                : genRegNumFromMask(initialRegMask);
7222                     regNumber sourceReg = (varDsc->lvIsRegArg) ? varDsc->lvArgReg : REG_STK;
7223
7224 #ifdef _TARGET_ARM_
7225                     if (varTypeIsMultiReg(varDsc))
7226                     {
7227                         // TODO-ARM-NYI: Map the hi/lo intervals back to lvRegNum and lvOtherReg (these should NYI
7228                         // before this)
7229                         assert(!"Multi-reg types not yet supported");
7230                     }
7231                     else
7232 #endif // _TARGET_ARM_
7233                     {
7234                         varDsc->lvArgInitReg = initialReg;
7235                         JITDUMP("  Set V%02u argument initial register to %s\n", lclNum, getRegName(initialReg));
7236                     }
7237
7238                     // Stack args that are part of dependently-promoted structs should never be register candidates (see
7239                     // LinearScan::isRegCandidate).
7240                     assert(varDsc->lvIsRegArg || !compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc));
7241                 }
7242
7243                 // If lvRegNum is REG_STK, that means that either no register
7244                 // was assigned, or (more likely) that the same register was not
7245                 // used for all references.  In that case, codegen gets the register
7246                 // from the tree node.
7247                 if (varDsc->lvRegNum == REG_STK || interval->isSpilled || interval->isSplit)
7248                 {
7249                     // For codegen purposes, we'll set lvRegNum to whatever register
7250                     // it's currently in as we go.
7251                     // However, we never mark an interval as lvRegister if it has either been spilled
7252                     // or split.
7253                     varDsc->lvRegister = false;
7254
7255                     // Skip any dead defs or exposed uses
7256                     // (first use exposed will only occur when there is no explicit initialization)
7257                     RefPosition* firstRefPosition = interval->firstRefPosition;
7258                     while ((firstRefPosition != nullptr) && (firstRefPosition->refType == RefTypeExpUse))
7259                     {
7260                         firstRefPosition = firstRefPosition->nextRefPosition;
7261                     }
7262                     if (firstRefPosition == nullptr)
7263                     {
7264                         // Dead interval
7265                         varDsc->lvLRACandidate = false;
7266                         if (varDsc->lvRefCnt() == 0)
7267                         {
7268                             varDsc->lvOnFrame = false;
7269                         }
7270                         else
7271                         {
7272                             // We may encounter cases where a lclVar actually has no references, but
7273                             // a non-zero refCnt.  For safety (in case this is some "hidden" lclVar that we're
7274                             // not correctly recognizing), we'll mark those as needing a stack location.
7275                             // TODO-Cleanup: Make this an assert if/when we correct the refCnt
7276                             // updating.
7277                             varDsc->lvOnFrame = true;
7278                         }
7279                     }
7280                     else
7281                     {
7282                         // If the interval was not spilled, it doesn't need a stack location.
7283                         if (!interval->isSpilled)
7284                         {
7285                             varDsc->lvOnFrame = false;
7286                         }
7287                         if (firstRefPosition->registerAssignment == RBM_NONE || firstRefPosition->spillAfter)
7288                         {
7289                             // Either this RefPosition is spilled, or regOptional or it is not a "real" def or use
7290                             assert(
7291                                 firstRefPosition->spillAfter || firstRefPosition->RegOptional() ||
7292                                 (firstRefPosition->refType != RefTypeDef && firstRefPosition->refType != RefTypeUse));
7293                             varDsc->lvRegNum = REG_STK;
7294                         }
7295                         else
7296                         {
7297                             varDsc->lvRegNum = firstRefPosition->assignedReg();
7298                         }
7299                     }
7300                 }
7301                 else
7302                 {
7303                     {
7304                         varDsc->lvRegister = true;
7305                         varDsc->lvOnFrame  = false;
7306                     }
7307 #ifdef DEBUG
7308                     regMaskTP registerAssignment = genRegMask(varDsc->lvRegNum);
7309                     assert(!interval->isSpilled && !interval->isSplit);
7310                     RefPosition* refPosition = interval->firstRefPosition;
7311                     assert(refPosition != nullptr);
7312
7313                     while (refPosition != nullptr)
7314                     {
7315                         // All RefPositions must match, except for dead definitions,
7316                         // copyReg/moveReg and RefTypeExpUse positions
7317                         if (refPosition->registerAssignment != RBM_NONE && !refPosition->copyReg &&
7318                             !refPosition->moveReg && refPosition->refType != RefTypeExpUse)
7319                         {
7320                             assert(refPosition->registerAssignment == registerAssignment);
7321                         }
7322                         refPosition = refPosition->nextRefPosition;
7323                     }
7324 #endif // DEBUG
7325                 }
7326             }
7327         }
7328     }
7329
7330 #ifdef DEBUG
7331     if (VERBOSE)
7332     {
7333         printf("Trees after linear scan register allocator (LSRA)\n");
7334         compiler->fgDispBasicBlocks(true);
7335     }
7336
7337     verifyFinalAllocation();
7338 #endif // DEBUG
7339
7340     compiler->raMarkStkVars();
7341     recordMaxSpill();
7342
7343     // TODO-CQ: Review this comment and address as needed.
7344     // Change all unused promoted non-argument struct locals to a non-GC type (in this case TYP_INT)
7345     // so that the gc tracking logic and lvMustInit logic will ignore them.
7346     // Extract the code that does this from raAssignVars, and call it here.
7347     // PRECONDITIONS: Ensure that lvPromoted is set on promoted structs, if and
7348     // only if it is promoted on all paths.
7349     // Call might be something like:
7350     // compiler->BashUnusedStructLocals();
7351 }
7352
7353 //
7354 //------------------------------------------------------------------------
7355 // insertMove: Insert a move of a lclVar with the given lclNum into the given block.
7356 //
7357 // Arguments:
7358 //    block          - the BasicBlock into which the move will be inserted.
7359 //    insertionPoint - the instruction before which to insert the move
7360 //    lclNum         - the lclNum of the var to be moved
7361 //    fromReg        - the register from which the var is moving
7362 //    toReg          - the register to which the var is moving
7363 //
7364 // Return Value:
7365 //    None.
7366 //
7367 // Notes:
7368 //    If insertionPoint is non-NULL, insert before that instruction;
7369 //    otherwise, insert "near" the end (prior to the branch, if any).
7370 //    If fromReg or toReg is REG_STK, then move from/to memory, respectively.
7371
7372 void LinearScan::insertMove(
7373     BasicBlock* block, GenTree* insertionPoint, unsigned lclNum, regNumber fromReg, regNumber toReg)
7374 {
7375     LclVarDsc* varDsc = compiler->lvaTable + lclNum;
7376     // the lclVar must be a register candidate
7377     assert(isRegCandidate(varDsc));
7378     // One or both MUST be a register
7379     assert(fromReg != REG_STK || toReg != REG_STK);
7380     // They must not be the same register.
7381     assert(fromReg != toReg);
7382
7383     // This var can't be marked lvRegister now
7384     varDsc->lvRegNum = REG_STK;
7385
7386     GenTree* src = compiler->gtNewLclvNode(lclNum, varDsc->TypeGet());
7387     SetLsraAdded(src);
7388
7389     // There are three cases we need to handle:
7390     // - We are loading a lclVar from the stack.
7391     // - We are storing a lclVar to the stack.
7392     // - We are copying a lclVar between registers.
7393     //
7394     // In the first and second cases, the lclVar node will be marked with GTF_SPILLED and GTF_SPILL, respectively.
7395     // It is up to the code generator to ensure that any necessary normalization is done when loading or storing the
7396     // lclVar's value.
7397     //
7398     // In the third case, we generate GT_COPY(GT_LCL_VAR) and type each node with the normalized type of the lclVar.
7399     // This is safe because a lclVar is always normalized once it is in a register.
7400
7401     GenTree* dst = src;
7402     if (fromReg == REG_STK)
7403     {
7404         src->gtFlags |= GTF_SPILLED;
7405         src->gtRegNum = toReg;
7406     }
7407     else if (toReg == REG_STK)
7408     {
7409         src->gtFlags |= GTF_SPILL;
7410         src->gtRegNum = fromReg;
7411     }
7412     else
7413     {
7414         var_types movType = genActualType(varDsc->TypeGet());
7415         src->gtType       = movType;
7416
7417         dst = new (compiler, GT_COPY) GenTreeCopyOrReload(GT_COPY, movType, src);
7418         // This is the new home of the lclVar - indicate that by clearing the GTF_VAR_DEATH flag.
7419         // Note that if src is itself a lastUse, this will have no effect.
7420         dst->gtFlags &= ~(GTF_VAR_DEATH);
7421         src->gtRegNum = fromReg;
7422         dst->gtRegNum = toReg;
7423         SetLsraAdded(dst);
7424     }
7425     dst->SetUnusedValue();
7426
7427     LIR::Range  treeRange  = LIR::SeqTree(compiler, dst);
7428     LIR::Range& blockRange = LIR::AsRange(block);
7429
7430     if (insertionPoint != nullptr)
7431     {
7432         blockRange.InsertBefore(insertionPoint, std::move(treeRange));
7433     }
7434     else
7435     {
7436         // Put the copy at the bottom
7437         if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_SWITCH)
7438         {
7439             noway_assert(!blockRange.IsEmpty());
7440
7441             GenTree* branch = blockRange.LastNode();
7442             assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE ||
7443                    branch->OperGet() == GT_SWITCH);
7444
7445             blockRange.InsertBefore(branch, std::move(treeRange));
7446         }
7447         else
7448         {
7449             assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS);
7450             blockRange.InsertAtEnd(std::move(treeRange));
7451         }
7452     }
7453 }
7454
7455 void LinearScan::insertSwap(
7456     BasicBlock* block, GenTree* insertionPoint, unsigned lclNum1, regNumber reg1, unsigned lclNum2, regNumber reg2)
7457 {
7458 #ifdef DEBUG
7459     if (VERBOSE)
7460     {
7461         const char* insertionPointString = "top";
7462         if (insertionPoint == nullptr)
7463         {
7464             insertionPointString = "bottom";
7465         }
7466         printf("   " FMT_BB " %s: swap V%02u in %s with V%02u in %s\n", block->bbNum, insertionPointString, lclNum1,
7467                getRegName(reg1), lclNum2, getRegName(reg2));
7468     }
7469 #endif // DEBUG
7470
7471     LclVarDsc* varDsc1 = compiler->lvaTable + lclNum1;
7472     LclVarDsc* varDsc2 = compiler->lvaTable + lclNum2;
7473     assert(reg1 != REG_STK && reg1 != REG_NA && reg2 != REG_STK && reg2 != REG_NA);
7474
7475     GenTree* lcl1  = compiler->gtNewLclvNode(lclNum1, varDsc1->TypeGet());
7476     lcl1->gtRegNum = reg1;
7477     SetLsraAdded(lcl1);
7478
7479     GenTree* lcl2  = compiler->gtNewLclvNode(lclNum2, varDsc2->TypeGet());
7480     lcl2->gtRegNum = reg2;
7481     SetLsraAdded(lcl2);
7482
7483     GenTree* swap  = compiler->gtNewOperNode(GT_SWAP, TYP_VOID, lcl1, lcl2);
7484     swap->gtRegNum = REG_NA;
7485     SetLsraAdded(swap);
7486
7487     lcl1->gtNext = lcl2;
7488     lcl2->gtPrev = lcl1;
7489     lcl2->gtNext = swap;
7490     swap->gtPrev = lcl2;
7491
7492     LIR::Range  swapRange  = LIR::SeqTree(compiler, swap);
7493     LIR::Range& blockRange = LIR::AsRange(block);
7494
7495     if (insertionPoint != nullptr)
7496     {
7497         blockRange.InsertBefore(insertionPoint, std::move(swapRange));
7498     }
7499     else
7500     {
7501         // Put the copy at the bottom
7502         // If there's a branch, make an embedded statement that executes just prior to the branch
7503         if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_SWITCH)
7504         {
7505             noway_assert(!blockRange.IsEmpty());
7506
7507             GenTree* branch = blockRange.LastNode();
7508             assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE ||
7509                    branch->OperGet() == GT_SWITCH);
7510
7511             blockRange.InsertBefore(branch, std::move(swapRange));
7512         }
7513         else
7514         {
7515             assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS);
7516             blockRange.InsertAtEnd(std::move(swapRange));
7517         }
7518     }
7519 }
7520
7521 //------------------------------------------------------------------------
7522 // getTempRegForResolution: Get a free register to use for resolution code.
7523 //
7524 // Arguments:
7525 //    fromBlock - The "from" block on the edge being resolved.
7526 //    toBlock   - The "to"block on the edge
7527 //    type      - the type of register required
7528 //
7529 // Return Value:
7530 //    Returns a register that is free on the given edge, or REG_NA if none is available.
7531 //
7532 // Notes:
7533 //    It is up to the caller to check the return value, and to determine whether a register is
7534 //    available, and to handle that case appropriately.
7535 //    It is also up to the caller to cache the return value, as this is not cheap to compute.
7536
7537 regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type)
7538 {
7539     // TODO-Throughput: This would be much more efficient if we add RegToVarMaps instead of VarToRegMaps
7540     // and they would be more space-efficient as well.
7541     VarToRegMap fromVarToRegMap = getOutVarToRegMap(fromBlock->bbNum);
7542     VarToRegMap toVarToRegMap   = getInVarToRegMap(toBlock->bbNum);
7543
7544 #ifdef _TARGET_ARM_
7545     regMaskTP freeRegs;
7546     if (type == TYP_DOUBLE)
7547     {
7548         // We have to consider all float registers for TYP_DOUBLE
7549         freeRegs = allRegs(TYP_FLOAT);
7550     }
7551     else
7552     {
7553         freeRegs = allRegs(type);
7554     }
7555 #else  // !_TARGET_ARM_
7556     regMaskTP freeRegs = allRegs(type);
7557 #endif // !_TARGET_ARM_
7558
7559 #ifdef DEBUG
7560     if (getStressLimitRegs() == LSRA_LIMIT_SMALL_SET)
7561     {
7562         return REG_NA;
7563     }
7564 #endif // DEBUG
7565     INDEBUG(freeRegs = stressLimitRegs(nullptr, freeRegs));
7566
7567     // We are only interested in the variables that are live-in to the "to" block.
7568     VarSetOps::Iter iter(compiler, toBlock->bbLiveIn);
7569     unsigned        varIndex = 0;
7570     while (iter.NextElem(&varIndex) && freeRegs != RBM_NONE)
7571     {
7572         regNumber fromReg = getVarReg(fromVarToRegMap, varIndex);
7573         regNumber toReg   = getVarReg(toVarToRegMap, varIndex);
7574         assert(fromReg != REG_NA && toReg != REG_NA);
7575         if (fromReg != REG_STK)
7576         {
7577             freeRegs &= ~genRegMask(fromReg, getIntervalForLocalVar(varIndex)->registerType);
7578         }
7579         if (toReg != REG_STK)
7580         {
7581             freeRegs &= ~genRegMask(toReg, getIntervalForLocalVar(varIndex)->registerType);
7582         }
7583     }
7584
7585 #ifdef _TARGET_ARM_
7586     if (type == TYP_DOUBLE)
7587     {
7588         // Exclude any doubles for which the odd half isn't in freeRegs.
7589         freeRegs = freeRegs & ((freeRegs << 1) & RBM_ALLDOUBLE);
7590     }
7591 #endif
7592
7593     if (freeRegs == RBM_NONE)
7594     {
7595         return REG_NA;
7596     }
7597     else
7598     {
7599         regNumber tempReg = genRegNumFromMask(genFindLowestBit(freeRegs));
7600         return tempReg;
7601     }
7602 }
7603
7604 #ifdef _TARGET_ARM_
7605 //------------------------------------------------------------------------
7606 // addResolutionForDouble: Add resolution move(s) for TYP_DOUBLE interval
7607 //                         and update location.
7608 //
7609 // Arguments:
7610 //    block           - the BasicBlock into which the move will be inserted.
7611 //    insertionPoint  - the instruction before which to insert the move
7612 //    sourceIntervals - maintains sourceIntervals[reg] which each 'reg' is associated with
7613 //    location        - maintains location[reg] which is the location of the var that was originally in 'reg'.
7614 //    toReg           - the register to which the var is moving
7615 //    fromReg         - the register from which the var is moving
7616 //    resolveType     - the type of resolution to be performed
7617 //
7618 // Return Value:
7619 //    None.
7620 //
7621 // Notes:
7622 //    It inserts at least one move and updates incoming parameter 'location'.
7623 //
7624 void LinearScan::addResolutionForDouble(BasicBlock*     block,
7625                                         GenTree*        insertionPoint,
7626                                         Interval**      sourceIntervals,
7627                                         regNumberSmall* location,
7628                                         regNumber       toReg,
7629                                         regNumber       fromReg,
7630                                         ResolveType     resolveType)
7631 {
7632     regNumber secondHalfTargetReg = REG_NEXT(fromReg);
7633     Interval* intervalToBeMoved1  = sourceIntervals[fromReg];
7634     Interval* intervalToBeMoved2  = sourceIntervals[secondHalfTargetReg];
7635
7636     assert(!(intervalToBeMoved1 == nullptr && intervalToBeMoved2 == nullptr));
7637
7638     if (intervalToBeMoved1 != nullptr)
7639     {
7640         if (intervalToBeMoved1->registerType == TYP_DOUBLE)
7641         {
7642             // TYP_DOUBLE interval occupies a double register, i.e. two float registers.
7643             assert(intervalToBeMoved2 == nullptr);
7644             assert(genIsValidDoubleReg(toReg));
7645         }
7646         else
7647         {
7648             // TYP_FLOAT interval occupies 1st half of double register, i.e. 1st float register
7649             assert(genIsValidFloatReg(toReg));
7650         }
7651         addResolution(block, insertionPoint, intervalToBeMoved1, toReg, fromReg);
7652         JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
7653         location[fromReg] = (regNumberSmall)toReg;
7654     }
7655
7656     if (intervalToBeMoved2 != nullptr)
7657     {
7658         // TYP_FLOAT interval occupies 2nd half of double register.
7659         assert(intervalToBeMoved2->registerType == TYP_FLOAT);
7660         regNumber secondHalfTempReg = REG_NEXT(toReg);
7661
7662         addResolution(block, insertionPoint, intervalToBeMoved2, secondHalfTempReg, secondHalfTargetReg);
7663         JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
7664         location[secondHalfTargetReg] = (regNumberSmall)secondHalfTempReg;
7665     }
7666
7667     return;
7668 }
7669 #endif // _TARGET_ARM_
7670
7671 //------------------------------------------------------------------------
7672 // addResolution: Add a resolution move of the given interval
7673 //
7674 // Arguments:
7675 //    block          - the BasicBlock into which the move will be inserted.
7676 //    insertionPoint - the instruction before which to insert the move
7677 //    interval       - the interval of the var to be moved
7678 //    toReg          - the register to which the var is moving
7679 //    fromReg        - the register from which the var is moving
7680 //
7681 // Return Value:
7682 //    None.
7683 //
7684 // Notes:
7685 //    For joins, we insert at the bottom (indicated by an insertionPoint
7686 //    of nullptr), while for splits we insert at the top.
7687 //    This is because for joins 'block' is a pred of the join, while for splits it is a succ.
7688 //    For critical edges, this function may be called twice - once to move from
7689 //    the source (fromReg), if any, to the stack, in which case toReg will be
7690 //    REG_STK, and we insert at the bottom (leave insertionPoint as nullptr).
7691 //    The next time, we want to move from the stack to the destination (toReg),
7692 //    in which case fromReg will be REG_STK, and we insert at the top.
7693
7694 void LinearScan::addResolution(
7695     BasicBlock* block, GenTree* insertionPoint, Interval* interval, regNumber toReg, regNumber fromReg)
7696 {
7697 #ifdef DEBUG
7698     const char* insertionPointString = "top";
7699 #endif // DEBUG
7700     if (insertionPoint == nullptr)
7701     {
7702 #ifdef DEBUG
7703         insertionPointString = "bottom";
7704 #endif // DEBUG
7705     }
7706
7707     JITDUMP("   " FMT_BB " %s: move V%02u from ", block->bbNum, insertionPointString, interval->varNum);
7708     JITDUMP("%s to %s", getRegName(fromReg), getRegName(toReg));
7709
7710     insertMove(block, insertionPoint, interval->varNum, fromReg, toReg);
7711     if (fromReg == REG_STK || toReg == REG_STK)
7712     {
7713         assert(interval->isSpilled);
7714     }
7715     else
7716     {
7717         // We should have already marked this as spilled or split.
7718         assert((interval->isSpilled) || (interval->isSplit));
7719     }
7720
7721     INTRACK_STATS(updateLsraStat(LSRA_STAT_RESOLUTION_MOV, block->bbNum));
7722 }
7723
7724 //------------------------------------------------------------------------
7725 // handleOutgoingCriticalEdges: Performs the necessary resolution on all critical edges that feed out of 'block'
7726 //
7727 // Arguments:
7728 //    block     - the block with outgoing critical edges.
7729 //
7730 // Return Value:
7731 //    None..
7732 //
7733 // Notes:
7734 //    For all outgoing critical edges (i.e. any successor of this block which is
7735 //    a join edge), if there are any conflicts, split the edge by adding a new block,
7736 //    and generate the resolution code into that block.
7737
7738 void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
7739 {
7740     VARSET_TP outResolutionSet(VarSetOps::Intersection(compiler, block->bbLiveOut, resolutionCandidateVars));
7741     if (VarSetOps::IsEmpty(compiler, outResolutionSet))
7742     {
7743         return;
7744     }
7745     VARSET_TP sameResolutionSet(VarSetOps::MakeEmpty(compiler));
7746     VARSET_TP sameLivePathsSet(VarSetOps::MakeEmpty(compiler));
7747     VARSET_TP singleTargetSet(VarSetOps::MakeEmpty(compiler));
7748     VARSET_TP diffResolutionSet(VarSetOps::MakeEmpty(compiler));
7749
7750     // Get the outVarToRegMap for this block
7751     VarToRegMap outVarToRegMap = getOutVarToRegMap(block->bbNum);
7752     unsigned    succCount      = block->NumSucc(compiler);
7753     assert(succCount > 1);
7754     VarToRegMap firstSuccInVarToRegMap = nullptr;
7755     BasicBlock* firstSucc              = nullptr;
7756
7757     // First, determine the live regs at the end of this block so that we know what regs are
7758     // available to copy into.
7759     // Note that for this purpose we use the full live-out set, because we must ensure that
7760     // even the registers that remain the same across the edge are preserved correctly.
7761     regMaskTP       liveOutRegs = RBM_NONE;
7762     VarSetOps::Iter liveOutIter(compiler, block->bbLiveOut);
7763     unsigned        liveOutVarIndex = 0;
7764     while (liveOutIter.NextElem(&liveOutVarIndex))
7765     {
7766         regNumber fromReg = getVarReg(outVarToRegMap, liveOutVarIndex);
7767         if (fromReg != REG_STK)
7768         {
7769             regMaskTP fromRegMask = genRegMask(fromReg, getIntervalForLocalVar(liveOutVarIndex)->registerType);
7770             liveOutRegs |= fromRegMask;
7771         }
7772     }
7773
7774     // Next, if this blocks ends with a switch table, we have to make sure not to copy
7775     // into the registers that it uses.
7776     regMaskTP switchRegs = RBM_NONE;
7777     if (block->bbJumpKind == BBJ_SWITCH)
7778     {
7779         // At this point, Lowering has transformed any non-switch-table blocks into
7780         // cascading ifs.
7781         GenTree* switchTable = LIR::AsRange(block).LastNode();
7782         assert(switchTable != nullptr && switchTable->OperGet() == GT_SWITCH_TABLE);
7783
7784         switchRegs   = switchTable->gtRsvdRegs;
7785         GenTree* op1 = switchTable->gtGetOp1();
7786         GenTree* op2 = switchTable->gtGetOp2();
7787         noway_assert(op1 != nullptr && op2 != nullptr);
7788         assert(op1->gtRegNum != REG_NA && op2->gtRegNum != REG_NA);
7789         // No floating point values, so no need to worry about the register type
7790         // (i.e. for ARM32, where we used the genRegMask overload with a type).
7791         assert(varTypeIsIntegralOrI(op1) && varTypeIsIntegralOrI(op2));
7792         switchRegs |= genRegMask(op1->gtRegNum);
7793         switchRegs |= genRegMask(op2->gtRegNum);
7794     }
7795
7796 #ifdef _TARGET_ARM64_
7797     // Next, if this blocks ends with a JCMP, we have to make sure not to copy
7798     // into the register that it uses or modify the local variable it must consume
7799     LclVarDsc* jcmpLocalVarDsc = nullptr;
7800     if (block->bbJumpKind == BBJ_COND)
7801     {
7802         GenTree* lastNode = LIR::AsRange(block).LastNode();
7803
7804         if (lastNode->OperIs(GT_JCMP))
7805         {
7806             GenTree* op1 = lastNode->gtGetOp1();
7807             switchRegs |= genRegMask(op1->gtRegNum);
7808
7809             if (op1->IsLocal())
7810             {
7811                 GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
7812                 jcmpLocalVarDsc          = &compiler->lvaTable[lcl->gtLclNum];
7813             }
7814         }
7815     }
7816 #endif
7817
7818     VarToRegMap sameVarToRegMap = sharedCriticalVarToRegMap;
7819     regMaskTP   sameWriteRegs   = RBM_NONE;
7820     regMaskTP   diffReadRegs    = RBM_NONE;
7821
7822     // For each var that may require resolution, classify them as:
7823     // - in the same register at the end of this block and at each target (no resolution needed)
7824     // - in different registers at different targets (resolve separately):
7825     //     diffResolutionSet
7826     // - in the same register at each target at which it's live, but different from the end of
7827     //   this block.  We may be able to resolve these as if it is "join", but only if they do not
7828     //   write to any registers that are read by those in the diffResolutionSet:
7829     //     sameResolutionSet
7830
7831     VarSetOps::Iter outResolutionSetIter(compiler, outResolutionSet);
7832     unsigned        outResolutionSetVarIndex = 0;
7833     while (outResolutionSetIter.NextElem(&outResolutionSetVarIndex))
7834     {
7835         regNumber fromReg             = getVarReg(outVarToRegMap, outResolutionSetVarIndex);
7836         bool      isMatch             = true;
7837         bool      isSame              = false;
7838         bool      maybeSingleTarget   = false;
7839         bool      maybeSameLivePaths  = false;
7840         bool      liveOnlyAtSplitEdge = true;
7841         regNumber sameToReg           = REG_NA;
7842         for (unsigned succIndex = 0; succIndex < succCount; succIndex++)
7843         {
7844             BasicBlock* succBlock = block->GetSucc(succIndex, compiler);
7845             if (!VarSetOps::IsMember(compiler, succBlock->bbLiveIn, outResolutionSetVarIndex))
7846             {
7847                 maybeSameLivePaths = true;
7848                 continue;
7849             }
7850             else if (liveOnlyAtSplitEdge)
7851             {
7852                 // Is the var live only at those target blocks which are connected by a split edge to this block
7853                 liveOnlyAtSplitEdge = ((succBlock->bbPreds->flNext == nullptr) && (succBlock != compiler->fgFirstBB));
7854             }
7855
7856             regNumber toReg = getVarReg(getInVarToRegMap(succBlock->bbNum), outResolutionSetVarIndex);
7857             if (sameToReg == REG_NA)
7858             {
7859                 sameToReg = toReg;
7860                 continue;
7861             }
7862             if (toReg == sameToReg)
7863             {
7864                 continue;
7865             }
7866             sameToReg = REG_NA;
7867             break;
7868         }
7869
7870         // Check for the cases where we can't write to a register.
7871         // We only need to check for these cases if sameToReg is an actual register (not REG_STK).
7872         if (sameToReg != REG_NA && sameToReg != REG_STK)
7873         {
7874             // If there's a path on which this var isn't live, it may use the original value in sameToReg.
7875             // In this case, sameToReg will be in the liveOutRegs of this block.
7876             // Similarly, if sameToReg is in sameWriteRegs, it has already been used (i.e. for a lclVar that's
7877             // live only at another target), and we can't copy another lclVar into that reg in this block.
7878             regMaskTP sameToRegMask =
7879                 genRegMask(sameToReg, getIntervalForLocalVar(outResolutionSetVarIndex)->registerType);
7880             if (maybeSameLivePaths &&
7881                 (((sameToRegMask & liveOutRegs) != RBM_NONE) || ((sameToRegMask & sameWriteRegs) != RBM_NONE)))
7882             {
7883                 sameToReg = REG_NA;
7884             }
7885             // If this register is used by a switch table at the end of the block, we can't do the copy
7886             // in this block (since we can't insert it after the switch).
7887             if ((sameToRegMask & switchRegs) != RBM_NONE)
7888             {
7889                 sameToReg = REG_NA;
7890             }
7891
7892 #ifdef _TARGET_ARM64_
7893             if (jcmpLocalVarDsc && (jcmpLocalVarDsc->lvVarIndex == outResolutionSetVarIndex))
7894             {
7895                 sameToReg = REG_NA;
7896             }
7897 #endif
7898
7899             // If the var is live only at those blocks connected by a split edge and not live-in at some of the
7900             // target blocks, we will resolve it the same way as if it were in diffResolutionSet and resolution
7901             // will be deferred to the handling of split edges, which means copy will only be at those target(s).
7902             //
7903             // Another way to achieve similar resolution for vars live only at split edges is by removing them
7904             // from consideration up-front but it requires that we traverse those edges anyway to account for
7905             // the registers that must note be overwritten.
7906             if (liveOnlyAtSplitEdge && maybeSameLivePaths)
7907             {
7908                 sameToReg = REG_NA;
7909             }
7910         }
7911
7912         if (sameToReg == REG_NA)
7913         {
7914             VarSetOps::AddElemD(compiler, diffResolutionSet, outResolutionSetVarIndex);
7915             if (fromReg != REG_STK)
7916             {
7917                 diffReadRegs |= genRegMask(fromReg, getIntervalForLocalVar(outResolutionSetVarIndex)->registerType);
7918             }
7919         }
7920         else if (sameToReg != fromReg)
7921         {
7922             VarSetOps::AddElemD(compiler, sameResolutionSet, outResolutionSetVarIndex);
7923             setVarReg(sameVarToRegMap, outResolutionSetVarIndex, sameToReg);
7924             if (sameToReg != REG_STK)
7925             {
7926                 sameWriteRegs |= genRegMask(sameToReg, getIntervalForLocalVar(outResolutionSetVarIndex)->registerType);
7927             }
7928         }
7929     }
7930
7931     if (!VarSetOps::IsEmpty(compiler, sameResolutionSet))
7932     {
7933         if ((sameWriteRegs & diffReadRegs) != RBM_NONE)
7934         {
7935             // We cannot split the "same" and "diff" regs if the "same" set writes registers
7936             // that must be read by the "diff" set.  (Note that when these are done as a "batch"
7937             // we carefully order them to ensure all the input regs are read before they are
7938             // overwritten.)
7939             VarSetOps::UnionD(compiler, diffResolutionSet, sameResolutionSet);
7940             VarSetOps::ClearD(compiler, sameResolutionSet);
7941         }
7942         else
7943         {
7944             // For any vars in the sameResolutionSet, we can simply add the move at the end of "block".
7945             resolveEdge(block, nullptr, ResolveSharedCritical, sameResolutionSet);
7946         }
7947     }
7948     if (!VarSetOps::IsEmpty(compiler, diffResolutionSet))
7949     {
7950         for (unsigned succIndex = 0; succIndex < succCount; succIndex++)
7951         {
7952             BasicBlock* succBlock = block->GetSucc(succIndex, compiler);
7953
7954             // Any "diffResolutionSet" resolution for a block with no other predecessors will be handled later
7955             // as split resolution.
7956             if ((succBlock->bbPreds->flNext == nullptr) && (succBlock != compiler->fgFirstBB))
7957             {
7958                 continue;
7959             }
7960
7961             // Now collect the resolution set for just this edge, if any.
7962             // Check only the vars in diffResolutionSet that are live-in to this successor.
7963             bool        needsResolution   = false;
7964             VarToRegMap succInVarToRegMap = getInVarToRegMap(succBlock->bbNum);
7965             VARSET_TP   edgeResolutionSet(VarSetOps::Intersection(compiler, diffResolutionSet, succBlock->bbLiveIn));
7966             VarSetOps::Iter iter(compiler, edgeResolutionSet);
7967             unsigned        varIndex = 0;
7968             while (iter.NextElem(&varIndex))
7969             {
7970                 regNumber fromReg = getVarReg(outVarToRegMap, varIndex);
7971                 regNumber toReg   = getVarReg(succInVarToRegMap, varIndex);
7972
7973                 if (fromReg == toReg)
7974                 {
7975                     VarSetOps::RemoveElemD(compiler, edgeResolutionSet, varIndex);
7976                 }
7977             }
7978             if (!VarSetOps::IsEmpty(compiler, edgeResolutionSet))
7979             {
7980                 resolveEdge(block, succBlock, ResolveCritical, edgeResolutionSet);
7981             }
7982         }
7983     }
7984 }
7985
7986 //------------------------------------------------------------------------
7987 // resolveEdges: Perform resolution across basic block edges
7988 //
7989 // Arguments:
7990 //    None.
7991 //
7992 // Return Value:
7993 //    None.
7994 //
7995 // Notes:
7996 //    Traverse the basic blocks.
7997 //    - If this block has a single predecessor that is not the immediately
7998 //      preceding block, perform any needed 'split' resolution at the beginning of this block
7999 //    - Otherwise if this block has critical incoming edges, handle them.
8000 //    - If this block has a single successor that has multiple predecesors, perform any needed
8001 //      'join' resolution at the end of this block.
8002 //    Note that a block may have both 'split' or 'critical' incoming edge(s) and 'join' outgoing
8003 //    edges.
8004
8005 void LinearScan::resolveEdges()
8006 {
8007     JITDUMP("RESOLVING EDGES\n");
8008
8009     // The resolutionCandidateVars set was initialized with all the lclVars that are live-in to
8010     // any block. We now intersect that set with any lclVars that ever spilled or split.
8011     // If there are no candidates for resoultion, simply return.
8012
8013     VarSetOps::IntersectionD(compiler, resolutionCandidateVars, splitOrSpilledVars);
8014     if (VarSetOps::IsEmpty(compiler, resolutionCandidateVars))
8015     {
8016         return;
8017     }
8018
8019     BasicBlock *block, *prevBlock = nullptr;
8020
8021     // Handle all the critical edges first.
8022     // We will try to avoid resolution across critical edges in cases where all the critical-edge
8023     // targets of a block have the same home.  We will then split the edges only for the
8024     // remaining mismatches.  We visit the out-edges, as that allows us to share the moves that are
8025     // common among all the targets.
8026
8027     if (hasCriticalEdges)
8028     {
8029         foreach_block(compiler, block)
8030         {
8031             if (block->bbNum > bbNumMaxBeforeResolution)
8032             {
8033                 // This is a new block added during resolution - we don't need to visit these now.
8034                 continue;
8035             }
8036             if (blockInfo[block->bbNum].hasCriticalOutEdge)
8037             {
8038                 handleOutgoingCriticalEdges(block);
8039             }
8040             prevBlock = block;
8041         }
8042     }
8043
8044     prevBlock = nullptr;
8045     foreach_block(compiler, block)
8046     {
8047         if (block->bbNum > bbNumMaxBeforeResolution)
8048         {
8049             // This is a new block added during resolution - we don't need to visit these now.
8050             continue;
8051         }
8052
8053         unsigned    succCount       = block->NumSucc(compiler);
8054         flowList*   preds           = block->bbPreds;
8055         BasicBlock* uniquePredBlock = block->GetUniquePred(compiler);
8056
8057         // First, if this block has a single predecessor,
8058         // we may need resolution at the beginning of this block.
8059         // This may be true even if it's the block we used for starting locations,
8060         // if a variable was spilled.
8061         VARSET_TP inResolutionSet(VarSetOps::Intersection(compiler, block->bbLiveIn, resolutionCandidateVars));
8062         if (!VarSetOps::IsEmpty(compiler, inResolutionSet))
8063         {
8064             if (uniquePredBlock != nullptr)
8065             {
8066                 // We may have split edges during critical edge resolution, and in the process split
8067                 // a non-critical edge as well.
8068                 // It is unlikely that we would ever have more than one of these in sequence (indeed,
8069                 // I don't think it's possible), but there's no need to assume that it can't.
8070                 while (uniquePredBlock->bbNum > bbNumMaxBeforeResolution)
8071                 {
8072                     uniquePredBlock = uniquePredBlock->GetUniquePred(compiler);
8073                     noway_assert(uniquePredBlock != nullptr);
8074                 }
8075                 resolveEdge(uniquePredBlock, block, ResolveSplit, inResolutionSet);
8076             }
8077         }
8078
8079         // Finally, if this block has a single successor:
8080         //  - and that has at least one other predecessor (otherwise we will do the resolution at the
8081         //    top of the successor),
8082         //  - and that is not the target of a critical edge (otherwise we've already handled it)
8083         // we may need resolution at the end of this block.
8084
8085         if (succCount == 1)
8086         {
8087             BasicBlock* succBlock = block->GetSucc(0, compiler);
8088             if (succBlock->GetUniquePred(compiler) == nullptr)
8089             {
8090                 VARSET_TP outResolutionSet(
8091                     VarSetOps::Intersection(compiler, succBlock->bbLiveIn, resolutionCandidateVars));
8092                 if (!VarSetOps::IsEmpty(compiler, outResolutionSet))
8093                 {
8094                     resolveEdge(block, succBlock, ResolveJoin, outResolutionSet);
8095                 }
8096             }
8097         }
8098     }
8099
8100     // Now, fixup the mapping for any blocks that were adding for edge splitting.
8101     // See the comment prior to the call to fgSplitEdge() in resolveEdge().
8102     // Note that we could fold this loop in with the checking code below, but that
8103     // would only improve the debug case, and would clutter up the code somewhat.
8104     if (compiler->fgBBNumMax > bbNumMaxBeforeResolution)
8105     {
8106         foreach_block(compiler, block)
8107         {
8108             if (block->bbNum > bbNumMaxBeforeResolution)
8109             {
8110                 // There may be multiple blocks inserted when we split.  But we must always have exactly
8111                 // one path (i.e. all blocks must be single-successor and single-predecessor),
8112                 // and only one block along the path may be non-empty.
8113                 // Note that we may have a newly-inserted block that is empty, but which connects
8114                 // two non-resolution blocks. This happens when an edge is split that requires it.
8115
8116                 BasicBlock* succBlock = block;
8117                 do
8118                 {
8119                     succBlock = succBlock->GetUniqueSucc();
8120                     noway_assert(succBlock != nullptr);
8121                 } while ((succBlock->bbNum > bbNumMaxBeforeResolution) && succBlock->isEmpty());
8122
8123                 BasicBlock* predBlock = block;
8124                 do
8125                 {
8126                     predBlock = predBlock->GetUniquePred(compiler);
8127                     noway_assert(predBlock != nullptr);
8128                 } while ((predBlock->bbNum > bbNumMaxBeforeResolution) && predBlock->isEmpty());
8129
8130                 unsigned succBBNum = succBlock->bbNum;
8131                 unsigned predBBNum = predBlock->bbNum;
8132                 if (block->isEmpty())
8133                 {
8134                     // For the case of the empty block, find the non-resolution block (succ or pred).
8135                     if (predBBNum > bbNumMaxBeforeResolution)
8136                     {
8137                         assert(succBBNum <= bbNumMaxBeforeResolution);
8138                         predBBNum = 0;
8139                     }
8140                     else
8141                     {
8142                         succBBNum = 0;
8143                     }
8144                 }
8145                 else
8146                 {
8147                     assert((succBBNum <= bbNumMaxBeforeResolution) && (predBBNum <= bbNumMaxBeforeResolution));
8148                 }
8149                 SplitEdgeInfo info = {predBBNum, succBBNum};
8150                 getSplitBBNumToTargetBBNumMap()->Set(block->bbNum, info);
8151             }
8152         }
8153     }
8154
8155 #ifdef DEBUG
8156     // Make sure the varToRegMaps match up on all edges.
8157     bool foundMismatch = false;
8158     foreach_block(compiler, block)
8159     {
8160         if (block->isEmpty() && block->bbNum > bbNumMaxBeforeResolution)
8161         {
8162             continue;
8163         }
8164         VarToRegMap toVarToRegMap = getInVarToRegMap(block->bbNum);
8165         for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
8166         {
8167             BasicBlock*     predBlock       = pred->flBlock;
8168             VarToRegMap     fromVarToRegMap = getOutVarToRegMap(predBlock->bbNum);
8169             VarSetOps::Iter iter(compiler, block->bbLiveIn);
8170             unsigned        varIndex = 0;
8171             while (iter.NextElem(&varIndex))
8172             {
8173                 regNumber fromReg = getVarReg(fromVarToRegMap, varIndex);
8174                 regNumber toReg   = getVarReg(toVarToRegMap, varIndex);
8175                 if (fromReg != toReg)
8176                 {
8177                     if (!foundMismatch)
8178                     {
8179                         foundMismatch = true;
8180                         printf("Found mismatched var locations after resolution!\n");
8181                     }
8182                     unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
8183                     printf(" V%02u: " FMT_BB " to " FMT_BB ": %s to %s\n", varNum, predBlock->bbNum, block->bbNum,
8184                            getRegName(fromReg), getRegName(toReg));
8185                 }
8186             }
8187         }
8188     }
8189     assert(!foundMismatch);
8190 #endif
8191     JITDUMP("\n");
8192 }
8193
8194 //------------------------------------------------------------------------
8195 // resolveEdge: Perform the specified type of resolution between two blocks.
8196 //
8197 // Arguments:
8198 //    fromBlock     - the block from which the edge originates
8199 //    toBlock       - the block at which the edge terminates
8200 //    resolveType   - the type of resolution to be performed
8201 //    liveSet       - the set of tracked lclVar indices which may require resolution
8202 //
8203 // Return Value:
8204 //    None.
8205 //
8206 // Assumptions:
8207 //    The caller must have performed the analysis to determine the type of the edge.
8208 //
8209 // Notes:
8210 //    This method emits the correctly ordered moves necessary to place variables in the
8211 //    correct registers across a Split, Join or Critical edge.
8212 //    In order to avoid overwriting register values before they have been moved to their
8213 //    new home (register/stack), it first does the register-to-stack moves (to free those
8214 //    registers), then the register to register moves, ensuring that the target register
8215 //    is free before the move, and then finally the stack to register moves.
8216
8217 void LinearScan::resolveEdge(BasicBlock*      fromBlock,
8218                              BasicBlock*      toBlock,
8219                              ResolveType      resolveType,
8220                              VARSET_VALARG_TP liveSet)
8221 {
8222     VarToRegMap fromVarToRegMap = getOutVarToRegMap(fromBlock->bbNum);
8223     VarToRegMap toVarToRegMap;
8224     if (resolveType == ResolveSharedCritical)
8225     {
8226         toVarToRegMap = sharedCriticalVarToRegMap;
8227     }
8228     else
8229     {
8230         toVarToRegMap = getInVarToRegMap(toBlock->bbNum);
8231     }
8232
8233     // The block to which we add the resolution moves depends on the resolveType
8234     BasicBlock* block;
8235     switch (resolveType)
8236     {
8237         case ResolveJoin:
8238         case ResolveSharedCritical:
8239             block = fromBlock;
8240             break;
8241         case ResolveSplit:
8242             block = toBlock;
8243             break;
8244         case ResolveCritical:
8245             // fgSplitEdge may add one or two BasicBlocks.  It returns the block that splits
8246             // the edge from 'fromBlock' and 'toBlock', but if it inserts that block right after
8247             // a block with a fall-through it will have to create another block to handle that edge.
8248             // These new blocks can be mapped to existing blocks in order to correctly handle
8249             // the calls to recordVarLocationsAtStartOfBB() from codegen.  That mapping is handled
8250             // in resolveEdges(), after all the edge resolution has been done (by calling this
8251             // method for each edge).
8252             block = compiler->fgSplitEdge(fromBlock, toBlock);
8253
8254             // Split edges are counted against fromBlock.
8255             INTRACK_STATS(updateLsraStat(LSRA_STAT_SPLIT_EDGE, fromBlock->bbNum));
8256             break;
8257         default:
8258             unreached();
8259             break;
8260     }
8261
8262 #ifndef _TARGET_XARCH_
8263     // We record tempregs for beginning and end of each block.
8264     // For amd64/x86 we only need a tempReg for float - we'll use xchg for int.
8265     // TODO-Throughput: It would be better to determine the tempRegs on demand, but the code below
8266     // modifies the varToRegMaps so we don't have all the correct registers at the time
8267     // we need to get the tempReg.
8268     regNumber tempRegInt =
8269         (resolveType == ResolveSharedCritical) ? REG_NA : getTempRegForResolution(fromBlock, toBlock, TYP_INT);
8270 #endif // !_TARGET_XARCH_
8271     regNumber tempRegFlt = REG_NA;
8272     regNumber tempRegDbl = REG_NA; // Used only for ARM
8273     if ((compiler->compFloatingPointUsed) && (resolveType != ResolveSharedCritical))
8274     {
8275 #ifdef _TARGET_ARM_
8276         // Try to reserve a double register for TYP_DOUBLE and use it for TYP_FLOAT too if available.
8277         tempRegDbl = getTempRegForResolution(fromBlock, toBlock, TYP_DOUBLE);
8278         if (tempRegDbl != REG_NA)
8279         {
8280             tempRegFlt = tempRegDbl;
8281         }
8282         else
8283 #endif // _TARGET_ARM_
8284         {
8285             tempRegFlt = getTempRegForResolution(fromBlock, toBlock, TYP_FLOAT);
8286         }
8287     }
8288
8289     regMaskTP targetRegsToDo      = RBM_NONE;
8290     regMaskTP targetRegsReady     = RBM_NONE;
8291     regMaskTP targetRegsFromStack = RBM_NONE;
8292
8293     // The following arrays capture the location of the registers as they are moved:
8294     // - location[reg] gives the current location of the var that was originally in 'reg'.
8295     //   (Note that a var may be moved more than once.)
8296     // - source[reg] gives the original location of the var that needs to be moved to 'reg'.
8297     // For example, if a var is in rax and needs to be moved to rsi, then we would start with:
8298     //   location[rax] == rax
8299     //   source[rsi] == rax     -- this doesn't change
8300     // Then, if for some reason we need to move it temporary to rbx, we would have:
8301     //   location[rax] == rbx
8302     // Once we have completed the move, we will have:
8303     //   location[rax] == REG_NA
8304     // This indicates that the var originally in rax is now in its target register.
8305
8306     regNumberSmall location[REG_COUNT];
8307     C_ASSERT(sizeof(char) == sizeof(regNumberSmall)); // for memset to work
8308     memset(location, REG_NA, REG_COUNT);
8309     regNumberSmall source[REG_COUNT];
8310     memset(source, REG_NA, REG_COUNT);
8311
8312     // What interval is this register associated with?
8313     // (associated with incoming reg)
8314     Interval* sourceIntervals[REG_COUNT];
8315     memset(&sourceIntervals, 0, sizeof(sourceIntervals));
8316
8317     // Intervals for vars that need to be loaded from the stack
8318     Interval* stackToRegIntervals[REG_COUNT];
8319     memset(&stackToRegIntervals, 0, sizeof(stackToRegIntervals));
8320
8321     // Get the starting insertion point for the "to" resolution
8322     GenTree* insertionPoint = nullptr;
8323     if (resolveType == ResolveSplit || resolveType == ResolveCritical)
8324     {
8325         insertionPoint = LIR::AsRange(block).FirstNonPhiNode();
8326     }
8327
8328     // First:
8329     //   - Perform all moves from reg to stack (no ordering needed on these)
8330     //   - For reg to reg moves, record the current location, associating their
8331     //     source location with the target register they need to go into
8332     //   - For stack to reg moves (done last, no ordering needed between them)
8333     //     record the interval associated with the target reg
8334     // TODO-Throughput: We should be looping over the liveIn and liveOut registers, since
8335     // that will scale better than the live variables
8336
8337     VarSetOps::Iter iter(compiler, liveSet);
8338     unsigned        varIndex = 0;
8339     while (iter.NextElem(&varIndex))
8340     {
8341         regNumber fromReg = getVarReg(fromVarToRegMap, varIndex);
8342         regNumber toReg   = getVarReg(toVarToRegMap, varIndex);
8343         if (fromReg == toReg)
8344         {
8345             continue;
8346         }
8347
8348         // For Critical edges, the location will not change on either side of the edge,
8349         // since we'll add a new block to do the move.
8350         if (resolveType == ResolveSplit)
8351         {
8352             setVarReg(toVarToRegMap, varIndex, fromReg);
8353         }
8354         else if (resolveType == ResolveJoin || resolveType == ResolveSharedCritical)
8355         {
8356             setVarReg(fromVarToRegMap, varIndex, toReg);
8357         }
8358
8359         assert(fromReg < UCHAR_MAX && toReg < UCHAR_MAX);
8360
8361         Interval* interval = getIntervalForLocalVar(varIndex);
8362
8363         if (fromReg == REG_STK)
8364         {
8365             stackToRegIntervals[toReg] = interval;
8366             targetRegsFromStack |= genRegMask(toReg);
8367         }
8368         else if (toReg == REG_STK)
8369         {
8370             // Do the reg to stack moves now
8371             addResolution(block, insertionPoint, interval, REG_STK, fromReg);
8372             JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
8373         }
8374         else
8375         {
8376             location[fromReg]        = (regNumberSmall)fromReg;
8377             source[toReg]            = (regNumberSmall)fromReg;
8378             sourceIntervals[fromReg] = interval;
8379             targetRegsToDo |= genRegMask(toReg);
8380         }
8381     }
8382
8383     // REGISTER to REGISTER MOVES
8384
8385     // First, find all the ones that are ready to move now
8386     regMaskTP targetCandidates = targetRegsToDo;
8387     while (targetCandidates != RBM_NONE)
8388     {
8389         regMaskTP targetRegMask = genFindLowestBit(targetCandidates);
8390         targetCandidates &= ~targetRegMask;
8391         regNumber targetReg = genRegNumFromMask(targetRegMask);
8392         if (location[targetReg] == REG_NA)
8393         {
8394 #ifdef _TARGET_ARM_
8395             regNumber sourceReg = (regNumber)source[targetReg];
8396             Interval* interval  = sourceIntervals[sourceReg];
8397             if (interval->registerType == TYP_DOUBLE)
8398             {
8399                 // For ARM32, make sure that both of the float halves of the double register are available.
8400                 assert(genIsValidDoubleReg(targetReg));
8401                 regNumber anotherHalfRegNum = REG_NEXT(targetReg);
8402                 if (location[anotherHalfRegNum] == REG_NA)
8403                 {
8404                     targetRegsReady |= targetRegMask;
8405                 }
8406             }
8407             else
8408 #endif // _TARGET_ARM_
8409             {
8410                 targetRegsReady |= targetRegMask;
8411             }
8412         }
8413     }
8414
8415     // Perform reg to reg moves
8416     while (targetRegsToDo != RBM_NONE)
8417     {
8418         while (targetRegsReady != RBM_NONE)
8419         {
8420             regMaskTP targetRegMask = genFindLowestBit(targetRegsReady);
8421             targetRegsToDo &= ~targetRegMask;
8422             targetRegsReady &= ~targetRegMask;
8423             regNumber targetReg = genRegNumFromMask(targetRegMask);
8424             assert(location[targetReg] != targetReg);
8425             regNumber sourceReg = (regNumber)source[targetReg];
8426             regNumber fromReg   = (regNumber)location[sourceReg];
8427             assert(fromReg < UCHAR_MAX && sourceReg < UCHAR_MAX);
8428             Interval* interval = sourceIntervals[sourceReg];
8429             assert(interval != nullptr);
8430             addResolution(block, insertionPoint, interval, targetReg, fromReg);
8431             JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
8432             sourceIntervals[sourceReg] = nullptr;
8433             location[sourceReg]        = REG_NA;
8434
8435             // Do we have a free targetReg?
8436             if (fromReg == sourceReg)
8437             {
8438                 if (source[fromReg] != REG_NA)
8439                 {
8440                     regMaskTP fromRegMask = genRegMask(fromReg);
8441                     targetRegsReady |= fromRegMask;
8442 #ifdef _TARGET_ARM_
8443                     if (genIsValidDoubleReg(fromReg))
8444                     {
8445                         // Ensure that either:
8446                         // - the Interval targeting fromReg is not double, or
8447                         // - the other half of the double is free.
8448                         Interval* otherInterval = sourceIntervals[source[fromReg]];
8449                         regNumber upperHalfReg  = REG_NEXT(fromReg);
8450                         if ((otherInterval->registerType == TYP_DOUBLE) && (location[upperHalfReg] != REG_NA))
8451                         {
8452                             targetRegsReady &= ~fromRegMask;
8453                         }
8454                     }
8455                 }
8456                 else if (genIsValidFloatReg(fromReg) && !genIsValidDoubleReg(fromReg))
8457                 {
8458                     // We may have freed up the other half of a double where the lower half
8459                     // was already free.
8460                     regNumber lowerHalfReg    = REG_PREV(fromReg);
8461                     regNumber lowerHalfSrcReg = (regNumber)source[lowerHalfReg];
8462                     regNumber lowerHalfSrcLoc = (regNumber)location[lowerHalfReg];
8463                     // Necessary conditions:
8464                     // - There is a source register for this reg (lowerHalfSrcReg != REG_NA)
8465                     // - It is currently free                    (lowerHalfSrcLoc == REG_NA)
8466                     // - The source interval isn't yet completed (sourceIntervals[lowerHalfSrcReg] != nullptr)
8467                     // - It's not in the ready set               ((targetRegsReady & genRegMask(lowerHalfReg)) ==
8468                     //                                            RBM_NONE)
8469                     //
8470                     if ((lowerHalfSrcReg != REG_NA) && (lowerHalfSrcLoc == REG_NA) &&
8471                         (sourceIntervals[lowerHalfSrcReg] != nullptr) &&
8472                         ((targetRegsReady & genRegMask(lowerHalfReg)) == RBM_NONE))
8473                     {
8474                         // This must be a double interval, otherwise it would be in targetRegsReady, or already
8475                         // completed.
8476                         assert(sourceIntervals[lowerHalfSrcReg]->registerType == TYP_DOUBLE);
8477                         targetRegsReady |= genRegMask(lowerHalfReg);
8478                     }
8479 #endif // _TARGET_ARM_
8480                 }
8481             }
8482         }
8483         if (targetRegsToDo != RBM_NONE)
8484         {
8485             regMaskTP targetRegMask = genFindLowestBit(targetRegsToDo);
8486             regNumber targetReg     = genRegNumFromMask(targetRegMask);
8487
8488             // Is it already there due to other moves?
8489             // If not, move it to the temp reg, OR swap it with another register
8490             regNumber sourceReg = (regNumber)source[targetReg];
8491             regNumber fromReg   = (regNumber)location[sourceReg];
8492             if (targetReg == fromReg)
8493             {
8494                 targetRegsToDo &= ~targetRegMask;
8495             }
8496             else
8497             {
8498                 regNumber tempReg = REG_NA;
8499                 bool      useSwap = false;
8500                 if (emitter::isFloatReg(targetReg))
8501                 {
8502 #ifdef _TARGET_ARM_
8503                     if (sourceIntervals[fromReg]->registerType == TYP_DOUBLE)
8504                     {
8505                         // ARM32 requires a double temp register for TYP_DOUBLE.
8506                         tempReg = tempRegDbl;
8507                     }
8508                     else
8509 #endif // _TARGET_ARM_
8510                         tempReg = tempRegFlt;
8511                 }
8512 #ifdef _TARGET_XARCH_
8513                 else
8514                 {
8515                     useSwap = true;
8516                 }
8517 #else // !_TARGET_XARCH_
8518
8519                 else
8520                 {
8521                     tempReg = tempRegInt;
8522                 }
8523
8524 #endif // !_TARGET_XARCH_
8525                 if (useSwap || tempReg == REG_NA)
8526                 {
8527                     // First, we have to figure out the destination register for what's currently in fromReg,
8528                     // so that we can find its sourceInterval.
8529                     regNumber otherTargetReg = REG_NA;
8530
8531                     // By chance, is fromReg going where it belongs?
8532                     if (location[source[fromReg]] == targetReg)
8533                     {
8534                         otherTargetReg = fromReg;
8535                         // If we can swap, we will be done with otherTargetReg as well.
8536                         // Otherwise, we'll spill it to the stack and reload it later.
8537                         if (useSwap)
8538                         {
8539                             regMaskTP fromRegMask = genRegMask(fromReg);
8540                             targetRegsToDo &= ~fromRegMask;
8541                         }
8542                     }
8543                     else
8544                     {
8545                         // Look at the remaining registers from targetRegsToDo (which we expect to be relatively
8546                         // small at this point) to find out what's currently in targetReg.
8547                         regMaskTP mask = targetRegsToDo;
8548                         while (mask != RBM_NONE && otherTargetReg == REG_NA)
8549                         {
8550                             regMaskTP nextRegMask = genFindLowestBit(mask);
8551                             regNumber nextReg     = genRegNumFromMask(nextRegMask);
8552                             mask &= ~nextRegMask;
8553                             if (location[source[nextReg]] == targetReg)
8554                             {
8555                                 otherTargetReg = nextReg;
8556                             }
8557                         }
8558                     }
8559                     assert(otherTargetReg != REG_NA);
8560
8561                     if (useSwap)
8562                     {
8563                         // Generate a "swap" of fromReg and targetReg
8564                         insertSwap(block, insertionPoint, sourceIntervals[source[otherTargetReg]]->varNum, targetReg,
8565                                    sourceIntervals[sourceReg]->varNum, fromReg);
8566                         location[sourceReg]              = REG_NA;
8567                         location[source[otherTargetReg]] = (regNumberSmall)fromReg;
8568
8569                         INTRACK_STATS(updateLsraStat(LSRA_STAT_RESOLUTION_MOV, block->bbNum));
8570                     }
8571                     else
8572                     {
8573                         // Spill "targetReg" to the stack and add its eventual target (otherTargetReg)
8574                         // to "targetRegsFromStack", which will be handled below.
8575                         // NOTE: This condition is very rare.  Setting COMPlus_JitStressRegs=0x203
8576                         // has been known to trigger it in JIT SH.
8577
8578                         // First, spill "otherInterval" from targetReg to the stack.
8579                         Interval* otherInterval = sourceIntervals[source[otherTargetReg]];
8580                         setIntervalAsSpilled(otherInterval);
8581                         addResolution(block, insertionPoint, otherInterval, REG_STK, targetReg);
8582                         JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
8583                         location[source[otherTargetReg]] = REG_STK;
8584
8585                         // Now, move the interval that is going to targetReg, and add its "fromReg" to
8586                         // "targetRegsReady".
8587                         addResolution(block, insertionPoint, sourceIntervals[sourceReg], targetReg, fromReg);
8588                         JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
8589                         location[sourceReg] = REG_NA;
8590                         targetRegsReady |= genRegMask(fromReg);
8591                     }
8592                     targetRegsToDo &= ~targetRegMask;
8593                 }
8594                 else
8595                 {
8596                     compiler->codeGen->regSet.rsSetRegsModified(genRegMask(tempReg) DEBUGARG(true));
8597 #ifdef _TARGET_ARM_
8598                     if (sourceIntervals[fromReg]->registerType == TYP_DOUBLE)
8599                     {
8600                         assert(genIsValidDoubleReg(targetReg));
8601                         assert(genIsValidDoubleReg(tempReg));
8602
8603                         addResolutionForDouble(block, insertionPoint, sourceIntervals, location, tempReg, targetReg,
8604                                                resolveType);
8605                     }
8606                     else
8607 #endif // _TARGET_ARM_
8608                     {
8609                         assert(sourceIntervals[targetReg] != nullptr);
8610
8611                         addResolution(block, insertionPoint, sourceIntervals[targetReg], tempReg, targetReg);
8612                         JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
8613                         location[targetReg] = (regNumberSmall)tempReg;
8614                     }
8615                     targetRegsReady |= targetRegMask;
8616                 }
8617             }
8618         }
8619     }
8620
8621     // Finally, perform stack to reg moves
8622     // All the target regs will be empty at this point
8623     while (targetRegsFromStack != RBM_NONE)
8624     {
8625         regMaskTP targetRegMask = genFindLowestBit(targetRegsFromStack);
8626         targetRegsFromStack &= ~targetRegMask;
8627         regNumber targetReg = genRegNumFromMask(targetRegMask);
8628
8629         Interval* interval = stackToRegIntervals[targetReg];
8630         assert(interval != nullptr);
8631
8632         addResolution(block, insertionPoint, interval, targetReg, REG_STK);
8633         JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
8634     }
8635 }
8636
8637 #if TRACK_LSRA_STATS
8638 // ----------------------------------------------------------
8639 // updateLsraStat: Increment LSRA stat counter.
8640 //
8641 // Arguments:
8642 //    stat      -   LSRA stat enum
8643 //    bbNum     -   Basic block to which LSRA stat needs to be
8644 //                  associated with.
8645 //
8646 void LinearScan::updateLsraStat(LsraStat stat, unsigned bbNum)
8647 {
8648     if (bbNum > bbNumMaxBeforeResolution)
8649     {
8650         // This is a newly created basic block as part of resolution.
8651         // These blocks contain resolution moves that are already accounted.
8652         return;
8653     }
8654
8655     switch (stat)
8656     {
8657         case LSRA_STAT_SPILL:
8658             ++(blockInfo[bbNum].spillCount);
8659             break;
8660
8661         case LSRA_STAT_COPY_REG:
8662             ++(blockInfo[bbNum].copyRegCount);
8663             break;
8664
8665         case LSRA_STAT_RESOLUTION_MOV:
8666             ++(blockInfo[bbNum].resolutionMovCount);
8667             break;
8668
8669         case LSRA_STAT_SPLIT_EDGE:
8670             ++(blockInfo[bbNum].splitEdgeCount);
8671             break;
8672
8673         default:
8674             break;
8675     }
8676 }
8677
8678 // -----------------------------------------------------------
8679 // dumpLsraStats - dumps Lsra stats to given file.
8680 //
8681 // Arguments:
8682 //    file    -  file to which stats are to be written.
8683 //
8684 void LinearScan::dumpLsraStats(FILE* file)
8685 {
8686     unsigned sumSpillCount         = 0;
8687     unsigned sumCopyRegCount       = 0;
8688     unsigned sumResolutionMovCount = 0;
8689     unsigned sumSplitEdgeCount     = 0;
8690     UINT64   wtdSpillCount         = 0;
8691     UINT64   wtdCopyRegCount       = 0;
8692     UINT64   wtdResolutionMovCount = 0;
8693
8694     fprintf(file, "----------\n");
8695     fprintf(file, "LSRA Stats");
8696 #ifdef DEBUG
8697     if (!VERBOSE)
8698     {
8699         fprintf(file, " : %s\n", compiler->info.compFullName);
8700     }
8701     else
8702     {
8703         // In verbose mode no need to print full name
8704         // while printing lsra stats.
8705         fprintf(file, "\n");
8706     }
8707 #else
8708     fprintf(file, " : %s\n", compiler->eeGetMethodFullName(compiler->info.compCompHnd));
8709 #endif
8710
8711     fprintf(file, "----------\n");
8712
8713     for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
8714     {
8715         if (block->bbNum > bbNumMaxBeforeResolution)
8716         {
8717             continue;
8718         }
8719
8720         unsigned spillCount         = blockInfo[block->bbNum].spillCount;
8721         unsigned copyRegCount       = blockInfo[block->bbNum].copyRegCount;
8722         unsigned resolutionMovCount = blockInfo[block->bbNum].resolutionMovCount;
8723         unsigned splitEdgeCount     = blockInfo[block->bbNum].splitEdgeCount;
8724
8725         if (spillCount != 0 || copyRegCount != 0 || resolutionMovCount != 0 || splitEdgeCount != 0)
8726         {
8727             fprintf(file, FMT_BB " [%8d]: ", block->bbNum, block->bbWeight);
8728             fprintf(file, "SpillCount = %d, ResolutionMovs = %d, SplitEdges = %d, CopyReg = %d\n", spillCount,
8729                     resolutionMovCount, splitEdgeCount, copyRegCount);
8730         }
8731
8732         sumSpillCount += spillCount;
8733         sumCopyRegCount += copyRegCount;
8734         sumResolutionMovCount += resolutionMovCount;
8735         sumSplitEdgeCount += splitEdgeCount;
8736
8737         wtdSpillCount += (UINT64)spillCount * block->bbWeight;
8738         wtdCopyRegCount += (UINT64)copyRegCount * block->bbWeight;
8739         wtdResolutionMovCount += (UINT64)resolutionMovCount * block->bbWeight;
8740     }
8741
8742     fprintf(file, "Total Tracked Vars:  %d\n", compiler->lvaTrackedCount);
8743     fprintf(file, "Total Reg Cand Vars: %d\n", regCandidateVarCount);
8744     fprintf(file, "Total number of Intervals: %d\n", static_cast<unsigned>(intervals.size() - 1));
8745     fprintf(file, "Total number of RefPositions: %d\n", static_cast<unsigned>(refPositions.size() - 1));
8746     fprintf(file, "Total Spill Count: %d    Weighted: %I64u\n", sumSpillCount, wtdSpillCount);
8747     fprintf(file, "Total CopyReg Count: %d   Weighted: %I64u\n", sumCopyRegCount, wtdCopyRegCount);
8748     fprintf(file, "Total ResolutionMov Count: %d    Weighted: %I64u\n", sumResolutionMovCount, wtdResolutionMovCount);
8749     fprintf(file, "Total number of split edges: %d\n", sumSplitEdgeCount);
8750
8751     // compute total number of spill temps created
8752     unsigned numSpillTemps = 0;
8753     for (int i = 0; i < TYP_COUNT; i++)
8754     {
8755         numSpillTemps += maxSpill[i];
8756     }
8757     fprintf(file, "Total Number of spill temps created: %d\n\n", numSpillTemps);
8758 }
8759 #endif // TRACK_LSRA_STATS
8760
8761 #ifdef DEBUG
8762 void dumpRegMask(regMaskTP regs)
8763 {
8764     if (regs == RBM_ALLINT)
8765     {
8766         printf("[allInt]");
8767     }
8768     else if (regs == (RBM_ALLINT & ~RBM_FPBASE))
8769     {
8770         printf("[allIntButFP]");
8771     }
8772     else if (regs == RBM_ALLFLOAT)
8773     {
8774         printf("[allFloat]");
8775     }
8776     else if (regs == RBM_ALLDOUBLE)
8777     {
8778         printf("[allDouble]");
8779     }
8780     else
8781     {
8782         dspRegMask(regs);
8783     }
8784 }
8785
8786 static const char* getRefTypeName(RefType refType)
8787 {
8788     switch (refType)
8789     {
8790 #define DEF_REFTYPE(memberName, memberValue, shortName)                                                                \
8791     case memberName:                                                                                                   \
8792         return #memberName;
8793 #include "lsra_reftypes.h"
8794 #undef DEF_REFTYPE
8795         default:
8796             return nullptr;
8797     }
8798 }
8799
8800 static const char* getRefTypeShortName(RefType refType)
8801 {
8802     switch (refType)
8803     {
8804 #define DEF_REFTYPE(memberName, memberValue, shortName)                                                                \
8805     case memberName:                                                                                                   \
8806         return shortName;
8807 #include "lsra_reftypes.h"
8808 #undef DEF_REFTYPE
8809         default:
8810             return nullptr;
8811     }
8812 }
8813
8814 void RefPosition::dump()
8815 {
8816     printf("<RefPosition #%-3u @%-3u", rpNum, nodeLocation);
8817
8818     printf(" %s ", getRefTypeName(refType));
8819
8820     if (this->isPhysRegRef)
8821     {
8822         this->getReg()->tinyDump();
8823     }
8824     else if (getInterval())
8825     {
8826         this->getInterval()->tinyDump();
8827     }
8828
8829     if (this->treeNode)
8830     {
8831         printf("%s ", treeNode->OpName(treeNode->OperGet()));
8832     }
8833     printf(FMT_BB " ", this->bbNum);
8834
8835     printf("regmask=");
8836     dumpRegMask(registerAssignment);
8837
8838     printf(" minReg=%d", minRegCandidateCount);
8839
8840     if (this->lastUse)
8841     {
8842         printf(" last");
8843     }
8844     if (this->reload)
8845     {
8846         printf(" reload");
8847     }
8848     if (this->spillAfter)
8849     {
8850         printf(" spillAfter");
8851     }
8852     if (this->moveReg)
8853     {
8854         printf(" move");
8855     }
8856     if (this->copyReg)
8857     {
8858         printf(" copy");
8859     }
8860     if (this->isFixedRegRef)
8861     {
8862         printf(" fixed");
8863     }
8864     if (this->isLocalDefUse)
8865     {
8866         printf(" local");
8867     }
8868     if (this->delayRegFree)
8869     {
8870         printf(" delay");
8871     }
8872     if (this->outOfOrder)
8873     {
8874         printf(" outOfOrder");
8875     }
8876
8877     if (this->RegOptional())
8878     {
8879         printf(" regOptional");
8880     }
8881     printf(">\n");
8882 }
8883
8884 void RegRecord::dump()
8885 {
8886     tinyDump();
8887 }
8888
8889 void Interval::dump()
8890 {
8891     printf("Interval %2u:", intervalIndex);
8892
8893     if (isLocalVar)
8894     {
8895         printf(" (V%02u)", varNum);
8896     }
8897     else if (IsUpperVector())
8898     {
8899         assert(relatedInterval != nullptr);
8900         printf(" (U%02u)", relatedInterval->varNum);
8901     }
8902     printf(" %s", varTypeName(registerType));
8903     if (isInternal)
8904     {
8905         printf(" (INTERNAL)");
8906     }
8907     if (isSpilled)
8908     {
8909         printf(" (SPILLED)");
8910     }
8911     if (isSplit)
8912     {
8913         printf(" (SPLIT)");
8914     }
8915     if (isStructField)
8916     {
8917         printf(" (struct)");
8918     }
8919     if (isPromotedStruct)
8920     {
8921         printf(" (promoted struct)");
8922     }
8923     if (hasConflictingDefUse)
8924     {
8925         printf(" (def-use conflict)");
8926     }
8927     if (hasInterferingUses)
8928     {
8929         printf(" (interfering uses)");
8930     }
8931     if (isSpecialPutArg)
8932     {
8933         printf(" (specialPutArg)");
8934     }
8935     if (isConstant)
8936     {
8937         printf(" (constant)");
8938     }
8939
8940     printf(" RefPositions {");
8941     for (RefPosition* refPosition = this->firstRefPosition; refPosition != nullptr;
8942          refPosition              = refPosition->nextRefPosition)
8943     {
8944         printf("#%u@%u", refPosition->rpNum, refPosition->nodeLocation);
8945         if (refPosition->nextRefPosition)
8946         {
8947             printf(" ");
8948         }
8949     }
8950     printf("}");
8951
8952     // this is not used (yet?)
8953     // printf(" SpillOffset %d", this->spillOffset);
8954
8955     printf(" physReg:%s", getRegName(physReg));
8956
8957     printf(" Preferences=");
8958     dumpRegMask(this->registerPreferences);
8959
8960     if (relatedInterval)
8961     {
8962         printf(" RelatedInterval ");
8963         relatedInterval->microDump();
8964     }
8965
8966     printf("\n");
8967 }
8968
8969 // print out very concise representation
8970 void Interval::tinyDump()
8971 {
8972     printf("<Ivl:%u", intervalIndex);
8973     if (isLocalVar)
8974     {
8975         printf(" V%02u", varNum);
8976     }
8977     else if (IsUpperVector())
8978     {
8979         assert(relatedInterval != nullptr);
8980         printf(" (U%02u)", relatedInterval->varNum);
8981     }
8982     else if (isInternal)
8983     {
8984         printf(" internal");
8985     }
8986     printf("> ");
8987 }
8988
8989 // print out extremely concise representation
8990 void Interval::microDump()
8991 {
8992     if (isLocalVar)
8993     {
8994         printf("<V%02u/L%u>", varNum, intervalIndex);
8995         return;
8996     }
8997     else if (IsUpperVector())
8998     {
8999         assert(relatedInterval != nullptr);
9000         printf(" (U%02u)", relatedInterval->varNum);
9001     }
9002     char intervalTypeChar = 'I';
9003     if (isInternal)
9004     {
9005         intervalTypeChar = 'T';
9006     }
9007     printf("<%c%u>", intervalTypeChar, intervalIndex);
9008 }
9009
9010 void RegRecord::tinyDump()
9011 {
9012     printf("<Reg:%-3s> ", getRegName(regNum));
9013 }
9014
9015 void LinearScan::dumpNodeInfo(GenTree* node, regMaskTP dstCandidates, int srcCount, int dstCount)
9016 {
9017     if (!VERBOSE)
9018     {
9019         return;
9020     }
9021     // This is formatted like the old dump to make diffs easier. TODO-Cleanup: improve.
9022     int       internalIntCount   = 0;
9023     int       internalFloatCount = 0;
9024     regMaskTP internalCandidates = RBM_NONE;
9025     for (int i = 0; i < internalCount; i++)
9026     {
9027         RefPosition* def = internalDefs[i];
9028         if (def->getInterval()->registerType == TYP_INT)
9029         {
9030             internalIntCount++;
9031         }
9032         else
9033         {
9034             internalFloatCount++;
9035         }
9036         internalCandidates |= def->registerAssignment;
9037     }
9038     if (dstCandidates == RBM_NONE)
9039     {
9040         dstCandidates = varTypeIsFloating(node) ? allRegs(TYP_FLOAT) : allRegs(TYP_INT);
9041     }
9042     if (internalCandidates == RBM_NONE)
9043     {
9044         internalCandidates = allRegs(TYP_INT);
9045     }
9046     printf("    +<TreeNodeInfo %d=%d %di %df", dstCount, srcCount, internalIntCount, internalFloatCount);
9047     printf(" src=");
9048     dumpRegMask(varTypeIsFloating(node) ? allRegs(TYP_FLOAT) : allRegs(TYP_INT));
9049     printf(" int=");
9050     dumpRegMask(internalCandidates);
9051     printf(" dst=");
9052     dumpRegMask(dstCandidates);
9053     if (node->IsUnusedValue())
9054     {
9055         printf(" L");
9056     }
9057     printf(" I");
9058     if (pendingDelayFree)
9059     {
9060         printf(" D");
9061     }
9062     if (setInternalRegsDelayFree)
9063     {
9064         printf(" ID");
9065     }
9066     printf(">");
9067     node->dumpLIRFlags();
9068     printf("\n  consume= %d produce=%d\n", srcCount, dstCount);
9069 }
9070
9071 void LinearScan::dumpDefList()
9072 {
9073     if (!VERBOSE)
9074     {
9075         return;
9076     }
9077     JITDUMP("DefList: { ");
9078     bool first = true;
9079     for (RefInfoListNode *listNode = defList.Begin(), *end = defList.End(); listNode != end;
9080          listNode = listNode->Next())
9081     {
9082         GenTree* node = listNode->treeNode;
9083         JITDUMP("%sN%03u.t%d. %s", first ? "" : "; ", node->gtSeqNum, node->gtTreeID, GenTree::OpName(node->OperGet()));
9084         first = false;
9085     }
9086     JITDUMP(" }\n");
9087 }
9088
9089 void LinearScan::lsraDumpIntervals(const char* msg)
9090 {
9091     printf("\nLinear scan intervals %s:\n", msg);
9092     for (Interval& interval : intervals)
9093     {
9094         // only dump something if it has references
9095         // if (interval->firstRefPosition)
9096         interval.dump();
9097     }
9098
9099     printf("\n");
9100 }
9101
9102 // Dumps a tree node as a destination or source operand, with the style
9103 // of dump dependent on the mode
9104 void LinearScan::lsraGetOperandString(GenTree*          tree,
9105                                       LsraTupleDumpMode mode,
9106                                       char*             operandString,
9107                                       unsigned          operandStringLength)
9108 {
9109     const char* lastUseChar = "";
9110     if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
9111     {
9112         lastUseChar = "*";
9113     }
9114     switch (mode)
9115     {
9116         case LinearScan::LSRA_DUMP_PRE:
9117             _snprintf_s(operandString, operandStringLength, operandStringLength, "t%d%s", tree->gtTreeID, lastUseChar);
9118             break;
9119         case LinearScan::LSRA_DUMP_REFPOS:
9120             _snprintf_s(operandString, operandStringLength, operandStringLength, "t%d%s", tree->gtTreeID, lastUseChar);
9121             break;
9122         case LinearScan::LSRA_DUMP_POST:
9123         {
9124             Compiler* compiler = JitTls::GetCompiler();
9125
9126             if (!tree->gtHasReg())
9127             {
9128                 _snprintf_s(operandString, operandStringLength, operandStringLength, "STK%s", lastUseChar);
9129             }
9130             else
9131             {
9132                 regNumber reg       = tree->gtRegNum;
9133                 int       charCount = _snprintf_s(operandString, operandStringLength, operandStringLength, "%s%s",
9134                                             getRegName(reg, genIsValidFloatReg(reg)), lastUseChar);
9135                 operandString += charCount;
9136                 operandStringLength -= charCount;
9137
9138                 if (tree->IsMultiRegNode())
9139                 {
9140                     unsigned regCount = tree->GetMultiRegCount();
9141                     for (unsigned regIndex = 1; regIndex < regCount; regIndex++)
9142                     {
9143                         regNumber reg = tree->GetRegByIndex(regIndex);
9144                         charCount     = _snprintf_s(operandString, operandStringLength, operandStringLength, ",%s%s",
9145                                                 getRegName(reg, genIsValidFloatReg(reg)), lastUseChar);
9146                         operandString += charCount;
9147                         operandStringLength -= charCount;
9148                     }
9149                 }
9150             }
9151         }
9152         break;
9153         default:
9154             printf("ERROR: INVALID TUPLE DUMP MODE\n");
9155             break;
9156     }
9157 }
9158 void LinearScan::lsraDispNode(GenTree* tree, LsraTupleDumpMode mode, bool hasDest)
9159 {
9160     Compiler*      compiler            = JitTls::GetCompiler();
9161     const unsigned operandStringLength = 16;
9162     char           operandString[operandStringLength];
9163     const char*    emptyDestOperand = "               ";
9164     char           spillChar        = ' ';
9165
9166     if (mode == LinearScan::LSRA_DUMP_POST)
9167     {
9168         if ((tree->gtFlags & GTF_SPILL) != 0)
9169         {
9170             spillChar = 'S';
9171         }
9172         if (!hasDest && tree->gtHasReg())
9173         {
9174             // A node can define a register, but not produce a value for a parent to consume,
9175             // i.e. in the "localDefUse" case.
9176             // There used to be an assert here that we wouldn't spill such a node.
9177             // However, we can have unused lclVars that wind up being the node at which
9178             // it is spilled. This probably indicates a bug, but we don't realy want to
9179             // assert during a dump.
9180             if (spillChar == 'S')
9181             {
9182                 spillChar = '$';
9183             }
9184             else
9185             {
9186                 spillChar = '*';
9187             }
9188             hasDest = true;
9189         }
9190     }
9191     printf("%c N%03u. ", spillChar, tree->gtSeqNum);
9192
9193     LclVarDsc* varDsc = nullptr;
9194     unsigned   varNum = UINT_MAX;
9195     if (tree->IsLocal())
9196     {
9197         varNum = tree->gtLclVarCommon.gtLclNum;
9198         varDsc = &(compiler->lvaTable[varNum]);
9199         if (varDsc->lvLRACandidate)
9200         {
9201             hasDest = false;
9202         }
9203     }
9204     if (hasDest)
9205     {
9206         if (mode == LinearScan::LSRA_DUMP_POST && tree->gtFlags & GTF_SPILLED)
9207         {
9208             assert(tree->gtHasReg());
9209         }
9210         lsraGetOperandString(tree, mode, operandString, operandStringLength);
9211         printf("%-15s =", operandString);
9212     }
9213     else
9214     {
9215         printf("%-15s  ", emptyDestOperand);
9216     }
9217     if (varDsc != nullptr)
9218     {
9219         if (varDsc->lvLRACandidate)
9220         {
9221             if (mode == LSRA_DUMP_REFPOS)
9222             {
9223                 printf("  V%02u(L%d)", varNum, getIntervalForLocalVar(varDsc->lvVarIndex)->intervalIndex);
9224             }
9225             else
9226             {
9227                 lsraGetOperandString(tree, mode, operandString, operandStringLength);
9228                 printf("  V%02u(%s)", varNum, operandString);
9229                 if (mode == LinearScan::LSRA_DUMP_POST && tree->gtFlags & GTF_SPILLED)
9230                 {
9231                     printf("R");
9232                 }
9233             }
9234         }
9235         else
9236         {
9237             printf("  V%02u MEM", varNum);
9238         }
9239     }
9240     else if (tree->OperIs(GT_ASG))
9241     {
9242         assert(!tree->gtHasReg());
9243         printf("  asg%s  ", GenTree::OpName(tree->OperGet()));
9244     }
9245     else
9246     {
9247         compiler->gtDispNodeName(tree);
9248         if (tree->OperKind() & GTK_LEAF)
9249         {
9250             compiler->gtDispLeaf(tree, nullptr);
9251         }
9252     }
9253 }
9254
9255 //------------------------------------------------------------------------
9256 // DumpOperandDefs: dumps the registers defined by a node.
9257 //
9258 // Arguments:
9259 //    operand - The operand for which to compute a register count.
9260 //
9261 // Returns:
9262 //    The number of registers defined by `operand`.
9263 //
9264 void LinearScan::DumpOperandDefs(
9265     GenTree* operand, bool& first, LsraTupleDumpMode mode, char* operandString, const unsigned operandStringLength)
9266 {
9267     assert(operand != nullptr);
9268     assert(operandString != nullptr);
9269     if (!operand->IsLIR())
9270     {
9271         return;
9272     }
9273
9274     int dstCount = ComputeOperandDstCount(operand);
9275
9276     if (dstCount != 0)
9277     {
9278         // This operand directly produces registers; print it.
9279         if (!first)
9280         {
9281             printf(",");
9282         }
9283         lsraGetOperandString(operand, mode, operandString, operandStringLength);
9284         printf("%s", operandString);
9285         first = false;
9286     }
9287     else if (operand->isContained())
9288     {
9289         // This is a contained node. Dump the defs produced by its operands.
9290         for (GenTree* op : operand->Operands())
9291         {
9292             DumpOperandDefs(op, first, mode, operandString, operandStringLength);
9293         }
9294     }
9295 }
9296
9297 void LinearScan::TupleStyleDump(LsraTupleDumpMode mode)
9298 {
9299     BasicBlock*    block;
9300     LsraLocation   currentLoc          = 1; // 0 is the entry
9301     const unsigned operandStringLength = 16;
9302     char           operandString[operandStringLength];
9303
9304     // currentRefPosition is not used for LSRA_DUMP_PRE
9305     // We keep separate iterators for defs, so that we can print them
9306     // on the lhs of the dump
9307     RefPositionIterator refPosIterator     = refPositions.begin();
9308     RefPosition*        currentRefPosition = &refPosIterator;
9309
9310     switch (mode)
9311     {
9312         case LSRA_DUMP_PRE:
9313             printf("TUPLE STYLE DUMP BEFORE LSRA\n");
9314             break;
9315         case LSRA_DUMP_REFPOS:
9316             printf("TUPLE STYLE DUMP WITH REF POSITIONS\n");
9317             break;
9318         case LSRA_DUMP_POST:
9319             printf("TUPLE STYLE DUMP WITH REGISTER ASSIGNMENTS\n");
9320             break;
9321         default:
9322             printf("ERROR: INVALID TUPLE DUMP MODE\n");
9323             return;
9324     }
9325
9326     if (mode != LSRA_DUMP_PRE)
9327     {
9328         printf("Incoming Parameters: ");
9329         for (; refPosIterator != refPositions.end() && currentRefPosition->refType != RefTypeBB;
9330              ++refPosIterator, currentRefPosition = &refPosIterator)
9331         {
9332             Interval* interval = currentRefPosition->getInterval();
9333             assert(interval != nullptr && interval->isLocalVar);
9334             printf(" V%02d", interval->varNum);
9335             if (mode == LSRA_DUMP_POST)
9336             {
9337                 regNumber reg;
9338                 if (currentRefPosition->registerAssignment == RBM_NONE)
9339                 {
9340                     reg = REG_STK;
9341                 }
9342                 else
9343                 {
9344                     reg = currentRefPosition->assignedReg();
9345                 }
9346                 LclVarDsc* varDsc = &(compiler->lvaTable[interval->varNum]);
9347                 printf("(");
9348                 regNumber assignedReg = varDsc->lvRegNum;
9349                 regNumber argReg      = (varDsc->lvIsRegArg) ? varDsc->lvArgReg : REG_STK;
9350
9351                 assert(reg == assignedReg || varDsc->lvRegister == false);
9352                 if (reg != argReg)
9353                 {
9354                     printf(getRegName(argReg, isFloatRegType(interval->registerType)));
9355                     printf("=>");
9356                 }
9357                 printf("%s)", getRegName(reg, isFloatRegType(interval->registerType)));
9358             }
9359         }
9360         printf("\n");
9361     }
9362
9363     for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock())
9364     {
9365         currentLoc += 2;
9366
9367         if (mode == LSRA_DUMP_REFPOS)
9368         {
9369             bool printedBlockHeader = false;
9370             // We should find the boundary RefPositions in the order of exposed uses, dummy defs, and the blocks
9371             for (; refPosIterator != refPositions.end() &&
9372                    (currentRefPosition->refType == RefTypeExpUse || currentRefPosition->refType == RefTypeDummyDef ||
9373                     (currentRefPosition->refType == RefTypeBB && !printedBlockHeader));
9374                  ++refPosIterator, currentRefPosition = &refPosIterator)
9375             {
9376                 Interval* interval = nullptr;
9377                 if (currentRefPosition->isIntervalRef())
9378                 {
9379                     interval = currentRefPosition->getInterval();
9380                 }
9381                 switch (currentRefPosition->refType)
9382                 {
9383                     case RefTypeExpUse:
9384                         assert(interval != nullptr);
9385                         assert(interval->isLocalVar);
9386                         printf("  Exposed use of V%02u at #%d\n", interval->varNum, currentRefPosition->rpNum);
9387                         break;
9388                     case RefTypeDummyDef:
9389                         assert(interval != nullptr);
9390                         assert(interval->isLocalVar);
9391                         printf("  Dummy def of V%02u at #%d\n", interval->varNum, currentRefPosition->rpNum);
9392                         break;
9393                     case RefTypeBB:
9394                         block->dspBlockHeader(compiler);
9395                         printedBlockHeader = true;
9396                         printf("=====\n");
9397                         break;
9398                     default:
9399                         printf("Unexpected RefPosition type at #%d\n", currentRefPosition->rpNum);
9400                         break;
9401                 }
9402             }
9403         }
9404         else
9405         {
9406             block->dspBlockHeader(compiler);
9407             printf("=====\n");
9408         }
9409         if (enregisterLocalVars && mode == LSRA_DUMP_POST && block != compiler->fgFirstBB &&
9410             block->bbNum <= bbNumMaxBeforeResolution)
9411         {
9412             printf("Predecessor for variable locations: " FMT_BB "\n", blockInfo[block->bbNum].predBBNum);
9413             dumpInVarToRegMap(block);
9414         }
9415         if (block->bbNum > bbNumMaxBeforeResolution)
9416         {
9417             SplitEdgeInfo splitEdgeInfo;
9418             splitBBNumToTargetBBNumMap->Lookup(block->bbNum, &splitEdgeInfo);
9419             assert(splitEdgeInfo.toBBNum <= bbNumMaxBeforeResolution);
9420             assert(splitEdgeInfo.fromBBNum <= bbNumMaxBeforeResolution);
9421             printf("New block introduced for resolution from " FMT_BB " to " FMT_BB "\n", splitEdgeInfo.fromBBNum,
9422                    splitEdgeInfo.toBBNum);
9423         }
9424
9425         for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
9426         {
9427             GenTree* tree = node;
9428
9429             genTreeOps oper      = tree->OperGet();
9430             int        produce   = tree->IsValue() ? ComputeOperandDstCount(tree) : 0;
9431             int        consume   = ComputeAvailableSrcCount(tree);
9432             regMaskTP  killMask  = RBM_NONE;
9433             regMaskTP  fixedMask = RBM_NONE;
9434
9435             lsraDispNode(tree, mode, produce != 0 && mode != LSRA_DUMP_REFPOS);
9436
9437             if (mode != LSRA_DUMP_REFPOS)
9438             {
9439                 if (consume > 0)
9440                 {
9441                     printf("; ");
9442
9443                     bool first = true;
9444                     for (GenTree* operand : tree->Operands())
9445                     {
9446                         DumpOperandDefs(operand, first, mode, operandString, operandStringLength);
9447                     }
9448                 }
9449             }
9450             else
9451             {
9452                 // Print each RefPosition on a new line, but
9453                 // printing all the kills for each node on a single line
9454                 // and combining the fixed regs with their associated def or use
9455                 bool         killPrinted        = false;
9456                 RefPosition* lastFixedRegRefPos = nullptr;
9457                 for (; refPosIterator != refPositions.end() &&
9458                        (currentRefPosition->refType == RefTypeUse || currentRefPosition->refType == RefTypeFixedReg ||
9459                         currentRefPosition->refType == RefTypeKill || currentRefPosition->refType == RefTypeDef) &&
9460                        (currentRefPosition->nodeLocation == tree->gtSeqNum ||
9461                         currentRefPosition->nodeLocation == tree->gtSeqNum + 1);
9462                      ++refPosIterator, currentRefPosition = &refPosIterator)
9463                 {
9464                     Interval* interval = nullptr;
9465                     if (currentRefPosition->isIntervalRef())
9466                     {
9467                         interval = currentRefPosition->getInterval();
9468                     }
9469                     switch (currentRefPosition->refType)
9470                     {
9471                         case RefTypeUse:
9472                             if (currentRefPosition->isPhysRegRef)
9473                             {
9474                                 printf("\n                               Use:R%d(#%d)",
9475                                        currentRefPosition->getReg()->regNum, currentRefPosition->rpNum);
9476                             }
9477                             else
9478                             {
9479                                 assert(interval != nullptr);
9480                                 printf("\n                               Use:");
9481                                 interval->microDump();
9482                                 printf("(#%d)", currentRefPosition->rpNum);
9483                                 if (currentRefPosition->isFixedRegRef && !interval->isInternal)
9484                                 {
9485                                     assert(genMaxOneBit(currentRefPosition->registerAssignment));
9486                                     assert(lastFixedRegRefPos != nullptr);
9487                                     printf(" Fixed:%s(#%d)", getRegName(currentRefPosition->assignedReg(),
9488                                                                         isFloatRegType(interval->registerType)),
9489                                            lastFixedRegRefPos->rpNum);
9490                                     lastFixedRegRefPos = nullptr;
9491                                 }
9492                                 if (currentRefPosition->isLocalDefUse)
9493                                 {
9494                                     printf(" LocalDefUse");
9495                                 }
9496                                 if (currentRefPosition->lastUse)
9497                                 {
9498                                     printf(" *");
9499                                 }
9500                             }
9501                             break;
9502                         case RefTypeDef:
9503                         {
9504                             // Print each def on a new line
9505                             assert(interval != nullptr);
9506                             printf("\n        Def:");
9507                             interval->microDump();
9508                             printf("(#%d)", currentRefPosition->rpNum);
9509                             if (currentRefPosition->isFixedRegRef)
9510                             {
9511                                 assert(genMaxOneBit(currentRefPosition->registerAssignment));
9512                                 printf(" %s", getRegName(currentRefPosition->assignedReg(),
9513                                                          isFloatRegType(interval->registerType)));
9514                             }
9515                             if (currentRefPosition->isLocalDefUse)
9516                             {
9517                                 printf(" LocalDefUse");
9518                             }
9519                             if (currentRefPosition->lastUse)
9520                             {
9521                                 printf(" *");
9522                             }
9523                             if (interval->relatedInterval != nullptr)
9524                             {
9525                                 printf(" Pref:");
9526                                 interval->relatedInterval->microDump();
9527                             }
9528                         }
9529                         break;
9530                         case RefTypeKill:
9531                             if (!killPrinted)
9532                             {
9533                                 printf("\n        Kill: ");
9534                                 killPrinted = true;
9535                             }
9536                             printf(getRegName(currentRefPosition->assignedReg(),
9537                                               isFloatRegType(currentRefPosition->getReg()->registerType)));
9538                             printf(" ");
9539                             break;
9540                         case RefTypeFixedReg:
9541                             lastFixedRegRefPos = currentRefPosition;
9542                             break;
9543                         default:
9544                             printf("Unexpected RefPosition type at #%d\n", currentRefPosition->rpNum);
9545                             break;
9546                     }
9547                 }
9548             }
9549             printf("\n");
9550         }
9551         if (enregisterLocalVars && mode == LSRA_DUMP_POST)
9552         {
9553             dumpOutVarToRegMap(block);
9554         }
9555         printf("\n");
9556     }
9557     printf("\n\n");
9558 }
9559
9560 void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event,
9561                                          Interval*     interval,
9562                                          regNumber     reg,
9563                                          BasicBlock*   currentBlock)
9564 {
9565     if (!(VERBOSE))
9566     {
9567         return;
9568     }
9569     if ((interval != nullptr) && (reg != REG_NA) && (reg != REG_STK))
9570     {
9571         registersToDump |= genRegMask(reg);
9572         dumpRegRecordTitleIfNeeded();
9573     }
9574
9575     switch (event)
9576     {
9577         // Conflicting def/use
9578         case LSRA_EVENT_DEFUSE_CONFLICT:
9579             dumpRefPositionShort(activeRefPosition, currentBlock);
9580             printf("DUconflict ");
9581             dumpRegRecords();
9582             break;
9583         case LSRA_EVENT_DEFUSE_CASE1:
9584             printf(indentFormat, "  Case #1 use defRegAssignment");
9585             dumpRegRecords();
9586             break;
9587         case LSRA_EVENT_DEFUSE_CASE2:
9588             printf(indentFormat, "  Case #2 use useRegAssignment");
9589             dumpRegRecords();
9590             break;
9591         case LSRA_EVENT_DEFUSE_CASE3:
9592             printf(indentFormat, "  Case #3 use useRegAssignment");
9593             dumpRegRecords();
9594             dumpRegRecords();
9595             break;
9596         case LSRA_EVENT_DEFUSE_CASE4:
9597             printf(indentFormat, "  Case #4 use defRegAssignment");
9598             dumpRegRecords();
9599             break;
9600         case LSRA_EVENT_DEFUSE_CASE5:
9601             printf(indentFormat, "  Case #5 set def to all regs");
9602             dumpRegRecords();
9603             break;
9604         case LSRA_EVENT_DEFUSE_CASE6:
9605             printf(indentFormat, "  Case #6 need a copy");
9606             dumpRegRecords();
9607             if (interval == nullptr)
9608             {
9609                 printf(indentFormat, "    NULL interval");
9610                 dumpRegRecords();
9611             }
9612             else if (interval->firstRefPosition->multiRegIdx != 0)
9613             {
9614                 printf(indentFormat, "    (multiReg)");
9615                 dumpRegRecords();
9616             }
9617             break;
9618
9619         case LSRA_EVENT_SPILL:
9620             dumpRefPositionShort(activeRefPosition, currentBlock);
9621             assert(interval != nullptr && interval->assignedReg != nullptr);
9622             printf("Spill %-4s ", getRegName(interval->assignedReg->regNum));
9623             dumpRegRecords();
9624             break;
9625
9626         // Restoring the previous register
9627         case LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL:
9628             assert(interval != nullptr);
9629             dumpRefPositionShort(activeRefPosition, currentBlock);
9630             printf("SRstr %-4s ", getRegName(reg));
9631             dumpRegRecords();
9632             break;
9633
9634         case LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL:
9635             assert(interval != nullptr);
9636             if (activeRefPosition == nullptr)
9637             {
9638                 printf(emptyRefPositionFormat, "");
9639             }
9640             else
9641             {
9642                 dumpRefPositionShort(activeRefPosition, currentBlock);
9643             }
9644             printf("Restr %-4s ", getRegName(reg));
9645             dumpRegRecords();
9646             break;
9647
9648         // Done with GC Kills
9649         case LSRA_EVENT_DONE_KILL_GC_REFS:
9650             printf(indentFormat, "  DoneKillGC ");
9651             break;
9652
9653         // Block boundaries
9654         case LSRA_EVENT_START_BB:
9655             // The RefTypeBB comes after the RefTypeDummyDefs associated with that block,
9656             // so we may have a RefTypeDummyDef at the time we dump this event.
9657             // In that case we'll have another "EVENT" associated with it, so we need to
9658             // print the full line now.
9659             if (activeRefPosition->refType != RefTypeBB)
9660             {
9661                 dumpNewBlock(currentBlock, activeRefPosition->nodeLocation);
9662                 dumpRegRecords();
9663             }
9664             else
9665             {
9666                 dumpRefPositionShort(activeRefPosition, currentBlock);
9667             }
9668             break;
9669
9670         // Allocation decisions
9671         case LSRA_EVENT_NEEDS_NEW_REG:
9672             dumpRefPositionShort(activeRefPosition, currentBlock);
9673             printf("Free  %-4s ", getRegName(reg));
9674             dumpRegRecords();
9675             break;
9676
9677         case LSRA_EVENT_ZERO_REF:
9678             assert(interval != nullptr && interval->isLocalVar);
9679             dumpRefPositionShort(activeRefPosition, currentBlock);
9680             printf("NoRef      ");
9681             dumpRegRecords();
9682             break;
9683
9684         case LSRA_EVENT_FIXED_REG:
9685         case LSRA_EVENT_EXP_USE:
9686         case LSRA_EVENT_KEPT_ALLOCATION:
9687             dumpRefPositionShort(activeRefPosition, currentBlock);
9688             printf("Keep  %-4s ", getRegName(reg));
9689             break;
9690
9691         case LSRA_EVENT_COPY_REG:
9692             assert(interval != nullptr && interval->recentRefPosition != nullptr);
9693             dumpRefPositionShort(activeRefPosition, currentBlock);
9694             printf("Copy  %-4s ", getRegName(reg));
9695             break;
9696
9697         case LSRA_EVENT_MOVE_REG:
9698             assert(interval != nullptr && interval->recentRefPosition != nullptr);
9699             dumpRefPositionShort(activeRefPosition, currentBlock);
9700             printf("Move  %-4s ", getRegName(reg));
9701             dumpRegRecords();
9702             break;
9703
9704         case LSRA_EVENT_ALLOC_REG:
9705             dumpRefPositionShort(activeRefPosition, currentBlock);
9706             printf("Alloc %-4s ", getRegName(reg));
9707             break;
9708
9709         case LSRA_EVENT_REUSE_REG:
9710             dumpRefPositionShort(activeRefPosition, currentBlock);
9711             printf("Reuse %-4s ", getRegName(reg));
9712             break;
9713
9714         case LSRA_EVENT_ALLOC_SPILLED_REG:
9715             dumpRefPositionShort(activeRefPosition, currentBlock);
9716             printf("Steal %-4s ", getRegName(reg));
9717             break;
9718
9719         case LSRA_EVENT_NO_ENTRY_REG_ALLOCATED:
9720             assert(interval != nullptr && interval->isLocalVar);
9721             dumpRefPositionShort(activeRefPosition, currentBlock);
9722             printf("LoRef      ");
9723             break;
9724
9725         case LSRA_EVENT_NO_REG_ALLOCATED:
9726             dumpRefPositionShort(activeRefPosition, currentBlock);
9727             printf("NoReg      ");
9728             break;
9729
9730         case LSRA_EVENT_RELOAD:
9731             dumpRefPositionShort(activeRefPosition, currentBlock);
9732             printf("ReLod %-4s ", getRegName(reg));
9733             dumpRegRecords();
9734             break;
9735
9736         case LSRA_EVENT_SPECIAL_PUTARG:
9737             dumpRefPositionShort(activeRefPosition, currentBlock);
9738             printf("PtArg %-4s ", getRegName(reg));
9739             break;
9740
9741         case LSRA_EVENT_UPPER_VECTOR_SAVE:
9742             dumpRefPositionShort(activeRefPosition, currentBlock);
9743             printf("UVSav %-4s ", getRegName(reg));
9744             break;
9745
9746         case LSRA_EVENT_UPPER_VECTOR_RESTORE:
9747             dumpRefPositionShort(activeRefPosition, currentBlock);
9748             printf("UVRes %-4s ", getRegName(reg));
9749             dumpRegRecords();
9750             break;
9751
9752         // We currently don't dump anything for these events.
9753         case LSRA_EVENT_DEFUSE_FIXED_DELAY_USE:
9754         case LSRA_EVENT_SPILL_EXTENDED_LIFETIME:
9755         case LSRA_EVENT_END_BB:
9756         case LSRA_EVENT_FREE_REGS:
9757         case LSRA_EVENT_INCREMENT_RANGE_END:
9758         case LSRA_EVENT_LAST_USE:
9759         case LSRA_EVENT_LAST_USE_DELAYED:
9760             break;
9761
9762         default:
9763             unreached();
9764     }
9765 }
9766
9767 //------------------------------------------------------------------------
9768 // dumpRegRecordHeader: Dump the header for a column-based dump of the register state.
9769 //
9770 // Arguments:
9771 //    None.
9772 //
9773 // Return Value:
9774 //    None.
9775 //
9776 // Assumptions:
9777 //    Reg names fit in 4 characters (minimum width of the columns)
9778 //
9779 // Notes:
9780 //    In order to make the table as dense as possible (for ease of reading the dumps),
9781 //    we determine the minimum regColumnWidth width required to represent:
9782 //      regs, by name (e.g. eax or xmm0) - this is fixed at 4 characters.
9783 //      intervals, as Vnn for lclVar intervals, or as I<num> for other intervals.
9784 //    The table is indented by the amount needed for dumpRefPositionShort, which is
9785 //    captured in shortRefPositionDumpWidth.
9786 //
9787 void LinearScan::dumpRegRecordHeader()
9788 {
9789     printf("The following table has one or more rows for each RefPosition that is handled during allocation.\n"
9790            "The first column provides the basic information about the RefPosition, with its type (e.g. Def,\n"
9791            "Use, Fixd) followed by a '*' if it is a last use, and a 'D' if it is delayRegFree, and then the\n"
9792            "action taken during allocation (e.g. Alloc a new register, or Keep an existing one).\n"
9793            "The subsequent columns show the Interval occupying each register, if any, followed by 'a' if it is\n"
9794            "active, a 'p' if it is a large vector that has been partially spilled, and 'i'if it is inactive.\n"
9795            "Columns are only printed up to the last modifed register, which may increase during allocation,"
9796            "in which case additional columns will appear.  \n"
9797            "Registers which are not marked modified have ---- in their column.\n\n");
9798
9799     // First, determine the width of each register column (which holds a reg name in the
9800     // header, and an interval name in each subsequent row).
9801     int intervalNumberWidth = (int)log10((double)intervals.size()) + 1;
9802     // The regColumnWidth includes the identifying character (I or V) and an 'i', 'p' or 'a' (inactive,
9803     // partially-spilled or active)
9804     regColumnWidth = intervalNumberWidth + 2;
9805     if (regColumnWidth < 4)
9806     {
9807         regColumnWidth = 4;
9808     }
9809     sprintf_s(intervalNameFormat, MAX_FORMAT_CHARS, "%%c%%-%dd", regColumnWidth - 2);
9810     sprintf_s(regNameFormat, MAX_FORMAT_CHARS, "%%-%ds", regColumnWidth);
9811
9812     // Next, determine the width of the short RefPosition (see dumpRefPositionShort()).
9813     // This is in the form:
9814     // nnn.#mmm NAME TYPEld
9815     // Where:
9816     //    nnn is the Location, right-justified to the width needed for the highest location.
9817     //    mmm is the RefPosition rpNum, left-justified to the width needed for the highest rpNum.
9818     //    NAME is dumped by dumpReferentName(), and is "regColumnWidth".
9819     //    TYPE is RefTypeNameShort, and is 4 characters
9820     //    l is either '*' (if a last use) or ' ' (otherwise)
9821     //    d is either 'D' (if a delayed use) or ' ' (otherwise)
9822
9823     maxNodeLocation = (maxNodeLocation == 0)
9824                           ? 1
9825                           : maxNodeLocation; // corner case of a method with an infinite loop without any gentree nodes
9826     assert(maxNodeLocation >= 1);
9827     assert(refPositions.size() >= 1);
9828     int nodeLocationWidth         = (int)log10((double)maxNodeLocation) + 1;
9829     int refPositionWidth          = (int)log10((double)refPositions.size()) + 1;
9830     int refTypeInfoWidth          = 4 /*TYPE*/ + 2 /* last-use and delayed */ + 1 /* space */;
9831     int locationAndRPNumWidth     = nodeLocationWidth + 2 /* .# */ + refPositionWidth + 1 /* space */;
9832     int shortRefPositionDumpWidth = locationAndRPNumWidth + regColumnWidth + 1 /* space */ + refTypeInfoWidth;
9833     sprintf_s(shortRefPositionFormat, MAX_FORMAT_CHARS, "%%%dd.#%%-%dd ", nodeLocationWidth, refPositionWidth);
9834     sprintf_s(emptyRefPositionFormat, MAX_FORMAT_CHARS, "%%-%ds", shortRefPositionDumpWidth);
9835
9836     // The width of the "allocation info"
9837     //  - a 5-character allocation decision
9838     //  - a space
9839     //  - a 4-character register
9840     //  - a space
9841     int allocationInfoWidth = 5 + 1 + 4 + 1;
9842
9843     // Next, determine the width of the legend for each row.  This includes:
9844     //  - a short RefPosition dump (shortRefPositionDumpWidth), which includes a space
9845     //  - the allocation info (allocationInfoWidth), which also includes a space
9846
9847     regTableIndent = shortRefPositionDumpWidth + allocationInfoWidth;
9848
9849     // BBnn printed left-justified in the NAME Typeld and allocationInfo space.
9850     int bbDumpWidth = regColumnWidth + 1 + refTypeInfoWidth + allocationInfoWidth;
9851     int bbNumWidth  = (int)log10((double)compiler->fgBBNumMax) + 1;
9852     // In the unlikely event that BB numbers overflow the space, we'll simply omit the predBB
9853     int predBBNumDumpSpace = regTableIndent - locationAndRPNumWidth - bbNumWidth - 9; // 'BB' + ' PredBB'
9854     if (predBBNumDumpSpace < bbNumWidth)
9855     {
9856         sprintf_s(bbRefPosFormat, MAX_LEGEND_FORMAT_CHARS, "BB%%-%dd", shortRefPositionDumpWidth - 2);
9857     }
9858     else
9859     {
9860         sprintf_s(bbRefPosFormat, MAX_LEGEND_FORMAT_CHARS, "BB%%-%dd PredBB%%-%dd", bbNumWidth, predBBNumDumpSpace);
9861     }
9862
9863     if (compiler->shouldDumpASCIITrees())
9864     {
9865         columnSeparator = "|";
9866         line            = "-";
9867         leftBox         = "+";
9868         middleBox       = "+";
9869         rightBox        = "+";
9870     }
9871     else
9872     {
9873         columnSeparator = "\xe2\x94\x82";
9874         line            = "\xe2\x94\x80";
9875         leftBox         = "\xe2\x94\x9c";
9876         middleBox       = "\xe2\x94\xbc";
9877         rightBox        = "\xe2\x94\xa4";
9878     }
9879     sprintf_s(indentFormat, MAX_FORMAT_CHARS, "%%-%ds", regTableIndent);
9880
9881     // Now, set up the legend format for the RefPosition info
9882     sprintf_s(legendFormat, MAX_LEGEND_FORMAT_CHARS, "%%-%d.%ds%%-%d.%ds%%-%ds%%s", nodeLocationWidth + 1,
9883               nodeLocationWidth + 1, refPositionWidth + 2, refPositionWidth + 2, regColumnWidth + 1);
9884
9885     // Print a "title row" including the legend and the reg names.
9886     lastDumpedRegisters = RBM_NONE;
9887     dumpRegRecordTitleIfNeeded();
9888 }
9889
9890 void LinearScan::dumpRegRecordTitleIfNeeded()
9891 {
9892     if ((lastDumpedRegisters != registersToDump) || (rowCountSinceLastTitle > MAX_ROWS_BETWEEN_TITLES))
9893     {
9894         lastUsedRegNumIndex = 0;
9895         int lastRegNumIndex = compiler->compFloatingPointUsed ? REG_FP_LAST : REG_INT_LAST;
9896         for (int regNumIndex = 0; regNumIndex <= lastRegNumIndex; regNumIndex++)
9897         {
9898             if ((registersToDump & genRegMask((regNumber)regNumIndex)) != 0)
9899             {
9900                 lastUsedRegNumIndex = regNumIndex;
9901             }
9902         }
9903         dumpRegRecordTitle();
9904         lastDumpedRegisters = registersToDump;
9905     }
9906 }
9907
9908 void LinearScan::dumpRegRecordTitleLines()
9909 {
9910     for (int i = 0; i < regTableIndent; i++)
9911     {
9912         printf("%s", line);
9913     }
9914     for (int regNumIndex = 0; regNumIndex <= lastUsedRegNumIndex; regNumIndex++)
9915     {
9916         regNumber regNum = (regNumber)regNumIndex;
9917         if (shouldDumpReg(regNum))
9918         {
9919             printf("%s", middleBox);
9920             for (int i = 0; i < regColumnWidth; i++)
9921             {
9922                 printf("%s", line);
9923             }
9924         }
9925     }
9926     printf("%s\n", rightBox);
9927 }
9928 void LinearScan::dumpRegRecordTitle()
9929 {
9930     dumpRegRecordTitleLines();
9931
9932     // Print out the legend for the RefPosition info
9933     printf(legendFormat, "Loc ", "RP# ", "Name ", "Type  Action Reg  ");
9934
9935     // Print out the register name column headers
9936     char columnFormatArray[MAX_FORMAT_CHARS];
9937     sprintf_s(columnFormatArray, MAX_FORMAT_CHARS, "%s%%-%d.%ds", columnSeparator, regColumnWidth, regColumnWidth);
9938     for (int regNumIndex = 0; regNumIndex <= lastUsedRegNumIndex; regNumIndex++)
9939     {
9940         regNumber regNum = (regNumber)regNumIndex;
9941         if (shouldDumpReg(regNum))
9942         {
9943             const char* regName = getRegName(regNum);
9944             printf(columnFormatArray, regName);
9945         }
9946     }
9947     printf("%s\n", columnSeparator);
9948
9949     rowCountSinceLastTitle = 0;
9950
9951     dumpRegRecordTitleLines();
9952 }
9953
9954 void LinearScan::dumpRegRecords()
9955 {
9956     static char columnFormatArray[18];
9957
9958     for (int regNumIndex = 0; regNumIndex <= lastUsedRegNumIndex; regNumIndex++)
9959     {
9960         if (shouldDumpReg((regNumber)regNumIndex))
9961         {
9962             printf("%s", columnSeparator);
9963             RegRecord& regRecord = physRegs[regNumIndex];
9964             Interval*  interval  = regRecord.assignedInterval;
9965             if (interval != nullptr)
9966             {
9967                 dumpIntervalName(interval);
9968                 char activeChar = interval->isActive ? 'a' : 'i';
9969 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
9970                 if (interval->isPartiallySpilled)
9971                 {
9972                     activeChar = 'p';
9973                 }
9974 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
9975                 printf("%c", activeChar);
9976             }
9977             else if (regRecord.isBusyUntilNextKill)
9978             {
9979                 printf(columnFormatArray, "Busy");
9980             }
9981             else
9982             {
9983                 sprintf_s(columnFormatArray, MAX_FORMAT_CHARS, "%%-%ds", regColumnWidth);
9984                 printf(columnFormatArray, "");
9985             }
9986         }
9987     }
9988     printf("%s\n", columnSeparator);
9989     rowCountSinceLastTitle++;
9990 }
9991
9992 void LinearScan::dumpIntervalName(Interval* interval)
9993 {
9994     if (interval->isLocalVar)
9995     {
9996         printf(intervalNameFormat, 'V', interval->varNum);
9997     }
9998     else if (interval->IsUpperVector())
9999     {
10000         printf(intervalNameFormat, 'U', interval->relatedInterval->varNum);
10001     }
10002     else if (interval->isConstant)
10003     {
10004         printf(intervalNameFormat, 'C', interval->intervalIndex);
10005     }
10006     else
10007     {
10008         printf(intervalNameFormat, 'I', interval->intervalIndex);
10009     }
10010 }
10011
10012 void LinearScan::dumpEmptyRefPosition()
10013 {
10014     printf(emptyRefPositionFormat, "");
10015 }
10016
10017 //------------------------------------------------------------------------
10018 // dumpNewBlock: Dump a line for a new block in a column-based dump of the register state.
10019 //
10020 // Arguments:
10021 //    currentBlock - the new block to be dumped
10022 //
10023 void LinearScan::dumpNewBlock(BasicBlock* currentBlock, LsraLocation location)
10024 {
10025     if (!VERBOSE)
10026     {
10027         return;
10028     }
10029
10030     // Always print a title row before a RefTypeBB (except for the first, because we
10031     // will already have printed it before the parameters)
10032     if ((currentBlock != compiler->fgFirstBB) && (currentBlock != nullptr))
10033     {
10034         dumpRegRecordTitle();
10035     }
10036     // If the activeRefPosition is a DummyDef, then don't print anything further (printing the
10037     // title line makes it clearer that we're "about to" start the next block).
10038     if (activeRefPosition->refType == RefTypeDummyDef)
10039     {
10040         dumpEmptyRefPosition();
10041         printf("DDefs ");
10042         printf(regNameFormat, "");
10043         return;
10044     }
10045     printf(shortRefPositionFormat, location, activeRefPosition->rpNum);
10046     if (currentBlock == nullptr)
10047     {
10048         printf(regNameFormat, "END");
10049         printf("              ");
10050         printf(regNameFormat, "");
10051     }
10052     else
10053     {
10054         printf(bbRefPosFormat, currentBlock->bbNum,
10055                currentBlock == compiler->fgFirstBB ? 0 : blockInfo[currentBlock->bbNum].predBBNum);
10056     }
10057 }
10058
10059 // Note that the size of this dump is computed in dumpRegRecordHeader().
10060 //
10061 void LinearScan::dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBlock)
10062 {
10063     BasicBlock*         block                  = currentBlock;
10064     static RefPosition* lastPrintedRefPosition = nullptr;
10065     if (refPosition == lastPrintedRefPosition)
10066     {
10067         dumpEmptyRefPosition();
10068         return;
10069     }
10070     lastPrintedRefPosition = refPosition;
10071     if (refPosition->refType == RefTypeBB)
10072     {
10073         dumpNewBlock(currentBlock, refPosition->nodeLocation);
10074         return;
10075     }
10076     printf(shortRefPositionFormat, refPosition->nodeLocation, refPosition->rpNum);
10077     if (refPosition->isIntervalRef())
10078     {
10079         Interval* interval = refPosition->getInterval();
10080         dumpIntervalName(interval);
10081         char lastUseChar = ' ';
10082         char delayChar   = ' ';
10083         if (refPosition->lastUse)
10084         {
10085             lastUseChar = '*';
10086             if (refPosition->delayRegFree)
10087             {
10088                 delayChar = 'D';
10089             }
10090         }
10091         printf("  %s%c%c ", getRefTypeShortName(refPosition->refType), lastUseChar, delayChar);
10092     }
10093     else if (refPosition->isPhysRegRef)
10094     {
10095         RegRecord* regRecord = refPosition->getReg();
10096         printf(regNameFormat, getRegName(regRecord->regNum));
10097         printf(" %s   ", getRefTypeShortName(refPosition->refType));
10098     }
10099     else
10100     {
10101         assert(refPosition->refType == RefTypeKillGCRefs);
10102         // There's no interval or reg name associated with this.
10103         printf(regNameFormat, "   ");
10104         printf(" %s   ", getRefTypeShortName(refPosition->refType));
10105     }
10106 }
10107
10108 //------------------------------------------------------------------------
10109 // LinearScan::IsResolutionMove:
10110 //     Returns true if the given node is a move inserted by LSRA
10111 //     resolution.
10112 //
10113 // Arguments:
10114 //     node - the node to check.
10115 //
10116 bool LinearScan::IsResolutionMove(GenTree* node)
10117 {
10118     if (!IsLsraAdded(node))
10119     {
10120         return false;
10121     }
10122
10123     switch (node->OperGet())
10124     {
10125         case GT_LCL_VAR:
10126         case GT_COPY:
10127             return node->IsUnusedValue();
10128
10129         case GT_SWAP:
10130             return true;
10131
10132         default:
10133             return false;
10134     }
10135 }
10136
10137 //------------------------------------------------------------------------
10138 // LinearScan::IsResolutionNode:
10139 //     Returns true if the given node is either a move inserted by LSRA
10140 //     resolution or an operand to such a move.
10141 //
10142 // Arguments:
10143 //     containingRange - the range that contains the node to check.
10144 //     node - the node to check.
10145 //
10146 bool LinearScan::IsResolutionNode(LIR::Range& containingRange, GenTree* node)
10147 {
10148     for (;;)
10149     {
10150         if (IsResolutionMove(node))
10151         {
10152             return true;
10153         }
10154
10155         if (!IsLsraAdded(node) || (node->OperGet() != GT_LCL_VAR))
10156         {
10157             return false;
10158         }
10159
10160         LIR::Use use;
10161         bool     foundUse = containingRange.TryGetUse(node, &use);
10162         assert(foundUse);
10163
10164         node = use.User();
10165     }
10166 }
10167
10168 //------------------------------------------------------------------------
10169 // verifyFinalAllocation: Traverse the RefPositions and verify various invariants.
10170 //
10171 // Arguments:
10172 //    None.
10173 //
10174 // Return Value:
10175 //    None.
10176 //
10177 // Notes:
10178 //    If verbose is set, this will also dump a table of the final allocations.
10179 void LinearScan::verifyFinalAllocation()
10180 {
10181     if (VERBOSE)
10182     {
10183         printf("\nFinal allocation\n");
10184     }
10185
10186     // Clear register assignments.
10187     for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
10188     {
10189         RegRecord* physRegRecord        = getRegisterRecord(reg);
10190         physRegRecord->assignedInterval = nullptr;
10191     }
10192
10193     for (Interval& interval : intervals)
10194     {
10195         interval.assignedReg = nullptr;
10196         interval.physReg     = REG_NA;
10197     }
10198
10199     DBEXEC(VERBOSE, dumpRegRecordTitle());
10200
10201     BasicBlock*  currentBlock                = nullptr;
10202     GenTree*     firstBlockEndResolutionNode = nullptr;
10203     regMaskTP    regsToFree                  = RBM_NONE;
10204     regMaskTP    delayRegsToFree             = RBM_NONE;
10205     LsraLocation currentLocation             = MinLocation;
10206     for (RefPosition& refPosition : refPositions)
10207     {
10208         RefPosition* currentRefPosition = &refPosition;
10209         Interval*    interval           = nullptr;
10210         RegRecord*   regRecord          = nullptr;
10211         regNumber    regNum             = REG_NA;
10212         activeRefPosition               = currentRefPosition;
10213
10214         if (currentRefPosition->refType == RefTypeBB)
10215         {
10216             regsToFree |= delayRegsToFree;
10217             delayRegsToFree = RBM_NONE;
10218         }
10219         else
10220         {
10221             if (currentRefPosition->isPhysRegRef)
10222             {
10223                 regRecord                    = currentRefPosition->getReg();
10224                 regRecord->recentRefPosition = currentRefPosition;
10225                 regNum                       = regRecord->regNum;
10226             }
10227             else if (currentRefPosition->isIntervalRef())
10228             {
10229                 interval                    = currentRefPosition->getInterval();
10230                 interval->recentRefPosition = currentRefPosition;
10231                 if (currentRefPosition->registerAssignment != RBM_NONE)
10232                 {
10233                     if (!genMaxOneBit(currentRefPosition->registerAssignment))
10234                     {
10235                         assert(currentRefPosition->refType == RefTypeExpUse ||
10236                                currentRefPosition->refType == RefTypeDummyDef);
10237                     }
10238                     else
10239                     {
10240                         regNum    = currentRefPosition->assignedReg();
10241                         regRecord = getRegisterRecord(regNum);
10242                     }
10243                 }
10244             }
10245         }
10246
10247         LsraLocation newLocation = currentRefPosition->nodeLocation;
10248
10249         if (newLocation > currentLocation)
10250         {
10251             // Free Registers.
10252             // We could use the freeRegisters() method, but we'd have to carefully manage the active intervals.
10253             for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
10254             {
10255                 regMaskTP regMask = genRegMask(reg);
10256                 if ((regsToFree & regMask) != RBM_NONE)
10257                 {
10258                     RegRecord* physRegRecord        = getRegisterRecord(reg);
10259                     physRegRecord->assignedInterval = nullptr;
10260                 }
10261             }
10262             regsToFree = delayRegsToFree;
10263             regsToFree = RBM_NONE;
10264         }
10265         currentLocation = newLocation;
10266
10267         switch (currentRefPosition->refType)
10268         {
10269             case RefTypeBB:
10270             {
10271                 if (currentBlock == nullptr)
10272                 {
10273                     currentBlock = startBlockSequence();
10274                 }
10275                 else
10276                 {
10277                     // Verify the resolution moves at the end of the previous block.
10278                     for (GenTree* node = firstBlockEndResolutionNode; node != nullptr; node = node->gtNext)
10279                     {
10280                         assert(enregisterLocalVars);
10281                         // Only verify nodes that are actually moves; don't bother with the nodes that are
10282                         // operands to moves.
10283                         if (IsResolutionMove(node))
10284                         {
10285                             verifyResolutionMove(node, currentLocation);
10286                         }
10287                     }
10288
10289                     // Validate the locations at the end of the previous block.
10290                     if (enregisterLocalVars)
10291                     {
10292                         VarToRegMap     outVarToRegMap = outVarToRegMaps[currentBlock->bbNum];
10293                         VarSetOps::Iter iter(compiler, currentBlock->bbLiveOut);
10294                         unsigned        varIndex = 0;
10295                         while (iter.NextElem(&varIndex))
10296                         {
10297                             if (localVarIntervals[varIndex] == nullptr)
10298                             {
10299                                 assert(!compiler->lvaTable[compiler->lvaTrackedToVarNum[varIndex]].lvLRACandidate);
10300                                 continue;
10301                             }
10302                             regNumber regNum = getVarReg(outVarToRegMap, varIndex);
10303                             interval         = getIntervalForLocalVar(varIndex);
10304                             assert(interval->physReg == regNum || (interval->physReg == REG_NA && regNum == REG_STK));
10305                             interval->physReg     = REG_NA;
10306                             interval->assignedReg = nullptr;
10307                             interval->isActive    = false;
10308                         }
10309                     }
10310
10311                     // Clear register assignments.
10312                     for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
10313                     {
10314                         RegRecord* physRegRecord        = getRegisterRecord(reg);
10315                         physRegRecord->assignedInterval = nullptr;
10316                     }
10317
10318                     // Now, record the locations at the beginning of this block.
10319                     currentBlock = moveToNextBlock();
10320                 }
10321
10322                 if (currentBlock != nullptr)
10323                 {
10324                     if (enregisterLocalVars)
10325                     {
10326                         VarToRegMap     inVarToRegMap = inVarToRegMaps[currentBlock->bbNum];
10327                         VarSetOps::Iter iter(compiler, currentBlock->bbLiveIn);
10328                         unsigned        varIndex = 0;
10329                         while (iter.NextElem(&varIndex))
10330                         {
10331                             if (localVarIntervals[varIndex] == nullptr)
10332                             {
10333                                 assert(!compiler->lvaTable[compiler->lvaTrackedToVarNum[varIndex]].lvLRACandidate);
10334                                 continue;
10335                             }
10336                             regNumber regNum                  = getVarReg(inVarToRegMap, varIndex);
10337                             interval                          = getIntervalForLocalVar(varIndex);
10338                             interval->physReg                 = regNum;
10339                             interval->assignedReg             = &(physRegs[regNum]);
10340                             interval->isActive                = true;
10341                             physRegs[regNum].assignedInterval = interval;
10342                         }
10343                     }
10344
10345                     if (VERBOSE)
10346                     {
10347                         dumpRefPositionShort(currentRefPosition, currentBlock);
10348                         dumpRegRecords();
10349                     }
10350
10351                     // Finally, handle the resolution moves, if any, at the beginning of the next block.
10352                     firstBlockEndResolutionNode = nullptr;
10353                     bool foundNonResolutionNode = false;
10354
10355                     LIR::Range& currentBlockRange = LIR::AsRange(currentBlock);
10356                     for (GenTree* node : currentBlockRange.NonPhiNodes())
10357                     {
10358                         if (IsResolutionNode(currentBlockRange, node))
10359                         {
10360                             assert(enregisterLocalVars);
10361                             if (foundNonResolutionNode)
10362                             {
10363                                 firstBlockEndResolutionNode = node;
10364                                 break;
10365                             }
10366                             else if (IsResolutionMove(node))
10367                             {
10368                                 // Only verify nodes that are actually moves; don't bother with the nodes that are
10369                                 // operands to moves.
10370                                 verifyResolutionMove(node, currentLocation);
10371                             }
10372                         }
10373                         else
10374                         {
10375                             foundNonResolutionNode = true;
10376                         }
10377                     }
10378                 }
10379             }
10380
10381             break;
10382
10383             case RefTypeKill:
10384                 assert(regRecord != nullptr);
10385                 assert(regRecord->assignedInterval == nullptr);
10386                 dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock);
10387                 break;
10388             case RefTypeFixedReg:
10389                 assert(regRecord != nullptr);
10390                 dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock);
10391                 break;
10392
10393             case RefTypeUpperVectorSave:
10394                 dumpLsraAllocationEvent(LSRA_EVENT_UPPER_VECTOR_SAVE, nullptr, REG_NA, currentBlock);
10395                 break;
10396
10397             case RefTypeUpperVectorRestore:
10398                 dumpLsraAllocationEvent(LSRA_EVENT_UPPER_VECTOR_RESTORE, nullptr, REG_NA, currentBlock);
10399                 break;
10400
10401             case RefTypeDef:
10402             case RefTypeUse:
10403             case RefTypeParamDef:
10404             case RefTypeZeroInit:
10405                 assert(interval != nullptr);
10406
10407                 if (interval->isSpecialPutArg)
10408                 {
10409                     dumpLsraAllocationEvent(LSRA_EVENT_SPECIAL_PUTARG, interval, regNum);
10410                     break;
10411                 }
10412                 if (currentRefPosition->reload)
10413                 {
10414                     interval->isActive = true;
10415                     assert(regNum != REG_NA);
10416                     interval->physReg           = regNum;
10417                     interval->assignedReg       = regRecord;
10418                     regRecord->assignedInterval = interval;
10419                     dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, nullptr, regRecord->regNum, currentBlock);
10420                 }
10421                 if (regNum == REG_NA)
10422                 {
10423                     dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, interval);
10424                 }
10425                 else if (RefTypeIsDef(currentRefPosition->refType))
10426                 {
10427                     interval->isActive = true;
10428                     if (VERBOSE)
10429                     {
10430                         if (interval->isConstant && (currentRefPosition->treeNode != nullptr) &&
10431                             currentRefPosition->treeNode->IsReuseRegVal())
10432                         {
10433                             dumpLsraAllocationEvent(LSRA_EVENT_REUSE_REG, nullptr, regRecord->regNum, currentBlock);
10434                         }
10435                         else
10436                         {
10437                             dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, nullptr, regRecord->regNum, currentBlock);
10438                         }
10439                     }
10440                 }
10441                 else if (currentRefPosition->copyReg)
10442                 {
10443                     dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, interval, regRecord->regNum, currentBlock);
10444                 }
10445                 else if (currentRefPosition->moveReg)
10446                 {
10447                     assert(interval->assignedReg != nullptr);
10448                     interval->assignedReg->assignedInterval = nullptr;
10449                     interval->physReg                       = regNum;
10450                     interval->assignedReg                   = regRecord;
10451                     regRecord->assignedInterval             = interval;
10452                     if (VERBOSE)
10453                     {
10454                         printf("Move  %-4s ", getRegName(regRecord->regNum));
10455                     }
10456                 }
10457                 else
10458                 {
10459                     dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock);
10460                 }
10461                 if (currentRefPosition->lastUse || currentRefPosition->spillAfter)
10462                 {
10463                     interval->isActive = false;
10464                 }
10465                 if (regNum != REG_NA)
10466                 {
10467                     if (currentRefPosition->spillAfter)
10468                     {
10469                         if (VERBOSE)
10470                         {
10471                             // If refPos is marked as copyReg, then the reg that is spilled
10472                             // is the homeReg of the interval not the reg currently assigned
10473                             // to refPos.
10474                             regNumber spillReg = regNum;
10475                             if (currentRefPosition->copyReg)
10476                             {
10477                                 assert(interval != nullptr);
10478                                 spillReg = interval->physReg;
10479                             }
10480                             dumpRegRecords();
10481                             dumpEmptyRefPosition();
10482                             printf("Spill %-4s ", getRegName(spillReg));
10483                         }
10484                     }
10485                     else if (currentRefPosition->copyReg)
10486                     {
10487                         regRecord->assignedInterval = interval;
10488                     }
10489                     else
10490                     {
10491                         interval->physReg           = regNum;
10492                         interval->assignedReg       = regRecord;
10493                         regRecord->assignedInterval = interval;
10494                     }
10495                 }
10496                 break;
10497             case RefTypeKillGCRefs:
10498                 // No action to take.
10499                 // However, we will assert that, at resolution time, no registers contain GC refs.
10500                 {
10501                     DBEXEC(VERBOSE, printf("           "));
10502                     regMaskTP candidateRegs = currentRefPosition->registerAssignment;
10503                     while (candidateRegs != RBM_NONE)
10504                     {
10505                         regMaskTP nextRegBit = genFindLowestBit(candidateRegs);
10506                         candidateRegs &= ~nextRegBit;
10507                         regNumber  nextReg          = genRegNumFromMask(nextRegBit);
10508                         RegRecord* regRecord        = getRegisterRecord(nextReg);
10509                         Interval*  assignedInterval = regRecord->assignedInterval;
10510                         assert(assignedInterval == nullptr || !varTypeIsGC(assignedInterval->registerType));
10511                     }
10512                 }
10513                 break;
10514
10515             case RefTypeExpUse:
10516             case RefTypeDummyDef:
10517                 // Do nothing; these will be handled by the RefTypeBB.
10518                 DBEXEC(VERBOSE, dumpRefPositionShort(currentRefPosition, currentBlock));
10519                 DBEXEC(VERBOSE, printf("           "));
10520                 break;
10521
10522             case RefTypeInvalid:
10523                 // for these 'currentRefPosition->refType' values, No action to take
10524                 break;
10525         }
10526
10527         if (currentRefPosition->refType != RefTypeBB)
10528         {
10529             DBEXEC(VERBOSE, dumpRegRecords());
10530             if (interval != nullptr)
10531             {
10532                 if (currentRefPosition->copyReg)
10533                 {
10534                     assert(interval->physReg != regNum);
10535                     regRecord->assignedInterval = nullptr;
10536                     assert(interval->assignedReg != nullptr);
10537                     regRecord = interval->assignedReg;
10538                 }
10539                 if (currentRefPosition->spillAfter || currentRefPosition->lastUse)
10540                 {
10541                     interval->physReg     = REG_NA;
10542                     interval->assignedReg = nullptr;
10543
10544                     // regRegcord could be null if the RefPosition does not require a register.
10545                     if (regRecord != nullptr)
10546                     {
10547                         regRecord->assignedInterval = nullptr;
10548                     }
10549 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
10550                     else if (interval->isUpperVector && !currentRefPosition->RegOptional())
10551                     {
10552                         // These only require a register if they are not RegOptional, and their lclVar
10553                         // interval is living in a register and not already partially spilled.
10554                         if ((currentRefPosition->refType == RefTypeUpperVectorSave) ||
10555                             (currentRefPosition->refType == RefTypeUpperVectorRestore))
10556                         {
10557                             Interval* lclVarInterval = interval->relatedInterval;
10558                             assert((lclVarInterval->physReg == REG_NA) || lclVarInterval->isPartiallySpilled);
10559                         }
10560                     }
10561 #endif
10562                     else
10563                     {
10564                         assert(currentRefPosition->RegOptional());
10565                     }
10566                 }
10567             }
10568         }
10569     }
10570
10571     // Now, verify the resolution blocks.
10572     // Currently these are nearly always at the end of the method, but that may not always be the case.
10573     // So, we'll go through all the BBs looking for blocks whose bbNum is greater than bbNumMaxBeforeResolution.
10574     for (BasicBlock* currentBlock = compiler->fgFirstBB; currentBlock != nullptr; currentBlock = currentBlock->bbNext)
10575     {
10576         if (currentBlock->bbNum > bbNumMaxBeforeResolution)
10577         {
10578             // If we haven't enregistered an lclVars, we have no resolution blocks.
10579             assert(enregisterLocalVars);
10580
10581             if (VERBOSE)
10582             {
10583                 dumpRegRecordTitle();
10584                 printf(shortRefPositionFormat, 0, 0);
10585                 assert(currentBlock->bbPreds != nullptr && currentBlock->bbPreds->flBlock != nullptr);
10586                 printf(bbRefPosFormat, currentBlock->bbNum, currentBlock->bbPreds->flBlock->bbNum);
10587                 dumpRegRecords();
10588             }
10589
10590             // Clear register assignments.
10591             for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
10592             {
10593                 RegRecord* physRegRecord        = getRegisterRecord(reg);
10594                 physRegRecord->assignedInterval = nullptr;
10595             }
10596
10597             // Set the incoming register assignments
10598             VarToRegMap     inVarToRegMap = getInVarToRegMap(currentBlock->bbNum);
10599             VarSetOps::Iter iter(compiler, currentBlock->bbLiveIn);
10600             unsigned        varIndex = 0;
10601             while (iter.NextElem(&varIndex))
10602             {
10603                 if (localVarIntervals[varIndex] == nullptr)
10604                 {
10605                     assert(!compiler->lvaTable[compiler->lvaTrackedToVarNum[varIndex]].lvLRACandidate);
10606                     continue;
10607                 }
10608                 regNumber regNum                  = getVarReg(inVarToRegMap, varIndex);
10609                 Interval* interval                = getIntervalForLocalVar(varIndex);
10610                 interval->physReg                 = regNum;
10611                 interval->assignedReg             = &(physRegs[regNum]);
10612                 interval->isActive                = true;
10613                 physRegs[regNum].assignedInterval = interval;
10614             }
10615
10616             // Verify the moves in this block
10617             LIR::Range& currentBlockRange = LIR::AsRange(currentBlock);
10618             for (GenTree* node : currentBlockRange.NonPhiNodes())
10619             {
10620                 assert(IsResolutionNode(currentBlockRange, node));
10621                 if (IsResolutionMove(node))
10622                 {
10623                     // Only verify nodes that are actually moves; don't bother with the nodes that are
10624                     // operands to moves.
10625                     verifyResolutionMove(node, currentLocation);
10626                 }
10627             }
10628
10629             // Verify the outgoing register assignments
10630             {
10631                 VarToRegMap     outVarToRegMap = getOutVarToRegMap(currentBlock->bbNum);
10632                 VarSetOps::Iter iter(compiler, currentBlock->bbLiveOut);
10633                 unsigned        varIndex = 0;
10634                 while (iter.NextElem(&varIndex))
10635                 {
10636                     if (localVarIntervals[varIndex] == nullptr)
10637                     {
10638                         assert(!compiler->lvaTable[compiler->lvaTrackedToVarNum[varIndex]].lvLRACandidate);
10639                         continue;
10640                     }
10641                     regNumber regNum   = getVarReg(outVarToRegMap, varIndex);
10642                     Interval* interval = getIntervalForLocalVar(varIndex);
10643                     assert(interval->physReg == regNum || (interval->physReg == REG_NA && regNum == REG_STK));
10644                     interval->physReg     = REG_NA;
10645                     interval->assignedReg = nullptr;
10646                     interval->isActive    = false;
10647                 }
10648             }
10649         }
10650     }
10651
10652     DBEXEC(VERBOSE, printf("\n"));
10653 }
10654
10655 //------------------------------------------------------------------------
10656 // verifyResolutionMove: Verify a resolution statement.  Called by verifyFinalAllocation()
10657 //
10658 // Arguments:
10659 //    resolutionMove    - A GenTree* that must be a resolution move.
10660 //    currentLocation   - The LsraLocation of the most recent RefPosition that has been verified.
10661 //
10662 // Return Value:
10663 //    None.
10664 //
10665 // Notes:
10666 //    If verbose is set, this will also dump the moves into the table of final allocations.
10667 void LinearScan::verifyResolutionMove(GenTree* resolutionMove, LsraLocation currentLocation)
10668 {
10669     GenTree* dst = resolutionMove;
10670     assert(IsResolutionMove(dst));
10671
10672     if (dst->OperGet() == GT_SWAP)
10673     {
10674         GenTreeLclVarCommon* left          = dst->gtGetOp1()->AsLclVarCommon();
10675         GenTreeLclVarCommon* right         = dst->gtGetOp2()->AsLclVarCommon();
10676         regNumber            leftRegNum    = left->gtRegNum;
10677         regNumber            rightRegNum   = right->gtRegNum;
10678         LclVarDsc*           leftVarDsc    = compiler->lvaTable + left->gtLclNum;
10679         LclVarDsc*           rightVarDsc   = compiler->lvaTable + right->gtLclNum;
10680         Interval*            leftInterval  = getIntervalForLocalVar(leftVarDsc->lvVarIndex);
10681         Interval*            rightInterval = getIntervalForLocalVar(rightVarDsc->lvVarIndex);
10682         assert(leftInterval->physReg == leftRegNum && rightInterval->physReg == rightRegNum);
10683         leftInterval->physReg                  = rightRegNum;
10684         rightInterval->physReg                 = leftRegNum;
10685         leftInterval->assignedReg              = &physRegs[rightRegNum];
10686         rightInterval->assignedReg             = &physRegs[leftRegNum];
10687         physRegs[rightRegNum].assignedInterval = leftInterval;
10688         physRegs[leftRegNum].assignedInterval  = rightInterval;
10689         if (VERBOSE)
10690         {
10691             printf(shortRefPositionFormat, currentLocation, 0);
10692             dumpIntervalName(leftInterval);
10693             printf("  Swap   ");
10694             printf("      %-4s ", getRegName(rightRegNum));
10695             dumpRegRecords();
10696             printf(shortRefPositionFormat, currentLocation, 0);
10697             dumpIntervalName(rightInterval);
10698             printf("  \"      ");
10699             printf("      %-4s ", getRegName(leftRegNum));
10700             dumpRegRecords();
10701         }
10702         return;
10703     }
10704     regNumber            dstRegNum = dst->gtRegNum;
10705     regNumber            srcRegNum;
10706     GenTreeLclVarCommon* lcl;
10707     if (dst->OperGet() == GT_COPY)
10708     {
10709         lcl       = dst->gtGetOp1()->AsLclVarCommon();
10710         srcRegNum = lcl->gtRegNum;
10711     }
10712     else
10713     {
10714         lcl = dst->AsLclVarCommon();
10715         if ((lcl->gtFlags & GTF_SPILLED) != 0)
10716         {
10717             srcRegNum = REG_STK;
10718         }
10719         else
10720         {
10721             assert((lcl->gtFlags & GTF_SPILL) != 0);
10722             srcRegNum = dstRegNum;
10723             dstRegNum = REG_STK;
10724         }
10725     }
10726
10727     Interval* interval = getIntervalForLocalVarNode(lcl);
10728     assert(interval->physReg == srcRegNum || (srcRegNum == REG_STK && interval->physReg == REG_NA));
10729     if (srcRegNum != REG_STK)
10730     {
10731         physRegs[srcRegNum].assignedInterval = nullptr;
10732     }
10733     if (dstRegNum != REG_STK)
10734     {
10735         interval->physReg                    = dstRegNum;
10736         interval->assignedReg                = &(physRegs[dstRegNum]);
10737         physRegs[dstRegNum].assignedInterval = interval;
10738         interval->isActive                   = true;
10739     }
10740     else
10741     {
10742         interval->physReg     = REG_NA;
10743         interval->assignedReg = nullptr;
10744         interval->isActive    = false;
10745     }
10746     if (VERBOSE)
10747     {
10748         printf(shortRefPositionFormat, currentLocation, 0);
10749         dumpIntervalName(interval);
10750         printf("  Move   ");
10751         printf("      %-4s ", getRegName(dstRegNum));
10752         dumpRegRecords();
10753     }
10754 }
10755 #endif // DEBUG