Linux/x86: fix build (#26594)
[platform/upstream/coreclr.git] / src / jit / lsra.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 /*
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8
9                  Linear Scan Register Allocation
10
11                          a.k.a. LSRA
12
13   Preconditions
14     - All register requirements are expressed in the code stream, either as destination
15       registers of tree nodes, or as internal registers.  These requirements are
16       expressed in the RefPositions built for each node by BuildNode(), which includes:
17       - The register uses and definitions.
18       - The register restrictions (candidates) of the target register, both from itself,
19         as producer of the value (dstCandidates), and from its consuming node (srcCandidates).
20         Note that when we talk about srcCandidates we are referring to the destination register
21         (not any of its sources).
22       - The number (internalCount) of registers required, and their register restrictions (internalCandidates).
23         These are neither inputs nor outputs of the node, but used in the sequence of code generated for the tree.
24     "Internal registers" are registers used during the code sequence generated for the node.
25     The register lifetimes must obey the following lifetime model:
26     - First, any internal registers are defined.
27     - Next, any source registers are used (and are then freed if they are last use and are not identified as
28       "delayRegFree").
29     - Next, the internal registers are used (and are then freed).
30     - Next, any registers in the kill set for the instruction are killed.
31     - Next, the destination register(s) are defined (multiple destination registers are only supported on ARM)
32     - Finally, any "delayRegFree" source registers are freed.
33   There are several things to note about this order:
34     - The internal registers will never overlap any use, but they may overlap a destination register.
35     - Internal registers are never live beyond the node.
36     - The "delayRegFree" annotation is used for instructions that are only available in a Read-Modify-Write form.
37       That is, the destination register is one of the sources.  In this case, we must not use the same register for
38       the non-RMW operand as for the destination.
39
40   Overview (doLinearScan):
41     - Walk all blocks, building intervals and RefPositions (buildIntervals)
42     - Allocate registers (allocateRegisters)
43     - Annotate nodes with register assignments (resolveRegisters)
44     - Add move nodes as needed to resolve conflicting register
45       assignments across non-adjacent edges. (resolveEdges, called from resolveRegisters)
46
47   Postconditions:
48
49     Tree nodes (GenTree):
50     - GenTree::gtRegNum (and gtRegPair for ARM) is annotated with the register
51       assignment for a node. If the node does not require a register, it is
52       annotated as such (gtRegNum = REG_NA). For a variable definition or interior
53       tree node (an "implicit" definition), this is the register to put the result.
54       For an expression use, this is the place to find the value that has previously
55       been computed.
56       - In most cases, this register must satisfy the constraints specified for the RefPosition.
57       - In some cases, this is difficult:
58         - If a lclVar node currently lives in some register, it may not be desirable to move it
59           (i.e. its current location may be desirable for future uses, e.g. if it's a callee save register,
60           but needs to be in a specific arg register for a call).
61         - In other cases there may be conflicts on the restrictions placed by the defining node and the node which
62           consumes it
63       - If such a node is constrained to a single fixed register (e.g. an arg register, or a return from a call),
64         then LSRA is free to annotate the node with a different register.  The code generator must issue the appropriate
65         move.
66       - However, if such a node is constrained to a set of registers, and its current location does not satisfy that
67         requirement, LSRA must insert a GT_COPY node between the node and its parent.  The gtRegNum on the GT_COPY node
68         must satisfy the register requirement of the parent.
69     - GenTree::gtRsvdRegs has a set of registers used for internal temps.
70     - A tree node is marked GTF_SPILL if the tree node must be spilled by the code generator after it has been
71       evaluated.
72       - LSRA currently does not set GTF_SPILLED on such nodes, because it caused problems in the old code generator.
73         In the new backend perhaps this should change (see also the note below under CodeGen).
74     - A tree node is marked GTF_SPILLED if it is a lclVar that must be reloaded prior to use.
75       - The register (gtRegNum) on the node indicates the register to which it must be reloaded.
76       - For lclVar nodes, since the uses and defs are distinct tree nodes, it is always possible to annotate the node
77         with the register to which the variable must be reloaded.
78       - For other nodes, since they represent both the def and use, if the value must be reloaded to a different
79         register, LSRA must insert a GT_RELOAD node in order to specify the register to which it should be reloaded.
80
81     Local variable table (LclVarDsc):
82     - LclVarDsc::lvRegister is set to true if a local variable has the
83       same register assignment for its entire lifetime.
84     - LclVarDsc::lvRegNum / lvOtherReg: these are initialized to their
85       first value at the end of LSRA (it looks like lvOtherReg isn't?
86       This is probably a bug (ARM)). Codegen will set them to their current value
87       as it processes the trees, since a variable can (now) be assigned different
88       registers over its lifetimes.
89
90 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
91 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
92 */
93
94 #include "jitpch.h"
95 #ifdef _MSC_VER
96 #pragma hdrstop
97 #endif
98
99 #include "lsra.h"
100
101 #ifdef DEBUG
102 const char* LinearScan::resolveTypeName[] = {"Split", "Join", "Critical", "SharedCritical"};
103 #endif // DEBUG
104
105 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
106 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
107 XX                                                                           XX
108 XX                    Small Helper functions                                 XX
109 XX                                                                           XX
110 XX                                                                           XX
111 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
112 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
113 */
114
115 //--------------------------------------------------------------
116 // lsraAssignRegToTree: Assign the given reg to tree node.
117 //
118 // Arguments:
119 //    tree    -    Gentree node
120 //    reg     -    register to be assigned
121 //    regIdx  -    register idx, if tree is a multi-reg call node.
122 //                 regIdx will be zero for single-reg result producing tree nodes.
123 //
124 // Return Value:
125 //    None
126 //
127 void lsraAssignRegToTree(GenTree* tree, regNumber reg, unsigned regIdx)
128 {
129     if (regIdx == 0)
130     {
131         tree->gtRegNum = reg;
132     }
133 #if !defined(_TARGET_64BIT_)
134     else if (tree->OperIsMultiRegOp())
135     {
136         assert(regIdx == 1);
137         GenTreeMultiRegOp* mul = tree->AsMultiRegOp();
138         mul->gtOtherReg        = reg;
139     }
140 #endif // _TARGET_64BIT_
141 #if FEATURE_MULTIREG_RET
142     else if (tree->OperGet() == GT_COPY)
143     {
144         assert(regIdx == 1);
145         GenTreeCopyOrReload* copy = tree->AsCopyOrReload();
146         copy->gtOtherRegs[0]      = (regNumberSmall)reg;
147     }
148 #endif // FEATURE_MULTIREG_RET
149 #if FEATURE_ARG_SPLIT
150     else if (tree->OperIsPutArgSplit())
151     {
152         GenTreePutArgSplit* putArg = tree->AsPutArgSplit();
153         putArg->SetRegNumByIdx(reg, regIdx);
154     }
155 #endif // FEATURE_ARG_SPLIT
156     else
157     {
158         assert(tree->IsMultiRegCall());
159         GenTreeCall* call = tree->AsCall();
160         call->SetRegNumByIdx(reg, regIdx);
161     }
162 }
163
164 //-------------------------------------------------------------
165 // getWeight: Returns the weight of the RefPosition.
166 //
167 // Arguments:
168 //    refPos   -   ref position
169 //
170 // Returns:
171 //    Weight of ref position.
172 unsigned LinearScan::getWeight(RefPosition* refPos)
173 {
174     unsigned weight;
175     GenTree* treeNode = refPos->treeNode;
176
177     if (treeNode != nullptr)
178     {
179         if (isCandidateLocalRef(treeNode))
180         {
181             // Tracked locals: use weighted ref cnt as the weight of the
182             // ref position.
183             GenTreeLclVarCommon* lclCommon = treeNode->AsLclVarCommon();
184             LclVarDsc*           varDsc    = &(compiler->lvaTable[lclCommon->gtLclNum]);
185             weight                         = varDsc->lvRefCntWtd();
186             if (refPos->getInterval()->isSpilled)
187             {
188                 // Decrease the weight if the interval has already been spilled.
189                 weight -= BB_UNITY_WEIGHT;
190             }
191         }
192         else
193         {
194             // Non-candidate local ref or non-lcl tree node.
195             // These are considered to have two references in the basic block:
196             // a def and a use and hence weighted ref count would be 2 times
197             // the basic block weight in which they appear.
198             // However, it is generally more harmful to spill tree temps, so we
199             // double that.
200             const unsigned TREE_TEMP_REF_COUNT    = 2;
201             const unsigned TREE_TEMP_BOOST_FACTOR = 2;
202             weight = TREE_TEMP_REF_COUNT * TREE_TEMP_BOOST_FACTOR * blockInfo[refPos->bbNum].weight;
203         }
204     }
205     else
206     {
207         // Non-tree node ref positions.  These will have a single
208         // reference in the basic block and hence their weighted
209         // refcount is equal to the block weight in which they
210         // appear.
211         weight = blockInfo[refPos->bbNum].weight;
212     }
213
214     return weight;
215 }
216
217 // allRegs represents a set of registers that can
218 // be used to allocate the specified type in any point
219 // in time (more of a 'bank' of registers).
220 regMaskTP LinearScan::allRegs(RegisterType rt)
221 {
222     if (rt == TYP_FLOAT)
223     {
224         return availableFloatRegs;
225     }
226     else if (rt == TYP_DOUBLE)
227     {
228         return availableDoubleRegs;
229     }
230 #ifdef FEATURE_SIMD
231     // TODO-Cleanup: Add an RBM_ALLSIMD
232     else if (varTypeIsSIMD(rt))
233     {
234         return availableDoubleRegs;
235     }
236 #endif // FEATURE_SIMD
237     else
238     {
239         return availableIntRegs;
240     }
241 }
242
243 regMaskTP LinearScan::allByteRegs()
244 {
245 #ifdef _TARGET_X86_
246     return availableIntRegs & RBM_BYTE_REGS;
247 #else
248     return availableIntRegs;
249 #endif
250 }
251
252 regMaskTP LinearScan::allSIMDRegs()
253 {
254     return availableFloatRegs;
255 }
256
257 //------------------------------------------------------------------------
258 // internalFloatRegCandidates: Return the set of registers that are appropriate
259 //                             for use as internal float registers.
260 //
261 // Return Value:
262 //    The set of registers (as a regMaskTP).
263 //
264 // Notes:
265 //    compFloatingPointUsed is only required to be set if it is possible that we
266 //    will use floating point callee-save registers.
267 //    It is unlikely, if an internal register is the only use of floating point,
268 //    that it will select a callee-save register.  But to be safe, we restrict
269 //    the set of candidates if compFloatingPointUsed is not already set.
270
271 regMaskTP LinearScan::internalFloatRegCandidates()
272 {
273     if (compiler->compFloatingPointUsed)
274     {
275         return allRegs(TYP_FLOAT);
276     }
277     else
278     {
279         return RBM_FLT_CALLEE_TRASH;
280     }
281 }
282
283 /*****************************************************************************
284  * Inline functions for RegRecord
285  *****************************************************************************/
286
287 bool RegRecord::isFree()
288 {
289     return ((assignedInterval == nullptr || !assignedInterval->isActive) && !isBusyUntilNextKill);
290 }
291
292 /*****************************************************************************
293  * Inline functions for LinearScan
294  *****************************************************************************/
295 RegRecord* LinearScan::getRegisterRecord(regNumber regNum)
296 {
297     assert((unsigned)regNum < ArrLen(physRegs));
298     return &physRegs[regNum];
299 }
300
301 #ifdef DEBUG
302
303 //----------------------------------------------------------------------------
304 // getConstrainedRegMask: Returns new regMask which is the intersection of
305 // regMaskActual and regMaskConstraint if the new regMask has at least
306 // minRegCount registers, otherwise returns regMaskActual.
307 //
308 // Arguments:
309 //     regMaskActual      -  regMask that needs to be constrained
310 //     regMaskConstraint  -  regMask constraint that needs to be
311 //                           applied to regMaskActual
312 //     minRegCount        -  Minimum number of regs that should be
313 //                           be present in new regMask.
314 //
315 // Return Value:
316 //     New regMask that has minRegCount registers after instersection.
317 //     Otherwise returns regMaskActual.
318 regMaskTP LinearScan::getConstrainedRegMask(regMaskTP regMaskActual, regMaskTP regMaskConstraint, unsigned minRegCount)
319 {
320     regMaskTP newMask = regMaskActual & regMaskConstraint;
321     if (genCountBits(newMask) >= minRegCount)
322     {
323         return newMask;
324     }
325
326     return regMaskActual;
327 }
328
329 //------------------------------------------------------------------------
330 // stressLimitRegs: Given a set of registers, expressed as a register mask, reduce
331 //            them based on the current stress options.
332 //
333 // Arguments:
334 //    mask      - The current mask of register candidates for a node
335 //
336 // Return Value:
337 //    A possibly-modified mask, based on the value of COMPlus_JitStressRegs.
338 //
339 // Notes:
340 //    This is the method used to implement the stress options that limit
341 //    the set of registers considered for allocation.
342
343 regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask)
344 {
345     if (getStressLimitRegs() != LSRA_LIMIT_NONE)
346     {
347         // The refPosition could be null, for example when called
348         // by getTempRegForResolution().
349         int minRegCount = (refPosition != nullptr) ? refPosition->minRegCandidateCount : 1;
350
351         switch (getStressLimitRegs())
352         {
353             case LSRA_LIMIT_CALLEE:
354                 if (!compiler->opts.compDbgEnC)
355                 {
356                     mask = getConstrainedRegMask(mask, RBM_CALLEE_SAVED, minRegCount);
357                 }
358                 break;
359
360             case LSRA_LIMIT_CALLER:
361             {
362                 mask = getConstrainedRegMask(mask, RBM_CALLEE_TRASH, minRegCount);
363             }
364             break;
365
366             case LSRA_LIMIT_SMALL_SET:
367                 if ((mask & LsraLimitSmallIntSet) != RBM_NONE)
368                 {
369                     mask = getConstrainedRegMask(mask, LsraLimitSmallIntSet, minRegCount);
370                 }
371                 else if ((mask & LsraLimitSmallFPSet) != RBM_NONE)
372                 {
373                     mask = getConstrainedRegMask(mask, LsraLimitSmallFPSet, minRegCount);
374                 }
375                 break;
376
377             default:
378                 unreached();
379         }
380
381         if (refPosition != nullptr && refPosition->isFixedRegRef)
382         {
383             mask |= refPosition->registerAssignment;
384         }
385     }
386
387     return mask;
388 }
389 #endif // DEBUG
390
391 //------------------------------------------------------------------------
392 // conflictingFixedRegReference: Determine whether the current RegRecord has a
393 //                               fixed register use that conflicts with 'refPosition'
394 //
395 // Arguments:
396 //    refPosition - The RefPosition of interest
397 //
398 // Return Value:
399 //    Returns true iff the given RefPosition is NOT a fixed use of this register,
400 //    AND either:
401 //    - there is a RefPosition on this RegRecord at the nodeLocation of the given RefPosition, or
402 //    - the given RefPosition has a delayRegFree, and there is a RefPosition on this RegRecord at
403 //      the nodeLocation just past the given RefPosition.
404 //
405 // Assumptions:
406 //    'refPosition is non-null.
407
408 bool RegRecord::conflictingFixedRegReference(RefPosition* refPosition)
409 {
410     // Is this a fixed reference of this register?  If so, there is no conflict.
411     if (refPosition->isFixedRefOfRegMask(genRegMask(regNum)))
412     {
413         return false;
414     }
415     // Otherwise, check for conflicts.
416     // There is a conflict if:
417     // 1. There is a recent RefPosition on this RegRecord that is at this location,
418     //    except in the case where it is a special "putarg" that is associated with this interval, OR
419     // 2. There is an upcoming RefPosition at this location, or at the next location
420     //    if refPosition is a delayed use (i.e. must be kept live through the next/def location).
421
422     LsraLocation refLocation = refPosition->nodeLocation;
423     if (recentRefPosition != nullptr && recentRefPosition->refType != RefTypeKill &&
424         recentRefPosition->nodeLocation == refLocation &&
425         (!isBusyUntilNextKill || assignedInterval != refPosition->getInterval()))
426     {
427         return true;
428     }
429     LsraLocation nextPhysRefLocation = getNextRefLocation();
430     if (nextPhysRefLocation == refLocation || (refPosition->delayRegFree && nextPhysRefLocation == (refLocation + 1)))
431     {
432         return true;
433     }
434     return false;
435 }
436
437 /*****************************************************************************
438  * Inline functions for Interval
439  *****************************************************************************/
440 RefPosition* Referenceable::getNextRefPosition()
441 {
442     if (recentRefPosition == nullptr)
443     {
444         return firstRefPosition;
445     }
446     else
447     {
448         return recentRefPosition->nextRefPosition;
449     }
450 }
451
452 LsraLocation Referenceable::getNextRefLocation()
453 {
454     RefPosition* nextRefPosition = getNextRefPosition();
455     if (nextRefPosition == nullptr)
456     {
457         return MaxLocation;
458     }
459     else
460     {
461         return nextRefPosition->nodeLocation;
462     }
463 }
464
465 // Iterate through all the registers of the given type
466 class RegisterIterator
467 {
468     friend class Registers;
469
470 public:
471     RegisterIterator(RegisterType type) : regType(type)
472     {
473         if (useFloatReg(regType))
474         {
475             currentRegNum = REG_FP_FIRST;
476         }
477         else
478         {
479             currentRegNum = REG_INT_FIRST;
480         }
481     }
482
483 protected:
484     static RegisterIterator Begin(RegisterType regType)
485     {
486         return RegisterIterator(regType);
487     }
488     static RegisterIterator End(RegisterType regType)
489     {
490         RegisterIterator endIter = RegisterIterator(regType);
491         // This assumes only integer and floating point register types
492         // if we target a processor with additional register types,
493         // this would have to change
494         if (useFloatReg(regType))
495         {
496             // This just happens to work for both double & float
497             endIter.currentRegNum = REG_NEXT(REG_FP_LAST);
498         }
499         else
500         {
501             endIter.currentRegNum = REG_NEXT(REG_INT_LAST);
502         }
503         return endIter;
504     }
505
506 public:
507     void operator++(int dummy) // int dummy is c++ for "this is postfix ++"
508     {
509         currentRegNum = REG_NEXT(currentRegNum);
510 #ifdef _TARGET_ARM_
511         if (regType == TYP_DOUBLE)
512             currentRegNum = REG_NEXT(currentRegNum);
513 #endif
514     }
515     void operator++() // prefix operator++
516     {
517         currentRegNum = REG_NEXT(currentRegNum);
518 #ifdef _TARGET_ARM_
519         if (regType == TYP_DOUBLE)
520             currentRegNum = REG_NEXT(currentRegNum);
521 #endif
522     }
523     regNumber operator*()
524     {
525         return currentRegNum;
526     }
527     bool operator!=(const RegisterIterator& other)
528     {
529         return other.currentRegNum != currentRegNum;
530     }
531
532 private:
533     regNumber    currentRegNum;
534     RegisterType regType;
535 };
536
537 class Registers
538 {
539 public:
540     friend class RegisterIterator;
541     RegisterType type;
542     Registers(RegisterType t)
543     {
544         type = t;
545     }
546     RegisterIterator begin()
547     {
548         return RegisterIterator::Begin(type);
549     }
550     RegisterIterator end()
551     {
552         return RegisterIterator::End(type);
553     }
554 };
555
556 #ifdef DEBUG
557 void LinearScan::dumpVarToRegMap(VarToRegMap map)
558 {
559     bool anyPrinted = false;
560     for (unsigned varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++)
561     {
562         unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
563         if (map[varIndex] != REG_STK)
564         {
565             printf("V%02u=%s ", varNum, getRegName(map[varIndex]));
566             anyPrinted = true;
567         }
568     }
569     if (!anyPrinted)
570     {
571         printf("none");
572     }
573     printf("\n");
574 }
575
576 void LinearScan::dumpInVarToRegMap(BasicBlock* block)
577 {
578     printf("Var=Reg beg of " FMT_BB ": ", block->bbNum);
579     VarToRegMap map = getInVarToRegMap(block->bbNum);
580     dumpVarToRegMap(map);
581 }
582
583 void LinearScan::dumpOutVarToRegMap(BasicBlock* block)
584 {
585     printf("Var=Reg end of " FMT_BB ": ", block->bbNum);
586     VarToRegMap map = getOutVarToRegMap(block->bbNum);
587     dumpVarToRegMap(map);
588 }
589
590 #endif // DEBUG
591
592 LinearScanInterface* getLinearScanAllocator(Compiler* comp)
593 {
594     return new (comp, CMK_LSRA) LinearScan(comp);
595 }
596
597 //------------------------------------------------------------------------
598 // LSRA constructor
599 //
600 // Arguments:
601 //    theCompiler
602 //
603 // Notes:
604 //    The constructor takes care of initializing the data structures that are used
605 //    during Lowering, including (in DEBUG) getting the stress environment variables,
606 //    as they may affect the block ordering.
607
608 LinearScan::LinearScan(Compiler* theCompiler)
609     : compiler(theCompiler)
610     , intervals(theCompiler->getAllocator(CMK_LSRA_Interval))
611     , allocationPassComplete(false)
612     , refPositions(theCompiler->getAllocator(CMK_LSRA_RefPosition))
613     , listNodePool(theCompiler)
614 {
615 #ifdef DEBUG
616     maxNodeLocation   = 0;
617     activeRefPosition = nullptr;
618
619     // Get the value of the environment variable that controls stress for register allocation
620     lsraStressMask = JitConfig.JitStressRegs();
621 #if 0
622     if (lsraStressMask != 0)
623     {
624         // The code in this #if can be used to debug JitStressRegs issues according to
625         // method hash.  To use, simply set environment variables JitStressRegsHashLo and JitStressRegsHashHi
626         unsigned methHash = compiler->info.compMethodHash();
627         char* lostr = getenv("JitStressRegsHashLo");
628         unsigned methHashLo = 0;
629         bool dump = false;
630         if (lostr != nullptr)
631         {
632             sscanf_s(lostr, "%x", &methHashLo);
633             dump = true;
634         }
635         char* histr = getenv("JitStressRegsHashHi");
636         unsigned methHashHi = UINT32_MAX;
637         if (histr != nullptr)
638         {
639             sscanf_s(histr, "%x", &methHashHi);
640             dump = true;
641         }
642         if (methHash < methHashLo || methHash > methHashHi)
643         {
644             lsraStressMask = 0;
645         }
646         else if (dump == true)
647         {
648             printf("JitStressRegs = %x for method %s, hash = 0x%x.\n",
649                 lsraStressMask, compiler->info.compFullName, compiler->info.compMethodHash());
650             printf("");         // in our logic this causes a flush
651         }
652     }
653 #endif // 0
654 #endif // DEBUG
655
656     // Assume that we will enregister local variables if it's not disabled. We'll reset it if we
657     // have no tracked locals when we start allocating. Note that new tracked lclVars may be added
658     // after the first liveness analysis - either by optimizations or by Lowering, and the tracked
659     // set won't be recomputed until after Lowering (and this constructor is called prior to Lowering),
660     // so we don't want to check that yet.
661     enregisterLocalVars = ((compiler->opts.compFlags & CLFLG_REGVAR) != 0);
662 #ifdef _TARGET_ARM64_
663     availableIntRegs = (RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd);
664 #else
665     availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd);
666 #endif
667
668 #if ETW_EBP_FRAMED
669     availableIntRegs &= ~RBM_FPBASE;
670 #endif // ETW_EBP_FRAMED
671
672     availableFloatRegs  = RBM_ALLFLOAT;
673     availableDoubleRegs = RBM_ALLDOUBLE;
674
675 #ifdef _TARGET_AMD64_
676     if (compiler->opts.compDbgEnC)
677     {
678         // On x64 when the EnC option is set, we always save exactly RBP, RSI and RDI.
679         // RBP is not available to the register allocator, so RSI and RDI are the only
680         // callee-save registers available.
681         availableIntRegs &= ~RBM_CALLEE_SAVED | RBM_RSI | RBM_RDI;
682         availableFloatRegs &= ~RBM_CALLEE_SAVED;
683         availableDoubleRegs &= ~RBM_CALLEE_SAVED;
684     }
685 #endif // _TARGET_AMD64_
686     compiler->rpFrameType           = FT_NOT_SET;
687     compiler->rpMustCreateEBPCalled = false;
688
689     compiler->codeGen->intRegState.rsIsFloat   = false;
690     compiler->codeGen->floatRegState.rsIsFloat = true;
691
692     // Block sequencing (the order in which we schedule).
693     // Note that we don't initialize the bbVisitedSet until we do the first traversal
694     // This is so that any blocks that are added during the first traversal
695     // are accounted for (and we don't have BasicBlockEpoch issues).
696     blockSequencingDone   = false;
697     blockSequence         = nullptr;
698     blockSequenceWorkList = nullptr;
699     curBBSeqNum           = 0;
700     bbSeqCount            = 0;
701
702     // Information about each block, including predecessor blocks used for variable locations at block entry.
703     blockInfo = nullptr;
704
705     pendingDelayFree = false;
706     tgtPrefUse       = nullptr;
707 }
708
709 //------------------------------------------------------------------------
710 // getNextCandidateFromWorkList: Get the next candidate for block sequencing
711 //
712 // Arguments:
713 //    None.
714 //
715 // Return Value:
716 //    The next block to be placed in the sequence.
717 //
718 // Notes:
719 //    This method currently always returns the next block in the list, and relies on having
720 //    blocks added to the list only when they are "ready", and on the
721 //    addToBlockSequenceWorkList() method to insert them in the proper order.
722 //    However, a block may be in the list and already selected, if it was subsequently
723 //    encountered as both a flow and layout successor of the most recently selected
724 //    block.
725
726 BasicBlock* LinearScan::getNextCandidateFromWorkList()
727 {
728     BasicBlockList* nextWorkList = nullptr;
729     for (BasicBlockList* workList = blockSequenceWorkList; workList != nullptr; workList = nextWorkList)
730     {
731         nextWorkList          = workList->next;
732         BasicBlock* candBlock = workList->block;
733         removeFromBlockSequenceWorkList(workList, nullptr);
734         if (!isBlockVisited(candBlock))
735         {
736             return candBlock;
737         }
738     }
739     return nullptr;
740 }
741
742 //------------------------------------------------------------------------
743 // setBlockSequence: Determine the block order for register allocation.
744 //
745 // Arguments:
746 //    None
747 //
748 // Return Value:
749 //    None
750 //
751 // Notes:
752 //    On return, the blockSequence array contains the blocks, in the order in which they
753 //    will be allocated.
754 //    This method clears the bbVisitedSet on LinearScan, and when it returns the set
755 //    contains all the bbNums for the block.
756
757 void LinearScan::setBlockSequence()
758 {
759     assert(!blockSequencingDone); // The method should be called only once.
760
761     compiler->EnsureBasicBlockEpoch();
762 #ifdef DEBUG
763     blockEpoch = compiler->GetCurBasicBlockEpoch();
764 #endif // DEBUG
765
766     // Initialize the "visited" blocks set.
767     bbVisitedSet = BlockSetOps::MakeEmpty(compiler);
768
769     BlockSet readySet(BlockSetOps::MakeEmpty(compiler));
770     BlockSet predSet(BlockSetOps::MakeEmpty(compiler));
771
772     assert(blockSequence == nullptr && bbSeqCount == 0);
773     blockSequence            = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount];
774     bbNumMaxBeforeResolution = compiler->fgBBNumMax;
775     blockInfo                = new (compiler, CMK_LSRA) LsraBlockInfo[bbNumMaxBeforeResolution + 1];
776
777     assert(blockSequenceWorkList == nullptr);
778
779     bool addedInternalBlocks = false;
780     verifiedAllBBs           = false;
781     hasCriticalEdges         = false;
782     BasicBlock* nextBlock;
783     // We use a bbNum of 0 for entry RefPositions.
784     // The other information in blockInfo[0] will never be used.
785     blockInfo[0].weight = BB_UNITY_WEIGHT;
786     for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = nextBlock)
787     {
788         blockSequence[bbSeqCount] = block;
789         markBlockVisited(block);
790         bbSeqCount++;
791         nextBlock = nullptr;
792
793         // Initialize the blockInfo.
794         // predBBNum will be set later.  0 is never used as a bbNum.
795         assert(block->bbNum != 0);
796         blockInfo[block->bbNum].predBBNum = 0;
797         // We check for critical edges below, but initialize to false.
798         blockInfo[block->bbNum].hasCriticalInEdge  = false;
799         blockInfo[block->bbNum].hasCriticalOutEdge = false;
800         blockInfo[block->bbNum].weight             = block->getBBWeight(compiler);
801
802 #if TRACK_LSRA_STATS
803         blockInfo[block->bbNum].spillCount         = 0;
804         blockInfo[block->bbNum].copyRegCount       = 0;
805         blockInfo[block->bbNum].resolutionMovCount = 0;
806         blockInfo[block->bbNum].splitEdgeCount     = 0;
807 #endif // TRACK_LSRA_STATS
808
809         if (block->GetUniquePred(compiler) == nullptr)
810         {
811             for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
812             {
813                 BasicBlock* predBlock = pred->flBlock;
814                 if (predBlock->NumSucc(compiler) > 1)
815                 {
816                     blockInfo[block->bbNum].hasCriticalInEdge = true;
817                     hasCriticalEdges                          = true;
818                     break;
819                 }
820                 else if (predBlock->bbJumpKind == BBJ_SWITCH)
821                 {
822                     assert(!"Switch with single successor");
823                 }
824             }
825         }
826
827         // Determine which block to schedule next.
828
829         // First, update the NORMAL successors of the current block, adding them to the worklist
830         // according to the desired order.  We will handle the EH successors below.
831         bool checkForCriticalOutEdge = (block->NumSucc(compiler) > 1);
832         if (!checkForCriticalOutEdge && block->bbJumpKind == BBJ_SWITCH)
833         {
834             assert(!"Switch with single successor");
835         }
836
837         const unsigned numSuccs = block->NumSucc(compiler);
838         for (unsigned succIndex = 0; succIndex < numSuccs; succIndex++)
839         {
840             BasicBlock* succ = block->GetSucc(succIndex, compiler);
841             if (checkForCriticalOutEdge && succ->GetUniquePred(compiler) == nullptr)
842             {
843                 blockInfo[block->bbNum].hasCriticalOutEdge = true;
844                 hasCriticalEdges                           = true;
845                 // We can stop checking now.
846                 checkForCriticalOutEdge = false;
847             }
848
849             if (isTraversalLayoutOrder() || isBlockVisited(succ))
850             {
851                 continue;
852             }
853
854             // We've now seen a predecessor, so add it to the work list and the "readySet".
855             // It will be inserted in the worklist according to the specified traversal order
856             // (i.e. pred-first or random, since layout order is handled above).
857             if (!BlockSetOps::IsMember(compiler, readySet, succ->bbNum))
858             {
859                 addToBlockSequenceWorkList(readySet, succ, predSet);
860                 BlockSetOps::AddElemD(compiler, readySet, succ->bbNum);
861             }
862         }
863
864         // For layout order, simply use bbNext
865         if (isTraversalLayoutOrder())
866         {
867             nextBlock = block->bbNext;
868             continue;
869         }
870
871         while (nextBlock == nullptr)
872         {
873             nextBlock = getNextCandidateFromWorkList();
874
875             // TODO-Throughput: We would like to bypass this traversal if we know we've handled all
876             // the blocks - but fgBBcount does not appear to be updated when blocks are removed.
877             if (nextBlock == nullptr /* && bbSeqCount != compiler->fgBBcount*/ && !verifiedAllBBs)
878             {
879                 // If we don't encounter all blocks by traversing the regular successor links, do a full
880                 // traversal of all the blocks, and add them in layout order.
881                 // This may include:
882                 //   - internal-only blocks (in the fgAddCodeList) which may not be in the flow graph
883                 //     (these are not even in the bbNext links).
884                 //   - blocks that have become unreachable due to optimizations, but that are strongly
885                 //     connected (these are not removed)
886                 //   - EH blocks
887
888                 for (Compiler::AddCodeDsc* desc = compiler->fgAddCodeList; desc != nullptr; desc = desc->acdNext)
889                 {
890                     if (!isBlockVisited(block))
891                     {
892                         addToBlockSequenceWorkList(readySet, block, predSet);
893                         BlockSetOps::AddElemD(compiler, readySet, block->bbNum);
894                     }
895                 }
896
897                 for (BasicBlock* block = compiler->fgFirstBB; block; block = block->bbNext)
898                 {
899                     if (!isBlockVisited(block))
900                     {
901                         addToBlockSequenceWorkList(readySet, block, predSet);
902                         BlockSetOps::AddElemD(compiler, readySet, block->bbNum);
903                     }
904                 }
905                 verifiedAllBBs = true;
906             }
907             else
908             {
909                 break;
910             }
911         }
912     }
913     blockSequencingDone = true;
914
915 #ifdef DEBUG
916     // Make sure that we've visited all the blocks.
917     for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
918     {
919         assert(isBlockVisited(block));
920     }
921
922     JITDUMP("LSRA Block Sequence: ");
923     int i = 1;
924     for (BasicBlock *block = startBlockSequence(); block != nullptr; ++i, block = moveToNextBlock())
925     {
926         JITDUMP(FMT_BB, block->bbNum);
927
928         if (block->isMaxBBWeight())
929         {
930             JITDUMP("(MAX) ");
931         }
932         else
933         {
934             JITDUMP("(%6s) ", refCntWtd2str(block->getBBWeight(compiler)));
935         }
936
937         if (i % 10 == 0)
938         {
939             JITDUMP("\n                     ");
940         }
941     }
942     JITDUMP("\n\n");
943 #endif
944 }
945
946 //------------------------------------------------------------------------
947 // compareBlocksForSequencing: Compare two basic blocks for sequencing order.
948 //
949 // Arguments:
950 //    block1            - the first block for comparison
951 //    block2            - the second block for comparison
952 //    useBlockWeights   - whether to use block weights for comparison
953 //
954 // Return Value:
955 //    -1 if block1 is preferred.
956 //     0 if the blocks are equivalent.
957 //     1 if block2 is preferred.
958 //
959 // Notes:
960 //    See addToBlockSequenceWorkList.
961 int LinearScan::compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights)
962 {
963     if (useBlockWeights)
964     {
965         unsigned weight1 = block1->getBBWeight(compiler);
966         unsigned weight2 = block2->getBBWeight(compiler);
967
968         if (weight1 > weight2)
969         {
970             return -1;
971         }
972         else if (weight1 < weight2)
973         {
974             return 1;
975         }
976     }
977
978     // If weights are the same prefer LOWER bbnum
979     if (block1->bbNum < block2->bbNum)
980     {
981         return -1;
982     }
983     else if (block1->bbNum == block2->bbNum)
984     {
985         return 0;
986     }
987     else
988     {
989         return 1;
990     }
991 }
992
993 //------------------------------------------------------------------------
994 // addToBlockSequenceWorkList: Add a BasicBlock to the work list for sequencing.
995 //
996 // Arguments:
997 //    sequencedBlockSet - the set of blocks that are already sequenced
998 //    block             - the new block to be added
999 //    predSet           - the buffer to save predecessors set. A block set allocated by the caller used here as a
1000 //    temporary block set for constructing a predecessor set. Allocated by the caller to avoid reallocating a new block
1001 //    set with every call to this function
1002 //
1003 // Return Value:
1004 //    None.
1005 //
1006 // Notes:
1007 //    The first block in the list will be the next one to be sequenced, as soon
1008 //    as we encounter a block whose successors have all been sequenced, in pred-first
1009 //    order, or the very next block if we are traversing in random order (once implemented).
1010 //    This method uses a comparison method to determine the order in which to place
1011 //    the blocks in the list.  This method queries whether all predecessors of the
1012 //    block are sequenced at the time it is added to the list and if so uses block weights
1013 //    for inserting the block.  A block is never inserted ahead of its predecessors.
1014 //    A block at the time of insertion may not have all its predecessors sequenced, in
1015 //    which case it will be sequenced based on its block number. Once a block is inserted,
1016 //    its priority\order will not be changed later once its remaining predecessors are
1017 //    sequenced.  This would mean that work list may not be sorted entirely based on
1018 //    block weights alone.
1019 //
1020 //    Note also that, when random traversal order is implemented, this method
1021 //    should insert the blocks into the list in random order, so that we can always
1022 //    simply select the first block in the list.
1023 void LinearScan::addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet)
1024 {
1025     // The block that is being added is not already sequenced
1026     assert(!BlockSetOps::IsMember(compiler, sequencedBlockSet, block->bbNum));
1027
1028     // Get predSet of block
1029     BlockSetOps::ClearD(compiler, predSet);
1030     flowList* pred;
1031     for (pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
1032     {
1033         BlockSetOps::AddElemD(compiler, predSet, pred->flBlock->bbNum);
1034     }
1035
1036     // If either a rarely run block or all its preds are already sequenced, use block's weight to sequence
1037     bool useBlockWeight = block->isRunRarely() || BlockSetOps::IsSubset(compiler, sequencedBlockSet, predSet);
1038
1039     BasicBlockList* prevNode = nullptr;
1040     BasicBlockList* nextNode = blockSequenceWorkList;
1041
1042     while (nextNode != nullptr)
1043     {
1044         int seqResult;
1045
1046         if (nextNode->block->isRunRarely())
1047         {
1048             // If the block that is yet to be sequenced is a rarely run block, always use block weights for sequencing
1049             seqResult = compareBlocksForSequencing(nextNode->block, block, true);
1050         }
1051         else if (BlockSetOps::IsMember(compiler, predSet, nextNode->block->bbNum))
1052         {
1053             // always prefer unsequenced pred blocks
1054             seqResult = -1;
1055         }
1056         else
1057         {
1058             seqResult = compareBlocksForSequencing(nextNode->block, block, useBlockWeight);
1059         }
1060
1061         if (seqResult > 0)
1062         {
1063             break;
1064         }
1065
1066         prevNode = nextNode;
1067         nextNode = nextNode->next;
1068     }
1069
1070     BasicBlockList* newListNode = new (compiler, CMK_LSRA) BasicBlockList(block, nextNode);
1071     if (prevNode == nullptr)
1072     {
1073         blockSequenceWorkList = newListNode;
1074     }
1075     else
1076     {
1077         prevNode->next = newListNode;
1078     }
1079 }
1080
1081 void LinearScan::removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode)
1082 {
1083     if (listNode == blockSequenceWorkList)
1084     {
1085         assert(prevNode == nullptr);
1086         blockSequenceWorkList = listNode->next;
1087     }
1088     else
1089     {
1090         assert(prevNode != nullptr && prevNode->next == listNode);
1091         prevNode->next = listNode->next;
1092     }
1093     // TODO-Cleanup: consider merging Compiler::BlockListNode and BasicBlockList
1094     // compiler->FreeBlockListNode(listNode);
1095 }
1096
1097 // Initialize the block order for allocation (called each time a new traversal begins).
1098 BasicBlock* LinearScan::startBlockSequence()
1099 {
1100     if (!blockSequencingDone)
1101     {
1102         setBlockSequence();
1103     }
1104     else
1105     {
1106         clearVisitedBlocks();
1107     }
1108
1109     BasicBlock* curBB = compiler->fgFirstBB;
1110     curBBSeqNum       = 0;
1111     curBBNum          = curBB->bbNum;
1112     assert(blockSequence[0] == compiler->fgFirstBB);
1113     markBlockVisited(curBB);
1114     return curBB;
1115 }
1116
1117 //------------------------------------------------------------------------
1118 // moveToNextBlock: Move to the next block in order for allocation or resolution.
1119 //
1120 // Arguments:
1121 //    None
1122 //
1123 // Return Value:
1124 //    The next block.
1125 //
1126 // Notes:
1127 //    This method is used when the next block is actually going to be handled.
1128 //    It changes curBBNum.
1129
1130 BasicBlock* LinearScan::moveToNextBlock()
1131 {
1132     BasicBlock* nextBlock = getNextBlock();
1133     curBBSeqNum++;
1134     if (nextBlock != nullptr)
1135     {
1136         curBBNum = nextBlock->bbNum;
1137     }
1138     return nextBlock;
1139 }
1140
1141 //------------------------------------------------------------------------
1142 // getNextBlock: Get the next block in order for allocation or resolution.
1143 //
1144 // Arguments:
1145 //    None
1146 //
1147 // Return Value:
1148 //    The next block.
1149 //
1150 // Notes:
1151 //    This method does not actually change the current block - it is used simply
1152 //    to determine which block will be next.
1153
1154 BasicBlock* LinearScan::getNextBlock()
1155 {
1156     assert(blockSequencingDone);
1157     unsigned int nextBBSeqNum = curBBSeqNum + 1;
1158     if (nextBBSeqNum < bbSeqCount)
1159     {
1160         return blockSequence[nextBBSeqNum];
1161     }
1162     return nullptr;
1163 }
1164
1165 //------------------------------------------------------------------------
1166 // doLinearScan: The main method for register allocation.
1167 //
1168 // Arguments:
1169 //    None
1170 //
1171 // Return Value:
1172 //    None.
1173 //
1174
1175 void LinearScan::doLinearScan()
1176 {
1177     // Check to see whether we have any local variables to enregister.
1178     // We initialize this in the constructor based on opt settings,
1179     // but we don't want to spend time on the lclVar parts of LinearScan
1180     // if we have no tracked locals.
1181     if (enregisterLocalVars && (compiler->lvaTrackedCount == 0))
1182     {
1183         enregisterLocalVars = false;
1184     }
1185
1186     splitBBNumToTargetBBNumMap = nullptr;
1187
1188     // This is complicated by the fact that physical registers have refs associated
1189     // with locations where they are killed (e.g. calls), but we don't want to
1190     // count these as being touched.
1191
1192     compiler->codeGen->regSet.rsClearRegsModified();
1193
1194     initMaxSpill();
1195     buildIntervals();
1196     DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_REFPOS));
1197     compiler->EndPhase(PHASE_LINEAR_SCAN_BUILD);
1198
1199     DBEXEC(VERBOSE, lsraDumpIntervals("after buildIntervals"));
1200
1201     initVarRegMaps();
1202     allocateRegisters();
1203     allocationPassComplete = true;
1204     compiler->EndPhase(PHASE_LINEAR_SCAN_ALLOC);
1205     resolveRegisters();
1206     compiler->EndPhase(PHASE_LINEAR_SCAN_RESOLVE);
1207
1208     assert(blockSequencingDone); // Should do at least one traversal.
1209     assert(blockEpoch == compiler->GetCurBasicBlockEpoch());
1210
1211 #if TRACK_LSRA_STATS
1212     if ((JitConfig.DisplayLsraStats() != 0)
1213 #ifdef DEBUG
1214         || VERBOSE
1215 #endif
1216         )
1217     {
1218         dumpLsraStats(jitstdout);
1219     }
1220 #endif // TRACK_LSRA_STATS
1221
1222     DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_POST));
1223
1224     compiler->compLSRADone = true;
1225 }
1226
1227 //------------------------------------------------------------------------
1228 // recordVarLocationsAtStartOfBB: Update live-in LclVarDscs with the appropriate
1229 //    register location at the start of a block, during codegen.
1230 //
1231 // Arguments:
1232 //    bb - the block for which code is about to be generated.
1233 //
1234 // Return Value:
1235 //    None.
1236 //
1237 // Assumptions:
1238 //    CodeGen will take care of updating the reg masks and the current var liveness,
1239 //    after calling this method.
1240 //    This is because we need to kill off the dead registers before setting the newly live ones.
1241
1242 void LinearScan::recordVarLocationsAtStartOfBB(BasicBlock* bb)
1243 {
1244     if (!enregisterLocalVars)
1245     {
1246         return;
1247     }
1248     JITDUMP("Recording Var Locations at start of " FMT_BB "\n", bb->bbNum);
1249     VarToRegMap map   = getInVarToRegMap(bb->bbNum);
1250     unsigned    count = 0;
1251
1252     VarSetOps::AssignNoCopy(compiler, currentLiveVars,
1253                             VarSetOps::Intersection(compiler, registerCandidateVars, bb->bbLiveIn));
1254     VarSetOps::Iter iter(compiler, currentLiveVars);
1255     unsigned        varIndex = 0;
1256     while (iter.NextElem(&varIndex))
1257     {
1258         unsigned   varNum = compiler->lvaTrackedToVarNum[varIndex];
1259         LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
1260         regNumber  regNum = getVarReg(map, varIndex);
1261
1262         regNumber oldRegNum = varDsc->lvRegNum;
1263         regNumber newRegNum = regNum;
1264
1265         if (oldRegNum != newRegNum)
1266         {
1267             JITDUMP("  V%02u(%s->%s)", varNum, compiler->compRegVarName(oldRegNum),
1268                     compiler->compRegVarName(newRegNum));
1269             varDsc->lvRegNum = newRegNum;
1270             count++;
1271
1272 #ifdef USING_VARIABLE_LIVE_RANGE
1273             if (bb->bbPrev != nullptr && VarSetOps::IsMember(compiler, bb->bbPrev->bbLiveOut, varIndex))
1274             {
1275                 // varDsc was alive on previous block end ("bb->bbPrev->bbLiveOut"), so it has an open
1276                 // "VariableLiveRange" which should change to be according "getInVarToRegMap"
1277                 compiler->codeGen->getVariableLiveKeeper()->siUpdateVariableLiveRange(varDsc, varNum);
1278             }
1279 #endif // USING_VARIABLE_LIVE_RANGE
1280         }
1281         else if (newRegNum != REG_STK)
1282         {
1283             JITDUMP("  V%02u(%s)", varNum, compiler->compRegVarName(newRegNum));
1284             count++;
1285         }
1286     }
1287
1288     if (count == 0)
1289     {
1290         JITDUMP("  <none>\n");
1291     }
1292
1293     JITDUMP("\n");
1294 }
1295
1296 void Interval::setLocalNumber(Compiler* compiler, unsigned lclNum, LinearScan* linScan)
1297 {
1298     LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
1299     assert(varDsc->lvTracked);
1300     assert(varDsc->lvVarIndex < compiler->lvaTrackedCount);
1301
1302     linScan->localVarIntervals[varDsc->lvVarIndex] = this;
1303
1304     assert(linScan->getIntervalForLocalVar(varDsc->lvVarIndex) == this);
1305     this->isLocalVar = true;
1306     this->varNum     = lclNum;
1307 }
1308
1309 // identify the candidates which we are not going to enregister due to
1310 // being used in EH in a way we don't want to deal with
1311 // this logic cloned from fgInterBlockLocalVarLiveness
1312 void LinearScan::identifyCandidatesExceptionDataflow()
1313 {
1314     VARSET_TP   exceptVars(VarSetOps::MakeEmpty(compiler));
1315     VARSET_TP   filterVars(VarSetOps::MakeEmpty(compiler));
1316     VARSET_TP   finallyVars(VarSetOps::MakeEmpty(compiler));
1317     BasicBlock* block;
1318
1319     foreach_block(compiler, block)
1320     {
1321         if (block->bbCatchTyp != BBCT_NONE)
1322         {
1323             // live on entry to handler
1324             VarSetOps::UnionD(compiler, exceptVars, block->bbLiveIn);
1325         }
1326
1327         if (block->bbJumpKind == BBJ_EHFILTERRET)
1328         {
1329             // live on exit from filter
1330             VarSetOps::UnionD(compiler, filterVars, block->bbLiveOut);
1331         }
1332         else if (block->bbJumpKind == BBJ_EHFINALLYRET)
1333         {
1334             // live on exit from finally
1335             VarSetOps::UnionD(compiler, finallyVars, block->bbLiveOut);
1336         }
1337 #if FEATURE_EH_FUNCLETS
1338         // Funclets are called and returned from, as such we can only count on the frame
1339         // pointer being restored, and thus everything live in or live out must be on the
1340         // stack
1341         if (block->bbFlags & BBF_FUNCLET_BEG)
1342         {
1343             VarSetOps::UnionD(compiler, exceptVars, block->bbLiveIn);
1344         }
1345         if ((block->bbJumpKind == BBJ_EHFINALLYRET) || (block->bbJumpKind == BBJ_EHFILTERRET) ||
1346             (block->bbJumpKind == BBJ_EHCATCHRET))
1347         {
1348             VarSetOps::UnionD(compiler, exceptVars, block->bbLiveOut);
1349         }
1350 #endif // FEATURE_EH_FUNCLETS
1351     }
1352
1353     // slam them all together (there was really no need to use more than 2 bitvectors here)
1354     VarSetOps::UnionD(compiler, exceptVars, filterVars);
1355     VarSetOps::UnionD(compiler, exceptVars, finallyVars);
1356
1357     /* Mark all pointer variables live on exit from a 'finally'
1358         block as either volatile for non-GC ref types or as
1359         'explicitly initialized' (volatile and must-init) for GC-ref types */
1360
1361     VarSetOps::Iter iter(compiler, exceptVars);
1362     unsigned        varIndex = 0;
1363     while (iter.NextElem(&varIndex))
1364     {
1365         unsigned   varNum = compiler->lvaTrackedToVarNum[varIndex];
1366         LclVarDsc* varDsc = compiler->lvaTable + varNum;
1367
1368         compiler->lvaSetVarDoNotEnregister(varNum DEBUGARG(Compiler::DNER_LiveInOutOfHandler));
1369
1370         if (varTypeIsGC(varDsc))
1371         {
1372             if (VarSetOps::IsMember(compiler, finallyVars, varIndex) && !varDsc->lvIsParam)
1373             {
1374                 varDsc->lvMustInit = true;
1375             }
1376         }
1377     }
1378 }
1379
1380 bool LinearScan::isRegCandidate(LclVarDsc* varDsc)
1381 {
1382     if (!enregisterLocalVars)
1383     {
1384         return false;
1385     }
1386     assert((compiler->opts.compFlags & CLFLG_REGVAR) != 0);
1387
1388     if (!varDsc->lvTracked)
1389     {
1390         return false;
1391     }
1392
1393 #if !defined(_TARGET_64BIT_)
1394     if (varDsc->lvType == TYP_LONG)
1395     {
1396         // Long variables should not be register candidates.
1397         // Lowering will have split any candidate lclVars into lo/hi vars.
1398         return false;
1399     }
1400 #endif // !defined(_TARGET_64BIT)
1401
1402     // If we have JMP, reg args must be put on the stack
1403
1404     if (compiler->compJmpOpUsed && varDsc->lvIsRegArg)
1405     {
1406         return false;
1407     }
1408
1409     // Don't allocate registers for dependently promoted struct fields
1410     if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
1411     {
1412         return false;
1413     }
1414
1415     // Don't enregister if the ref count is zero.
1416     if (varDsc->lvRefCnt() == 0)
1417     {
1418         varDsc->setLvRefCntWtd(0);
1419         return false;
1420     }
1421
1422     // Variables that are address-exposed are never enregistered, or tracked.
1423     // A struct may be promoted, and a struct that fits in a register may be fully enregistered.
1424     // Pinned variables may not be tracked (a condition of the GCInfo representation)
1425     // or enregistered, on x86 -- it is believed that we can enregister pinned (more properly, "pinning")
1426     // references when using the general GC encoding.
1427     unsigned lclNum = (unsigned)(varDsc - compiler->lvaTable);
1428     if (varDsc->lvAddrExposed || !varTypeIsEnregisterable(varDsc))
1429     {
1430 #ifdef DEBUG
1431         Compiler::DoNotEnregisterReason dner = Compiler::DNER_AddrExposed;
1432         if (!varDsc->lvAddrExposed)
1433         {
1434             dner = Compiler::DNER_IsStruct;
1435         }
1436 #endif // DEBUG
1437         compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(dner));
1438         return false;
1439     }
1440     else if (varDsc->lvPinned)
1441     {
1442         varDsc->lvTracked = 0;
1443 #ifdef JIT32_GCENCODER
1444         compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_PinningRef));
1445 #endif // JIT32_GCENCODER
1446         return false;
1447     }
1448
1449     //  Are we not optimizing and we have exception handlers?
1450     //   if so mark all args and locals as volatile, so that they
1451     //   won't ever get enregistered.
1452     //
1453     if (compiler->opts.MinOpts() && compiler->compHndBBtabCount > 0)
1454     {
1455         compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_LiveInOutOfHandler));
1456     }
1457
1458     if (varDsc->lvDoNotEnregister)
1459     {
1460         return false;
1461     }
1462
1463     switch (genActualType(varDsc->TypeGet()))
1464     {
1465 #if CPU_HAS_FP_SUPPORT
1466         case TYP_FLOAT:
1467         case TYP_DOUBLE:
1468             return !compiler->opts.compDbgCode;
1469
1470 #endif // CPU_HAS_FP_SUPPORT
1471
1472         case TYP_INT:
1473         case TYP_LONG:
1474         case TYP_REF:
1475         case TYP_BYREF:
1476             break;
1477
1478 #ifdef FEATURE_SIMD
1479         case TYP_SIMD12:
1480         case TYP_SIMD16:
1481         case TYP_SIMD32:
1482             return !varDsc->lvPromoted;
1483
1484         // TODO-1stClassStructs: Move TYP_SIMD8 up with the other SIMD types, after handling the param issue
1485         // (passing & returning as TYP_LONG).
1486         case TYP_SIMD8:
1487             return false;
1488 #endif // FEATURE_SIMD
1489
1490         case TYP_STRUCT:
1491             return false;
1492
1493         case TYP_UNDEF:
1494         case TYP_UNKNOWN:
1495             noway_assert(!"lvType not set correctly");
1496             varDsc->lvType = TYP_INT;
1497             return false;
1498
1499         default:
1500             return false;
1501     }
1502
1503     return true;
1504 }
1505
1506 // Identify locals & compiler temps that are register candidates
1507 // TODO-Cleanup: This was cloned from Compiler::lvaSortByRefCount() in lclvars.cpp in order
1508 // to avoid perturbation, but should be merged.
1509
1510 void LinearScan::identifyCandidates()
1511 {
1512     if (enregisterLocalVars)
1513     {
1514         // Initialize the set of lclVars that are candidates for register allocation.
1515         VarSetOps::AssignNoCopy(compiler, registerCandidateVars, VarSetOps::MakeEmpty(compiler));
1516
1517         // Initialize the sets of lclVars that are used to determine whether, and for which lclVars,
1518         // we need to perform resolution across basic blocks.
1519         // Note that we can't do this in the constructor because the number of tracked lclVars may
1520         // change between the constructor and the actual allocation.
1521         VarSetOps::AssignNoCopy(compiler, resolutionCandidateVars, VarSetOps::MakeEmpty(compiler));
1522         VarSetOps::AssignNoCopy(compiler, splitOrSpilledVars, VarSetOps::MakeEmpty(compiler));
1523
1524         // We set enregisterLocalVars to true only if there are tracked lclVars
1525         assert(compiler->lvaCount != 0);
1526     }
1527     else if (compiler->lvaCount == 0)
1528     {
1529         // Nothing to do. Note that even if enregisterLocalVars is false, we still need to set the
1530         // lvLRACandidate field on all the lclVars to false if we have any.
1531         return;
1532     }
1533
1534     if (compiler->compHndBBtabCount > 0)
1535     {
1536         identifyCandidatesExceptionDataflow();
1537     }
1538
1539     unsigned   lclNum;
1540     LclVarDsc* varDsc;
1541
1542     // While we build intervals for the candidate lclVars, we will determine the floating point
1543     // lclVars, if any, to consider for callee-save register preferencing.
1544     // We maintain two sets of FP vars - those that meet the first threshold of weighted ref Count,
1545     // and those that meet the second.
1546     // The first threshold is used for methods that are heuristically deemed either to have light
1547     // fp usage, or other factors that encourage conservative use of callee-save registers, such
1548     // as multiple exits (where there might be an early exit that woudl be excessively penalized by
1549     // lots of prolog/epilog saves & restores).
1550     // The second threshold is used where there are factors deemed to make it more likely that fp
1551     // fp callee save registers will be needed, such as loops or many fp vars.
1552     // We keep two sets of vars, since we collect some of the information to determine which set to
1553     // use as we iterate over the vars.
1554     // When we are generating AVX code on non-Unix (FEATURE_PARTIAL_SIMD_CALLEE_SAVE), we maintain an
1555     // additional set of LargeVectorType vars, and there is a separate threshold defined for those.
1556     // It is assumed that if we encounter these, that we should consider this a "high use" scenario,
1557     // so we don't maintain two sets of these vars.
1558     // This is defined as thresholdLargeVectorRefCntWtd, as we are likely to use the same mechanism
1559     // for vectors on Arm64, though the actual value may differ.
1560
1561     unsigned int floatVarCount        = 0;
1562     unsigned int thresholdFPRefCntWtd = 4 * BB_UNITY_WEIGHT;
1563     unsigned int maybeFPRefCntWtd     = 2 * BB_UNITY_WEIGHT;
1564     VARSET_TP    fpMaybeCandidateVars(VarSetOps::UninitVal());
1565 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1566     unsigned int largeVectorVarCount           = 0;
1567     unsigned int thresholdLargeVectorRefCntWtd = 4 * BB_UNITY_WEIGHT;
1568 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1569     if (enregisterLocalVars)
1570     {
1571         VarSetOps::AssignNoCopy(compiler, fpCalleeSaveCandidateVars, VarSetOps::MakeEmpty(compiler));
1572         VarSetOps::AssignNoCopy(compiler, fpMaybeCandidateVars, VarSetOps::MakeEmpty(compiler));
1573 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1574         VarSetOps::AssignNoCopy(compiler, largeVectorVars, VarSetOps::MakeEmpty(compiler));
1575         VarSetOps::AssignNoCopy(compiler, largeVectorCalleeSaveCandidateVars, VarSetOps::MakeEmpty(compiler));
1576 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1577     }
1578 #if DOUBLE_ALIGN
1579     unsigned refCntStk       = 0;
1580     unsigned refCntReg       = 0;
1581     unsigned refCntWtdReg    = 0;
1582     unsigned refCntStkParam  = 0; // sum of     ref counts for all stack based parameters
1583     unsigned refCntWtdStkDbl = 0; // sum of wtd ref counts for stack based doubles
1584     doDoubleAlign            = false;
1585     bool checkDoubleAlign    = true;
1586     if (compiler->codeGen->isFramePointerRequired() || compiler->opts.MinOpts())
1587     {
1588         checkDoubleAlign = false;
1589     }
1590     else
1591     {
1592         switch (compiler->getCanDoubleAlign())
1593         {
1594             case MUST_DOUBLE_ALIGN:
1595                 doDoubleAlign    = true;
1596                 checkDoubleAlign = false;
1597                 break;
1598             case CAN_DOUBLE_ALIGN:
1599                 break;
1600             case CANT_DOUBLE_ALIGN:
1601                 doDoubleAlign    = false;
1602                 checkDoubleAlign = false;
1603                 break;
1604             default:
1605                 unreached();
1606         }
1607     }
1608 #endif // DOUBLE_ALIGN
1609
1610     // Check whether register variables are permitted.
1611     if (!enregisterLocalVars)
1612     {
1613         localVarIntervals = nullptr;
1614     }
1615     else if (compiler->lvaTrackedCount > 0)
1616     {
1617         // initialize mapping from tracked local to interval
1618         localVarIntervals = new (compiler, CMK_LSRA) Interval*[compiler->lvaTrackedCount];
1619     }
1620
1621     INTRACK_STATS(regCandidateVarCount = 0);
1622     for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++)
1623     {
1624         // Initialize all variables to REG_STK
1625         varDsc->lvRegNum = REG_STK;
1626 #ifndef _TARGET_64BIT_
1627         varDsc->lvOtherReg = REG_STK;
1628 #endif // _TARGET_64BIT_
1629
1630         if (!enregisterLocalVars)
1631         {
1632             varDsc->lvLRACandidate = false;
1633             continue;
1634         }
1635
1636 #if DOUBLE_ALIGN
1637         if (checkDoubleAlign)
1638         {
1639             if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
1640             {
1641                 refCntStkParam += varDsc->lvRefCnt();
1642             }
1643             else if (!isRegCandidate(varDsc) || varDsc->lvDoNotEnregister)
1644             {
1645                 refCntStk += varDsc->lvRefCnt();
1646                 if ((varDsc->lvType == TYP_DOUBLE) ||
1647                     ((varTypeIsStruct(varDsc) && varDsc->lvStructDoubleAlign &&
1648                       (compiler->lvaGetPromotionType(varDsc) != Compiler::PROMOTION_TYPE_INDEPENDENT))))
1649                 {
1650                     refCntWtdStkDbl += varDsc->lvRefCntWtd();
1651                 }
1652             }
1653             else
1654             {
1655                 refCntReg += varDsc->lvRefCnt();
1656                 refCntWtdReg += varDsc->lvRefCntWtd();
1657             }
1658         }
1659 #endif // DOUBLE_ALIGN
1660
1661         // Start with the assumption that it's a candidate.
1662
1663         varDsc->lvLRACandidate = 1;
1664
1665         // Start with lvRegister as false - set it true only if the variable gets
1666         // the same register assignment throughout
1667         varDsc->lvRegister = false;
1668
1669         if (!isRegCandidate(varDsc))
1670         {
1671             varDsc->lvLRACandidate = 0;
1672             if (varDsc->lvTracked)
1673             {
1674                 localVarIntervals[varDsc->lvVarIndex] = nullptr;
1675             }
1676             continue;
1677         }
1678
1679         if (varDsc->lvLRACandidate)
1680         {
1681             var_types type   = genActualType(varDsc->TypeGet());
1682             Interval* newInt = newInterval(type);
1683             newInt->setLocalNumber(compiler, lclNum, this);
1684             VarSetOps::AddElemD(compiler, registerCandidateVars, varDsc->lvVarIndex);
1685
1686             // we will set this later when we have determined liveness
1687             varDsc->lvMustInit = false;
1688
1689             if (varDsc->lvIsStructField)
1690             {
1691                 newInt->isStructField = true;
1692             }
1693
1694             INTRACK_STATS(regCandidateVarCount++);
1695
1696             // We maintain two sets of FP vars - those that meet the first threshold of weighted ref Count,
1697             // and those that meet the second (see the definitions of thresholdFPRefCntWtd and maybeFPRefCntWtd
1698             // above).
1699             CLANG_FORMAT_COMMENT_ANCHOR;
1700
1701 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1702             // Additionally, when we are generating code for a target with partial SIMD callee-save
1703             // (AVX on non-UNIX amd64 and 16-byte vectors on arm64), we keep a separate set of the
1704             // LargeVectorType vars.
1705             if (varTypeNeedsPartialCalleeSave(varDsc->lvType))
1706             {
1707                 largeVectorVarCount++;
1708                 VarSetOps::AddElemD(compiler, largeVectorVars, varDsc->lvVarIndex);
1709                 unsigned refCntWtd = varDsc->lvRefCntWtd();
1710                 if (refCntWtd >= thresholdLargeVectorRefCntWtd)
1711                 {
1712                     VarSetOps::AddElemD(compiler, largeVectorCalleeSaveCandidateVars, varDsc->lvVarIndex);
1713                 }
1714             }
1715             else
1716 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1717                 if (regType(type) == FloatRegisterType)
1718             {
1719                 floatVarCount++;
1720                 unsigned refCntWtd = varDsc->lvRefCntWtd();
1721                 if (varDsc->lvIsRegArg)
1722                 {
1723                     // Don't count the initial reference for register params.  In those cases,
1724                     // using a callee-save causes an extra copy.
1725                     refCntWtd -= BB_UNITY_WEIGHT;
1726                 }
1727                 if (refCntWtd >= thresholdFPRefCntWtd)
1728                 {
1729                     VarSetOps::AddElemD(compiler, fpCalleeSaveCandidateVars, varDsc->lvVarIndex);
1730                 }
1731                 else if (refCntWtd >= maybeFPRefCntWtd)
1732                 {
1733                     VarSetOps::AddElemD(compiler, fpMaybeCandidateVars, varDsc->lvVarIndex);
1734                 }
1735             }
1736         }
1737         else
1738         {
1739             localVarIntervals[varDsc->lvVarIndex] = nullptr;
1740         }
1741     }
1742
1743 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1744     // Create Intervals to use for the save & restore of the upper halves of large vector lclVars.
1745     if (enregisterLocalVars)
1746     {
1747         VarSetOps::Iter largeVectorVarsIter(compiler, largeVectorVars);
1748         unsigned        largeVectorVarIndex = 0;
1749         while (largeVectorVarsIter.NextElem(&largeVectorVarIndex))
1750         {
1751             makeUpperVectorInterval(largeVectorVarIndex);
1752         }
1753     }
1754 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
1755
1756 #if DOUBLE_ALIGN
1757     if (checkDoubleAlign)
1758     {
1759         // TODO-CQ: Fine-tune this:
1760         // In the legacy reg predictor, this runs after allocation, and then demotes any lclVars
1761         // allocated to the frame pointer, which is probably the wrong order.
1762         // However, because it runs after allocation, it can determine the impact of demoting
1763         // the lclVars allocated to the frame pointer.
1764         // => Here, estimate of the EBP refCnt and weighted refCnt is a wild guess.
1765         //
1766         unsigned refCntEBP    = refCntReg / 8;
1767         unsigned refCntWtdEBP = refCntWtdReg / 8;
1768
1769         doDoubleAlign =
1770             compiler->shouldDoubleAlign(refCntStk, refCntEBP, refCntWtdEBP, refCntStkParam, refCntWtdStkDbl);
1771     }
1772 #endif // DOUBLE_ALIGN
1773
1774     // The factors we consider to determine which set of fp vars to use as candidates for callee save
1775     // registers current include the number of fp vars, whether there are loops, and whether there are
1776     // multiple exits.  These have been selected somewhat empirically, but there is probably room for
1777     // more tuning.
1778     CLANG_FORMAT_COMMENT_ANCHOR;
1779
1780 #ifdef DEBUG
1781     if (VERBOSE)
1782     {
1783         printf("\nFP callee save candidate vars: ");
1784         if (enregisterLocalVars && !VarSetOps::IsEmpty(compiler, fpCalleeSaveCandidateVars))
1785         {
1786             dumpConvertedVarSet(compiler, fpCalleeSaveCandidateVars);
1787             printf("\n");
1788         }
1789         else
1790         {
1791             printf("None\n\n");
1792         }
1793     }
1794 #endif
1795
1796     JITDUMP("floatVarCount = %d; hasLoops = %d, singleExit = %d\n", floatVarCount, compiler->fgHasLoops,
1797             (compiler->fgReturnBlocks == nullptr || compiler->fgReturnBlocks->next == nullptr));
1798
1799     // Determine whether to use the 2nd, more aggressive, threshold for fp callee saves.
1800     if (floatVarCount > 6 && compiler->fgHasLoops &&
1801         (compiler->fgReturnBlocks == nullptr || compiler->fgReturnBlocks->next == nullptr))
1802     {
1803         assert(enregisterLocalVars);
1804 #ifdef DEBUG
1805         if (VERBOSE)
1806         {
1807             printf("Adding additional fp callee save candidates: \n");
1808             if (!VarSetOps::IsEmpty(compiler, fpMaybeCandidateVars))
1809             {
1810                 dumpConvertedVarSet(compiler, fpMaybeCandidateVars);
1811                 printf("\n");
1812             }
1813             else
1814             {
1815                 printf("None\n\n");
1816             }
1817         }
1818 #endif
1819         VarSetOps::UnionD(compiler, fpCalleeSaveCandidateVars, fpMaybeCandidateVars);
1820     }
1821
1822 #ifdef _TARGET_ARM_
1823 #ifdef DEBUG
1824     if (VERBOSE)
1825     {
1826         // Frame layout is only pre-computed for ARM
1827         printf("\nlvaTable after IdentifyCandidates\n");
1828         compiler->lvaTableDump(Compiler::FrameLayoutState::PRE_REGALLOC_FRAME_LAYOUT);
1829     }
1830 #endif // DEBUG
1831 #endif // _TARGET_ARM_
1832 }
1833
1834 // TODO-Throughput: This mapping can surely be more efficiently done
1835 void LinearScan::initVarRegMaps()
1836 {
1837     if (!enregisterLocalVars)
1838     {
1839         inVarToRegMaps  = nullptr;
1840         outVarToRegMaps = nullptr;
1841         return;
1842     }
1843     assert(compiler->lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked
1844                                        // variables.
1845
1846     // The compiler memory allocator requires that the allocation be an
1847     // even multiple of int-sized objects
1848     unsigned int varCount = compiler->lvaTrackedCount;
1849     regMapCount           = roundUp(varCount, (unsigned)sizeof(int));
1850
1851     // Not sure why blocks aren't numbered from zero, but they don't appear to be.
1852     // So, if we want to index by bbNum we have to know the maximum value.
1853     unsigned int bbCount = compiler->fgBBNumMax + 1;
1854
1855     inVarToRegMaps  = new (compiler, CMK_LSRA) regNumberSmall*[bbCount];
1856     outVarToRegMaps = new (compiler, CMK_LSRA) regNumberSmall*[bbCount];
1857
1858     if (varCount > 0)
1859     {
1860         // This VarToRegMap is used during the resolution of critical edges.
1861         sharedCriticalVarToRegMap = new (compiler, CMK_LSRA) regNumberSmall[regMapCount];
1862
1863         for (unsigned int i = 0; i < bbCount; i++)
1864         {
1865             VarToRegMap inVarToRegMap  = new (compiler, CMK_LSRA) regNumberSmall[regMapCount];
1866             VarToRegMap outVarToRegMap = new (compiler, CMK_LSRA) regNumberSmall[regMapCount];
1867
1868             for (unsigned int j = 0; j < regMapCount; j++)
1869             {
1870                 inVarToRegMap[j]  = REG_STK;
1871                 outVarToRegMap[j] = REG_STK;
1872             }
1873             inVarToRegMaps[i]  = inVarToRegMap;
1874             outVarToRegMaps[i] = outVarToRegMap;
1875         }
1876     }
1877     else
1878     {
1879         sharedCriticalVarToRegMap = nullptr;
1880         for (unsigned int i = 0; i < bbCount; i++)
1881         {
1882             inVarToRegMaps[i]  = nullptr;
1883             outVarToRegMaps[i] = nullptr;
1884         }
1885     }
1886 }
1887
1888 void LinearScan::setInVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg)
1889 {
1890     assert(enregisterLocalVars);
1891     assert(reg < UCHAR_MAX && varNum < compiler->lvaCount);
1892     inVarToRegMaps[bbNum][compiler->lvaTable[varNum].lvVarIndex] = (regNumberSmall)reg;
1893 }
1894
1895 void LinearScan::setOutVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg)
1896 {
1897     assert(enregisterLocalVars);
1898     assert(reg < UCHAR_MAX && varNum < compiler->lvaCount);
1899     outVarToRegMaps[bbNum][compiler->lvaTable[varNum].lvVarIndex] = (regNumberSmall)reg;
1900 }
1901
1902 LinearScan::SplitEdgeInfo LinearScan::getSplitEdgeInfo(unsigned int bbNum)
1903 {
1904     assert(enregisterLocalVars);
1905     SplitEdgeInfo splitEdgeInfo;
1906     assert(bbNum <= compiler->fgBBNumMax);
1907     assert(bbNum > bbNumMaxBeforeResolution);
1908     assert(splitBBNumToTargetBBNumMap != nullptr);
1909     splitBBNumToTargetBBNumMap->Lookup(bbNum, &splitEdgeInfo);
1910     assert(splitEdgeInfo.toBBNum <= bbNumMaxBeforeResolution);
1911     assert(splitEdgeInfo.fromBBNum <= bbNumMaxBeforeResolution);
1912     return splitEdgeInfo;
1913 }
1914
1915 VarToRegMap LinearScan::getInVarToRegMap(unsigned int bbNum)
1916 {
1917     assert(enregisterLocalVars);
1918     assert(bbNum <= compiler->fgBBNumMax);
1919     // For the blocks inserted to split critical edges, the inVarToRegMap is
1920     // equal to the outVarToRegMap at the "from" block.
1921     if (bbNum > bbNumMaxBeforeResolution)
1922     {
1923         SplitEdgeInfo splitEdgeInfo = getSplitEdgeInfo(bbNum);
1924         unsigned      fromBBNum     = splitEdgeInfo.fromBBNum;
1925         if (fromBBNum == 0)
1926         {
1927             assert(splitEdgeInfo.toBBNum != 0);
1928             return inVarToRegMaps[splitEdgeInfo.toBBNum];
1929         }
1930         else
1931         {
1932             return outVarToRegMaps[fromBBNum];
1933         }
1934     }
1935
1936     return inVarToRegMaps[bbNum];
1937 }
1938
1939 VarToRegMap LinearScan::getOutVarToRegMap(unsigned int bbNum)
1940 {
1941     assert(enregisterLocalVars);
1942     assert(bbNum <= compiler->fgBBNumMax);
1943     // For the blocks inserted to split critical edges, the outVarToRegMap is
1944     // equal to the inVarToRegMap at the target.
1945     if (bbNum > bbNumMaxBeforeResolution)
1946     {
1947         // If this is an empty block, its in and out maps are both the same.
1948         // We identify this case by setting fromBBNum or toBBNum to 0, and using only the other.
1949         SplitEdgeInfo splitEdgeInfo = getSplitEdgeInfo(bbNum);
1950         unsigned      toBBNum       = splitEdgeInfo.toBBNum;
1951         if (toBBNum == 0)
1952         {
1953             assert(splitEdgeInfo.fromBBNum != 0);
1954             return outVarToRegMaps[splitEdgeInfo.fromBBNum];
1955         }
1956         else
1957         {
1958             return inVarToRegMaps[toBBNum];
1959         }
1960     }
1961     return outVarToRegMaps[bbNum];
1962 }
1963
1964 //------------------------------------------------------------------------
1965 // setVarReg: Set the register associated with a variable in the given 'bbVarToRegMap'.
1966 //
1967 // Arguments:
1968 //    bbVarToRegMap   - the map of interest
1969 //    trackedVarIndex - the lvVarIndex for the variable
1970 //    reg             - the register to which it is being mapped
1971 //
1972 // Return Value:
1973 //    None
1974 //
1975 void LinearScan::setVarReg(VarToRegMap bbVarToRegMap, unsigned int trackedVarIndex, regNumber reg)
1976 {
1977     assert(trackedVarIndex < compiler->lvaTrackedCount);
1978     regNumberSmall regSmall = (regNumberSmall)reg;
1979     assert((regNumber)regSmall == reg);
1980     bbVarToRegMap[trackedVarIndex] = regSmall;
1981 }
1982
1983 //------------------------------------------------------------------------
1984 // getVarReg: Get the register associated with a variable in the given 'bbVarToRegMap'.
1985 //
1986 // Arguments:
1987 //    bbVarToRegMap   - the map of interest
1988 //    trackedVarIndex - the lvVarIndex for the variable
1989 //
1990 // Return Value:
1991 //    The register to which 'trackedVarIndex' is mapped
1992 //
1993 regNumber LinearScan::getVarReg(VarToRegMap bbVarToRegMap, unsigned int trackedVarIndex)
1994 {
1995     assert(enregisterLocalVars);
1996     assert(trackedVarIndex < compiler->lvaTrackedCount);
1997     return (regNumber)bbVarToRegMap[trackedVarIndex];
1998 }
1999
2000 // Initialize the incoming VarToRegMap to the given map values (generally a predecessor of
2001 // the block)
2002 VarToRegMap LinearScan::setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarToRegMap)
2003 {
2004     assert(enregisterLocalVars);
2005     VarToRegMap inVarToRegMap = inVarToRegMaps[bbNum];
2006     memcpy(inVarToRegMap, srcVarToRegMap, (regMapCount * sizeof(regNumber)));
2007     return inVarToRegMap;
2008 }
2009
2010 //------------------------------------------------------------------------
2011 // checkLastUses: Check correctness of last use flags
2012 //
2013 // Arguments:
2014 //    The block for which we are checking last uses.
2015 //
2016 // Notes:
2017 //    This does a backward walk of the RefPositions, starting from the liveOut set.
2018 //    This method was previously used to set the last uses, which were computed by
2019 //    liveness, but were not create in some cases of multiple lclVar references in the
2020 //    same tree. However, now that last uses are computed as RefPositions are created,
2021 //    that is no longer necessary, and this method is simply retained as a check.
2022 //    The exception to the check-only behavior is when LSRA_EXTEND_LIFETIMES if set via
2023 //    COMPlus_JitStressRegs. In that case, this method is required, because even though
2024 //    the RefPositions will not be marked lastUse in that case, we still need to correclty
2025 //    mark the last uses on the tree nodes, which is done by this method.
2026 //
2027 #ifdef DEBUG
2028 void LinearScan::checkLastUses(BasicBlock* block)
2029 {
2030     if (VERBOSE)
2031     {
2032         JITDUMP("\n\nCHECKING LAST USES for " FMT_BB ", liveout=", block->bbNum);
2033         dumpConvertedVarSet(compiler, block->bbLiveOut);
2034         JITDUMP("\n==============================\n");
2035     }
2036
2037     unsigned keepAliveVarNum = BAD_VAR_NUM;
2038     if (compiler->lvaKeepAliveAndReportThis())
2039     {
2040         keepAliveVarNum = compiler->info.compThisArg;
2041         assert(compiler->info.compIsStatic == false);
2042     }
2043
2044     // find which uses are lastUses
2045
2046     // Work backwards starting with live out.
2047     // 'computedLive' is updated to include any exposed use (including those in this
2048     // block that we've already seen).  When we encounter a use, if it's
2049     // not in that set, then it's a last use.
2050
2051     VARSET_TP computedLive(VarSetOps::MakeCopy(compiler, block->bbLiveOut));
2052
2053     bool                       foundDiff       = false;
2054     RefPositionReverseIterator reverseIterator = refPositions.rbegin();
2055     RefPosition*               currentRefPosition;
2056     for (currentRefPosition = &reverseIterator; currentRefPosition->refType != RefTypeBB;
2057          reverseIterator++, currentRefPosition = &reverseIterator)
2058     {
2059         // We should never see ParamDefs or ZeroInits within a basic block.
2060         assert(currentRefPosition->refType != RefTypeParamDef && currentRefPosition->refType != RefTypeZeroInit);
2061         if (currentRefPosition->isIntervalRef() && currentRefPosition->getInterval()->isLocalVar)
2062         {
2063             unsigned varNum   = currentRefPosition->getInterval()->varNum;
2064             unsigned varIndex = currentRefPosition->getInterval()->getVarIndex(compiler);
2065
2066             LsraLocation loc = currentRefPosition->nodeLocation;
2067
2068             // We should always have a tree node for a localVar, except for the "special" RefPositions.
2069             GenTree* tree = currentRefPosition->treeNode;
2070             assert(tree != nullptr || currentRefPosition->refType == RefTypeExpUse ||
2071                    currentRefPosition->refType == RefTypeDummyDef);
2072
2073             if (!VarSetOps::IsMember(compiler, computedLive, varIndex) && varNum != keepAliveVarNum)
2074             {
2075                 // There was no exposed use, so this is a "last use" (and we mark it thus even if it's a def)
2076
2077                 if (extendLifetimes())
2078                 {
2079                     // NOTE: this is a bit of a hack. When extending lifetimes, the "last use" bit will be clear.
2080                     // This bit, however, would normally be used during resolveLocalRef to set the value of
2081                     // GTF_VAR_DEATH on the node for a ref position. If this bit is not set correctly even when
2082                     // extending lifetimes, the code generator will assert as it expects to have accurate last
2083                     // use information. To avoid these asserts, set the GTF_VAR_DEATH bit here.
2084                     // Note also that extendLifetimes() is an LSRA stress mode, so it will only be true for
2085                     // Checked or Debug builds, for which this method will be executed.
2086                     if (tree != nullptr)
2087                     {
2088                         tree->gtFlags |= GTF_VAR_DEATH;
2089                     }
2090                 }
2091                 else if (!currentRefPosition->lastUse)
2092                 {
2093                     JITDUMP("missing expected last use of V%02u @%u\n", compiler->lvaTrackedToVarNum[varIndex], loc);
2094                     foundDiff = true;
2095                 }
2096                 VarSetOps::AddElemD(compiler, computedLive, varIndex);
2097             }
2098             else if (currentRefPosition->lastUse)
2099             {
2100                 JITDUMP("unexpected last use of V%02u @%u\n", compiler->lvaTrackedToVarNum[varIndex], loc);
2101                 foundDiff = true;
2102             }
2103             else if (extendLifetimes() && tree != nullptr)
2104             {
2105                 // NOTE: see the comment above re: the extendLifetimes hack.
2106                 tree->gtFlags &= ~GTF_VAR_DEATH;
2107             }
2108
2109             if (currentRefPosition->refType == RefTypeDef || currentRefPosition->refType == RefTypeDummyDef)
2110             {
2111                 VarSetOps::RemoveElemD(compiler, computedLive, varIndex);
2112             }
2113         }
2114
2115         assert(reverseIterator != refPositions.rend());
2116     }
2117
2118     VARSET_TP liveInNotComputedLive(VarSetOps::Diff(compiler, block->bbLiveIn, computedLive));
2119
2120     VarSetOps::Iter liveInNotComputedLiveIter(compiler, liveInNotComputedLive);
2121     unsigned        liveInNotComputedLiveIndex = 0;
2122     while (liveInNotComputedLiveIter.NextElem(&liveInNotComputedLiveIndex))
2123     {
2124         unsigned varNum = compiler->lvaTrackedToVarNum[liveInNotComputedLiveIndex];
2125         if (compiler->lvaTable[varNum].lvLRACandidate)
2126         {
2127             JITDUMP(FMT_BB ": V%02u is in LiveIn set, but not computed live.\n", block->bbNum, varNum);
2128             foundDiff = true;
2129         }
2130     }
2131
2132     VarSetOps::DiffD(compiler, computedLive, block->bbLiveIn);
2133     const VARSET_TP& computedLiveNotLiveIn(computedLive); // reuse the buffer.
2134     VarSetOps::Iter  computedLiveNotLiveInIter(compiler, computedLiveNotLiveIn);
2135     unsigned         computedLiveNotLiveInIndex = 0;
2136     while (computedLiveNotLiveInIter.NextElem(&computedLiveNotLiveInIndex))
2137     {
2138         unsigned varNum = compiler->lvaTrackedToVarNum[computedLiveNotLiveInIndex];
2139         if (compiler->lvaTable[varNum].lvLRACandidate)
2140         {
2141             JITDUMP(FMT_BB ": V%02u is computed live, but not in LiveIn set.\n", block->bbNum, varNum);
2142             foundDiff = true;
2143         }
2144     }
2145
2146     assert(!foundDiff);
2147 }
2148 #endif // DEBUG
2149
2150 //------------------------------------------------------------------------
2151 // findPredBlockForLiveIn: Determine which block should be used for the register locations of the live-in variables.
2152 //
2153 // Arguments:
2154 //    block                 - The block for which we're selecting a predecesor.
2155 //    prevBlock             - The previous block in in allocation order.
2156 //    pPredBlockIsAllocated - A debug-only argument that indicates whether any of the predecessors have been seen
2157 //                            in allocation order.
2158 //
2159 // Return Value:
2160 //    The selected predecessor.
2161 //
2162 // Assumptions:
2163 //    in DEBUG, caller initializes *pPredBlockIsAllocated to false, and it will be set to true if the block
2164 //    returned is in fact a predecessor.
2165 //
2166 // Notes:
2167 //    This will select a predecessor based on the heuristics obtained by getLsraBlockBoundaryLocations(), which can be
2168 //    one of:
2169 //      LSRA_BLOCK_BOUNDARY_PRED    - Use the register locations of a predecessor block (default)
2170 //      LSRA_BLOCK_BOUNDARY_LAYOUT  - Use the register locations of the previous block in layout order.
2171 //                                    This is the only case where this actually returns a different block.
2172 //      LSRA_BLOCK_BOUNDARY_ROTATE  - Rotate the register locations from a predecessor.
2173 //                                    For this case, the block returned is the same as for LSRA_BLOCK_BOUNDARY_PRED, but
2174 //                                    the register locations will be "rotated" to stress the resolution and allocation
2175 //                                    code.
2176
2177 BasicBlock* LinearScan::findPredBlockForLiveIn(BasicBlock* block,
2178                                                BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated))
2179 {
2180     BasicBlock* predBlock = nullptr;
2181 #ifdef DEBUG
2182     assert(*pPredBlockIsAllocated == false);
2183     if (getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_LAYOUT)
2184     {
2185         if (prevBlock != nullptr)
2186         {
2187             predBlock = prevBlock;
2188         }
2189     }
2190     else
2191 #endif // DEBUG
2192         if (block != compiler->fgFirstBB)
2193     {
2194         predBlock = block->GetUniquePred(compiler);
2195         if (predBlock != nullptr)
2196         {
2197             if (isBlockVisited(predBlock))
2198             {
2199                 if (predBlock->bbJumpKind == BBJ_COND)
2200                 {
2201                     // Special handling to improve matching on backedges.
2202                     BasicBlock* otherBlock = (block == predBlock->bbNext) ? predBlock->bbJumpDest : predBlock->bbNext;
2203                     noway_assert(otherBlock != nullptr);
2204                     if (isBlockVisited(otherBlock))
2205                     {
2206                         // This is the case when we have a conditional branch where one target has already
2207                         // been visited.  It would be best to use the same incoming regs as that block,
2208                         // so that we have less likelihood of having to move registers.
2209                         // For example, in determining the block to use for the starting register locations for
2210                         // "block" in the following example, we'd like to use the same predecessor for "block"
2211                         // as for "otherBlock", so that both successors of predBlock have the same locations, reducing
2212                         // the likelihood of needing a split block on a backedge:
2213                         //
2214                         //   otherPred
2215                         //       |
2216                         //   otherBlock <-+
2217                         //     . . .      |
2218                         //                |
2219                         //   predBlock----+
2220                         //       |
2221                         //     block
2222                         //
2223                         for (flowList* pred = otherBlock->bbPreds; pred != nullptr; pred = pred->flNext)
2224                         {
2225                             BasicBlock* otherPred = pred->flBlock;
2226                             if (otherPred->bbNum == blockInfo[otherBlock->bbNum].predBBNum)
2227                             {
2228                                 predBlock = otherPred;
2229                                 break;
2230                             }
2231                         }
2232                     }
2233                 }
2234             }
2235             else
2236             {
2237                 predBlock = nullptr;
2238             }
2239         }
2240         else
2241         {
2242             for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
2243             {
2244                 BasicBlock* candidatePredBlock = pred->flBlock;
2245                 if (isBlockVisited(candidatePredBlock))
2246                 {
2247                     if (predBlock == nullptr || predBlock->bbWeight < candidatePredBlock->bbWeight)
2248                     {
2249                         predBlock = candidatePredBlock;
2250                         INDEBUG(*pPredBlockIsAllocated = true;)
2251                     }
2252                 }
2253             }
2254         }
2255         if (predBlock == nullptr)
2256         {
2257             predBlock = prevBlock;
2258             assert(predBlock != nullptr);
2259             JITDUMP("\n\nNo allocated predecessor; ");
2260         }
2261     }
2262     return predBlock;
2263 }
2264
2265 #ifdef DEBUG
2266 void LinearScan::dumpVarRefPositions(const char* title)
2267 {
2268     if (enregisterLocalVars)
2269     {
2270         printf("\nVAR REFPOSITIONS %s\n", title);
2271
2272         for (unsigned i = 0; i < compiler->lvaCount; i++)
2273         {
2274             printf("--- V%02u", i);
2275
2276             LclVarDsc* varDsc = compiler->lvaTable + i;
2277             if (varDsc->lvIsRegCandidate())
2278             {
2279                 Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex);
2280                 printf("  (Interval %d)\n", interval->intervalIndex);
2281                 for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition)
2282                 {
2283                     ref->dump();
2284                 }
2285             }
2286             else
2287             {
2288                 printf("\n");
2289             }
2290         }
2291         printf("\n");
2292     }
2293 }
2294
2295 #endif // DEBUG
2296
2297 // Set the default rpFrameType based upon codeGen->isFramePointerRequired()
2298 // This was lifted from the register predictor
2299 //
2300 void LinearScan::setFrameType()
2301 {
2302     FrameType frameType = FT_NOT_SET;
2303 #if DOUBLE_ALIGN
2304     compiler->codeGen->setDoubleAlign(false);
2305     if (doDoubleAlign)
2306     {
2307         frameType = FT_DOUBLE_ALIGN_FRAME;
2308         compiler->codeGen->setDoubleAlign(true);
2309     }
2310     else
2311 #endif // DOUBLE_ALIGN
2312         if (compiler->codeGen->isFramePointerRequired())
2313     {
2314         frameType = FT_EBP_FRAME;
2315     }
2316     else
2317     {
2318         if (compiler->rpMustCreateEBPCalled == false)
2319         {
2320 #ifdef DEBUG
2321             const char* reason;
2322 #endif // DEBUG
2323             compiler->rpMustCreateEBPCalled = true;
2324             if (compiler->rpMustCreateEBPFrame(INDEBUG(&reason)))
2325             {
2326                 JITDUMP("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason);
2327                 compiler->codeGen->setFrameRequired(true);
2328             }
2329         }
2330
2331         if (compiler->codeGen->isFrameRequired())
2332         {
2333             frameType = FT_EBP_FRAME;
2334         }
2335         else
2336         {
2337             frameType = FT_ESP_FRAME;
2338         }
2339     }
2340
2341     switch (frameType)
2342     {
2343         case FT_ESP_FRAME:
2344             noway_assert(!compiler->codeGen->isFramePointerRequired());
2345             noway_assert(!compiler->codeGen->isFrameRequired());
2346             compiler->codeGen->setFramePointerUsed(false);
2347             break;
2348         case FT_EBP_FRAME:
2349             compiler->codeGen->setFramePointerUsed(true);
2350             break;
2351 #if DOUBLE_ALIGN
2352         case FT_DOUBLE_ALIGN_FRAME:
2353             noway_assert(!compiler->codeGen->isFramePointerRequired());
2354             compiler->codeGen->setFramePointerUsed(false);
2355             break;
2356 #endif // DOUBLE_ALIGN
2357         default:
2358             noway_assert(!"rpFrameType not set correctly!");
2359             break;
2360     }
2361
2362     // If we are using FPBASE as the frame register, we cannot also use it for
2363     // a local var.
2364     regMaskTP removeMask = RBM_NONE;
2365     if (frameType == FT_EBP_FRAME)
2366     {
2367         removeMask |= RBM_FPBASE;
2368     }
2369
2370     compiler->rpFrameType = frameType;
2371
2372 #ifdef _TARGET_ARMARCH_
2373     // Determine whether we need to reserve a register for large lclVar offsets.
2374     if (compiler->compRsvdRegCheck(Compiler::REGALLOC_FRAME_LAYOUT))
2375     {
2376         // We reserve R10/IP1 in this case to hold the offsets in load/store instructions
2377         compiler->codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD;
2378         assert(REG_OPT_RSVD != REG_FP);
2379         JITDUMP("  Reserved REG_OPT_RSVD (%s) due to large frame\n", getRegName(REG_OPT_RSVD));
2380         removeMask |= RBM_OPT_RSVD;
2381     }
2382 #endif // _TARGET_ARMARCH_
2383
2384     if ((removeMask != RBM_NONE) && ((availableIntRegs & removeMask) != 0))
2385     {
2386         // We know that we're already in "read mode" for availableIntRegs. However,
2387         // we need to remove these registers, so subsequent users (like callers
2388         // to allRegs()) get the right thing. The RemoveRegistersFromMasks() code
2389         // fixes up everything that already took a dependency on the value that was
2390         // previously read, so this completes the picture.
2391         availableIntRegs.OverrideAssign(availableIntRegs & ~removeMask);
2392     }
2393 }
2394
2395 //------------------------------------------------------------------------
2396 // copyOrMoveRegInUse: Is 'ref' a copyReg/moveReg that is still busy at the given location?
2397 //
2398 // Arguments:
2399 //    ref: The RefPosition of interest
2400 //    loc: The LsraLocation at which we're determining whether it's busy.
2401 //
2402 // Return Value:
2403 //    true iff 'ref' is active at the given location
2404 //
2405 bool copyOrMoveRegInUse(RefPosition* ref, LsraLocation loc)
2406 {
2407     if (!ref->copyReg && !ref->moveReg)
2408     {
2409         return false;
2410     }
2411     if (ref->getRefEndLocation() >= loc)
2412     {
2413         return true;
2414     }
2415     Interval*    interval = ref->getInterval();
2416     RefPosition* nextRef  = interval->getNextRefPosition();
2417     if (nextRef != nullptr && nextRef->treeNode == ref->treeNode && nextRef->getRefEndLocation() >= loc)
2418     {
2419         return true;
2420     }
2421     return false;
2422 }
2423
2424 // Determine whether the register represented by "physRegRecord" is available at least
2425 // at the "currentLoc", and if so, return the next location at which it is in use in
2426 // "nextRefLocationPtr"
2427 //
2428 bool LinearScan::registerIsAvailable(RegRecord*    physRegRecord,
2429                                      LsraLocation  currentLoc,
2430                                      LsraLocation* nextRefLocationPtr,
2431                                      RegisterType  regType)
2432 {
2433     *nextRefLocationPtr          = MaxLocation;
2434     LsraLocation nextRefLocation = MaxLocation;
2435     regMaskTP    regMask         = genRegMask(physRegRecord->regNum);
2436     if (physRegRecord->isBusyUntilNextKill)
2437     {
2438         return false;
2439     }
2440
2441     RefPosition* nextPhysReference = physRegRecord->getNextRefPosition();
2442     if (nextPhysReference != nullptr)
2443     {
2444         nextRefLocation = nextPhysReference->nodeLocation;
2445         // if (nextPhysReference->refType == RefTypeFixedReg) nextRefLocation--;
2446     }
2447     else if (!physRegRecord->isCalleeSave)
2448     {
2449         nextRefLocation = MaxLocation - 1;
2450     }
2451
2452     Interval* assignedInterval = physRegRecord->assignedInterval;
2453
2454     if (assignedInterval != nullptr)
2455     {
2456         RefPosition* recentReference = assignedInterval->recentRefPosition;
2457
2458         // The only case where we have an assignedInterval, but recentReference is null
2459         // is where this interval is live at procedure entry (i.e. an arg register), in which
2460         // case it's still live and its assigned register is not available
2461         // (Note that the ParamDef will be recorded as a recentReference when we encounter
2462         // it, but we will be allocating registers, potentially to other incoming parameters,
2463         // as we process the ParamDefs.)
2464
2465         if (recentReference == nullptr)
2466         {
2467             return false;
2468         }
2469
2470         // Is this a copyReg/moveReg?  It is if the register assignment doesn't match.
2471         // (the recentReference may not be a copyReg/moveReg, because we could have seen another
2472         // reference since the copyReg/moveReg)
2473
2474         if (!assignedInterval->isAssignedTo(physRegRecord->regNum))
2475         {
2476             // If the recentReference is for a different register, it can be reassigned, but
2477             // otherwise don't reassign it if it's still in use.
2478             // (Note that it is unlikely that we have a recent copy or move to a different register,
2479             // where this physRegRecord is still pointing at an earlier copy or move, but it is possible,
2480             // especially in stress modes.)
2481             if ((recentReference->registerAssignment == regMask) && copyOrMoveRegInUse(recentReference, currentLoc))
2482             {
2483                 return false;
2484             }
2485         }
2486         else if (!assignedInterval->isActive && assignedInterval->isConstant)
2487         {
2488             // Treat this as unassigned, i.e. do nothing.
2489             // TODO-CQ: Consider adjusting the heuristics (probably in the caller of this method)
2490             // to avoid reusing these registers.
2491         }
2492         // If this interval isn't active, it's available if it isn't referenced
2493         // at this location (or the previous location, if the recent RefPosition
2494         // is a delayRegFree).
2495         else if (!assignedInterval->isActive &&
2496                  (recentReference->refType == RefTypeExpUse || recentReference->getRefEndLocation() < currentLoc))
2497         {
2498             // This interval must have a next reference (otherwise it wouldn't be assigned to this register)
2499             RefPosition* nextReference = recentReference->nextRefPosition;
2500             if (nextReference != nullptr)
2501             {
2502                 if (nextReference->nodeLocation < nextRefLocation)
2503                 {
2504                     nextRefLocation = nextReference->nodeLocation;
2505                 }
2506             }
2507             else
2508             {
2509                 assert(recentReference->copyReg && recentReference->registerAssignment != regMask);
2510             }
2511         }
2512         else
2513         {
2514             return false;
2515         }
2516     }
2517     if (nextRefLocation < *nextRefLocationPtr)
2518     {
2519         *nextRefLocationPtr = nextRefLocation;
2520     }
2521
2522 #ifdef _TARGET_ARM_
2523     if (regType == TYP_DOUBLE)
2524     {
2525         // Recurse, but check the other half this time (TYP_FLOAT)
2526         if (!registerIsAvailable(findAnotherHalfRegRec(physRegRecord), currentLoc, nextRefLocationPtr, TYP_FLOAT))
2527             return false;
2528         nextRefLocation = *nextRefLocationPtr;
2529     }
2530 #endif // _TARGET_ARM_
2531
2532     return (nextRefLocation >= currentLoc);
2533 }
2534
2535 //------------------------------------------------------------------------
2536 // getRegisterType: Get the RegisterType to use for the given RefPosition
2537 //
2538 // Arguments:
2539 //    currentInterval: The interval for the current allocation
2540 //    refPosition:     The RefPosition of the current Interval for which a register is being allocated
2541 //
2542 // Return Value:
2543 //    The RegisterType that should be allocated for this RefPosition
2544 //
2545 // Notes:
2546 //    This will nearly always be identical to the registerType of the interval, except in the case
2547 //    of SIMD types of 8 bytes (currently only Vector2) when they are passed and returned in integer
2548 //    registers, or copied to a return temp.
2549 //    This method need only be called in situations where we may be dealing with the register requirements
2550 //    of a RefTypeUse RefPosition (i.e. not when we are only looking at the type of an interval, nor when
2551 //    we are interested in the "defining" type of the interval).  This is because the situation of interest
2552 //    only happens at the use (where it must be copied to an integer register).
2553
2554 RegisterType LinearScan::getRegisterType(Interval* currentInterval, RefPosition* refPosition)
2555 {
2556     assert(refPosition->getInterval() == currentInterval);
2557     RegisterType regType    = currentInterval->registerType;
2558     regMaskTP    candidates = refPosition->registerAssignment;
2559
2560     assert((candidates & allRegs(regType)) != RBM_NONE);
2561     return regType;
2562 }
2563
2564 //------------------------------------------------------------------------
2565 // isMatchingConstant: Check to see whether a given register contains the constant referenced
2566 //                     by the given RefPosition
2567 //
2568 // Arguments:
2569 //    physRegRecord:   The RegRecord for the register we're interested in.
2570 //    refPosition:     The RefPosition for a constant interval.
2571 //
2572 // Return Value:
2573 //    True iff the register was defined by an identical constant node as the current interval.
2574 //
2575 bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition)
2576 {
2577     if ((physRegRecord->assignedInterval == nullptr) || !physRegRecord->assignedInterval->isConstant)
2578     {
2579         return false;
2580     }
2581     noway_assert(refPosition->treeNode != nullptr);
2582     GenTree* otherTreeNode = physRegRecord->assignedInterval->firstRefPosition->treeNode;
2583     noway_assert(otherTreeNode != nullptr);
2584
2585     if (refPosition->treeNode->OperGet() == otherTreeNode->OperGet())
2586     {
2587         switch (otherTreeNode->OperGet())
2588         {
2589             case GT_CNS_INT:
2590                 if ((refPosition->treeNode->AsIntCon()->IconValue() == otherTreeNode->AsIntCon()->IconValue()) &&
2591                     (varTypeGCtype(refPosition->treeNode) == varTypeGCtype(otherTreeNode)))
2592                 {
2593 #ifdef _TARGET_64BIT_
2594                     // If the constant is negative, only reuse registers of the same type.
2595                     // This is because, on a 64-bit system, we do not sign-extend immediates in registers to
2596                     // 64-bits unless they are actually longs, as this requires a longer instruction.
2597                     // This doesn't apply to a 32-bit system, on which long values occupy multiple registers.
2598                     // (We could sign-extend, but we would have to always sign-extend, because if we reuse more
2599                     // than once, we won't have access to the instruction that originally defines the constant).
2600                     if ((refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet()) ||
2601                         (refPosition->treeNode->AsIntCon()->IconValue() >= 0))
2602 #endif // _TARGET_64BIT_
2603                     {
2604                         return true;
2605                     }
2606                 }
2607                 break;
2608             case GT_CNS_DBL:
2609             {
2610                 // For floating point constants, the values must be identical, not simply compare
2611                 // equal.  So we compare the bits.
2612                 if (refPosition->treeNode->AsDblCon()->isBitwiseEqual(otherTreeNode->AsDblCon()) &&
2613                     (refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet()))
2614                 {
2615                     return true;
2616                 }
2617                 break;
2618             }
2619             default:
2620                 break;
2621         }
2622     }
2623     return false;
2624 }
2625
2626 //------------------------------------------------------------------------
2627 // tryAllocateFreeReg: Find a free register that satisfies the requirements for refPosition,
2628 //                     and takes into account the preferences for the given Interval
2629 //
2630 // Arguments:
2631 //    currentInterval: The interval for the current allocation
2632 //    refPosition:     The RefPosition of the current Interval for which a register is being allocated
2633 //
2634 // Return Value:
2635 //    The regNumber, if any, allocated to the RefPositon.  Returns REG_NA if no free register is found.
2636 //
2637 // Notes:
2638 //    TODO-CQ: Consider whether we need to use a different order for tree temps than for vars, as
2639 //    reg predict does
2640
2641 static const regNumber lsraRegOrder[]      = {REG_VAR_ORDER};
2642 const unsigned         lsraRegOrderSize    = ArrLen(lsraRegOrder);
2643 static const regNumber lsraRegOrderFlt[]   = {REG_VAR_ORDER_FLT};
2644 const unsigned         lsraRegOrderFltSize = ArrLen(lsraRegOrderFlt);
2645
2646 regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* refPosition)
2647 {
2648     regNumber foundReg = REG_NA;
2649
2650     RegisterType     regType = getRegisterType(currentInterval, refPosition);
2651     const regNumber* regOrder;
2652     unsigned         regOrderSize;
2653     if (useFloatReg(regType))
2654     {
2655         regOrder     = lsraRegOrderFlt;
2656         regOrderSize = lsraRegOrderFltSize;
2657     }
2658     else
2659     {
2660         regOrder     = lsraRegOrder;
2661         regOrderSize = lsraRegOrderSize;
2662     }
2663
2664     LsraLocation currentLocation = refPosition->nodeLocation;
2665     RefPosition* nextRefPos      = refPosition->nextRefPosition;
2666     LsraLocation nextLocation    = (nextRefPos == nullptr) ? currentLocation : nextRefPos->nodeLocation;
2667     regMaskTP    candidates      = refPosition->registerAssignment;
2668     regMaskTP    preferences     = currentInterval->registerPreferences;
2669
2670     if (RefTypeIsDef(refPosition->refType))
2671     {
2672         if (currentInterval->hasConflictingDefUse)
2673         {
2674             resolveConflictingDefAndUse(currentInterval, refPosition);
2675             candidates = refPosition->registerAssignment;
2676         }
2677         // Otherwise, check for the case of a fixed-reg def of a reg that will be killed before the
2678         // use, or interferes at the point of use (which shouldn't happen, but Lower doesn't mark
2679         // the contained nodes as interfering).
2680         // Note that we may have a ParamDef RefPosition that is marked isFixedRegRef, but which
2681         // has had its registerAssignment changed to no longer be a single register.
2682         else if (refPosition->isFixedRegRef && nextRefPos != nullptr && RefTypeIsUse(nextRefPos->refType) &&
2683                  !nextRefPos->isFixedRegRef && genMaxOneBit(refPosition->registerAssignment))
2684         {
2685             regNumber  defReg       = refPosition->assignedReg();
2686             RegRecord* defRegRecord = getRegisterRecord(defReg);
2687
2688             RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition;
2689             assert(currFixedRegRefPosition != nullptr &&
2690                    currFixedRegRefPosition->nodeLocation == refPosition->nodeLocation);
2691
2692             // If there is another fixed reference to this register before the use, change the candidates
2693             // on this RefPosition to include that of nextRefPos.
2694             if (currFixedRegRefPosition->nextRefPosition != nullptr &&
2695                 currFixedRegRefPosition->nextRefPosition->nodeLocation <= nextRefPos->getRefEndLocation())
2696             {
2697                 candidates |= nextRefPos->registerAssignment;
2698                 if (preferences == refPosition->registerAssignment)
2699                 {
2700                     preferences = candidates;
2701                 }
2702             }
2703         }
2704     }
2705
2706     preferences &= candidates;
2707     if (preferences == RBM_NONE)
2708     {
2709         preferences = candidates;
2710     }
2711
2712 #ifdef DEBUG
2713     candidates = stressLimitRegs(refPosition, candidates);
2714 #endif
2715     assert(candidates != RBM_NONE);
2716
2717     Interval* relatedInterval = currentInterval->relatedInterval;
2718     if (currentInterval->isSpecialPutArg)
2719     {
2720         // This is not actually a preference, it's merely to track the lclVar that this
2721         // "specialPutArg" is using.
2722         relatedInterval = nullptr;
2723     }
2724     Interval* nextRelatedInterval  = relatedInterval;
2725     Interval* finalRelatedInterval = relatedInterval;
2726     Interval* rangeEndInterval     = relatedInterval;
2727     regMaskTP relatedPreferences   = (relatedInterval == nullptr) ? RBM_NONE : relatedInterval->getCurrentPreferences();
2728     LsraLocation rangeEndLocation  = refPosition->getRangeEndLocation();
2729     bool         preferCalleeSave  = currentInterval->preferCalleeSave;
2730     bool         avoidByteRegs     = false;
2731 #ifdef _TARGET_X86_
2732     if ((relatedPreferences & ~RBM_BYTE_REGS) != RBM_NONE)
2733     {
2734         avoidByteRegs = true;
2735     }
2736 #endif
2737
2738     // Follow the chain of related intervals, as long as:
2739     // - The next reference is a def. We don't want to use the relatedInterval for preferencing if its next reference
2740     //   is not a new definition (as it either is or will become live).
2741     // - The next (def) reference is downstream. Otherwise we could iterate indefinitely because the preferences can be
2742     // circular.
2743     // - The intersection of preferenced registers is non-empty.
2744     //
2745     while (nextRelatedInterval != nullptr)
2746     {
2747         RefPosition* nextRelatedRefPosition = nextRelatedInterval->getNextRefPosition();
2748
2749         // Only use the relatedInterval for preferencing if the related interval's next reference
2750         // is a new definition.
2751         if ((nextRelatedRefPosition != nullptr) && RefTypeIsDef(nextRelatedRefPosition->refType))
2752         {
2753             finalRelatedInterval = nextRelatedInterval;
2754             nextRelatedInterval  = nullptr;
2755
2756             // First, get the preferences for this interval
2757             regMaskTP thisRelatedPreferences = finalRelatedInterval->getCurrentPreferences();
2758             // Now, determine if they are compatible and update the relatedPreferences that we'll consider.
2759             regMaskTP newRelatedPreferences = thisRelatedPreferences & relatedPreferences;
2760             if (newRelatedPreferences != RBM_NONE && (!avoidByteRegs || thisRelatedPreferences != RBM_BYTE_REGS))
2761             {
2762                 bool thisIsSingleReg = isSingleRegister(newRelatedPreferences);
2763                 if (!thisIsSingleReg || (finalRelatedInterval->isLocalVar &&
2764                                          getRegisterRecord(genRegNumFromMask(newRelatedPreferences))->isFree()))
2765                 {
2766                     relatedPreferences = newRelatedPreferences;
2767                     // If this Interval has a downstream def without a single-register preference, continue to iterate.
2768                     if (nextRelatedRefPosition->nodeLocation > rangeEndLocation)
2769                     {
2770                         preferCalleeSave    = (preferCalleeSave || finalRelatedInterval->preferCalleeSave);
2771                         rangeEndLocation    = nextRelatedRefPosition->getRangeEndLocation();
2772                         rangeEndInterval    = finalRelatedInterval;
2773                         nextRelatedInterval = finalRelatedInterval->relatedInterval;
2774                     }
2775                 }
2776             }
2777         }
2778         else
2779         {
2780             if (nextRelatedInterval == relatedInterval)
2781             {
2782                 relatedInterval    = nullptr;
2783                 relatedPreferences = RBM_NONE;
2784             }
2785             nextRelatedInterval = nullptr;
2786         }
2787     }
2788
2789     // For floating point, we want to be less aggressive about using callee-save registers.
2790     // So in that case, we just need to ensure that the current RefPosition is covered.
2791     RefPosition* rangeEndRefPosition;
2792     RefPosition* lastRefPosition = currentInterval->lastRefPosition;
2793     if (useFloatReg(currentInterval->registerType))
2794     {
2795         rangeEndRefPosition = refPosition;
2796         preferCalleeSave    = currentInterval->preferCalleeSave;
2797     }
2798     else
2799     {
2800         rangeEndRefPosition = refPosition->getRangeEndRef();
2801         // If we have a chain of related intervals, and a finalRelatedInterval that
2802         // is not currently occupying a register, and whose lifetime begins after this one,
2803         // we want to try to select a register that will cover its lifetime.
2804         if ((rangeEndInterval != nullptr) && (rangeEndInterval->assignedReg == nullptr) &&
2805             (rangeEndInterval->getNextRefLocation() >= rangeEndRefPosition->nodeLocation))
2806         {
2807             lastRefPosition = rangeEndInterval->lastRefPosition;
2808         }
2809     }
2810
2811     // If this has a delayed use (due to being used in a rmw position of a
2812     // non-commutative operator), its endLocation is delayed until the "def"
2813     // position, which is one location past the use (getRefEndLocation() takes care of this).
2814     rangeEndLocation          = rangeEndRefPosition->getRefEndLocation();
2815     LsraLocation lastLocation = lastRefPosition->getRefEndLocation();
2816     regNumber    prevReg      = REG_NA;
2817
2818     if (currentInterval->assignedReg)
2819     {
2820         bool useAssignedReg = false;
2821         // This was an interval that was previously allocated to the given
2822         // physical register, and we should try to allocate it to that register
2823         // again, if possible and reasonable.
2824         // Use it preemptively (i.e. before checking other available regs)
2825         // only if it is preferred and available.
2826
2827         RegRecord* regRec    = currentInterval->assignedReg;
2828         prevReg              = regRec->regNum;
2829         regMaskTP prevRegBit = genRegMask(prevReg);
2830
2831         // Is it in the preferred set of regs?
2832         if ((prevRegBit & preferences) != RBM_NONE)
2833         {
2834             // Is it currently available?
2835             LsraLocation nextPhysRefLoc;
2836             if (registerIsAvailable(regRec, currentLocation, &nextPhysRefLoc, currentInterval->registerType))
2837             {
2838                 // If the register is next referenced at this location, only use it if
2839                 // this has a fixed reg requirement (i.e. this is the reference that caused
2840                 // the FixedReg ref to be created)
2841
2842                 if (!regRec->conflictingFixedRegReference(refPosition))
2843                 {
2844                     useAssignedReg = true;
2845                 }
2846             }
2847         }
2848         if (useAssignedReg)
2849         {
2850             regNumber foundReg = prevReg;
2851             assignPhysReg(regRec, currentInterval);
2852             refPosition->registerAssignment = genRegMask(foundReg);
2853             return foundReg;
2854         }
2855         else
2856         {
2857             // Don't keep trying to allocate to this register
2858             currentInterval->assignedReg = nullptr;
2859         }
2860     }
2861
2862     //-------------------------------------------------------------------------
2863     // Register Selection
2864
2865     RegRecord* availablePhysRegInterval = nullptr;
2866     bool       unassignInterval         = false;
2867
2868     // Each register will receive a score which is the sum of the scoring criteria below.
2869     // These were selected on the assumption that they will have an impact on the "goodness"
2870     // of a register selection, and have been tuned to a certain extent by observing the impact
2871     // of the ordering on asmDiffs.  However, there is probably much more room for tuning,
2872     // and perhaps additional criteria.
2873     //
2874     // These are FLAGS (bits) so that we can easily order them and add them together.
2875     // If the scores are equal, but one covers more of the current interval's range,
2876     // then it wins.  Otherwise, the one encountered earlier in the regOrder wins.
2877
2878     enum RegisterScore
2879     {
2880         VALUE_AVAILABLE = 0x40, // It is a constant value that is already in an acceptable register.
2881         COVERS          = 0x20, // It is in the interval's preference set and it covers the entire lifetime.
2882         OWN_PREFERENCE  = 0x10, // It is in the preference set of this interval.
2883         COVERS_RELATED  = 0x08, // It is in the preference set of the related interval and covers the entire lifetime.
2884         RELATED_PREFERENCE = 0x04, // It is in the preference set of the related interval.
2885         CALLER_CALLEE      = 0x02, // It is in the right "set" for the interval (caller or callee-save).
2886         UNASSIGNED         = 0x01, // It is not currently assigned to an inactive interval.
2887     };
2888
2889     int bestScore = 0;
2890
2891     // Compute the best possible score so we can stop looping early if we find it.
2892     // TODO-Throughput: At some point we may want to short-circuit the computation of each score, but
2893     // probably not until we've tuned the order of these criteria.  At that point,
2894     // we'll need to avoid the short-circuit if we've got a stress option to reverse
2895     // the selection.
2896     int bestPossibleScore = COVERS + UNASSIGNED + OWN_PREFERENCE + CALLER_CALLEE;
2897     if (relatedPreferences != RBM_NONE)
2898     {
2899         bestPossibleScore |= RELATED_PREFERENCE + COVERS_RELATED;
2900     }
2901
2902     LsraLocation bestLocation = MinLocation;
2903
2904     // In non-debug builds, this will simply get optimized away
2905     bool reverseSelect = false;
2906 #ifdef DEBUG
2907     reverseSelect = doReverseSelect();
2908 #endif // DEBUG
2909
2910     // An optimization for the common case where there is only one candidate -
2911     // avoid looping over all the other registers
2912
2913     regNumber singleReg = REG_NA;
2914
2915     if (genMaxOneBit(candidates))
2916     {
2917         regOrderSize = 1;
2918         singleReg    = genRegNumFromMask(candidates);
2919         regOrder     = &singleReg;
2920     }
2921
2922     for (unsigned i = 0; i < regOrderSize && (candidates != RBM_NONE); i++)
2923     {
2924         regNumber regNum       = regOrder[i];
2925         regMaskTP candidateBit = genRegMask(regNum);
2926
2927         if (!(candidates & candidateBit))
2928         {
2929             continue;
2930         }
2931
2932         candidates &= ~candidateBit;
2933
2934         RegRecord* physRegRecord = getRegisterRecord(regNum);
2935
2936         int          score               = 0;
2937         LsraLocation nextPhysRefLocation = MaxLocation;
2938
2939         // By chance, is this register already holding this interval, as a copyReg or having
2940         // been restored as inactive after a kill?
2941         if (physRegRecord->assignedInterval == currentInterval)
2942         {
2943             availablePhysRegInterval = physRegRecord;
2944             unassignInterval         = false;
2945             break;
2946         }
2947
2948         // Find the next RefPosition of the physical register
2949         if (!registerIsAvailable(physRegRecord, currentLocation, &nextPhysRefLocation, regType))
2950         {
2951             continue;
2952         }
2953
2954         // If the register is next referenced at this location, only use it if
2955         // this has a fixed reg requirement (i.e. this is the reference that caused
2956         // the FixedReg ref to be created)
2957
2958         if (physRegRecord->conflictingFixedRegReference(refPosition))
2959         {
2960             continue;
2961         }
2962
2963         // If this is a definition of a constant interval, check to see if its value is already in this register.
2964         if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType) &&
2965             isMatchingConstant(physRegRecord, refPosition))
2966         {
2967             score |= VALUE_AVAILABLE;
2968         }
2969
2970         // If the nextPhysRefLocation is a fixedRef for the rangeEndRefPosition, increment it so that
2971         // we don't think it isn't covering the live range.
2972         // This doesn't handle the case where earlier RefPositions for this Interval are also
2973         // FixedRefs of this regNum, but at least those are only interesting in the case where those
2974         // are "local last uses" of the Interval - otherwise the liveRange would interfere with the reg.
2975         if (nextPhysRefLocation == rangeEndLocation && rangeEndRefPosition->isFixedRefOfReg(regNum))
2976         {
2977             INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_INCREMENT_RANGE_END, currentInterval));
2978             nextPhysRefLocation++;
2979         }
2980
2981         if ((candidateBit & preferences) != RBM_NONE)
2982         {
2983             score |= OWN_PREFERENCE;
2984             if (nextPhysRefLocation > rangeEndLocation)
2985             {
2986                 score |= COVERS;
2987             }
2988         }
2989         if ((candidateBit & relatedPreferences) != RBM_NONE)
2990         {
2991             score |= RELATED_PREFERENCE;
2992             if (nextPhysRefLocation > relatedInterval->lastRefPosition->nodeLocation)
2993             {
2994                 score |= COVERS_RELATED;
2995             }
2996         }
2997
2998         // If we had a fixed-reg def of a reg that will be killed before the use, prefer it to any other registers
2999         // with the same score.  (Note that we haven't changed the original registerAssignment on the RefPosition).
3000         // Overload the RELATED_PREFERENCE value.
3001         else if (candidateBit == refPosition->registerAssignment)
3002         {
3003             score |= RELATED_PREFERENCE;
3004         }
3005
3006         if ((preferCalleeSave && physRegRecord->isCalleeSave) || (!preferCalleeSave && !physRegRecord->isCalleeSave))
3007         {
3008             score |= CALLER_CALLEE;
3009         }
3010
3011         // The register is considered unassigned if it has no assignedInterval, OR
3012         // if its next reference is beyond the range of this interval.
3013         if (!isAssigned(physRegRecord, lastLocation ARM_ARG(currentInterval->registerType)))
3014         {
3015             score |= UNASSIGNED;
3016         }
3017
3018         bool foundBetterCandidate = false;
3019
3020         if (score > bestScore)
3021         {
3022             foundBetterCandidate = true;
3023         }
3024         else if (score == bestScore)
3025         {
3026             // Prefer a register that covers the range.
3027             if (bestLocation <= lastLocation)
3028             {
3029                 if (nextPhysRefLocation > bestLocation)
3030                 {
3031                     foundBetterCandidate = true;
3032                 }
3033             }
3034             // If both cover the range, prefer a register that is killed sooner (leaving the longer range register
3035             // available). If both cover the range and also getting killed at the same location, prefer the one which
3036             // is same as previous assignment.
3037             else if (nextPhysRefLocation > lastLocation)
3038             {
3039                 if (nextPhysRefLocation < bestLocation)
3040                 {
3041                     foundBetterCandidate = true;
3042                 }
3043                 else if (nextPhysRefLocation == bestLocation && prevReg == regNum)
3044                 {
3045                     foundBetterCandidate = true;
3046                 }
3047             }
3048         }
3049
3050 #ifdef DEBUG
3051         if (doReverseSelect() && bestScore != 0)
3052         {
3053             foundBetterCandidate = !foundBetterCandidate;
3054         }
3055 #endif // DEBUG
3056
3057         if (foundBetterCandidate)
3058         {
3059             bestLocation             = nextPhysRefLocation;
3060             availablePhysRegInterval = physRegRecord;
3061             unassignInterval         = true;
3062             bestScore                = score;
3063         }
3064
3065         // there is no way we can get a better score so break out
3066         if (!reverseSelect && score == bestPossibleScore && bestLocation == rangeEndLocation + 1)
3067         {
3068             break;
3069         }
3070     }
3071
3072     if (availablePhysRegInterval != nullptr)
3073     {
3074         if (unassignInterval && isAssigned(availablePhysRegInterval ARM_ARG(currentInterval->registerType)))
3075         {
3076             Interval* const intervalToUnassign = availablePhysRegInterval->assignedInterval;
3077             unassignPhysReg(availablePhysRegInterval ARM_ARG(currentInterval->registerType));
3078
3079             if ((bestScore & VALUE_AVAILABLE) != 0 && intervalToUnassign != nullptr)
3080             {
3081                 assert(intervalToUnassign->isConstant);
3082                 refPosition->treeNode->SetReuseRegVal();
3083             }
3084             // If we considered this "unassigned" because this interval's lifetime ends before
3085             // the next ref, remember it.
3086             else if ((bestScore & UNASSIGNED) != 0 && intervalToUnassign != nullptr)
3087             {
3088                 updatePreviousInterval(availablePhysRegInterval, intervalToUnassign, intervalToUnassign->registerType);
3089             }
3090         }
3091         else
3092         {
3093             assert((bestScore & VALUE_AVAILABLE) == 0);
3094         }
3095         assignPhysReg(availablePhysRegInterval, currentInterval);
3096         foundReg                        = availablePhysRegInterval->regNum;
3097         regMaskTP foundRegMask          = genRegMask(foundReg);
3098         refPosition->registerAssignment = foundRegMask;
3099     }
3100
3101     return foundReg;
3102 }
3103
3104 //------------------------------------------------------------------------
3105 // canSpillReg: Determine whether we can spill physRegRecord
3106 //
3107 // Arguments:
3108 //    physRegRecord             - reg to spill
3109 //    refLocation               - Location of RefPosition where this register will be spilled
3110 //    recentAssignedRefWeight   - Weight of recent assigned RefPosition which will be determined in this function
3111 //    farthestRefPosWeight      - Current farthestRefPosWeight at allocateBusyReg()
3112 //
3113 // Return Value:
3114 //    True  - if we can spill physRegRecord
3115 //    False - otherwise
3116 //
3117 // Note: This helper is designed to be used only from allocateBusyReg() and canSpillDoubleReg()
3118 //
3119 bool LinearScan::canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight)
3120 {
3121     assert(physRegRecord->assignedInterval != nullptr);
3122     RefPosition* recentAssignedRef = physRegRecord->assignedInterval->recentRefPosition;
3123
3124     if (recentAssignedRef != nullptr)
3125     {
3126         if (isRefPositionActive(recentAssignedRef, refLocation))
3127         {
3128             // We can't spill a register that's active at the current location
3129             return false;
3130         }
3131
3132         // We don't prefer to spill a register if the weight of recentAssignedRef > weight
3133         // of the spill candidate found so far.  We would consider spilling a greater weight
3134         // ref position only if the refPosition being allocated must need a reg.
3135         *recentAssignedRefWeight = getWeight(recentAssignedRef);
3136     }
3137     return true;
3138 }
3139
3140 #ifdef _TARGET_ARM_
3141 //------------------------------------------------------------------------
3142 // canSpillDoubleReg: Determine whether we can spill physRegRecord
3143 //
3144 // Arguments:
3145 //    physRegRecord             - reg to spill (must be a valid double register)
3146 //    refLocation               - Location of RefPosition where this register will be spilled
3147 //    recentAssignedRefWeight   - Weight of recent assigned RefPosition which will be determined in this function
3148 //
3149 // Return Value:
3150 //    True  - if we can spill physRegRecord
3151 //    False - otherwise
3152 //
3153 // Notes:
3154 //    This helper is designed to be used only from allocateBusyReg() and canSpillDoubleReg().
3155 //    The recentAssignedRefWeight is not updated if either register cannot be spilled.
3156 //
3157 bool LinearScan::canSpillDoubleReg(RegRecord*   physRegRecord,
3158                                    LsraLocation refLocation,
3159                                    unsigned*    recentAssignedRefWeight)
3160 {
3161     assert(genIsValidDoubleReg(physRegRecord->regNum));
3162     bool     retVal  = true;
3163     unsigned weight  = BB_ZERO_WEIGHT;
3164     unsigned weight2 = BB_ZERO_WEIGHT;
3165
3166     RegRecord* physRegRecord2 = findAnotherHalfRegRec(physRegRecord);
3167
3168     if ((physRegRecord->assignedInterval != nullptr) && !canSpillReg(physRegRecord, refLocation, &weight))
3169     {
3170         return false;
3171     }
3172     if (physRegRecord2->assignedInterval != nullptr)
3173     {
3174         if (!canSpillReg(physRegRecord2, refLocation, &weight2))
3175         {
3176             return false;
3177         }
3178         if (weight2 > weight)
3179         {
3180             weight = weight2;
3181         }
3182     }
3183     *recentAssignedRefWeight = weight;
3184     return true;
3185 }
3186 #endif
3187
3188 #ifdef _TARGET_ARM_
3189 //------------------------------------------------------------------------
3190 // unassignDoublePhysReg: unassign a double register (pair)
3191 //
3192 // Arguments:
3193 //    doubleRegRecord - reg to unassign
3194 //
3195 // Note:
3196 //    The given RegRecord must be a valid (even numbered) double register.
3197 //
3198 void LinearScan::unassignDoublePhysReg(RegRecord* doubleRegRecord)
3199 {
3200     assert(genIsValidDoubleReg(doubleRegRecord->regNum));
3201
3202     RegRecord* doubleRegRecordLo = doubleRegRecord;
3203     RegRecord* doubleRegRecordHi = findAnotherHalfRegRec(doubleRegRecordLo);
3204     // For a double register, we has following four cases.
3205     // Case 1: doubleRegRecLo is assigned to TYP_DOUBLE interval
3206     // Case 2: doubleRegRecLo and doubleRegRecHi are assigned to different TYP_FLOAT intervals
3207     // Case 3: doubelRegRecLo is assgined to TYP_FLOAT interval and doubleRegRecHi is nullptr
3208     // Case 4: doubleRegRecordLo is nullptr, and doubleRegRecordHi is assigned to a TYP_FLOAT interval
3209     if (doubleRegRecordLo->assignedInterval != nullptr)
3210     {
3211         if (doubleRegRecordLo->assignedInterval->registerType == TYP_DOUBLE)
3212         {
3213             // Case 1: doubleRegRecLo is assigned to TYP_DOUBLE interval
3214             unassignPhysReg(doubleRegRecordLo, doubleRegRecordLo->assignedInterval->recentRefPosition);
3215         }
3216         else
3217         {
3218             // Case 2: doubleRegRecLo and doubleRegRecHi are assigned to different TYP_FLOAT intervals
3219             // Case 3: doubelRegRecLo is assgined to TYP_FLOAT interval and doubleRegRecHi is nullptr
3220             assert(doubleRegRecordLo->assignedInterval->registerType == TYP_FLOAT);
3221             unassignPhysReg(doubleRegRecordLo, doubleRegRecordLo->assignedInterval->recentRefPosition);
3222
3223             if (doubleRegRecordHi != nullptr)
3224             {
3225                 if (doubleRegRecordHi->assignedInterval != nullptr)
3226                 {
3227                     assert(doubleRegRecordHi->assignedInterval->registerType == TYP_FLOAT);
3228                     unassignPhysReg(doubleRegRecordHi, doubleRegRecordHi->assignedInterval->recentRefPosition);
3229                 }
3230             }
3231         }
3232     }
3233     else
3234     {
3235         // Case 4: doubleRegRecordLo is nullptr, and doubleRegRecordHi is assigned to a TYP_FLOAT interval
3236         assert(doubleRegRecordHi->assignedInterval != nullptr);
3237         assert(doubleRegRecordHi->assignedInterval->registerType == TYP_FLOAT);
3238         unassignPhysReg(doubleRegRecordHi, doubleRegRecordHi->assignedInterval->recentRefPosition);
3239     }
3240 }
3241
3242 #endif // _TARGET_ARM_
3243
3244 //------------------------------------------------------------------------
3245 // isRefPositionActive: Determine whether a given RefPosition is active at the given location
3246 //
3247 // Arguments:
3248 //    refPosition - the RefPosition of interest
3249 //    refLocation - the LsraLocation at which we want to know if it is active
3250 //
3251 // Return Value:
3252 //    True  - if this RefPosition occurs at the given location, OR
3253 //            if it occurs at the previous location and is marked delayRegFree.
3254 //    False - otherwise
3255 //
3256 bool LinearScan::isRefPositionActive(RefPosition* refPosition, LsraLocation refLocation)
3257 {
3258     return (refPosition->nodeLocation == refLocation ||
3259             ((refPosition->nodeLocation + 1 == refLocation) && refPosition->delayRegFree));
3260 }
3261
3262 //----------------------------------------------------------------------------------------
3263 // isRegInUse: Test whether regRec is being used at the refPosition
3264 //
3265 // Arguments:
3266 //    regRec - A register to be tested
3267 //    refPosition - RefPosition where regRec is tested
3268 //
3269 // Return Value:
3270 //    True - if regRec is being used
3271 //    False - otherwise
3272 //
3273 // Notes:
3274 //    This helper is designed to be used only from allocateBusyReg(), where:
3275 //    - This register was *not* found when looking for a free register, and
3276 //    - The caller must have already checked for the case where 'refPosition' is a fixed ref
3277 //      (asserted at the beginning of this method).
3278 //
3279 bool LinearScan::isRegInUse(RegRecord* regRec, RefPosition* refPosition)
3280 {
3281     // We shouldn't reach this check if 'refPosition' is a FixedReg of this register.
3282     assert(!refPosition->isFixedRefOfReg(regRec->regNum));
3283     Interval* assignedInterval = regRec->assignedInterval;
3284     if (assignedInterval != nullptr)
3285     {
3286         if (!assignedInterval->isActive)
3287         {
3288             // This can only happen if we have a recentRefPosition active at this location that hasn't yet been freed.
3289             CLANG_FORMAT_COMMENT_ANCHOR;
3290
3291             if (isRefPositionActive(assignedInterval->recentRefPosition, refPosition->nodeLocation))
3292             {
3293                 return true;
3294             }
3295             else
3296             {
3297 #ifdef _TARGET_ARM_
3298                 // In the case of TYP_DOUBLE, we may have the case where 'assignedInterval' is inactive,
3299                 // but the other half register is active. If so, it must be have an active recentRefPosition,
3300                 // as above.
3301                 if (refPosition->getInterval()->registerType == TYP_DOUBLE)
3302                 {
3303                     RegRecord* otherHalfRegRec = findAnotherHalfRegRec(regRec);
3304                     if (!otherHalfRegRec->assignedInterval->isActive)
3305                     {
3306                         if (isRefPositionActive(otherHalfRegRec->assignedInterval->recentRefPosition,
3307                                                 refPosition->nodeLocation))
3308                         {
3309                             return true;
3310                         }
3311                         else
3312                         {
3313                             assert(!"Unexpected inactive assigned interval in isRegInUse");
3314                             return true;
3315                         }
3316                     }
3317                 }
3318                 else
3319 #endif
3320                 {
3321                     assert(!"Unexpected inactive assigned interval in isRegInUse");
3322                     return true;
3323                 }
3324             }
3325         }
3326         RefPosition* nextAssignedRef = assignedInterval->getNextRefPosition();
3327
3328         // We should never spill a register that's occupied by an Interval with its next use at the current
3329         // location.
3330         // Normally this won't occur (unless we actually had more uses in a single node than there are registers),
3331         // because we'll always find something with a later nextLocation, but it can happen in stress when
3332         // we have LSRA_SELECT_NEAREST.
3333         if ((nextAssignedRef != nullptr) && isRefPositionActive(nextAssignedRef, refPosition->nodeLocation) &&
3334             !nextAssignedRef->RegOptional())
3335         {
3336             return true;
3337         }
3338     }
3339     return false;
3340 }
3341
3342 //------------------------------------------------------------------------
3343 // isSpillCandidate: Determine if a register is a spill candidate for a given RefPosition.
3344 //
3345 // Arguments:
3346 //    current               The interval for the current allocation
3347 //    refPosition           The RefPosition of the current Interval for which a register is being allocated
3348 //    physRegRecord         The RegRecord for the register we're considering for spill
3349 //    nextLocation          An out (reference) parameter in which the next use location of the
3350 //                          given RegRecord will be returned.
3351 //
3352 // Return Value:
3353 //    True iff the given register can be spilled to accommodate the given RefPosition.
3354 //
3355 bool LinearScan::isSpillCandidate(Interval*     current,
3356                                   RefPosition*  refPosition,
3357                                   RegRecord*    physRegRecord,
3358                                   LsraLocation& nextLocation)
3359 {
3360     regMaskTP    candidateBit = genRegMask(physRegRecord->regNum);
3361     LsraLocation refLocation  = refPosition->nodeLocation;
3362     if (physRegRecord->isBusyUntilNextKill)
3363     {
3364         return false;
3365     }
3366     Interval* assignedInterval = physRegRecord->assignedInterval;
3367     if (assignedInterval != nullptr)
3368     {
3369         nextLocation = assignedInterval->getNextRefLocation();
3370     }
3371 #ifdef _TARGET_ARM_
3372     RegRecord* physRegRecord2    = nullptr;
3373     Interval*  assignedInterval2 = nullptr;
3374
3375     // For ARM32, a double occupies a consecutive even/odd pair of float registers.
3376     if (current->registerType == TYP_DOUBLE)
3377     {
3378         assert(genIsValidDoubleReg(physRegRecord->regNum));
3379         physRegRecord2 = findAnotherHalfRegRec(physRegRecord);
3380         if (physRegRecord2->isBusyUntilNextKill)
3381         {
3382             return false;
3383         }
3384         assignedInterval2 = physRegRecord2->assignedInterval;
3385         if ((assignedInterval2 != nullptr) && (assignedInterval2->getNextRefLocation() > nextLocation))
3386         {
3387             nextLocation = assignedInterval2->getNextRefLocation();
3388         }
3389     }
3390 #endif
3391
3392     // If there is a fixed reference at the same location (and it's not due to this reference),
3393     // don't use it.
3394     if (physRegRecord->conflictingFixedRegReference(refPosition))
3395     {
3396         return false;
3397     }
3398
3399     if (refPosition->isFixedRefOfRegMask(candidateBit))
3400     {
3401         // Either:
3402         // - there is a fixed reference due to this node, OR
3403         // - or there is a fixed use fed by a def at this node, OR
3404         // - or we have restricted the set of registers for stress.
3405         // In any case, we must use this register as it's the only candidate
3406         // TODO-CQ: At the time we allocate a register to a fixed-reg def, if it's not going
3407         // to remain live until the use, we should set the candidates to allRegs(regType)
3408         // to avoid a spill - codegen can then insert the copy.
3409         // If this is marked as allocateIfProfitable, the caller will compare the weights
3410         // of this RefPosition and the RefPosition to which it is currently assigned.
3411         assert(refPosition->isFixedRegRef ||
3412                (refPosition->nextRefPosition != nullptr && refPosition->nextRefPosition->isFixedRegRef) ||
3413                candidatesAreStressLimited());
3414         return true;
3415     }
3416
3417     // If this register is not assigned to an interval, either
3418     // - it has a FixedReg reference at the current location that is not this reference, OR
3419     // - this is the special case of a fixed loReg, where this interval has a use at the same location
3420     // In either case, we cannot use it
3421     CLANG_FORMAT_COMMENT_ANCHOR;
3422
3423 #ifdef _TARGET_ARM_
3424     if (assignedInterval == nullptr && assignedInterval2 == nullptr)
3425 #else
3426     if (assignedInterval == nullptr)
3427 #endif
3428     {
3429         RefPosition* nextPhysRegPosition = physRegRecord->getNextRefPosition();
3430         assert((nextPhysRegPosition != nullptr) && (nextPhysRegPosition->nodeLocation == refLocation) &&
3431                (candidateBit != refPosition->registerAssignment));
3432         return false;
3433     }
3434
3435     if (isRegInUse(physRegRecord, refPosition))
3436     {
3437         return false;
3438     }
3439
3440 #ifdef _TARGET_ARM_
3441     if (current->registerType == TYP_DOUBLE)
3442     {
3443         if (isRegInUse(physRegRecord2, refPosition))
3444         {
3445             return false;
3446         }
3447     }
3448 #endif
3449     return true;
3450 }
3451
3452 //------------------------------------------------------------------------
3453 // allocateBusyReg: Find a busy register that satisfies the requirements for refPosition,
3454 //                  and that can be spilled.
3455 //
3456 // Arguments:
3457 //    current               The interval for the current allocation
3458 //    refPosition           The RefPosition of the current Interval for which a register is being allocated
3459 //    allocateIfProfitable  If true, a reg may not be allocated if all other ref positions currently
3460 //                          occupying registers are more important than the 'refPosition'.
3461 //
3462 // Return Value:
3463 //    The regNumber allocated to the RefPositon.  Returns REG_NA if no free register is found.
3464 //
3465 // Note:  Currently this routine uses weight and farthest distance of next reference
3466 // to select a ref position for spilling.
3467 // a) if allocateIfProfitable = false
3468 //        The ref position chosen for spilling will be the lowest weight
3469 //        of all and if there is is more than one ref position with the
3470 //        same lowest weight, among them choses the one with farthest
3471 //        distance to its next reference.
3472 //
3473 // b) if allocateIfProfitable = true
3474 //        The ref position chosen for spilling will not only be lowest weight
3475 //        of all but also has a weight lower than 'refPosition'.  If there is
3476 //        no such ref position, reg will not be allocated.
3477 //
3478 regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable)
3479 {
3480     regNumber foundReg = REG_NA;
3481
3482     RegisterType regType     = getRegisterType(current, refPosition);
3483     regMaskTP    candidates  = refPosition->registerAssignment;
3484     regMaskTP    preferences = (current->registerPreferences & candidates);
3485     if (preferences == RBM_NONE)
3486     {
3487         preferences = candidates;
3488     }
3489     if (candidates == RBM_NONE)
3490     {
3491         // This assumes only integer and floating point register types
3492         // if we target a processor with additional register types,
3493         // this would have to change
3494         candidates = allRegs(regType);
3495     }
3496
3497 #ifdef DEBUG
3498     candidates = stressLimitRegs(refPosition, candidates);
3499 #endif // DEBUG
3500
3501     // TODO-CQ: Determine whether/how to take preferences into account in addition to
3502     // prefering the one with the furthest ref position when considering
3503     // a candidate to spill
3504     RegRecord* farthestRefPhysRegRecord = nullptr;
3505 #ifdef _TARGET_ARM_
3506     RegRecord* farthestRefPhysRegRecord2 = nullptr;
3507 #endif
3508     LsraLocation farthestLocation = MinLocation;
3509     LsraLocation refLocation      = refPosition->nodeLocation;
3510     unsigned     farthestRefPosWeight;
3511     if (allocateIfProfitable)
3512     {
3513         // If allocating a reg is optional, we will consider those ref positions
3514         // whose weight is less than 'refPosition' for spilling.
3515         farthestRefPosWeight = getWeight(refPosition);
3516     }
3517     else
3518     {
3519         // If allocating a reg is a must, we start off with max weight so
3520         // that the first spill candidate will be selected based on
3521         // farthest distance alone.  Since we start off with farthestLocation
3522         // initialized to MinLocation, the first available ref position
3523         // will be selected as spill candidate and its weight as the
3524         // fathestRefPosWeight.
3525         farthestRefPosWeight = BB_MAX_WEIGHT;
3526     }
3527
3528     for (regNumber regNum : Registers(regType))
3529     {
3530         regMaskTP candidateBit = genRegMask(regNum);
3531         if (!(candidates & candidateBit))
3532         {
3533             continue;
3534         }
3535         RegRecord*   physRegRecord  = getRegisterRecord(regNum);
3536         RegRecord*   physRegRecord2 = nullptr; // only used for _TARGET_ARM_
3537         LsraLocation nextLocation   = MinLocation;
3538         LsraLocation physRegNextLocation;
3539         if (!isSpillCandidate(current, refPosition, physRegRecord, nextLocation))
3540         {
3541             assert(candidates != candidateBit);
3542             continue;
3543         }
3544
3545         // We've passed the preliminary checks for a spill candidate.
3546         // Now, if we have a recentAssignedRef, check that it is going to be OK to spill it.
3547         Interval*    assignedInterval        = physRegRecord->assignedInterval;
3548         unsigned     recentAssignedRefWeight = BB_ZERO_WEIGHT;
3549         RefPosition* recentAssignedRef       = nullptr;
3550         RefPosition* recentAssignedRef2      = nullptr;
3551 #ifdef _TARGET_ARM_
3552         if (current->registerType == TYP_DOUBLE)
3553         {
3554             recentAssignedRef           = (assignedInterval == nullptr) ? nullptr : assignedInterval->recentRefPosition;
3555             physRegRecord2              = findAnotherHalfRegRec(physRegRecord);
3556             Interval* assignedInterval2 = physRegRecord2->assignedInterval;
3557             recentAssignedRef2 = (assignedInterval2 == nullptr) ? nullptr : assignedInterval2->recentRefPosition;
3558             if (!canSpillDoubleReg(physRegRecord, refLocation, &recentAssignedRefWeight))
3559             {
3560                 continue;
3561             }
3562         }
3563         else
3564 #endif
3565         {
3566             recentAssignedRef = assignedInterval->recentRefPosition;
3567             if (!canSpillReg(physRegRecord, refLocation, &recentAssignedRefWeight))
3568             {
3569                 continue;
3570             }
3571         }
3572         if (recentAssignedRefWeight > farthestRefPosWeight)
3573         {
3574             continue;
3575         }
3576
3577         physRegNextLocation = physRegRecord->getNextRefLocation();
3578         if (nextLocation > physRegNextLocation)
3579         {
3580             nextLocation = physRegNextLocation;
3581         }
3582
3583         bool isBetterLocation;
3584
3585 #ifdef DEBUG
3586         if (doSelectNearest() && farthestRefPhysRegRecord != nullptr)
3587         {
3588             isBetterLocation = (nextLocation <= farthestLocation);
3589         }
3590         else
3591 #endif
3592             // This if-stmt is associated with the above else
3593             if (recentAssignedRefWeight < farthestRefPosWeight)
3594         {
3595             isBetterLocation = true;
3596         }
3597         else
3598         {
3599             // This would mean the weight of spill ref position we found so far is equal
3600             // to the weight of the ref position that is being evaluated.  In this case
3601             // we prefer to spill ref position whose distance to its next reference is
3602             // the farthest.
3603             assert(recentAssignedRefWeight == farthestRefPosWeight);
3604
3605             // If allocateIfProfitable=true, the first spill candidate selected
3606             // will be based on weight alone. After we have found a spill
3607             // candidate whose weight is less than the 'refPosition', we will
3608             // consider farthest distance when there is a tie in weights.
3609             // This is to ensure that we don't spill a ref position whose
3610             // weight is equal to weight of 'refPosition'.
3611             if (allocateIfProfitable && farthestRefPhysRegRecord == nullptr)
3612             {
3613                 isBetterLocation = false;
3614             }
3615             else
3616             {
3617                 isBetterLocation = (nextLocation > farthestLocation);
3618
3619                 if (nextLocation > farthestLocation)
3620                 {
3621                     isBetterLocation = true;
3622                 }
3623                 else if (nextLocation == farthestLocation)
3624                 {
3625                     // Both weight and distance are equal.
3626                     // Prefer that ref position which is marked both reload and
3627                     // allocate if profitable.  These ref positions don't need
3628                     // need to be spilled as they are already in memory and
3629                     // codegen considers them as contained memory operands.
3630                     CLANG_FORMAT_COMMENT_ANCHOR;
3631 #ifdef _TARGET_ARM_
3632                     // TODO-CQ-ARM: Just conservatively "and" two conditions. We may implement a better condition later.
3633                     isBetterLocation = true;
3634                     if (recentAssignedRef != nullptr)
3635                         isBetterLocation &= (recentAssignedRef->reload && recentAssignedRef->RegOptional());
3636
3637                     if (recentAssignedRef2 != nullptr)
3638                         isBetterLocation &= (recentAssignedRef2->reload && recentAssignedRef2->RegOptional());
3639 #else
3640                     isBetterLocation =
3641                         (recentAssignedRef != nullptr) && recentAssignedRef->reload && recentAssignedRef->RegOptional();
3642 #endif
3643                 }
3644                 else
3645                 {
3646                     isBetterLocation = false;
3647                 }
3648             }
3649         }
3650
3651         if (isBetterLocation)
3652         {
3653             farthestLocation         = nextLocation;
3654             farthestRefPhysRegRecord = physRegRecord;
3655 #ifdef _TARGET_ARM_
3656             farthestRefPhysRegRecord2 = physRegRecord2;
3657 #endif
3658             farthestRefPosWeight = recentAssignedRefWeight;
3659         }
3660     }
3661
3662 #if DEBUG
3663     if (allocateIfProfitable)
3664     {
3665         // There may not be a spill candidate or if one is found
3666         // its weight must be less than the weight of 'refPosition'
3667         assert((farthestRefPhysRegRecord == nullptr) || (farthestRefPosWeight < getWeight(refPosition)));
3668     }
3669     else
3670     {
3671         // Must have found a spill candidate.
3672         assert(farthestRefPhysRegRecord != nullptr);
3673
3674         if (farthestLocation == refLocation)
3675         {
3676             // This must be a RefPosition that is constrained to use a single register, either directly,
3677             // or at the use, or by stress.
3678             bool isConstrained = (refPosition->isFixedRegRef || (refPosition->nextRefPosition != nullptr &&
3679                                                                  refPosition->nextRefPosition->isFixedRegRef) ||
3680                                   candidatesAreStressLimited());
3681             if (!isConstrained)
3682             {
3683 #ifdef _TARGET_ARM_
3684                 Interval* assignedInterval =
3685                     (farthestRefPhysRegRecord == nullptr) ? nullptr : farthestRefPhysRegRecord->assignedInterval;
3686                 Interval* assignedInterval2 =
3687                     (farthestRefPhysRegRecord2 == nullptr) ? nullptr : farthestRefPhysRegRecord2->assignedInterval;
3688                 RefPosition* nextRefPosition =
3689                     (assignedInterval == nullptr) ? nullptr : assignedInterval->getNextRefPosition();
3690                 RefPosition* nextRefPosition2 =
3691                     (assignedInterval2 == nullptr) ? nullptr : assignedInterval2->getNextRefPosition();
3692                 if (nextRefPosition != nullptr)
3693                 {
3694                     if (nextRefPosition2 != nullptr)
3695                     {
3696                         assert(nextRefPosition->RegOptional() || nextRefPosition2->RegOptional());
3697                     }
3698                     else
3699                     {
3700                         assert(nextRefPosition->RegOptional());
3701                     }
3702                 }
3703                 else
3704                 {
3705                     assert(nextRefPosition2 != nullptr && nextRefPosition2->RegOptional());
3706                 }
3707 #else  // !_TARGET_ARM_
3708                 Interval*    assignedInterval = farthestRefPhysRegRecord->assignedInterval;
3709                 RefPosition* nextRefPosition  = assignedInterval->getNextRefPosition();
3710                 assert(nextRefPosition->RegOptional());
3711 #endif // !_TARGET_ARM_
3712             }
3713         }
3714         else
3715         {
3716             assert(farthestLocation > refLocation);
3717         }
3718     }
3719 #endif // DEBUG
3720
3721     if (farthestRefPhysRegRecord != nullptr)
3722     {
3723         foundReg = farthestRefPhysRegRecord->regNum;
3724
3725 #ifdef _TARGET_ARM_
3726         if (current->registerType == TYP_DOUBLE)
3727         {
3728             assert(genIsValidDoubleReg(foundReg));
3729             unassignDoublePhysReg(farthestRefPhysRegRecord);
3730         }
3731         else
3732 #endif
3733         {
3734             unassignPhysReg(farthestRefPhysRegRecord, farthestRefPhysRegRecord->assignedInterval->recentRefPosition);
3735         }
3736
3737         assignPhysReg(farthestRefPhysRegRecord, current);
3738         refPosition->registerAssignment = genRegMask(foundReg);
3739     }
3740     else
3741     {
3742         foundReg                        = REG_NA;
3743         refPosition->registerAssignment = RBM_NONE;
3744     }
3745
3746     return foundReg;
3747 }
3748
3749 // Grab a register to use to copy and then immediately use.
3750 // This is called only for localVar intervals that already have a register
3751 // assignment that is not compatible with the current RefPosition.
3752 // This is not like regular assignment, because we don't want to change
3753 // any preferences or existing register assignments.
3754 // Prefer a free register that's got the earliest next use.
3755 // Otherwise, spill something with the farthest next use
3756 //
3757 regNumber LinearScan::assignCopyReg(RefPosition* refPosition)
3758 {
3759     Interval* currentInterval = refPosition->getInterval();
3760     assert(currentInterval != nullptr);
3761     assert(currentInterval->isActive);
3762
3763     bool         foundFreeReg = false;
3764     RegRecord*   bestPhysReg  = nullptr;
3765     LsraLocation bestLocation = MinLocation;
3766     regMaskTP    candidates   = refPosition->registerAssignment;
3767
3768     // Save the relatedInterval, if any, so that it doesn't get modified during allocation.
3769     Interval* savedRelatedInterval   = currentInterval->relatedInterval;
3770     currentInterval->relatedInterval = nullptr;
3771
3772     // We don't want really want to change the default assignment,
3773     // so 1) pretend this isn't active, and 2) remember the old reg
3774     regNumber  oldPhysReg   = currentInterval->physReg;
3775     RegRecord* oldRegRecord = currentInterval->assignedReg;
3776     assert(oldRegRecord->regNum == oldPhysReg);
3777     currentInterval->isActive = false;
3778
3779     regNumber allocatedReg = tryAllocateFreeReg(currentInterval, refPosition);
3780     if (allocatedReg == REG_NA)
3781     {
3782         allocatedReg = allocateBusyReg(currentInterval, refPosition, false);
3783     }
3784
3785     // Now restore the old info
3786     currentInterval->relatedInterval = savedRelatedInterval;
3787     currentInterval->physReg         = oldPhysReg;
3788     currentInterval->assignedReg     = oldRegRecord;
3789     currentInterval->isActive        = true;
3790
3791     refPosition->copyReg = true;
3792     return allocatedReg;
3793 }
3794
3795 //------------------------------------------------------------------------
3796 // isAssigned: This is the function to check if the given RegRecord has an assignedInterval
3797 //             regardless of lastLocation.
3798 //             So it would be call isAssigned() with Maxlocation value.
3799 //
3800 // Arguments:
3801 //    regRec       - The RegRecord to check that it is assigned.
3802 //    newRegType   - There are elements to judge according to the upcoming register type.
3803 //
3804 // Return Value:
3805 //    Returns true if the given RegRecord has an assignedInterval.
3806 //
3807 // Notes:
3808 //    There is the case to check if the RegRecord has an assignedInterval regardless of Lastlocation.
3809 //
3810 bool LinearScan::isAssigned(RegRecord* regRec ARM_ARG(RegisterType newRegType))
3811 {
3812     return isAssigned(regRec, MaxLocation ARM_ARG(newRegType));
3813 }
3814
3815 //------------------------------------------------------------------------
3816 // isAssigned: Check whether the given RegRecord has an assignedInterval
3817 //             that has a reference prior to the given location.
3818 //
3819 // Arguments:
3820 //    regRec       - The RegRecord of interest
3821 //    lastLocation - The LsraLocation up to which we want to check
3822 //    newRegType   - The `RegisterType` of interval we want to check
3823 //                   (this is for the purposes of checking the other half of a TYP_DOUBLE RegRecord)
3824 //
3825 // Return value:
3826 //    Returns true if the given RegRecord (and its other half, if TYP_DOUBLE) has an assignedInterval
3827 //    that is referenced prior to the given location
3828 //
3829 // Notes:
3830 //    The register is not considered to be assigned if it has no assignedInterval, or that Interval's
3831 //    next reference is beyond lastLocation
3832 //
3833 bool LinearScan::isAssigned(RegRecord* regRec, LsraLocation lastLocation ARM_ARG(RegisterType newRegType))
3834 {
3835     Interval* assignedInterval = regRec->assignedInterval;
3836
3837     if ((assignedInterval == nullptr) || assignedInterval->getNextRefLocation() > lastLocation)
3838     {
3839 #ifdef _TARGET_ARM_
3840         if (newRegType == TYP_DOUBLE)
3841         {
3842             RegRecord* anotherRegRec = findAnotherHalfRegRec(regRec);
3843
3844             if ((anotherRegRec->assignedInterval == nullptr) ||
3845                 (anotherRegRec->assignedInterval->getNextRefLocation() > lastLocation))
3846             {
3847                 // In case the newRegType is a double register,
3848                 // the score would be set UNASSIGNED if another register is also not set.
3849                 return false;
3850             }
3851         }
3852         else
3853 #endif
3854         {
3855             return false;
3856         }
3857     }
3858
3859     return true;
3860 }
3861
3862 // Check if the interval is already assigned and if it is then unassign the physical record
3863 // then set the assignedInterval to 'interval'
3864 //
3865 void LinearScan::checkAndAssignInterval(RegRecord* regRec, Interval* interval)
3866 {
3867     Interval* assignedInterval = regRec->assignedInterval;
3868     if (assignedInterval != nullptr && assignedInterval != interval)
3869     {
3870         // This is allocated to another interval.  Either it is inactive, or it was allocated as a
3871         // copyReg and is therefore not the "assignedReg" of the other interval.  In the latter case,
3872         // we simply unassign it - in the former case we need to set the physReg on the interval to
3873         // REG_NA to indicate that it is no longer in that register.
3874         // The lack of checking for this case resulted in an assert in the retail version of System.dll,
3875         // in method SerialStream.GetDcbFlag.
3876         // Note that we can't check for the copyReg case, because we may have seen a more recent
3877         // RefPosition for the Interval that was NOT a copyReg.
3878         if (assignedInterval->assignedReg == regRec)
3879         {
3880             assert(assignedInterval->isActive == false);
3881             assignedInterval->physReg = REG_NA;
3882         }
3883         unassignPhysReg(regRec->regNum);
3884     }
3885 #ifdef _TARGET_ARM_
3886     // If 'interval' and 'assignedInterval' were both TYP_DOUBLE, then we have unassigned 'assignedInterval'
3887     // from both halves. Otherwise, if 'interval' is TYP_DOUBLE, we now need to unassign the other half.
3888     if ((interval->registerType == TYP_DOUBLE) &&
3889         ((assignedInterval == nullptr) || (assignedInterval->registerType == TYP_FLOAT)))
3890     {
3891         RegRecord* otherRegRecord = getSecondHalfRegRec(regRec);
3892         assignedInterval          = otherRegRecord->assignedInterval;
3893         if (assignedInterval != nullptr && assignedInterval != interval)
3894         {
3895             if (assignedInterval->assignedReg == otherRegRecord)
3896             {
3897                 assert(assignedInterval->isActive == false);
3898                 assignedInterval->physReg = REG_NA;
3899             }
3900             unassignPhysReg(otherRegRecord->regNum);
3901         }
3902     }
3903 #endif
3904
3905     updateAssignedInterval(regRec, interval, interval->registerType);
3906 }
3907
3908 // Assign the given physical register interval to the given interval
3909 void LinearScan::assignPhysReg(RegRecord* regRec, Interval* interval)
3910 {
3911     regMaskTP assignedRegMask = genRegMask(regRec->regNum);
3912     compiler->codeGen->regSet.rsSetRegsModified(assignedRegMask DEBUGARG(true));
3913
3914     checkAndAssignInterval(regRec, interval);
3915     interval->assignedReg = regRec;
3916
3917     interval->physReg  = regRec->regNum;
3918     interval->isActive = true;
3919     if (interval->isLocalVar)
3920     {
3921         // Prefer this register for future references
3922         interval->updateRegisterPreferences(assignedRegMask);
3923     }
3924 }
3925
3926 //------------------------------------------------------------------------
3927 // setIntervalAsSplit: Set this Interval as being split
3928 //
3929 // Arguments:
3930 //    interval - The Interval which is being split
3931 //
3932 // Return Value:
3933 //    None.
3934 //
3935 // Notes:
3936 //    The given Interval will be marked as split, and it will be added to the
3937 //    set of splitOrSpilledVars.
3938 //
3939 // Assumptions:
3940 //    "interval" must be a lclVar interval, as tree temps are never split.
3941 //    This is asserted in the call to getVarIndex().
3942 //
3943 void LinearScan::setIntervalAsSplit(Interval* interval)
3944 {
3945     if (interval->isLocalVar)
3946     {
3947         unsigned varIndex = interval->getVarIndex(compiler);
3948         if (!interval->isSplit)
3949         {
3950             VarSetOps::AddElemD(compiler, splitOrSpilledVars, varIndex);
3951         }
3952         else
3953         {
3954             assert(VarSetOps::IsMember(compiler, splitOrSpilledVars, varIndex));
3955         }
3956     }
3957     interval->isSplit = true;
3958 }
3959
3960 //------------------------------------------------------------------------
3961 // setIntervalAsSpilled: Set this Interval as being spilled
3962 //
3963 // Arguments:
3964 //    interval - The Interval which is being spilled
3965 //
3966 // Return Value:
3967 //    None.
3968 //
3969 // Notes:
3970 //    The given Interval will be marked as spilled, and it will be added
3971 //    to the set of splitOrSpilledVars.
3972 //
3973 void LinearScan::setIntervalAsSpilled(Interval* interval)
3974 {
3975 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
3976     if (interval->isUpperVector)
3977     {
3978         assert(interval->relatedInterval->isLocalVar);
3979         interval->isSpilled = true;
3980         // Now we need to mark the local as spilled also, even if the lower half is never spilled,
3981         // as this will use the upper part of its home location.
3982         interval = interval->relatedInterval;
3983     }
3984 #endif
3985     if (interval->isLocalVar)
3986     {
3987         unsigned varIndex = interval->getVarIndex(compiler);
3988         if (!interval->isSpilled)
3989         {
3990             VarSetOps::AddElemD(compiler, splitOrSpilledVars, varIndex);
3991         }
3992         else
3993         {
3994             assert(VarSetOps::IsMember(compiler, splitOrSpilledVars, varIndex));
3995         }
3996     }
3997     interval->isSpilled = true;
3998 }
3999
4000 //------------------------------------------------------------------------
4001 // spill: Spill this Interval between "fromRefPosition" and "toRefPosition"
4002 //
4003 // Arguments:
4004 //    fromRefPosition - The RefPosition at which the Interval is to be spilled
4005 //    toRefPosition   - The RefPosition at which it must be reloaded
4006 //
4007 // Return Value:
4008 //    None.
4009 //
4010 // Assumptions:
4011 //    fromRefPosition and toRefPosition must not be null
4012 //
4013 void LinearScan::spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition)
4014 {
4015     assert(fromRefPosition != nullptr && toRefPosition != nullptr);
4016     assert(fromRefPosition->getInterval() == interval && toRefPosition->getInterval() == interval);
4017     assert(fromRefPosition->nextRefPosition == toRefPosition);
4018
4019     if (!fromRefPosition->lastUse)
4020     {
4021         // If not allocated a register, Lcl var def/use ref positions even if reg optional
4022         // should be marked as spillAfter.
4023         if (fromRefPosition->RegOptional() && !(interval->isLocalVar && fromRefPosition->IsActualRef()))
4024         {
4025             fromRefPosition->registerAssignment = RBM_NONE;
4026         }
4027         else
4028         {
4029             fromRefPosition->spillAfter = true;
4030         }
4031     }
4032     assert(toRefPosition != nullptr);
4033
4034 #ifdef DEBUG
4035     if (VERBOSE)
4036     {
4037         dumpLsraAllocationEvent(LSRA_EVENT_SPILL, interval);
4038     }
4039 #endif // DEBUG
4040
4041     INTRACK_STATS(updateLsraStat(LSRA_STAT_SPILL, fromRefPosition->bbNum));
4042
4043     interval->isActive = false;
4044     setIntervalAsSpilled(interval);
4045
4046     // If fromRefPosition occurs before the beginning of this block, mark this as living in the stack
4047     // on entry to this block.
4048     if (fromRefPosition->nodeLocation <= curBBStartLocation)
4049     {
4050         // This must be a lclVar interval
4051         assert(interval->isLocalVar);
4052         setInVarRegForBB(curBBNum, interval->varNum, REG_STK);
4053     }
4054 }
4055
4056 //------------------------------------------------------------------------
4057 // unassignPhysRegNoSpill: Unassign the given physical register record from
4058 //                         an active interval, without spilling.
4059 //
4060 // Arguments:
4061 //    regRec           - the RegRecord to be unasssigned
4062 //
4063 // Return Value:
4064 //    None.
4065 //
4066 // Assumptions:
4067 //    The assignedInterval must not be null, and must be active.
4068 //
4069 // Notes:
4070 //    This method is used to unassign a register when an interval needs to be moved to a
4071 //    different register, but not (yet) spilled.
4072
4073 void LinearScan::unassignPhysRegNoSpill(RegRecord* regRec)
4074 {
4075     Interval* assignedInterval = regRec->assignedInterval;
4076     assert(assignedInterval != nullptr && assignedInterval->isActive);
4077     assignedInterval->isActive = false;
4078     unassignPhysReg(regRec, nullptr);
4079     assignedInterval->isActive = true;
4080 }
4081
4082 //------------------------------------------------------------------------
4083 // checkAndClearInterval: Clear the assignedInterval for the given
4084 //                        physical register record
4085 //
4086 // Arguments:
4087 //    regRec           - the physical RegRecord to be unasssigned
4088 //    spillRefPosition - The RefPosition at which the assignedInterval is to be spilled
4089 //                       or nullptr if we aren't spilling
4090 //
4091 // Return Value:
4092 //    None.
4093 //
4094 // Assumptions:
4095 //    see unassignPhysReg
4096 //
4097 void LinearScan::checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefPosition)
4098 {
4099     Interval* assignedInterval = regRec->assignedInterval;
4100     assert(assignedInterval != nullptr);
4101     regNumber thisRegNum = regRec->regNum;
4102
4103     if (spillRefPosition == nullptr)
4104     {
4105         // Note that we can't assert  for the copyReg case
4106         //
4107         if (assignedInterval->physReg == thisRegNum)
4108         {
4109             assert(assignedInterval->isActive == false);
4110         }
4111     }
4112     else
4113     {
4114         assert(spillRefPosition->getInterval() == assignedInterval);
4115     }
4116
4117     updateAssignedInterval(regRec, nullptr, assignedInterval->registerType);
4118 }
4119
4120 //------------------------------------------------------------------------
4121 // unassignPhysReg: Unassign the given physical register record, and spill the
4122 //                  assignedInterval at the given spillRefPosition, if any.
4123 //
4124 // Arguments:
4125 //    regRec           - The RegRecord to be unasssigned
4126 //    newRegType       - The RegisterType of interval that would be assigned
4127 //
4128 // Return Value:
4129 //    None.
4130 //
4131 // Notes:
4132 //    On ARM architecture, Intervals have to be unassigned considering
4133 //    with the register type of interval that would be assigned.
4134 //
4135 void LinearScan::unassignPhysReg(RegRecord* regRec ARM_ARG(RegisterType newRegType))
4136 {
4137     RegRecord* regRecToUnassign = regRec;
4138 #ifdef _TARGET_ARM_
4139     RegRecord* anotherRegRec = nullptr;
4140
4141     if ((regRecToUnassign->assignedInterval != nullptr) &&
4142         (regRecToUnassign->assignedInterval->registerType == TYP_DOUBLE))
4143     {
4144         // If the register type of interval(being unassigned or new) is TYP_DOUBLE,
4145         // It should have to be valid double register (even register)
4146         if (!genIsValidDoubleReg(regRecToUnassign->regNum))
4147         {
4148             regRecToUnassign = findAnotherHalfRegRec(regRec);
4149         }
4150     }
4151     else
4152     {
4153         if (newRegType == TYP_DOUBLE)
4154         {
4155             anotherRegRec = findAnotherHalfRegRec(regRecToUnassign);
4156         }
4157     }
4158 #endif
4159
4160     if (regRecToUnassign->assignedInterval != nullptr)
4161     {
4162         unassignPhysReg(regRecToUnassign, regRecToUnassign->assignedInterval->recentRefPosition);
4163     }
4164 #ifdef _TARGET_ARM_
4165     if ((anotherRegRec != nullptr) && (anotherRegRec->assignedInterval != nullptr))
4166     {
4167         unassignPhysReg(anotherRegRec, anotherRegRec->assignedInterval->recentRefPosition);
4168     }
4169 #endif
4170 }
4171
4172 //------------------------------------------------------------------------
4173 // unassignPhysReg: Unassign the given physical register record, and spill the
4174 //                  assignedInterval at the given spillRefPosition, if any.
4175 //
4176 // Arguments:
4177 //    regRec           - the RegRecord to be unasssigned
4178 //    spillRefPosition - The RefPosition at which the assignedInterval is to be spilled
4179 //
4180 // Return Value:
4181 //    None.
4182 //
4183 // Assumptions:
4184 //    The assignedInterval must not be null.
4185 //    If spillRefPosition is null, the assignedInterval must be inactive, or not currently
4186 //    assigned to this register (e.g. this is a copyReg for that Interval).
4187 //    Otherwise, spillRefPosition must be associated with the assignedInterval.
4188 //
4189 void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPosition)
4190 {
4191     Interval* assignedInterval = regRec->assignedInterval;
4192     assert(assignedInterval != nullptr);
4193     regNumber thisRegNum = regRec->regNum;
4194
4195     // Is assignedInterval actually still assigned to this register?
4196     bool intervalIsAssigned = (assignedInterval->physReg == thisRegNum);
4197
4198 #ifdef _TARGET_ARM_
4199     RegRecord* anotherRegRec = nullptr;
4200
4201     // Prepare second half RegRecord of a double register for TYP_DOUBLE
4202     if (assignedInterval->registerType == TYP_DOUBLE)
4203     {
4204         assert(isFloatRegType(regRec->registerType));
4205
4206         anotherRegRec = findAnotherHalfRegRec(regRec);
4207
4208         // Both two RegRecords should have been assigned to the same interval.
4209         assert(assignedInterval == anotherRegRec->assignedInterval);
4210         if (!intervalIsAssigned && (assignedInterval->physReg == anotherRegRec->regNum))
4211         {
4212             intervalIsAssigned = true;
4213         }
4214     }
4215 #endif // _TARGET_ARM_
4216
4217     checkAndClearInterval(regRec, spillRefPosition);
4218
4219 #ifdef _TARGET_ARM_
4220     if (assignedInterval->registerType == TYP_DOUBLE)
4221     {
4222         // Both two RegRecords should have been unassigned together.
4223         assert(regRec->assignedInterval == nullptr);
4224         assert(anotherRegRec->assignedInterval == nullptr);
4225     }
4226 #endif // _TARGET_ARM_
4227
4228     RefPosition* nextRefPosition = nullptr;
4229     if (spillRefPosition != nullptr)
4230     {
4231         nextRefPosition = spillRefPosition->nextRefPosition;
4232     }
4233
4234     if (!intervalIsAssigned && assignedInterval->physReg != REG_NA)
4235     {
4236         // This must have been a temporary copy reg, but we can't assert that because there
4237         // may have been intervening RefPositions that were not copyRegs.
4238
4239         // reg->assignedInterval has already been set to nullptr by checkAndClearInterval()
4240         assert(regRec->assignedInterval == nullptr);
4241         return;
4242     }
4243
4244     regNumber victimAssignedReg = assignedInterval->physReg;
4245     assignedInterval->physReg   = REG_NA;
4246
4247     bool spill = assignedInterval->isActive && nextRefPosition != nullptr;
4248     if (spill)
4249     {
4250         // If this is an active interval, it must have a recentRefPosition,
4251         // otherwise it would not be active
4252         assert(spillRefPosition != nullptr);
4253
4254 #if 0
4255         // TODO-CQ: Enable this and insert an explicit GT_COPY (otherwise there's no way to communicate
4256         // to codegen that we want the copyReg to be the new home location).
4257         // If the last reference was a copyReg, and we're spilling the register
4258         // it was copied from, then make the copyReg the new primary location
4259         // if possible
4260         if (spillRefPosition->copyReg)
4261         {
4262             regNumber copyFromRegNum = victimAssignedReg;
4263             regNumber copyRegNum = genRegNumFromMask(spillRefPosition->registerAssignment);
4264             if (copyFromRegNum == thisRegNum &&
4265                 getRegisterRecord(copyRegNum)->assignedInterval == assignedInterval)
4266             {
4267                 assert(copyRegNum != thisRegNum);
4268                 assignedInterval->physReg = copyRegNum;
4269                 assignedInterval->assignedReg = this->getRegisterRecord(copyRegNum);
4270                 return;
4271             }
4272         }
4273 #endif // 0
4274 #ifdef DEBUG
4275         // With JitStressRegs == 0x80 (LSRA_EXTEND_LIFETIMES), we may have a RefPosition
4276         // that is not marked lastUse even though the treeNode is a lastUse.  In that case
4277         // we must not mark it for spill because the register will have been immediately freed
4278         // after use.  While we could conceivably add special handling for this case in codegen,
4279         // it would be messy and undesirably cause the "bleeding" of LSRA stress modes outside
4280         // of LSRA.
4281         if (extendLifetimes() && assignedInterval->isLocalVar && RefTypeIsUse(spillRefPosition->refType) &&
4282             spillRefPosition->treeNode != nullptr && (spillRefPosition->treeNode->gtFlags & GTF_VAR_DEATH) != 0)
4283         {
4284             dumpLsraAllocationEvent(LSRA_EVENT_SPILL_EXTENDED_LIFETIME, assignedInterval);
4285             assignedInterval->isActive = false;
4286             spill                      = false;
4287             // If the spillRefPosition occurs before the beginning of this block, it will have
4288             // been marked as living in this register on entry to this block, but we now need
4289             // to mark this as living on the stack.
4290             if (spillRefPosition->nodeLocation <= curBBStartLocation)
4291             {
4292                 setInVarRegForBB(curBBNum, assignedInterval->varNum, REG_STK);
4293                 if (spillRefPosition->nextRefPosition != nullptr)
4294                 {
4295                     setIntervalAsSpilled(assignedInterval);
4296                 }
4297             }
4298             else
4299             {
4300                 // Otherwise, we need to mark spillRefPosition as lastUse, or the interval
4301                 // will remain active beyond its allocated range during the resolution phase.
4302                 spillRefPosition->lastUse = true;
4303             }
4304         }
4305         else
4306 #endif // DEBUG
4307         {
4308             spillInterval(assignedInterval, spillRefPosition, nextRefPosition);
4309         }
4310     }
4311     // Maintain the association with the interval, if it has more references.
4312     // Or, if we "remembered" an interval assigned to this register, restore it.
4313     if (nextRefPosition != nullptr)
4314     {
4315         assignedInterval->assignedReg = regRec;
4316     }
4317     else if (canRestorePreviousInterval(regRec, assignedInterval))
4318     {
4319         regRec->assignedInterval = regRec->previousInterval;
4320         regRec->previousInterval = nullptr;
4321
4322 #ifdef _TARGET_ARM_
4323         // Note:
4324         //   We can not use updateAssignedInterval() and updatePreviousInterval() here,
4325         //   because regRec may not be a even-numbered float register.
4326
4327         // Update second half RegRecord of a double register for TYP_DOUBLE
4328         if (regRec->assignedInterval->registerType == TYP_DOUBLE)
4329         {
4330             RegRecord* anotherHalfRegRec = findAnotherHalfRegRec(regRec);
4331
4332             anotherHalfRegRec->assignedInterval = regRec->assignedInterval;
4333             anotherHalfRegRec->previousInterval = nullptr;
4334         }
4335 #endif // _TARGET_ARM_
4336
4337 #ifdef DEBUG
4338         if (spill)
4339         {
4340             dumpLsraAllocationEvent(LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL, regRec->assignedInterval,
4341                                     thisRegNum);
4342         }
4343         else
4344         {
4345             dumpLsraAllocationEvent(LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL, regRec->assignedInterval, thisRegNum);
4346         }
4347 #endif // DEBUG
4348     }
4349     else
4350     {
4351         updateAssignedInterval(regRec, nullptr, assignedInterval->registerType);
4352         updatePreviousInterval(regRec, nullptr, assignedInterval->registerType);
4353     }
4354 }
4355
4356 //------------------------------------------------------------------------
4357 // spillGCRefs: Spill any GC-type intervals that are currently in registers.a
4358 //
4359 // Arguments:
4360 //    killRefPosition - The RefPosition for the kill
4361 //
4362 // Return Value:
4363 //    None.
4364 //
4365 void LinearScan::spillGCRefs(RefPosition* killRefPosition)
4366 {
4367     // For each physical register that can hold a GC type,
4368     // if it is occupied by an interval of a GC type, spill that interval.
4369     regMaskTP candidateRegs = killRefPosition->registerAssignment;
4370     while (candidateRegs != RBM_NONE)
4371     {
4372         regMaskTP nextRegBit = genFindLowestBit(candidateRegs);
4373         candidateRegs &= ~nextRegBit;
4374         regNumber  nextReg          = genRegNumFromMask(nextRegBit);
4375         RegRecord* regRecord        = getRegisterRecord(nextReg);
4376         Interval*  assignedInterval = regRecord->assignedInterval;
4377         if (assignedInterval == nullptr || (assignedInterval->isActive == false) ||
4378             !varTypeIsGC(assignedInterval->registerType))
4379         {
4380             continue;
4381         }
4382         unassignPhysReg(regRecord, assignedInterval->recentRefPosition);
4383     }
4384     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DONE_KILL_GC_REFS, nullptr, REG_NA, nullptr));
4385 }
4386
4387 //------------------------------------------------------------------------
4388 // processBlockEndAllocation: Update var locations after 'currentBlock' has been allocated
4389 //
4390 // Arguments:
4391 //    currentBlock - the BasicBlock we have just finished allocating registers for
4392 //
4393 // Return Value:
4394 //    None
4395 //
4396 // Notes:
4397 //    Calls processBlockEndLocations() to set the outVarToRegMap, then gets the next block,
4398 //    and sets the inVarToRegMap appropriately.
4399
4400 void LinearScan::processBlockEndAllocation(BasicBlock* currentBlock)
4401 {
4402     assert(currentBlock != nullptr);
4403     if (enregisterLocalVars)
4404     {
4405         processBlockEndLocations(currentBlock);
4406     }
4407     markBlockVisited(currentBlock);
4408
4409     // Get the next block to allocate.
4410     // When the last block in the method has successors, there will be a final "RefTypeBB" to
4411     // ensure that we get the varToRegMap set appropriately, but in that case we don't need
4412     // to worry about "nextBlock".
4413     BasicBlock* nextBlock = getNextBlock();
4414     if (nextBlock != nullptr)
4415     {
4416         processBlockStartLocations(nextBlock);
4417     }
4418 }
4419
4420 //------------------------------------------------------------------------
4421 // rotateBlockStartLocation: When in the LSRA_BLOCK_BOUNDARY_ROTATE stress mode, attempt to
4422 //                           "rotate" the register assignment for a localVar to the next higher
4423 //                           register that is available.
4424 //
4425 // Arguments:
4426 //    interval      - the Interval for the variable whose register is getting rotated
4427 //    targetReg     - its register assignment from the predecessor block being used for live-in
4428 //    availableRegs - registers available for use
4429 //
4430 // Return Value:
4431 //    The new register to use.
4432
4433 #ifdef DEBUG
4434 regNumber LinearScan::rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs)
4435 {
4436     if (targetReg != REG_STK && getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_ROTATE)
4437     {
4438         // If we're rotating the register locations at block boundaries, try to use
4439         // the next higher register number of the appropriate register type.
4440         regMaskTP candidateRegs = allRegs(interval->registerType) & availableRegs;
4441         regNumber firstReg      = REG_NA;
4442         regNumber newReg        = REG_NA;
4443         while (candidateRegs != RBM_NONE)
4444         {
4445             regMaskTP nextRegBit = genFindLowestBit(candidateRegs);
4446             candidateRegs &= ~nextRegBit;
4447             regNumber nextReg = genRegNumFromMask(nextRegBit);
4448             if (nextReg > targetReg)
4449             {
4450                 newReg = nextReg;
4451                 break;
4452             }
4453             else if (firstReg == REG_NA)
4454             {
4455                 firstReg = nextReg;
4456             }
4457         }
4458         if (newReg == REG_NA)
4459         {
4460             assert(firstReg != REG_NA);
4461             newReg = firstReg;
4462         }
4463         targetReg = newReg;
4464     }
4465     return targetReg;
4466 }
4467 #endif // DEBUG
4468
4469 #ifdef _TARGET_ARM_
4470 //--------------------------------------------------------------------------------------
4471 // isSecondHalfReg: Test if recRec is second half of double register
4472 //                  which is assigned to an interval.
4473 //
4474 // Arguments:
4475 //    regRec - a register to be tested
4476 //    interval - an interval which is assigned to some register
4477 //
4478 // Assumptions:
4479 //    None
4480 //
4481 // Return Value:
4482 //    True only if regRec is second half of assignedReg in interval
4483 //
4484 bool LinearScan::isSecondHalfReg(RegRecord* regRec, Interval* interval)
4485 {
4486     RegRecord* assignedReg = interval->assignedReg;
4487
4488     if (assignedReg != nullptr && interval->registerType == TYP_DOUBLE)
4489     {
4490         // interval should have been allocated to a valid double register
4491         assert(genIsValidDoubleReg(assignedReg->regNum));
4492
4493         // Find a second half RegRecord of double register
4494         regNumber firstRegNum  = assignedReg->regNum;
4495         regNumber secondRegNum = REG_NEXT(firstRegNum);
4496
4497         assert(genIsValidFloatReg(secondRegNum) && !genIsValidDoubleReg(secondRegNum));
4498
4499         RegRecord* secondRegRec = getRegisterRecord(secondRegNum);
4500
4501         return secondRegRec == regRec;
4502     }
4503
4504     return false;
4505 }
4506
4507 //------------------------------------------------------------------------------------------
4508 // getSecondHalfRegRec: Get the second (odd) half of an ARM32 double register
4509 //
4510 // Arguments:
4511 //    regRec - A float RegRecord
4512 //
4513 // Assumptions:
4514 //    regRec must be a valid double register (i.e. even)
4515 //
4516 // Return Value:
4517 //    The RegRecord for the second half of the double register
4518 //
4519 RegRecord* LinearScan::getSecondHalfRegRec(RegRecord* regRec)
4520 {
4521     regNumber  secondHalfRegNum;
4522     RegRecord* secondHalfRegRec;
4523
4524     assert(genIsValidDoubleReg(regRec->regNum));
4525
4526     secondHalfRegNum = REG_NEXT(regRec->regNum);
4527     secondHalfRegRec = getRegisterRecord(secondHalfRegNum);
4528
4529     return secondHalfRegRec;
4530 }
4531 //------------------------------------------------------------------------------------------
4532 // findAnotherHalfRegRec: Find another half RegRecord which forms same ARM32 double register
4533 //
4534 // Arguments:
4535 //    regRec - A float RegRecord
4536 //
4537 // Assumptions:
4538 //    None
4539 //
4540 // Return Value:
4541 //    A RegRecord which forms same double register with regRec
4542 //
4543 RegRecord* LinearScan::findAnotherHalfRegRec(RegRecord* regRec)
4544 {
4545     regNumber  anotherHalfRegNum;
4546     RegRecord* anotherHalfRegRec;
4547
4548     assert(genIsValidFloatReg(regRec->regNum));
4549
4550     // Find another half register for TYP_DOUBLE interval,
4551     // following same logic in canRestorePreviousInterval().
4552     if (genIsValidDoubleReg(regRec->regNum))
4553     {
4554         anotherHalfRegNum = REG_NEXT(regRec->regNum);
4555         assert(!genIsValidDoubleReg(anotherHalfRegNum));
4556     }
4557     else
4558     {
4559         anotherHalfRegNum = REG_PREV(regRec->regNum);
4560         assert(genIsValidDoubleReg(anotherHalfRegNum));
4561     }
4562     anotherHalfRegRec = getRegisterRecord(anotherHalfRegNum);
4563
4564     return anotherHalfRegRec;
4565 }
4566 #endif
4567
4568 //--------------------------------------------------------------------------------------
4569 // canRestorePreviousInterval: Test if we can restore previous interval
4570 //
4571 // Arguments:
4572 //    regRec - a register which contains previous interval to be restored
4573 //    assignedInterval - an interval just unassigned
4574 //
4575 // Assumptions:
4576 //    None
4577 //
4578 // Return Value:
4579 //    True only if previous interval of regRec can be restored
4580 //
4581 bool LinearScan::canRestorePreviousInterval(RegRecord* regRec, Interval* assignedInterval)
4582 {
4583     bool retVal =
4584         (regRec->previousInterval != nullptr && regRec->previousInterval != assignedInterval &&
4585          regRec->previousInterval->assignedReg == regRec && regRec->previousInterval->getNextRefPosition() != nullptr);
4586
4587 #ifdef _TARGET_ARM_
4588     if (retVal && regRec->previousInterval->registerType == TYP_DOUBLE)
4589     {
4590         RegRecord* anotherHalfRegRec = findAnotherHalfRegRec(regRec);
4591
4592         retVal = retVal && anotherHalfRegRec->assignedInterval == nullptr;
4593     }
4594 #endif
4595
4596     return retVal;
4597 }
4598
4599 bool LinearScan::isAssignedToInterval(Interval* interval, RegRecord* regRec)
4600 {
4601     bool isAssigned = (interval->assignedReg == regRec);
4602 #ifdef _TARGET_ARM_
4603     isAssigned |= isSecondHalfReg(regRec, interval);
4604 #endif
4605     return isAssigned;
4606 }
4607
4608 void LinearScan::unassignIntervalBlockStart(RegRecord* regRecord, VarToRegMap inVarToRegMap)
4609 {
4610     // Is there another interval currently assigned to this register?  If so unassign it.
4611     Interval* assignedInterval = regRecord->assignedInterval;
4612     if (assignedInterval != nullptr)
4613     {
4614         if (isAssignedToInterval(assignedInterval, regRecord))
4615         {
4616             // Only localVars, constants or vector upper halves should be assigned to registers at block boundaries.
4617             if (!assignedInterval->isLocalVar)
4618             {
4619                 assert(assignedInterval->isConstant || assignedInterval->IsUpperVector());
4620                 // Don't need to update the VarToRegMap.
4621                 inVarToRegMap = nullptr;
4622             }
4623
4624             regNumber assignedRegNum = assignedInterval->assignedReg->regNum;
4625
4626             // If the interval is active, it will be set to active when we reach its new
4627             // register assignment (which we must not yet have done, or it wouldn't still be
4628             // assigned to this register).
4629             assignedInterval->isActive = false;
4630             unassignPhysReg(assignedInterval->assignedReg, nullptr);
4631             if ((inVarToRegMap != nullptr) && inVarToRegMap[assignedInterval->getVarIndex(compiler)] == assignedRegNum)
4632             {
4633                 inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK;
4634             }
4635         }
4636         else
4637         {
4638             // This interval is no longer assigned to this register.
4639             updateAssignedInterval(regRecord, nullptr, assignedInterval->registerType);
4640         }
4641     }
4642 }
4643
4644 //------------------------------------------------------------------------
4645 // processBlockStartLocations: Update var locations on entry to 'currentBlock' and clear constant
4646 //                             registers.
4647 //
4648 // Arguments:
4649 //    currentBlock   - the BasicBlock we are about to allocate registers for
4650 //    allocationPass - true if we are currently allocating registers (versus writing them back)
4651 //
4652 // Return Value:
4653 //    None
4654 //
4655 // Notes:
4656 //    During the allocation pass, we use the outVarToRegMap of the selected predecessor to
4657 //    determine the lclVar locations for the inVarToRegMap.
4658 //    During the resolution (write-back) pass, we only modify the inVarToRegMap in cases where
4659 //    a lclVar was spilled after the block had been completed.
4660 void LinearScan::processBlockStartLocations(BasicBlock* currentBlock)
4661 {
4662     // If we have no register candidates we should only call this method during allocation.
4663
4664     assert(enregisterLocalVars || !allocationPassComplete);
4665
4666     if (!enregisterLocalVars)
4667     {
4668         // Just clear any constant registers and return.
4669         for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
4670         {
4671             RegRecord* physRegRecord    = getRegisterRecord(reg);
4672             Interval*  assignedInterval = physRegRecord->assignedInterval;
4673
4674             if (assignedInterval != nullptr)
4675             {
4676                 assert(assignedInterval->isConstant);
4677                 physRegRecord->assignedInterval = nullptr;
4678             }
4679         }
4680         return;
4681     }
4682
4683     unsigned    predBBNum         = blockInfo[currentBlock->bbNum].predBBNum;
4684     VarToRegMap predVarToRegMap   = getOutVarToRegMap(predBBNum);
4685     VarToRegMap inVarToRegMap     = getInVarToRegMap(currentBlock->bbNum);
4686     bool        hasCriticalInEdge = blockInfo[currentBlock->bbNum].hasCriticalInEdge;
4687
4688     VarSetOps::AssignNoCopy(compiler, currentLiveVars,
4689                             VarSetOps::Intersection(compiler, registerCandidateVars, currentBlock->bbLiveIn));
4690 #ifdef DEBUG
4691     if (getLsraExtendLifeTimes())
4692     {
4693         VarSetOps::AssignNoCopy(compiler, currentLiveVars, registerCandidateVars);
4694     }
4695     // If we are rotating register assignments at block boundaries, we want to make the
4696     // inactive registers available for the rotation.
4697     regMaskTP inactiveRegs = RBM_NONE;
4698 #endif // DEBUG
4699     regMaskTP       liveRegs = RBM_NONE;
4700     VarSetOps::Iter iter(compiler, currentLiveVars);
4701     unsigned        varIndex = 0;
4702     while (iter.NextElem(&varIndex))
4703     {
4704         unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
4705         if (!compiler->lvaTable[varNum].lvLRACandidate)
4706         {
4707             continue;
4708         }
4709         regNumber    targetReg;
4710         Interval*    interval        = getIntervalForLocalVar(varIndex);
4711         RefPosition* nextRefPosition = interval->getNextRefPosition();
4712         assert(nextRefPosition != nullptr);
4713
4714         if (!allocationPassComplete)
4715         {
4716             targetReg = getVarReg(predVarToRegMap, varIndex);
4717 #ifdef DEBUG
4718             regNumber newTargetReg = rotateBlockStartLocation(interval, targetReg, (~liveRegs | inactiveRegs));
4719             if (newTargetReg != targetReg)
4720             {
4721                 targetReg = newTargetReg;
4722                 setIntervalAsSplit(interval);
4723             }
4724 #endif // DEBUG
4725             setVarReg(inVarToRegMap, varIndex, targetReg);
4726         }
4727         else // allocationPassComplete (i.e. resolution/write-back pass)
4728         {
4729             targetReg = getVarReg(inVarToRegMap, varIndex);
4730             // There are four cases that we need to consider during the resolution pass:
4731             // 1. This variable had a register allocated initially, and it was not spilled in the RefPosition
4732             //    that feeds this block.  In this case, both targetReg and predVarToRegMap[varIndex] will be targetReg.
4733             // 2. This variable had not been spilled prior to the end of predBB, but was later spilled, so
4734             //    predVarToRegMap[varIndex] will be REG_STK, but targetReg is its former allocated value.
4735             //    In this case, we will normally change it to REG_STK.  We will update its "spilled" status when we
4736             //    encounter it in resolveLocalRef().
4737             // 2a. If the next RefPosition is marked as a copyReg, we need to retain the allocated register.  This is
4738             //     because the copyReg RefPosition will not have recorded the "home" register, yet downstream
4739             //     RefPositions rely on the correct "home" register.
4740             // 3. This variable was spilled before we reached the end of predBB.  In this case, both targetReg and
4741             //    predVarToRegMap[varIndex] will be REG_STK, and the next RefPosition will have been marked
4742             //    as reload during allocation time if necessary (note that by the time we actually reach the next
4743             //    RefPosition, we may be using a different predecessor, at which it is still in a register).
4744             // 4. This variable was spilled during the allocation of this block, so targetReg is REG_STK
4745             //    (because we set inVarToRegMap at the time we spilled it), but predVarToRegMap[varIndex]
4746             //    is not REG_STK.  We retain the REG_STK value in the inVarToRegMap.
4747             if (targetReg != REG_STK)
4748             {
4749                 if (getVarReg(predVarToRegMap, varIndex) != REG_STK)
4750                 {
4751                     // Case #1 above.
4752                     assert(getVarReg(predVarToRegMap, varIndex) == targetReg ||
4753                            getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_ROTATE);
4754                 }
4755                 else if (!nextRefPosition->copyReg)
4756                 {
4757                     // case #2 above.
4758                     setVarReg(inVarToRegMap, varIndex, REG_STK);
4759                     targetReg = REG_STK;
4760                 }
4761                 // Else case 2a. - retain targetReg.
4762             }
4763             // Else case #3 or #4, we retain targetReg and nothing further to do or assert.
4764         }
4765         if (interval->physReg == targetReg)
4766         {
4767             if (interval->isActive)
4768             {
4769                 assert(targetReg != REG_STK);
4770                 assert(interval->assignedReg != nullptr && interval->assignedReg->regNum == targetReg &&
4771                        interval->assignedReg->assignedInterval == interval);
4772                 liveRegs |= genRegMask(targetReg);
4773                 continue;
4774             }
4775         }
4776         else if (interval->physReg != REG_NA)
4777         {
4778             // This can happen if we are using the locations from a basic block other than the
4779             // immediately preceding one - where the variable was in a different location.
4780             if (targetReg != REG_STK)
4781             {
4782                 // Unassign it from the register (it will get a new register below).
4783                 if (interval->assignedReg != nullptr && interval->assignedReg->assignedInterval == interval)
4784                 {
4785                     interval->isActive = false;
4786                     unassignPhysReg(getRegisterRecord(interval->physReg), nullptr);
4787                 }
4788                 else
4789                 {
4790                     // This interval was live in this register the last time we saw a reference to it,
4791                     // but has since been displaced.
4792                     interval->physReg = REG_NA;
4793                 }
4794             }
4795             else if (!allocationPassComplete)
4796             {
4797                 // Keep the register assignment - if another var has it, it will get unassigned.
4798                 // Otherwise, resolution will fix it up later, and it will be more
4799                 // likely to match other assignments this way.
4800                 interval->isActive = true;
4801                 liveRegs |= genRegMask(interval->physReg);
4802                 INDEBUG(inactiveRegs |= genRegMask(interval->physReg));
4803                 setVarReg(inVarToRegMap, varIndex, interval->physReg);
4804             }
4805             else
4806             {
4807                 interval->physReg = REG_NA;
4808             }
4809         }
4810         if (targetReg != REG_STK)
4811         {
4812             RegRecord* targetRegRecord = getRegisterRecord(targetReg);
4813             liveRegs |= genRegMask(targetReg);
4814             if (!interval->isActive)
4815             {
4816                 interval->isActive    = true;
4817                 interval->physReg     = targetReg;
4818                 interval->assignedReg = targetRegRecord;
4819             }
4820             if (targetRegRecord->assignedInterval != interval)
4821             {
4822 #ifdef _TARGET_ARM_
4823                 // If this is a TYP_DOUBLE interval, and the assigned interval is either null or is TYP_FLOAT,
4824                 // we also need to unassign the other half of the register.
4825                 // Note that if the assigned interval is TYP_DOUBLE, it will be unassigned below.
4826                 if ((interval->registerType == TYP_DOUBLE) &&
4827                     ((targetRegRecord->assignedInterval == nullptr) ||
4828                      (targetRegRecord->assignedInterval->registerType == TYP_FLOAT)))
4829                 {
4830                     assert(genIsValidDoubleReg(targetReg));
4831                     unassignIntervalBlockStart(findAnotherHalfRegRec(targetRegRecord),
4832                                                allocationPassComplete ? nullptr : inVarToRegMap);
4833                 }
4834 #endif // _TARGET_ARM_
4835                 unassignIntervalBlockStart(targetRegRecord, allocationPassComplete ? nullptr : inVarToRegMap);
4836                 assignPhysReg(targetRegRecord, interval);
4837             }
4838             if (interval->recentRefPosition != nullptr && !interval->recentRefPosition->copyReg &&
4839                 interval->recentRefPosition->registerAssignment != genRegMask(targetReg))
4840             {
4841                 interval->getNextRefPosition()->outOfOrder = true;
4842             }
4843         }
4844     }
4845
4846     // Unassign any registers that are no longer live.
4847     for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
4848     {
4849         if ((liveRegs & genRegMask(reg)) == 0)
4850         {
4851             RegRecord* physRegRecord    = getRegisterRecord(reg);
4852             Interval*  assignedInterval = physRegRecord->assignedInterval;
4853
4854             if (assignedInterval != nullptr)
4855             {
4856                 assert(assignedInterval->isLocalVar || assignedInterval->isConstant ||
4857                        assignedInterval->IsUpperVector());
4858
4859                 if (!assignedInterval->isConstant && assignedInterval->assignedReg == physRegRecord)
4860                 {
4861                     assignedInterval->isActive = false;
4862                     if (assignedInterval->getNextRefPosition() == nullptr)
4863                     {
4864                         unassignPhysReg(physRegRecord, nullptr);
4865                     }
4866                     if (!assignedInterval->IsUpperVector())
4867                     {
4868                         inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK;
4869                     }
4870                 }
4871                 else
4872                 {
4873                     // This interval may still be active, but was in another register in an
4874                     // intervening block.
4875                     updateAssignedInterval(physRegRecord, nullptr, assignedInterval->registerType);
4876                 }
4877
4878 #ifdef _TARGET_ARM_
4879                 // unassignPhysReg, above, may have restored a 'previousInterval', in which case we need to
4880                 // get the value of 'physRegRecord->assignedInterval' rather than using 'assignedInterval'.
4881                 if (physRegRecord->assignedInterval != nullptr)
4882                 {
4883                     assignedInterval = physRegRecord->assignedInterval;
4884                 }
4885                 if (assignedInterval->registerType == TYP_DOUBLE)
4886                 {
4887                     // Skip next float register, because we already addressed a double register
4888                     assert(genIsValidDoubleReg(reg));
4889                     reg = REG_NEXT(reg);
4890                 }
4891 #endif // _TARGET_ARM_
4892             }
4893         }
4894 #ifdef _TARGET_ARM_
4895         else
4896         {
4897             RegRecord* physRegRecord    = getRegisterRecord(reg);
4898             Interval*  assignedInterval = physRegRecord->assignedInterval;
4899
4900             if (assignedInterval != nullptr && assignedInterval->registerType == TYP_DOUBLE)
4901             {
4902                 // Skip next float register, because we already addressed a double register
4903                 assert(genIsValidDoubleReg(reg));
4904                 reg = REG_NEXT(reg);
4905             }
4906         }
4907 #endif // _TARGET_ARM_
4908     }
4909 }
4910
4911 //------------------------------------------------------------------------
4912 // processBlockEndLocations: Record the variables occupying registers after completing the current block.
4913 //
4914 // Arguments:
4915 //    currentBlock - the block we have just completed.
4916 //
4917 // Return Value:
4918 //    None
4919 //
4920 // Notes:
4921 //    This must be called both during the allocation and resolution (write-back) phases.
4922 //    This is because we need to have the outVarToRegMap locations in order to set the locations
4923 //    at successor blocks during allocation time, but if lclVars are spilled after a block has been
4924 //    completed, we need to record the REG_STK location for those variables at resolution time.
4925
4926 void LinearScan::processBlockEndLocations(BasicBlock* currentBlock)
4927 {
4928     assert(currentBlock != nullptr && currentBlock->bbNum == curBBNum);
4929     VarToRegMap outVarToRegMap = getOutVarToRegMap(curBBNum);
4930
4931     VarSetOps::AssignNoCopy(compiler, currentLiveVars,
4932                             VarSetOps::Intersection(compiler, registerCandidateVars, currentBlock->bbLiveOut));
4933 #ifdef DEBUG
4934     if (getLsraExtendLifeTimes())
4935     {
4936         VarSetOps::Assign(compiler, currentLiveVars, registerCandidateVars);
4937     }
4938 #endif // DEBUG
4939     regMaskTP       liveRegs = RBM_NONE;
4940     VarSetOps::Iter iter(compiler, currentLiveVars);
4941     unsigned        varIndex = 0;
4942     while (iter.NextElem(&varIndex))
4943     {
4944         Interval* interval = getIntervalForLocalVar(varIndex);
4945         if (interval->isActive)
4946         {
4947             assert(interval->physReg != REG_NA && interval->physReg != REG_STK);
4948             setVarReg(outVarToRegMap, varIndex, interval->physReg);
4949         }
4950         else
4951         {
4952             outVarToRegMap[varIndex] = REG_STK;
4953         }
4954 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
4955         // Ensure that we have no partially-spilled large vector locals.
4956         assert(!varTypeNeedsPartialCalleeSave(interval->registerType) || !interval->isPartiallySpilled);
4957 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
4958     }
4959     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_END_BB));
4960 }
4961
4962 #ifdef DEBUG
4963 void LinearScan::dumpRefPositions(const char* str)
4964 {
4965     printf("------------\n");
4966     printf("REFPOSITIONS %s: \n", str);
4967     printf("------------\n");
4968     for (RefPosition& refPos : refPositions)
4969     {
4970         refPos.dump();
4971     }
4972 }
4973 #endif // DEBUG
4974
4975 bool LinearScan::registerIsFree(regNumber regNum, RegisterType regType)
4976 {
4977     RegRecord* physRegRecord = getRegisterRecord(regNum);
4978
4979     bool isFree = physRegRecord->isFree();
4980
4981 #ifdef _TARGET_ARM_
4982     if (isFree && regType == TYP_DOUBLE)
4983     {
4984         isFree = getSecondHalfRegRec(physRegRecord)->isFree();
4985     }
4986 #endif // _TARGET_ARM_
4987
4988     return isFree;
4989 }
4990
4991 //------------------------------------------------------------------------
4992 // LinearScan::freeRegister: Make a register available for use
4993 //
4994 // Arguments:
4995 //    physRegRecord - the RegRecord for the register to be freed.
4996 //
4997 // Return Value:
4998 //    None.
4999 //
5000 // Assumptions:
5001 //    None.
5002 //    It may be that the RegRecord has already been freed, e.g. due to a kill,
5003 //    in which case this method has no effect.
5004 //
5005 // Notes:
5006 //    If there is currently an Interval assigned to this register, and it has
5007 //    more references (i.e. this is a local last-use, but more uses and/or
5008 //    defs remain), it will remain assigned to the physRegRecord.  However, since
5009 //    it is marked inactive, the register will be available, albeit less desirable
5010 //    to allocate.
5011 void LinearScan::freeRegister(RegRecord* physRegRecord)
5012 {
5013     Interval* assignedInterval = physRegRecord->assignedInterval;
5014     // It may have already been freed by a "Kill"
5015     if (assignedInterval != nullptr)
5016     {
5017         assignedInterval->isActive = false;
5018         // If this is a constant node, that we may encounter again (e.g. constant),
5019         // don't unassign it until we need the register.
5020         if (!assignedInterval->isConstant)
5021         {
5022             RefPosition* nextRefPosition = assignedInterval->getNextRefPosition();
5023             // Unassign the register only if there are no more RefPositions, or the next
5024             // one is a def.  Note that the latter condition doesn't actually ensure that
5025             // there aren't subsequent uses that could be reached by a def in the assigned
5026             // register, but is merely a heuristic to avoid tying up the register (or using
5027             // it when it's non-optimal).  A better alternative would be to use SSA, so that
5028             // we wouldn't unnecessarily link separate live ranges to the same register.
5029             if (nextRefPosition == nullptr || RefTypeIsDef(nextRefPosition->refType))
5030             {
5031 #ifdef _TARGET_ARM_
5032                 assert((assignedInterval->registerType != TYP_DOUBLE) || genIsValidDoubleReg(physRegRecord->regNum));
5033 #endif // _TARGET_ARM_
5034                 unassignPhysReg(physRegRecord, nullptr);
5035             }
5036         }
5037     }
5038 }
5039
5040 void LinearScan::freeRegisters(regMaskTP regsToFree)
5041 {
5042     if (regsToFree == RBM_NONE)
5043     {
5044         return;
5045     }
5046
5047     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FREE_REGS));
5048     while (regsToFree != RBM_NONE)
5049     {
5050         regMaskTP nextRegBit = genFindLowestBit(regsToFree);
5051         regsToFree &= ~nextRegBit;
5052         regNumber nextReg = genRegNumFromMask(nextRegBit);
5053         freeRegister(getRegisterRecord(nextReg));
5054     }
5055 }
5056
5057 // Actual register allocation, accomplished by iterating over all of the previously
5058 // constructed Intervals
5059 // Loosely based on raAssignVars()
5060 //
5061 void LinearScan::allocateRegisters()
5062 {
5063     JITDUMP("*************** In LinearScan::allocateRegisters()\n");
5064     DBEXEC(VERBOSE, lsraDumpIntervals("before allocateRegisters"));
5065
5066     // at start, nothing is active except for register args
5067     for (Interval& interval : intervals)
5068     {
5069         Interval* currentInterval          = &interval;
5070         currentInterval->recentRefPosition = nullptr;
5071         currentInterval->isActive          = false;
5072         if (currentInterval->isLocalVar)
5073         {
5074             LclVarDsc* varDsc = currentInterval->getLocalVar(compiler);
5075             if (varDsc->lvIsRegArg && currentInterval->firstRefPosition != nullptr)
5076             {
5077                 currentInterval->isActive = true;
5078             }
5079         }
5080     }
5081
5082 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
5083     if (enregisterLocalVars)
5084     {
5085         VarSetOps::Iter largeVectorVarsIter(compiler, largeVectorVars);
5086         unsigned        largeVectorVarIndex = 0;
5087         while (largeVectorVarsIter.NextElem(&largeVectorVarIndex))
5088         {
5089             Interval* lclVarInterval           = getIntervalForLocalVar(largeVectorVarIndex);
5090             lclVarInterval->isPartiallySpilled = false;
5091         }
5092     }
5093 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
5094
5095     for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
5096     {
5097         getRegisterRecord(reg)->recentRefPosition = nullptr;
5098         getRegisterRecord(reg)->isActive          = false;
5099     }
5100
5101 #ifdef DEBUG
5102     regNumber lastAllocatedReg = REG_NA;
5103     if (VERBOSE)
5104     {
5105         dumpRefPositions("BEFORE ALLOCATION");
5106         dumpVarRefPositions("BEFORE ALLOCATION");
5107
5108         printf("\n\nAllocating Registers\n"
5109                "--------------------\n");
5110         // Start with a small set of commonly used registers, so that we don't keep having to print a new title.
5111         registersToDump = LsraLimitSmallIntSet | LsraLimitSmallFPSet;
5112         dumpRegRecordHeader();
5113         // Now print an empty "RefPosition", since we complete the dump of the regs at the beginning of the loop.
5114         printf(indentFormat, "");
5115     }
5116 #endif // DEBUG
5117
5118     BasicBlock* currentBlock = nullptr;
5119
5120     LsraLocation prevLocation    = MinLocation;
5121     regMaskTP    regsToFree      = RBM_NONE;
5122     regMaskTP    delayRegsToFree = RBM_NONE;
5123
5124     // This is the most recent RefPosition for which a register was allocated
5125     // - currently only used for DEBUG but maintained in non-debug, for clarity of code
5126     //   (and will be optimized away because in non-debug spillAlways() unconditionally returns false)
5127     RefPosition* lastAllocatedRefPosition = nullptr;
5128
5129     bool handledBlockEnd = false;
5130
5131     for (RefPosition& refPositionIterator : refPositions)
5132     {
5133         RefPosition* currentRefPosition = &refPositionIterator;
5134
5135 #ifdef DEBUG
5136         // Set the activeRefPosition to null until we're done with any boundary handling.
5137         activeRefPosition = nullptr;
5138         if (VERBOSE)
5139         {
5140             // We're really dumping the RegRecords "after" the previous RefPosition, but it's more convenient
5141             // to do this here, since there are a number of "continue"s in this loop.
5142             dumpRegRecords();
5143         }
5144 #endif // DEBUG
5145
5146         // This is the previousRefPosition of the current Referent, if any
5147         RefPosition* previousRefPosition = nullptr;
5148
5149         Interval*      currentInterval = nullptr;
5150         Referenceable* currentReferent = nullptr;
5151         bool           isInternalRef   = false;
5152         RefType        refType         = currentRefPosition->refType;
5153
5154         currentReferent = currentRefPosition->referent;
5155
5156         if (spillAlways() && lastAllocatedRefPosition != nullptr && !lastAllocatedRefPosition->isPhysRegRef &&
5157             !lastAllocatedRefPosition->getInterval()->isInternal &&
5158             (RefTypeIsDef(lastAllocatedRefPosition->refType) || lastAllocatedRefPosition->getInterval()->isLocalVar))
5159         {
5160             assert(lastAllocatedRefPosition->registerAssignment != RBM_NONE);
5161             RegRecord* regRecord = lastAllocatedRefPosition->getInterval()->assignedReg;
5162             unassignPhysReg(regRecord, lastAllocatedRefPosition);
5163             // Now set lastAllocatedRefPosition to null, so that we don't try to spill it again
5164             lastAllocatedRefPosition = nullptr;
5165         }
5166
5167         // We wait to free any registers until we've completed all the
5168         // uses for the current node.
5169         // This avoids reusing registers too soon.
5170         // We free before the last true def (after all the uses & internal
5171         // registers), and then again at the beginning of the next node.
5172         // This is made easier by assigning two LsraLocations per node - one
5173         // for all the uses, internal registers & all but the last def, and
5174         // another for the final def (if any).
5175
5176         LsraLocation currentLocation = currentRefPosition->nodeLocation;
5177
5178         if ((regsToFree | delayRegsToFree) != RBM_NONE)
5179         {
5180             // Free at a new location, or at a basic block boundary
5181             if (refType == RefTypeBB)
5182             {
5183                 assert(currentLocation > prevLocation);
5184             }
5185             if (currentLocation > prevLocation)
5186             {
5187                 freeRegisters(regsToFree);
5188                 if ((currentLocation > (prevLocation + 1)) && (delayRegsToFree != RBM_NONE))
5189                 {
5190                     // We should never see a delayReg that is delayed until a Location that has no RefPosition
5191                     // (that would be the RefPosition that it was supposed to interfere with).
5192                     assert(!"Found a delayRegFree associated with Location with no reference");
5193                     // However, to be cautious for the Release build case, we will free them.
5194                     freeRegisters(delayRegsToFree);
5195                     delayRegsToFree = RBM_NONE;
5196                 }
5197                 regsToFree      = delayRegsToFree;
5198                 delayRegsToFree = RBM_NONE;
5199             }
5200         }
5201         prevLocation = currentLocation;
5202
5203         // get previous refposition, then current refpos is the new previous
5204         if (currentReferent != nullptr)
5205         {
5206             previousRefPosition                = currentReferent->recentRefPosition;
5207             currentReferent->recentRefPosition = currentRefPosition;
5208         }
5209         else
5210         {
5211             assert((refType == RefTypeBB) || (refType == RefTypeKillGCRefs));
5212         }
5213
5214 #ifdef DEBUG
5215         activeRefPosition = currentRefPosition;
5216
5217         // For the purposes of register resolution, we handle the DummyDefs before
5218         // the block boundary - so the RefTypeBB is after all the DummyDefs.
5219         // However, for the purposes of allocation, we want to handle the block
5220         // boundary first, so that we can free any registers occupied by lclVars
5221         // that aren't live in the next block and make them available for the
5222         // DummyDefs.
5223
5224         // If we've already handled the BlockEnd, but now we're seeing the RefTypeBB,
5225         // dump it now.
5226         if ((refType == RefTypeBB) && handledBlockEnd)
5227         {
5228             dumpNewBlock(currentBlock, currentRefPosition->nodeLocation);
5229         }
5230 #endif // DEBUG
5231
5232         if (!handledBlockEnd && (refType == RefTypeBB || refType == RefTypeDummyDef))
5233         {
5234             // Free any delayed regs (now in regsToFree) before processing the block boundary
5235             freeRegisters(regsToFree);
5236             regsToFree         = RBM_NONE;
5237             handledBlockEnd    = true;
5238             curBBStartLocation = currentRefPosition->nodeLocation;
5239             if (currentBlock == nullptr)
5240             {
5241                 currentBlock = startBlockSequence();
5242                 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_START_BB, nullptr, REG_NA, compiler->fgFirstBB));
5243             }
5244             else
5245             {
5246                 processBlockEndAllocation(currentBlock);
5247                 currentBlock = moveToNextBlock();
5248                 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_START_BB, nullptr, REG_NA, currentBlock));
5249             }
5250         }
5251
5252         if (refType == RefTypeBB)
5253         {
5254             handledBlockEnd = false;
5255             continue;
5256         }
5257
5258         if (refType == RefTypeKillGCRefs)
5259         {
5260             spillGCRefs(currentRefPosition);
5261             continue;
5262         }
5263
5264         // If this is a FixedReg, disassociate any inactive constant interval from this register.
5265         // Otherwise, do nothing.
5266         if (refType == RefTypeFixedReg)
5267         {
5268             RegRecord* regRecord        = currentRefPosition->getReg();
5269             Interval*  assignedInterval = regRecord->assignedInterval;
5270
5271             if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant)
5272             {
5273                 regRecord->assignedInterval = nullptr;
5274
5275 #ifdef _TARGET_ARM_
5276                 // Update overlapping floating point register for TYP_DOUBLE
5277                 if (assignedInterval->registerType == TYP_DOUBLE)
5278                 {
5279                     regRecord = findAnotherHalfRegRec(regRecord);
5280                     assert(regRecord->assignedInterval == assignedInterval);
5281                     regRecord->assignedInterval = nullptr;
5282                 }
5283 #endif
5284             }
5285             INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition->assignedReg()));
5286             continue;
5287         }
5288
5289         // If this is an exposed use, do nothing - this is merely a placeholder to attempt to
5290         // ensure that a register is allocated for the full lifetime.  The resolution logic
5291         // will take care of moving to the appropriate register if needed.
5292
5293         if (refType == RefTypeExpUse)
5294         {
5295             INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_EXP_USE));
5296             continue;
5297         }
5298
5299         regNumber assignedRegister = REG_NA;
5300
5301         if (currentRefPosition->isIntervalRef())
5302         {
5303             currentInterval  = currentRefPosition->getInterval();
5304             assignedRegister = currentInterval->physReg;
5305
5306             // Identify the special cases where we decide up-front not to allocate
5307             bool allocate = true;
5308             bool didDump  = false;
5309
5310             if (refType == RefTypeParamDef || refType == RefTypeZeroInit)
5311             {
5312                 // For a ParamDef with a weighted refCount less than unity, don't enregister it at entry.
5313                 // TODO-CQ: Consider doing this only for stack parameters, since otherwise we may be needlessly
5314                 // inserting a store.
5315                 LclVarDsc* varDsc = currentInterval->getLocalVar(compiler);
5316                 assert(varDsc != nullptr);
5317                 if (refType == RefTypeParamDef && varDsc->lvRefCntWtd() <= BB_UNITY_WEIGHT)
5318                 {
5319                     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, currentInterval));
5320                     didDump  = true;
5321                     allocate = false;
5322                     setIntervalAsSpilled(currentInterval);
5323                 }
5324                 // If it has no actual references, mark it as "lastUse"; since they're not actually part
5325                 // of any flow they won't have been marked during dataflow.  Otherwise, if we allocate a
5326                 // register we won't unassign it.
5327                 else if (currentRefPosition->nextRefPosition == nullptr)
5328                 {
5329                     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ZERO_REF, currentInterval));
5330                     currentRefPosition->lastUse = true;
5331                 }
5332             }
5333 #ifdef FEATURE_SIMD
5334 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
5335             else if (currentInterval->isUpperVector)
5336             {
5337                 // This is a save or restore of the upper half of a large vector lclVar.
5338                 Interval* lclVarInterval = currentInterval->relatedInterval;
5339                 assert(lclVarInterval->isLocalVar);
5340                 if (refType == RefTypeUpperVectorSave)
5341                 {
5342                     if ((lclVarInterval->physReg == REG_NA) ||
5343                         (lclVarInterval->isPartiallySpilled && (currentInterval->physReg == REG_STK)))
5344                     {
5345                         allocate = false;
5346                     }
5347                     else
5348                     {
5349                         lclVarInterval->isPartiallySpilled = true;
5350                     }
5351                 }
5352                 else if (refType == RefTypeUpperVectorRestore)
5353                 {
5354                     assert(currentInterval->isUpperVector);
5355                     if (lclVarInterval->isPartiallySpilled)
5356                     {
5357                         lclVarInterval->isPartiallySpilled = false;
5358                     }
5359                     else
5360                     {
5361                         allocate = false;
5362                     }
5363                 }
5364             }
5365             else if (refType == RefTypeUpperVectorSave)
5366             {
5367                 assert(!currentInterval->isLocalVar);
5368                 // Note that this case looks a lot like the case below, but in this case we need to spill
5369                 // at the previous RefPosition.
5370                 // We may want to consider allocating two callee-save registers for this case, but it happens rarely
5371                 // enough that it may not warrant the additional complexity.
5372                 if (assignedRegister != REG_NA)
5373                 {
5374                     unassignPhysReg(getRegisterRecord(assignedRegister), currentInterval->firstRefPosition);
5375                     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
5376                 }
5377                 currentRefPosition->registerAssignment = RBM_NONE;
5378                 continue;
5379             }
5380 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
5381 #endif // FEATURE_SIMD
5382
5383             if (allocate == false)
5384             {
5385                 if (assignedRegister != REG_NA)
5386                 {
5387                     unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition);
5388                 }
5389                 else if (!didDump)
5390                 {
5391                     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
5392                     didDump = true;
5393                 }
5394                 currentRefPosition->registerAssignment = RBM_NONE;
5395                 continue;
5396             }
5397
5398             if (currentInterval->isSpecialPutArg)
5399             {
5400                 assert(!currentInterval->isLocalVar);
5401                 Interval* srcInterval = currentInterval->relatedInterval;
5402                 assert(srcInterval != nullptr && srcInterval->isLocalVar);
5403                 if (refType == RefTypeDef)
5404                 {
5405                     assert(srcInterval->recentRefPosition->nodeLocation == currentLocation - 1);
5406                     RegRecord* physRegRecord = srcInterval->assignedReg;
5407
5408                     // For a putarg_reg to be special, its next use location has to be the same
5409                     // as fixed reg's next kill location. Otherwise, if source lcl var's next use
5410                     // is after the kill of fixed reg but before putarg_reg's next use, fixed reg's
5411                     // kill would lead to spill of source but not the putarg_reg if it were treated
5412                     // as special.
5413                     if (srcInterval->isActive &&
5414                         genRegMask(srcInterval->physReg) == currentRefPosition->registerAssignment &&
5415                         currentInterval->getNextRefLocation() == physRegRecord->getNextRefLocation())
5416                     {
5417                         assert(physRegRecord->regNum == srcInterval->physReg);
5418
5419                         // Special putarg_reg acts as a pass-thru since both source lcl var
5420                         // and putarg_reg have the same register allocated.  Physical reg
5421                         // record of reg continue to point to source lcl var's interval
5422                         // instead of to putarg_reg's interval.  So if a spill of reg
5423                         // allocated to source lcl var happens, to reallocate to another
5424                         // tree node, before its use at call node it will lead to spill of
5425                         // lcl var instead of putarg_reg since physical reg record is pointing
5426                         // to lcl var's interval. As a result, arg reg would get trashed leading
5427                         // to bad codegen. The assumption here is that source lcl var of a
5428                         // special putarg_reg doesn't get spilled and re-allocated prior to
5429                         // its use at the call node.  This is ensured by marking physical reg
5430                         // record as busy until next kill.
5431                         physRegRecord->isBusyUntilNextKill = true;
5432                     }
5433                     else
5434                     {
5435                         currentInterval->isSpecialPutArg = false;
5436                     }
5437                 }
5438                 // If this is still a SpecialPutArg, continue;
5439                 if (currentInterval->isSpecialPutArg)
5440                 {
5441                     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_SPECIAL_PUTARG, currentInterval,
5442                                                     currentRefPosition->assignedReg()));
5443                     continue;
5444                 }
5445             }
5446
5447             if (assignedRegister == REG_NA && RefTypeIsUse(refType))
5448             {
5449                 currentRefPosition->reload = true;
5450                 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, currentInterval, assignedRegister));
5451             }
5452         }
5453
5454         regMaskTP assignedRegBit = RBM_NONE;
5455         bool      isInRegister   = false;
5456         if (assignedRegister != REG_NA)
5457         {
5458             isInRegister   = true;
5459             assignedRegBit = genRegMask(assignedRegister);
5460             if (!currentInterval->isActive)
5461             {
5462                 // If this is a use, it must have started the block on the stack, but the register
5463                 // was available for use so we kept the association.
5464                 if (RefTypeIsUse(refType))
5465                 {
5466                     assert(enregisterLocalVars);
5467                     assert(inVarToRegMaps[curBBNum][currentInterval->getVarIndex(compiler)] == REG_STK &&
5468                            previousRefPosition->nodeLocation <= curBBStartLocation);
5469                     isInRegister = false;
5470                 }
5471                 else
5472                 {
5473                     currentInterval->isActive = true;
5474                 }
5475             }
5476             assert(currentInterval->assignedReg != nullptr &&
5477                    currentInterval->assignedReg->regNum == assignedRegister &&
5478                    currentInterval->assignedReg->assignedInterval == currentInterval);
5479         }
5480
5481         // If this is a physical register, we unconditionally assign it to itself!
5482         if (currentRefPosition->isPhysRegRef)
5483         {
5484             RegRecord* currentReg       = currentRefPosition->getReg();
5485             Interval*  assignedInterval = currentReg->assignedInterval;
5486
5487             if (assignedInterval != nullptr)
5488             {
5489                 unassignPhysReg(currentReg, assignedInterval->recentRefPosition);
5490             }
5491             currentReg->isActive = true;
5492             assignedRegister     = currentReg->regNum;
5493             assignedRegBit       = genRegMask(assignedRegister);
5494             if (refType == RefTypeKill)
5495             {
5496                 currentReg->isBusyUntilNextKill = false;
5497             }
5498         }
5499         else if (previousRefPosition != nullptr)
5500         {
5501             assert(previousRefPosition->nextRefPosition == currentRefPosition);
5502             assert(assignedRegister == REG_NA || assignedRegBit == previousRefPosition->registerAssignment ||
5503                    currentRefPosition->outOfOrder || previousRefPosition->copyReg ||
5504                    previousRefPosition->refType == RefTypeExpUse || currentRefPosition->refType == RefTypeDummyDef);
5505         }
5506         else if (assignedRegister != REG_NA)
5507         {
5508             // Handle the case where this is a preassigned register (i.e. parameter).
5509             // We don't want to actually use the preassigned register if it's not
5510             // going to cover the lifetime - but we had to preallocate it to ensure
5511             // that it remained live.
5512             // TODO-CQ: At some point we may want to refine the analysis here, in case
5513             // it might be beneficial to keep it in this reg for PART of the lifetime
5514             if (currentInterval->isLocalVar)
5515             {
5516                 regMaskTP preferences        = currentInterval->registerPreferences;
5517                 bool      keepAssignment     = true;
5518                 bool      matchesPreferences = (preferences & genRegMask(assignedRegister)) != RBM_NONE;
5519
5520                 // Will the assigned register cover the lifetime?  If not, does it at least
5521                 // meet the preferences for the next RefPosition?
5522                 RegRecord*   physRegRecord     = getRegisterRecord(currentInterval->physReg);
5523                 RefPosition* nextPhysRegRefPos = physRegRecord->getNextRefPosition();
5524                 if (nextPhysRegRefPos != nullptr &&
5525                     nextPhysRegRefPos->nodeLocation <= currentInterval->lastRefPosition->nodeLocation)
5526                 {
5527                     // Check to see if the existing assignment matches the preferences (e.g. callee save registers)
5528                     // and ensure that the next use of this localVar does not occur after the nextPhysRegRefPos
5529                     // There must be a next RefPosition, because we know that the Interval extends beyond the
5530                     // nextPhysRegRefPos.
5531                     RefPosition* nextLclVarRefPos = currentRefPosition->nextRefPosition;
5532                     assert(nextLclVarRefPos != nullptr);
5533                     if (!matchesPreferences || nextPhysRegRefPos->nodeLocation < nextLclVarRefPos->nodeLocation ||
5534                         physRegRecord->conflictingFixedRegReference(nextLclVarRefPos))
5535                     {
5536                         keepAssignment = false;
5537                     }
5538                 }
5539                 else if (refType == RefTypeParamDef && !matchesPreferences)
5540                 {
5541                     // Don't use the register, even if available, if it doesn't match the preferences.
5542                     // Note that this case is only for ParamDefs, for which we haven't yet taken preferences
5543                     // into account (we've just automatically got the initial location).  In other cases,
5544                     // we would already have put it in a preferenced register, if it was available.
5545                     // TODO-CQ: Consider expanding this to check availability - that would duplicate
5546                     // code here, but otherwise we may wind up in this register anyway.
5547                     keepAssignment = false;
5548                 }
5549
5550                 if (keepAssignment == false)
5551                 {
5552                     currentRefPosition->registerAssignment = allRegs(currentInterval->registerType);
5553                     unassignPhysRegNoSpill(physRegRecord);
5554
5555                     // If the preferences are currently set to just this register, reset them to allRegs
5556                     // of the appropriate type (just as we just reset the registerAssignment for this
5557                     // RefPosition.
5558                     // Otherwise, simply remove this register from the preferences, if it's there.
5559
5560                     if (currentInterval->registerPreferences == assignedRegBit)
5561                     {
5562                         currentInterval->registerPreferences = currentRefPosition->registerAssignment;
5563                     }
5564                     else
5565                     {
5566                         currentInterval->registerPreferences &= ~assignedRegBit;
5567                     }
5568
5569                     assignedRegister = REG_NA;
5570                     assignedRegBit   = RBM_NONE;
5571                 }
5572             }
5573         }
5574
5575         if (assignedRegister != REG_NA)
5576         {
5577             RegRecord* physRegRecord = getRegisterRecord(assignedRegister);
5578
5579             // If there is a conflicting fixed reference, insert a copy.
5580             if (physRegRecord->conflictingFixedRegReference(currentRefPosition))
5581             {
5582                 // We may have already reassigned the register to the conflicting reference.
5583                 // If not, we need to unassign this interval.
5584                 if (physRegRecord->assignedInterval == currentInterval)
5585                 {
5586                     unassignPhysRegNoSpill(physRegRecord);
5587                 }
5588                 currentRefPosition->moveReg = true;
5589                 assignedRegister            = REG_NA;
5590                 setIntervalAsSplit(currentInterval);
5591                 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_MOVE_REG, currentInterval, assignedRegister));
5592             }
5593             else if ((genRegMask(assignedRegister) & currentRefPosition->registerAssignment) != 0)
5594             {
5595                 currentRefPosition->registerAssignment = assignedRegBit;
5596                 if (!currentReferent->isActive)
5597                 {
5598                     // If we've got an exposed use at the top of a block, the
5599                     // interval might not have been active.  Otherwise if it's a use,
5600                     // the interval must be active.
5601                     if (refType == RefTypeDummyDef)
5602                     {
5603                         currentReferent->isActive = true;
5604                         assert(getRegisterRecord(assignedRegister)->assignedInterval == currentInterval);
5605                     }
5606                     else
5607                     {
5608                         currentRefPosition->reload = true;
5609                     }
5610                 }
5611                 INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, currentInterval, assignedRegister));
5612             }
5613             else
5614             {
5615                 assert(currentInterval != nullptr);
5616
5617                 // It's already in a register, but not one we need.
5618                 if (!RefTypeIsDef(currentRefPosition->refType))
5619                 {
5620                     regNumber copyReg = assignCopyReg(currentRefPosition);
5621                     assert(copyReg != REG_NA);
5622                     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, currentInterval, copyReg));
5623                     lastAllocatedRefPosition = currentRefPosition;
5624                     if (currentRefPosition->lastUse)
5625                     {
5626                         if (currentRefPosition->delayRegFree)
5627                         {
5628                             INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED, currentInterval,
5629                                                             assignedRegister));
5630                             delayRegsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment);
5631                         }
5632                         else
5633                         {
5634                             INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, currentInterval, assignedRegister));
5635                             regsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment);
5636                         }
5637                     }
5638                     // If this is a tree temp (non-localVar) interval, we will need an explicit move.
5639                     if (!currentInterval->isLocalVar)
5640                     {
5641                         currentRefPosition->moveReg = true;
5642                         currentRefPosition->copyReg = false;
5643                     }
5644                     continue;
5645                 }
5646                 else
5647                 {
5648                     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NEEDS_NEW_REG, nullptr, assignedRegister));
5649                     regsToFree |= genRegMask(assignedRegister);
5650                     // We want a new register, but we don't want this to be considered a spill.
5651                     assignedRegister = REG_NA;
5652                     if (physRegRecord->assignedInterval == currentInterval)
5653                     {
5654                         unassignPhysRegNoSpill(physRegRecord);
5655                     }
5656                 }
5657             }
5658         }
5659
5660         if (assignedRegister == REG_NA)
5661         {
5662             bool allocateReg = true;
5663
5664             if (currentRefPosition->RegOptional())
5665             {
5666                 // We can avoid allocating a register if it is a the last use requiring a reload.
5667                 if (currentRefPosition->lastUse && currentRefPosition->reload)
5668                 {
5669                     allocateReg = false;
5670                 }
5671
5672 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE && defined(_TARGET_XARCH_)
5673                 // We can also avoid allocating a register (in fact we don't want to) if we have
5674                 // an UpperVectorRestore on xarch where the value is on the stack.
5675                 if ((currentRefPosition->refType == RefTypeUpperVectorRestore) && (currentInterval->physReg == REG_NA))
5676                 {
5677                     assert(currentRefPosition->regOptional);
5678                     allocateReg = false;
5679                 }
5680 #endif
5681
5682 #ifdef DEBUG
5683                 // Under stress mode, don't attempt to allocate a reg to
5684                 // reg optional ref position, unless it's a ParamDef.
5685                 if (allocateReg && regOptionalNoAlloc())
5686                 {
5687                     allocateReg = false;
5688                 }
5689 #endif
5690             }
5691
5692             if (allocateReg)
5693             {
5694                 // Try to allocate a register
5695                 assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition);
5696             }
5697
5698             // If no register was found, and if the currentRefPosition must have a register,
5699             // then find a register to spill
5700             if (assignedRegister == REG_NA)
5701             {
5702                 bool isAllocatable = currentRefPosition->IsActualRef();
5703 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE && defined(_TARGET_ARM64_)
5704                 if (currentInterval->isUpperVector)
5705                 {
5706                     // On Arm64, we can't save the upper half to memory without a register.
5707                     isAllocatable = true;
5708                     assert(!currentRefPosition->RegOptional());
5709                 }
5710 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE && _TARGET_ARM64_
5711                 if (isAllocatable)
5712                 {
5713                     if (allocateReg)
5714                     {
5715                         assignedRegister =
5716                             allocateBusyReg(currentInterval, currentRefPosition, currentRefPosition->RegOptional());
5717                     }
5718
5719                     if (assignedRegister != REG_NA)
5720                     {
5721                         INDEBUG(
5722                             dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_SPILLED_REG, currentInterval, assignedRegister));
5723                     }
5724                     else
5725                     {
5726                         // This can happen only for those ref positions that are to be allocated
5727                         // only if profitable.
5728                         noway_assert(currentRefPosition->RegOptional());
5729
5730                         currentRefPosition->registerAssignment = RBM_NONE;
5731                         currentRefPosition->reload             = false;
5732                         setIntervalAsSpilled(currentInterval);
5733
5734                         INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
5735                     }
5736                 }
5737                 else
5738                 {
5739                     INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
5740                     currentRefPosition->registerAssignment = RBM_NONE;
5741                     currentInterval->isActive              = false;
5742                     setIntervalAsSpilled(currentInterval);
5743                 }
5744             }
5745 #ifdef DEBUG
5746             else
5747             {
5748                 if (VERBOSE)
5749                 {
5750                     if (currentInterval->isConstant && (currentRefPosition->treeNode != nullptr) &&
5751                         currentRefPosition->treeNode->IsReuseRegVal())
5752                     {
5753                         dumpLsraAllocationEvent(LSRA_EVENT_REUSE_REG, currentInterval, assignedRegister, currentBlock);
5754                     }
5755                     else
5756                     {
5757                         dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, currentInterval, assignedRegister, currentBlock);
5758                     }
5759                 }
5760             }
5761 #endif // DEBUG
5762
5763             if (refType == RefTypeDummyDef && assignedRegister != REG_NA)
5764             {
5765                 setInVarRegForBB(curBBNum, currentInterval->varNum, assignedRegister);
5766             }
5767
5768             // If we allocated a register, and this is a use of a spilled value,
5769             // it should have been marked for reload above.
5770             if (assignedRegister != REG_NA && RefTypeIsUse(refType) && !isInRegister)
5771             {
5772                 assert(currentRefPosition->reload);
5773             }
5774         }
5775
5776         // If we allocated a register, record it
5777         if (currentInterval != nullptr && assignedRegister != REG_NA)
5778         {
5779             assignedRegBit                         = genRegMask(assignedRegister);
5780             currentRefPosition->registerAssignment = assignedRegBit;
5781             currentInterval->physReg               = assignedRegister;
5782             regsToFree &= ~assignedRegBit; // we'll set it again later if it's dead
5783
5784             // If this interval is dead, free the register.
5785             // The interval could be dead if this is a user variable, or if the
5786             // node is being evaluated for side effects, or a call whose result
5787             // is not used, etc.
5788             // If this is an UpperVector we'll neither free it nor preference it
5789             // (it will be freed when it is used).
5790             if (!currentInterval->IsUpperVector())
5791             {
5792                 if (currentRefPosition->lastUse || currentRefPosition->nextRefPosition == nullptr)
5793                 {
5794                     assert(currentRefPosition->isIntervalRef());
5795
5796                     if (refType != RefTypeExpUse && currentRefPosition->nextRefPosition == nullptr)
5797                     {
5798                         if (currentRefPosition->delayRegFree)
5799                         {
5800                             delayRegsToFree |= assignedRegBit;
5801
5802                             INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED));
5803                         }
5804                         else
5805                         {
5806                             regsToFree |= assignedRegBit;
5807
5808                             INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE));
5809                         }
5810                     }
5811                     else
5812                     {
5813                         currentInterval->isActive = false;
5814                     }
5815
5816                     // Update the register preferences for the relatedInterval, if this is 'preferencedToDef'.
5817                     // Don't propagate to subsequent relatedIntervals; that will happen as they are allocated, and we
5818                     // don't know yet whether the register will be retained.
5819                     if (currentInterval->relatedInterval != nullptr)
5820                     {
5821                         currentInterval->relatedInterval->updateRegisterPreferences(assignedRegBit);
5822                     }
5823                 }
5824             }
5825
5826             lastAllocatedRefPosition = currentRefPosition;
5827         }
5828     }
5829
5830 #ifdef JIT32_GCENCODER
5831     // For the JIT32_GCENCODER, when lvaKeepAliveAndReportThis is true, we must either keep this "this" pointer
5832     // in the same register for the entire method, or keep it on the stack. Rather than imposing this constraint
5833     // as we allocate, we will force all refs to the stack if it is split or spilled.
5834     if (enregisterLocalVars && compiler->lvaKeepAliveAndReportThis())
5835     {
5836         LclVarDsc* thisVarDsc = compiler->lvaGetDesc(compiler->info.compThisArg);
5837         if (!thisVarDsc->lvDoNotEnregister)
5838         {
5839             Interval* interval = getIntervalForLocalVar(thisVarDsc->lvVarIndex);
5840             if (interval->isSplit)
5841             {
5842                 // We'll have to spill this.
5843                 setIntervalAsSpilled(interval);
5844             }
5845             if (interval->isSpilled)
5846             {
5847                 for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition)
5848                 {
5849                     if (ref->RegOptional())
5850                     {
5851                         ref->registerAssignment = RBM_NONE;
5852                         ref->reload             = false;
5853                         ref->spillAfter         = false;
5854                     }
5855                     switch (ref->refType)
5856                     {
5857                         case RefTypeDef:
5858                             if (ref->registerAssignment != RBM_NONE)
5859                             {
5860                                 ref->spillAfter = true;
5861                             }
5862                             break;
5863                         case RefTypeUse:
5864                             if (ref->registerAssignment != RBM_NONE)
5865                             {
5866                                 ref->reload     = true;
5867                                 ref->spillAfter = true;
5868                                 ref->copyReg    = false;
5869                                 ref->moveReg    = false;
5870                             }
5871                             break;
5872                         default:
5873                             break;
5874                     }
5875                 }
5876             }
5877         }
5878     }
5879 #endif // JIT32_GCENCODER
5880
5881     // Free registers to clear associated intervals for resolution phase
5882     CLANG_FORMAT_COMMENT_ANCHOR;
5883
5884 #ifdef DEBUG
5885     if (getLsraExtendLifeTimes())
5886     {
5887         // If we have extended lifetimes, we need to make sure all the registers are freed.
5888         for (int regNumIndex = 0; regNumIndex <= REG_FP_LAST; regNumIndex++)
5889         {
5890             RegRecord& regRecord = physRegs[regNumIndex];
5891             Interval*  interval  = regRecord.assignedInterval;
5892             if (interval != nullptr)
5893             {
5894                 interval->isActive = false;
5895                 unassignPhysReg(&regRecord, nullptr);
5896             }
5897         }
5898     }
5899     else
5900 #endif // DEBUG
5901     {
5902         freeRegisters(regsToFree | delayRegsToFree);
5903     }
5904
5905 #ifdef DEBUG
5906     if (VERBOSE)
5907     {
5908         // Dump the RegRecords after the last RefPosition is handled.
5909         dumpRegRecords();
5910         printf("\n");
5911
5912         dumpRefPositions("AFTER ALLOCATION");
5913         dumpVarRefPositions("AFTER ALLOCATION");
5914
5915         // Dump the intervals that remain active
5916         printf("Active intervals at end of allocation:\n");
5917
5918         // We COULD just reuse the intervalIter from above, but ArrayListIterator doesn't
5919         // provide a Reset function (!) - we'll probably replace this so don't bother
5920         // adding it
5921
5922         for (Interval& interval : intervals)
5923         {
5924             if (interval.isActive)
5925             {
5926                 printf("Active ");
5927                 interval.dump();
5928             }
5929         }
5930
5931         printf("\n");
5932     }
5933 #endif // DEBUG
5934 }
5935
5936 //-----------------------------------------------------------------------------
5937 // updateAssignedInterval: Update assigned interval of register.
5938 //
5939 // Arguments:
5940 //    reg      -    register to be updated
5941 //    interval -    interval to be assigned
5942 //    regType  -    register type
5943 //
5944 // Return Value:
5945 //    None
5946 //
5947 // Assumptions:
5948 //    For ARM32, when "regType" is TYP_DOUBLE, "reg" should be a even-numbered
5949 //    float register, i.e. lower half of double register.
5950 //
5951 // Note:
5952 //    For ARM32, two float registers consisting a double register are updated
5953 //    together when "regType" is TYP_DOUBLE.
5954 //
5955 void LinearScan::updateAssignedInterval(RegRecord* reg, Interval* interval, RegisterType regType)
5956 {
5957 #ifdef _TARGET_ARM_
5958     // Update overlapping floating point register for TYP_DOUBLE.
5959     Interval* oldAssignedInterval = reg->assignedInterval;
5960     if (regType == TYP_DOUBLE)
5961     {
5962         RegRecord* anotherHalfReg = findAnotherHalfRegRec(reg);
5963
5964         anotherHalfReg->assignedInterval = interval;
5965     }
5966     else if ((oldAssignedInterval != nullptr) && (oldAssignedInterval->registerType == TYP_DOUBLE))
5967     {
5968         RegRecord* anotherHalfReg = findAnotherHalfRegRec(reg);
5969
5970         anotherHalfReg->assignedInterval = nullptr;
5971     }
5972 #endif
5973     reg->assignedInterval = interval;
5974 }
5975
5976 //-----------------------------------------------------------------------------
5977 // updatePreviousInterval: Update previous interval of register.
5978 //
5979 // Arguments:
5980 //    reg      -    register to be updated
5981 //    interval -    interval to be assigned
5982 //    regType  -    register type
5983 //
5984 // Return Value:
5985 //    None
5986 //
5987 // Assumptions:
5988 //    For ARM32, when "regType" is TYP_DOUBLE, "reg" should be a even-numbered
5989 //    float register, i.e. lower half of double register.
5990 //
5991 // Note:
5992 //    For ARM32, two float registers consisting a double register are updated
5993 //    together when "regType" is TYP_DOUBLE.
5994 //
5995 void LinearScan::updatePreviousInterval(RegRecord* reg, Interval* interval, RegisterType regType)
5996 {
5997     reg->previousInterval = interval;
5998
5999 #ifdef _TARGET_ARM_
6000     // Update overlapping floating point register for TYP_DOUBLE
6001     if (regType == TYP_DOUBLE)
6002     {
6003         RegRecord* anotherHalfReg = findAnotherHalfRegRec(reg);
6004
6005         anotherHalfReg->previousInterval = interval;
6006     }
6007 #endif
6008 }
6009
6010 // LinearScan::resolveLocalRef
6011 // Description:
6012 //      Update the graph for a local reference.
6013 //      Also, track the register (if any) that is currently occupied.
6014 // Arguments:
6015 //      treeNode: The lclVar that's being resolved
6016 //      currentRefPosition: the RefPosition associated with the treeNode
6017 //
6018 // Details:
6019 // This method is called for each local reference, during the resolveRegisters
6020 // phase of LSRA.  It is responsible for keeping the following in sync:
6021 //   - varDsc->lvRegNum (and lvOtherReg) contain the unique register location.
6022 //     If it is not in the same register through its lifetime, it is set to REG_STK.
6023 //   - interval->physReg is set to the assigned register
6024 //     (i.e. at the code location which is currently being handled by resolveRegisters())
6025 //     - interval->isActive is true iff the interval is live and occupying a register
6026 //     - interval->isSpilled should have already been set to true if the interval is EVER spilled
6027 //     - interval->isSplit is set to true if the interval does not occupy the same
6028 //       register throughout the method
6029 //   - RegRecord->assignedInterval points to the interval which currently occupies
6030 //     the register
6031 //   - For each lclVar node:
6032 //     - gtRegNum/gtRegPair is set to the currently allocated register(s).
6033 //     - GTF_SPILLED is set on a use if it must be reloaded prior to use.
6034 //     - GTF_SPILL is set if it must be spilled after use.
6035 //
6036 // A copyReg is an ugly case where the variable must be in a specific (fixed) register,
6037 // but it currently resides elsewhere.  The register allocator must track the use of the
6038 // fixed register, but it marks the lclVar node with the register it currently lives in
6039 // and the code generator does the necessary move.
6040 //
6041 // Before beginning, the varDsc for each parameter must be set to its initial location.
6042 //
6043 // NICE: Consider tracking whether an Interval is always in the same location (register/stack)
6044 // in which case it will require no resolution.
6045 //
6046 void LinearScan::resolveLocalRef(BasicBlock* block, GenTree* treeNode, RefPosition* currentRefPosition)
6047 {
6048     assert((block == nullptr) == (treeNode == nullptr));
6049     assert(enregisterLocalVars);
6050
6051     // Is this a tracked local?  Or just a register allocated for loading
6052     // a non-tracked one?
6053     Interval* interval = currentRefPosition->getInterval();
6054     assert(interval->isLocalVar);
6055
6056     interval->recentRefPosition = currentRefPosition;
6057     LclVarDsc* varDsc           = interval->getLocalVar(compiler);
6058
6059     // NOTE: we set the GTF_VAR_DEATH flag here unless we are extending lifetimes, in which case we write
6060     // this bit in checkLastUses. This is a bit of a hack, but is necessary because codegen requires
6061     // accurate last use info that is not reflected in the lastUse bit on ref positions when we are extending
6062     // lifetimes. See also the comments in checkLastUses.
6063     if ((treeNode != nullptr) && !extendLifetimes())
6064     {
6065         if (currentRefPosition->lastUse)
6066         {
6067             treeNode->gtFlags |= GTF_VAR_DEATH;
6068         }
6069         else
6070         {
6071             treeNode->gtFlags &= ~GTF_VAR_DEATH;
6072         }
6073     }
6074
6075     if (currentRefPosition->registerAssignment == RBM_NONE)
6076     {
6077         assert(currentRefPosition->RegOptional());
6078         assert(interval->isSpilled);
6079
6080         varDsc->lvRegNum = REG_STK;
6081         if (interval->assignedReg != nullptr && interval->assignedReg->assignedInterval == interval)
6082         {
6083             updateAssignedInterval(interval->assignedReg, nullptr, interval->registerType);
6084         }
6085         interval->assignedReg = nullptr;
6086         interval->physReg     = REG_NA;
6087         if (treeNode != nullptr)
6088         {
6089             treeNode->SetContained();
6090         }
6091
6092         return;
6093     }
6094
6095     // In most cases, assigned and home registers will be the same
6096     // The exception is the copyReg case, where we've assigned a register
6097     // for a specific purpose, but will be keeping the register assignment
6098     regNumber assignedReg = currentRefPosition->assignedReg();
6099     regNumber homeReg     = assignedReg;
6100
6101     // Undo any previous association with a physical register, UNLESS this
6102     // is a copyReg
6103     if (!currentRefPosition->copyReg)
6104     {
6105         regNumber oldAssignedReg = interval->physReg;
6106         if (oldAssignedReg != REG_NA && assignedReg != oldAssignedReg)
6107         {
6108             RegRecord* oldRegRecord = getRegisterRecord(oldAssignedReg);
6109             if (oldRegRecord->assignedInterval == interval)
6110             {
6111                 updateAssignedInterval(oldRegRecord, nullptr, interval->registerType);
6112             }
6113         }
6114     }
6115
6116     if (currentRefPosition->refType == RefTypeUse && !currentRefPosition->reload)
6117     {
6118         // Was this spilled after our predecessor was scheduled?
6119         if (interval->physReg == REG_NA)
6120         {
6121             assert(inVarToRegMaps[curBBNum][varDsc->lvVarIndex] == REG_STK);
6122             currentRefPosition->reload = true;
6123         }
6124     }
6125
6126     bool reload     = currentRefPosition->reload;
6127     bool spillAfter = currentRefPosition->spillAfter;
6128
6129     // In the reload case we either:
6130     // - Set the register to REG_STK if it will be referenced only from the home location, or
6131     // - Set the register to the assigned register and set GTF_SPILLED if it must be loaded into a register.
6132     if (reload)
6133     {
6134         assert(currentRefPosition->refType != RefTypeDef);
6135         assert(interval->isSpilled);
6136         varDsc->lvRegNum = REG_STK;
6137         if (!spillAfter)
6138         {
6139             interval->physReg = assignedReg;
6140         }
6141
6142         // If there is no treeNode, this must be a RefTypeExpUse, in
6143         // which case we did the reload already
6144         if (treeNode != nullptr)
6145         {
6146             treeNode->gtFlags |= GTF_SPILLED;
6147             if (spillAfter)
6148             {
6149                 if (currentRefPosition->RegOptional())
6150                 {
6151                     // This is a use of lclVar that is flagged as reg-optional
6152                     // by lower/codegen and marked for both reload and spillAfter.
6153                     // In this case we can avoid unnecessary reload and spill
6154                     // by setting reg on lclVar to REG_STK and reg on tree node
6155                     // to REG_NA.  Codegen will generate the code by considering
6156                     // it as a contained memory operand.
6157                     //
6158                     // Note that varDsc->lvRegNum is already to REG_STK above.
6159                     interval->physReg  = REG_NA;
6160                     treeNode->gtRegNum = REG_NA;
6161                     treeNode->gtFlags &= ~GTF_SPILLED;
6162                     treeNode->SetContained();
6163                 }
6164                 else
6165                 {
6166                     treeNode->gtFlags |= GTF_SPILL;
6167                 }
6168             }
6169         }
6170         else
6171         {
6172             assert(currentRefPosition->refType == RefTypeExpUse);
6173         }
6174     }
6175     else if (spillAfter && !RefTypeIsUse(currentRefPosition->refType))
6176     {
6177         // In the case of a pure def, don't bother spilling - just assign it to the
6178         // stack.  However, we need to remember that it was spilled.
6179
6180         assert(interval->isSpilled);
6181         varDsc->lvRegNum  = REG_STK;
6182         interval->physReg = REG_NA;
6183         if (treeNode != nullptr)
6184         {
6185             treeNode->gtRegNum = REG_NA;
6186         }
6187     }
6188     else // Not reload and Not pure-def that's spillAfter
6189     {
6190         if (currentRefPosition->copyReg || currentRefPosition->moveReg)
6191         {
6192             // For a copyReg or moveReg, we have two cases:
6193             //  - In the first case, we have a fixedReg - i.e. a register which the code
6194             //    generator is constrained to use.
6195             //    The code generator will generate the appropriate move to meet the requirement.
6196             //  - In the second case, we were forced to use a different register because of
6197             //    interference (or JitStressRegs).
6198             //    In this case, we generate a GT_COPY.
6199             // In either case, we annotate the treeNode with the register in which the value
6200             // currently lives.  For moveReg, the homeReg is the new register (as assigned above).
6201             // But for copyReg, the homeReg remains unchanged.
6202
6203             assert(treeNode != nullptr);
6204             treeNode->gtRegNum = interval->physReg;
6205
6206             if (currentRefPosition->copyReg)
6207             {
6208                 homeReg = interval->physReg;
6209             }
6210             else
6211             {
6212                 assert(interval->isSplit);
6213                 interval->physReg = assignedReg;
6214             }
6215
6216             if (!currentRefPosition->isFixedRegRef || currentRefPosition->moveReg)
6217             {
6218                 // This is the second case, where we need to generate a copy
6219                 insertCopyOrReload(block, treeNode, currentRefPosition->getMultiRegIdx(), currentRefPosition);
6220             }
6221         }
6222         else
6223         {
6224             interval->physReg = assignedReg;
6225
6226             if (!interval->isSpilled && !interval->isSplit)
6227             {
6228                 if (varDsc->lvRegNum != REG_STK)
6229                 {
6230                     // If the register assignments don't match, then this interval is split.
6231                     if (varDsc->lvRegNum != assignedReg)
6232                     {
6233                         setIntervalAsSplit(interval);
6234                         varDsc->lvRegNum = REG_STK;
6235                     }
6236                 }
6237                 else
6238                 {
6239                     varDsc->lvRegNum = assignedReg;
6240                 }
6241             }
6242         }
6243         if (spillAfter)
6244         {
6245             if (treeNode != nullptr)
6246             {
6247                 treeNode->gtFlags |= GTF_SPILL;
6248             }
6249             assert(interval->isSpilled);
6250             interval->physReg = REG_NA;
6251             varDsc->lvRegNum  = REG_STK;
6252         }
6253     }
6254
6255     // Update the physRegRecord for the register, so that we know what vars are in
6256     // regs at the block boundaries
6257     RegRecord* physRegRecord = getRegisterRecord(homeReg);
6258     if (spillAfter || currentRefPosition->lastUse)
6259     {
6260         interval->isActive    = false;
6261         interval->assignedReg = nullptr;
6262         interval->physReg     = REG_NA;
6263
6264         updateAssignedInterval(physRegRecord, nullptr, interval->registerType);
6265     }
6266     else
6267     {
6268         interval->isActive    = true;
6269         interval->assignedReg = physRegRecord;
6270
6271         updateAssignedInterval(physRegRecord, interval, interval->registerType);
6272     }
6273 }
6274
6275 void LinearScan::writeRegisters(RefPosition* currentRefPosition, GenTree* tree)
6276 {
6277     lsraAssignRegToTree(tree, currentRefPosition->assignedReg(), currentRefPosition->getMultiRegIdx());
6278 }
6279
6280 //------------------------------------------------------------------------
6281 // insertCopyOrReload: Insert a copy in the case where a tree node value must be moved
6282 //   to a different register at the point of use (GT_COPY), or it is reloaded to a different register
6283 //   than the one it was spilled from (GT_RELOAD).
6284 //
6285 // Arguments:
6286 //    block             - basic block in which GT_COPY/GT_RELOAD is inserted.
6287 //    tree              - This is the node to copy or reload.
6288 //                        Insert copy or reload node between this node and its parent.
6289 //    multiRegIdx       - register position of tree node for which copy or reload is needed.
6290 //    refPosition       - The RefPosition at which copy or reload will take place.
6291 //
6292 // Notes:
6293 //    The GT_COPY or GT_RELOAD will be inserted in the proper spot in execution order where the reload is to occur.
6294 //
6295 // For example, for this tree (numbers are execution order, lower is earlier and higher is later):
6296 //
6297 //                                   +---------+----------+
6298 //                                   |       GT_ADD (3)   |
6299 //                                   +---------+----------+
6300 //                                             |
6301 //                                           /   \
6302 //                                         /       \
6303 //                                       /           \
6304 //                   +-------------------+           +----------------------+
6305 //                   |         x (1)     | "tree"    |         y (2)        |
6306 //                   +-------------------+           +----------------------+
6307 //
6308 // generate this tree:
6309 //
6310 //                                   +---------+----------+
6311 //                                   |       GT_ADD (4)   |
6312 //                                   +---------+----------+
6313 //                                             |
6314 //                                           /   \
6315 //                                         /       \
6316 //                                       /           \
6317 //                   +-------------------+           +----------------------+
6318 //                   |  GT_RELOAD (3)    |           |         y (2)        |
6319 //                   +-------------------+           +----------------------+
6320 //                             |
6321 //                   +-------------------+
6322 //                   |         x (1)     | "tree"
6323 //                   +-------------------+
6324 //
6325 // Note in particular that the GT_RELOAD node gets inserted in execution order immediately before the parent of "tree",
6326 // which seems a bit weird since normally a node's parent (in this case, the parent of "x", GT_RELOAD in the "after"
6327 // picture) immediately follows all of its children (that is, normally the execution ordering is postorder).
6328 // The ordering must be this weird "out of normal order" way because the "x" node is being spilled, probably
6329 // because the expression in the tree represented above by "y" has high register requirements. We don't want
6330 // to reload immediately, of course. So we put GT_RELOAD where the reload should actually happen.
6331 //
6332 // Note that GT_RELOAD is required when we reload to a different register than the one we spilled to. It can also be
6333 // used if we reload to the same register. Normally, though, in that case we just mark the node with GTF_SPILLED,
6334 // and the unspilling code automatically reuses the same register, and does the reload when it notices that flag
6335 // when considering a node's operands.
6336 //
6337 void LinearScan::insertCopyOrReload(BasicBlock* block, GenTree* tree, unsigned multiRegIdx, RefPosition* refPosition)
6338 {
6339     LIR::Range& blockRange = LIR::AsRange(block);
6340
6341     LIR::Use treeUse;
6342     bool     foundUse = blockRange.TryGetUse(tree, &treeUse);
6343     assert(foundUse);
6344
6345     GenTree* parent = treeUse.User();
6346
6347     genTreeOps oper;
6348     if (refPosition->reload)
6349     {
6350         oper = GT_RELOAD;
6351     }
6352     else
6353     {
6354         oper = GT_COPY;
6355
6356 #if TRACK_LSRA_STATS
6357         updateLsraStat(LSRA_STAT_COPY_REG, block->bbNum);
6358 #endif
6359     }
6360
6361     // If the parent is a reload/copy node, then tree must be a multi-reg node
6362     // that has already had one of its registers spilled.
6363     // It is possible that one of its RefTypeDef positions got spilled and the next
6364     // use of it requires it to be in a different register.
6365     //
6366     // In this case set the i'th position reg of reload/copy node to the reg allocated
6367     // for copy/reload refPosition.  Essentially a copy/reload node will have a reg
6368     // for each multi-reg position of its child. If there is a valid reg in i'th
6369     // position of GT_COPY or GT_RELOAD node then the corresponding result of its
6370     // child needs to be copied or reloaded to that reg.
6371     if (parent->IsCopyOrReload())
6372     {
6373         noway_assert(parent->OperGet() == oper);
6374         noway_assert(tree->IsMultiRegNode());
6375         GenTreeCopyOrReload* copyOrReload = parent->AsCopyOrReload();
6376         noway_assert(copyOrReload->GetRegNumByIdx(multiRegIdx) == REG_NA);
6377         copyOrReload->SetRegNumByIdx(refPosition->assignedReg(), multiRegIdx);
6378     }
6379     else
6380     {
6381         // Create the new node, with "tree" as its only child.
6382         var_types treeType = tree->TypeGet();
6383
6384         GenTreeCopyOrReload* newNode = new (compiler, oper) GenTreeCopyOrReload(oper, treeType, tree);
6385         assert(refPosition->registerAssignment != RBM_NONE);
6386         SetLsraAdded(newNode);
6387         newNode->SetRegNumByIdx(refPosition->assignedReg(), multiRegIdx);
6388         if (refPosition->copyReg)
6389         {
6390             // This is a TEMPORARY copy
6391             assert(isCandidateLocalRef(tree));
6392             newNode->gtFlags |= GTF_VAR_DEATH;
6393         }
6394
6395         // Insert the copy/reload after the spilled node and replace the use of the original node with a use
6396         // of the copy/reload.
6397         blockRange.InsertAfter(tree, newNode);
6398         treeUse.ReplaceWith(compiler, newNode);
6399     }
6400 }
6401
6402 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6403 //------------------------------------------------------------------------
6404 // insertUpperVectorSave: Insert code to save the upper half of a vector that lives
6405 //                        in a callee-save register at the point of a kill (the upper half is
6406 //                        not preserved).
6407 //
6408 // Arguments:
6409 //    tree              - This is the node before which we will insert the Save.
6410 //                        It will be a call or some node that turns into a call.
6411 //    refPosition       - The RefTypeUpperVectorSave RefPosition.
6412 //    upperInterval     - The Interval for the upper half of the large vector lclVar.
6413 //    block             - the BasicBlock containing the call.
6414 //
6415 void LinearScan::insertUpperVectorSave(GenTree*     tree,
6416                                        RefPosition* refPosition,
6417                                        Interval*    upperVectorInterval,
6418                                        BasicBlock*  block)
6419 {
6420     JITDUMP("Inserting UpperVectorSave for RP #%d before %d.%s:\n", refPosition->rpNum, tree->gtTreeID,
6421             GenTree::OpName(tree->gtOper));
6422     Interval* lclVarInterval = upperVectorInterval->relatedInterval;
6423     assert(lclVarInterval->isLocalVar == true);
6424     assert(refPosition->getInterval() == upperVectorInterval);
6425     regNumber lclVarReg = lclVarInterval->physReg;
6426     if (lclVarReg == REG_NA)
6427     {
6428         return;
6429     }
6430
6431     LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum;
6432     assert(varTypeNeedsPartialCalleeSave(varDsc->lvType));
6433     assert((genRegMask(lclVarReg) & RBM_FLT_CALLEE_SAVED) != RBM_NONE);
6434
6435     // On Arm64, we must always have a register to save the upper half,
6436     // while on x86 we can spill directly to memory.
6437     regNumber spillReg = refPosition->assignedReg();
6438 #ifdef _TARGET_ARM64_
6439     bool spillToMem = refPosition->spillAfter;
6440     assert(spillReg != REG_NA);
6441 #else
6442     bool spillToMem = (spillReg == REG_NA);
6443     assert(!refPosition->spillAfter);
6444 #endif
6445
6446     LIR::Range& blockRange = LIR::AsRange(block);
6447
6448     // Insert the save before the call.
6449
6450     GenTree* saveLcl  = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType);
6451     saveLcl->gtRegNum = lclVarReg;
6452     SetLsraAdded(saveLcl);
6453
6454     GenTreeSIMD* simdNode =
6455         new (compiler, GT_SIMD) GenTreeSIMD(LargeVectorSaveType, saveLcl, nullptr, SIMDIntrinsicUpperSave,
6456                                             varDsc->lvBaseType, genTypeSize(varDsc->lvType));
6457     SetLsraAdded(simdNode);
6458     simdNode->gtRegNum = spillReg;
6459     if (spillToMem)
6460     {
6461         simdNode->gtFlags |= GTF_SPILL;
6462         upperVectorInterval->physReg = REG_NA;
6463     }
6464     else
6465     {
6466         assert((genRegMask(spillReg) & RBM_FLT_CALLEE_SAVED) != RBM_NONE);
6467         upperVectorInterval->physReg = spillReg;
6468     }
6469
6470     blockRange.InsertBefore(tree, LIR::SeqTree(compiler, simdNode));
6471     DISPTREE(simdNode);
6472     JITDUMP("\n");
6473 }
6474
6475 //------------------------------------------------------------------------
6476 // insertUpperVectorRestore: Insert code to restore the upper half of a vector that has been partially spilled.
6477 //
6478 // Arguments:
6479 //    tree                - This is the node for which we will insert the Restore.
6480 //                          If non-null, it will be a use of the large vector lclVar.
6481 //                          If null, the Restore will be added to the end of the block.
6482 //    upperVectorInterval - The Interval for the upper vector for the lclVar.
6483 //    block               - the BasicBlock into which we will be inserting the code.
6484 //
6485 // Notes:
6486 //    In the case where 'tree' is non-null, we will insert the restore just prior to
6487 //    its use, in order to ensure the proper ordering.
6488 //
6489 void LinearScan::insertUpperVectorRestore(GenTree*     tree,
6490                                           RefPosition* refPosition,
6491                                           Interval*    upperVectorInterval,
6492                                           BasicBlock*  block)
6493 {
6494     JITDUMP("Adding UpperVectorRestore for RP #%d ", refPosition->rpNum);
6495     Interval* lclVarInterval = upperVectorInterval->relatedInterval;
6496     assert(lclVarInterval->isLocalVar == true);
6497     regNumber lclVarReg = lclVarInterval->physReg;
6498
6499     // We should not call this method if the lclVar is not in a register (we should have simply marked the entire
6500     // lclVar as spilled).
6501     assert(lclVarReg != REG_NA);
6502     LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum;
6503     assert(varTypeNeedsPartialCalleeSave(varDsc->lvType));
6504
6505     GenTree* restoreLcl  = nullptr;
6506     restoreLcl           = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType);
6507     restoreLcl->gtRegNum = lclVarReg;
6508     SetLsraAdded(restoreLcl);
6509
6510     GenTreeSIMD* simdNode =
6511         new (compiler, GT_SIMD) GenTreeSIMD(varDsc->lvType, restoreLcl, nullptr, SIMDIntrinsicUpperRestore,
6512                                             varDsc->lvBaseType, genTypeSize(varDsc->lvType));
6513
6514     regNumber restoreReg = upperVectorInterval->physReg;
6515     SetLsraAdded(simdNode);
6516
6517     if (restoreReg == REG_NA)
6518     {
6519         // We need a stack location for this.
6520         assert(lclVarInterval->isSpilled);
6521 #ifdef _TARGET_AMD64_
6522         assert(refPosition->assignedReg() == REG_NA);
6523         simdNode->gtFlags |= GTF_NOREG_AT_USE;
6524 #else
6525         simdNode->gtFlags |= GTF_SPILLED;
6526         assert(refPosition->assignedReg() != REG_NA);
6527         restoreReg = refPosition->assignedReg();
6528 #endif
6529     }
6530     simdNode->gtRegNum = restoreReg;
6531
6532     LIR::Range& blockRange = LIR::AsRange(block);
6533     JITDUMP("Adding UpperVectorRestore ");
6534     if (tree != nullptr)
6535     {
6536         JITDUMP("before %d.%s:\n", tree->gtTreeID, GenTree::OpName(tree->gtOper));
6537         LIR::Use treeUse;
6538         bool     foundUse = blockRange.TryGetUse(tree, &treeUse);
6539         assert(foundUse);
6540         // We need to insert the restore prior to the use, not (necessarily) immediately after the lclVar.
6541         blockRange.InsertBefore(treeUse.User(), LIR::SeqTree(compiler, simdNode));
6542     }
6543     else
6544     {
6545         JITDUMP("at end of BB%02u:\n", block->bbNum);
6546         if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_SWITCH)
6547         {
6548             noway_assert(!blockRange.IsEmpty());
6549
6550             GenTree* branch = blockRange.LastNode();
6551             assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE ||
6552                    branch->OperGet() == GT_SWITCH);
6553
6554             blockRange.InsertBefore(branch, LIR::SeqTree(compiler, simdNode));
6555         }
6556         else
6557         {
6558             assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS);
6559             blockRange.InsertAtEnd(LIR::SeqTree(compiler, simdNode));
6560         }
6561     }
6562     DISPTREE(simdNode);
6563     JITDUMP("\n");
6564 }
6565 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6566
6567 //------------------------------------------------------------------------
6568 // initMaxSpill: Initializes the LinearScan members used to track the max number
6569 //               of concurrent spills.  This is needed so that we can set the
6570 //               fields in Compiler, so that the code generator, in turn can
6571 //               allocate the right number of spill locations.
6572 //
6573 // Arguments:
6574 //    None.
6575 //
6576 // Return Value:
6577 //    None.
6578 //
6579 // Assumptions:
6580 //    This is called before any calls to updateMaxSpill().
6581
6582 void LinearScan::initMaxSpill()
6583 {
6584     needDoubleTmpForFPCall = false;
6585     needFloatTmpForFPCall  = false;
6586     for (int i = 0; i < TYP_COUNT; i++)
6587     {
6588         maxSpill[i]     = 0;
6589         currentSpill[i] = 0;
6590     }
6591 }
6592
6593 //------------------------------------------------------------------------
6594 // recordMaxSpill: Sets the fields in Compiler for the max number of concurrent spills.
6595 //                 (See the comment on initMaxSpill.)
6596 //
6597 // Arguments:
6598 //    None.
6599 //
6600 // Return Value:
6601 //    None.
6602 //
6603 // Assumptions:
6604 //    This is called after updateMaxSpill() has been called for all "real"
6605 //    RefPositions.
6606
6607 void LinearScan::recordMaxSpill()
6608 {
6609     // Note: due to the temp normalization process (see tmpNormalizeType)
6610     // only a few types should actually be seen here.
6611     JITDUMP("Recording the maximum number of concurrent spills:\n");
6612 #ifdef _TARGET_X86_
6613     var_types returnType = RegSet::tmpNormalizeType(compiler->info.compRetType);
6614     if (needDoubleTmpForFPCall || (returnType == TYP_DOUBLE))
6615     {
6616         JITDUMP("Adding a spill temp for moving a double call/return value between xmm reg and x87 stack.\n");
6617         maxSpill[TYP_DOUBLE] += 1;
6618     }
6619     if (needFloatTmpForFPCall || (returnType == TYP_FLOAT))
6620     {
6621         JITDUMP("Adding a spill temp for moving a float call/return value between xmm reg and x87 stack.\n");
6622         maxSpill[TYP_FLOAT] += 1;
6623     }
6624 #endif // _TARGET_X86_
6625     for (int i = 0; i < TYP_COUNT; i++)
6626     {
6627         if (var_types(i) != RegSet::tmpNormalizeType(var_types(i)))
6628         {
6629             // Only normalized types should have anything in the maxSpill array.
6630             // We assume here that if type 'i' does not normalize to itself, then
6631             // nothing else normalizes to 'i', either.
6632             assert(maxSpill[i] == 0);
6633         }
6634         if (maxSpill[i] != 0)
6635         {
6636             JITDUMP("  %s: %d\n", varTypeName(var_types(i)), maxSpill[i]);
6637             compiler->codeGen->regSet.tmpPreAllocateTemps(var_types(i), maxSpill[i]);
6638         }
6639     }
6640     JITDUMP("\n");
6641 }
6642
6643 //------------------------------------------------------------------------
6644 // updateMaxSpill: Update the maximum number of concurrent spills
6645 //
6646 // Arguments:
6647 //    refPosition - the current RefPosition being handled
6648 //
6649 // Return Value:
6650 //    None.
6651 //
6652 // Assumptions:
6653 //    The RefPosition has an associated interval (getInterval() will
6654 //    otherwise assert).
6655 //
6656 // Notes:
6657 //    This is called for each "real" RefPosition during the writeback
6658 //    phase of LSRA.  It keeps track of how many concurrently-live
6659 //    spills there are, and the largest number seen so far.
6660
6661 void LinearScan::updateMaxSpill(RefPosition* refPosition)
6662 {
6663     RefType refType = refPosition->refType;
6664
6665 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6666     if ((refType == RefTypeUpperVectorSave) || (refType == RefTypeUpperVectorRestore))
6667     {
6668         Interval* interval = refPosition->getInterval();
6669         // If this is not an 'upperVector', it must be a tree temp that has been already
6670         // (fully) spilled.
6671         if (!interval->isUpperVector)
6672         {
6673             assert(interval->firstRefPosition->spillAfter);
6674         }
6675         else
6676         {
6677             // The UpperVector RefPositions spill to the localVar's home location.
6678             Interval* lclVarInterval = interval->relatedInterval;
6679             assert(lclVarInterval->isSpilled || (!refPosition->spillAfter && !refPosition->reload));
6680         }
6681         return;
6682     }
6683 #endif // !FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6684     if (refPosition->spillAfter || refPosition->reload ||
6685         (refPosition->RegOptional() && refPosition->assignedReg() == REG_NA))
6686     {
6687         Interval* interval = refPosition->getInterval();
6688         if (!interval->isLocalVar)
6689         {
6690             // The tmp allocation logic 'normalizes' types to a small number of
6691             // types that need distinct stack locations from each other.
6692             // Those types are currently gc refs, byrefs, <= 4 byte non-GC items,
6693             // 8-byte non-GC items, and 16-byte or 32-byte SIMD vectors.
6694             // LSRA is agnostic to those choices but needs
6695             // to know what they are here.
6696             var_types typ;
6697
6698 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6699             if (refType == RefTypeUpperVectorSave)
6700             {
6701                 typ = LargeVectorSaveType;
6702             }
6703             else
6704 #endif // !FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6705             {
6706                 GenTree* treeNode = refPosition->treeNode;
6707                 if (treeNode == nullptr)
6708                 {
6709                     assert(RefTypeIsUse(refType));
6710                     treeNode = interval->firstRefPosition->treeNode;
6711                 }
6712                 assert(treeNode != nullptr);
6713
6714                 // In case of multi-reg call nodes, we need to use the type
6715                 // of the return register given by multiRegIdx of the refposition.
6716                 if (treeNode->IsMultiRegCall())
6717                 {
6718                     ReturnTypeDesc* retTypeDesc = treeNode->AsCall()->GetReturnTypeDesc();
6719                     typ                         = retTypeDesc->GetReturnRegType(refPosition->getMultiRegIdx());
6720                 }
6721 #if FEATURE_ARG_SPLIT
6722                 else if (treeNode->OperIsPutArgSplit())
6723                 {
6724                     typ = treeNode->AsPutArgSplit()->GetRegType(refPosition->getMultiRegIdx());
6725                 }
6726 #if !defined(_TARGET_64BIT_)
6727                 else if (treeNode->OperIsPutArgReg())
6728                 {
6729                     // For double arg regs, the type is changed to long since they must be passed via `r0-r3`.
6730                     // However when they get spilled, they should be treated as separated int registers.
6731                     var_types typNode = treeNode->TypeGet();
6732                     typ               = (typNode == TYP_LONG) ? TYP_INT : typNode;
6733                 }
6734 #endif // !_TARGET_64BIT_
6735 #endif // FEATURE_ARG_SPLIT
6736                 else
6737                 {
6738                     typ = treeNode->TypeGet();
6739                 }
6740                 typ = RegSet::tmpNormalizeType(typ);
6741             }
6742
6743             if (refPosition->spillAfter && !refPosition->reload)
6744             {
6745                 currentSpill[typ]++;
6746                 if (currentSpill[typ] > maxSpill[typ])
6747                 {
6748                     maxSpill[typ] = currentSpill[typ];
6749                 }
6750             }
6751             else if (refPosition->reload)
6752             {
6753                 assert(currentSpill[typ] > 0);
6754                 currentSpill[typ]--;
6755             }
6756             else if (refPosition->RegOptional() && refPosition->assignedReg() == REG_NA)
6757             {
6758                 // A spill temp not getting reloaded into a reg because it is
6759                 // marked as allocate if profitable and getting used from its
6760                 // memory location.  To properly account max spill for typ we
6761                 // decrement spill count.
6762                 assert(RefTypeIsUse(refType));
6763                 assert(currentSpill[typ] > 0);
6764                 currentSpill[typ]--;
6765             }
6766             JITDUMP("  Max spill for %s is %d\n", varTypeName(typ), maxSpill[typ]);
6767         }
6768     }
6769 }
6770
6771 // This is the final phase of register allocation.  It writes the register assignments to
6772 // the tree, and performs resolution across joins and backedges.
6773 //
6774 void LinearScan::resolveRegisters()
6775 {
6776     // Iterate over the tree and the RefPositions in lockstep
6777     //  - annotate the tree with register assignments by setting gtRegNum or gtRegPair (for longs)
6778     //    on the tree node
6779     //  - track globally-live var locations
6780     //  - add resolution points at split/merge/critical points as needed
6781
6782     // Need to use the same traversal order as the one that assigns the location numbers.
6783
6784     // Dummy RefPositions have been added at any split, join or critical edge, at the
6785     // point where resolution may be required.  These are located:
6786     //  - for a split, at the top of the non-adjacent block
6787     //  - for a join, at the bottom of the non-adjacent joining block
6788     //  - for a critical edge, at the top of the target block of each critical
6789     //    edge.
6790     // Note that a target block may have multiple incoming critical or split edges
6791     //
6792     // These RefPositions record the expected location of the Interval at that point.
6793     // At each branch, we identify the location of each liveOut interval, and check
6794     // against the RefPositions at the target.
6795
6796     BasicBlock*  block;
6797     LsraLocation currentLocation = MinLocation;
6798
6799     // Clear register assignments - these will be reestablished as lclVar defs (including RefTypeParamDefs)
6800     // are encountered.
6801     if (enregisterLocalVars)
6802     {
6803         for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
6804         {
6805             RegRecord* physRegRecord    = getRegisterRecord(reg);
6806             Interval*  assignedInterval = physRegRecord->assignedInterval;
6807             if (assignedInterval != nullptr)
6808             {
6809                 assignedInterval->assignedReg = nullptr;
6810                 assignedInterval->physReg     = REG_NA;
6811             }
6812             physRegRecord->assignedInterval  = nullptr;
6813             physRegRecord->recentRefPosition = nullptr;
6814         }
6815
6816         // Clear "recentRefPosition" for lclVar intervals
6817         for (unsigned varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++)
6818         {
6819             if (localVarIntervals[varIndex] != nullptr)
6820             {
6821                 localVarIntervals[varIndex]->recentRefPosition = nullptr;
6822                 localVarIntervals[varIndex]->isActive          = false;
6823             }
6824             else
6825             {
6826                 assert(compiler->lvaTable[compiler->lvaTrackedToVarNum[varIndex]].lvLRACandidate == false);
6827             }
6828         }
6829     }
6830
6831     // handle incoming arguments and special temps
6832     RefPositionIterator refPosIterator     = refPositions.begin();
6833     RefPosition*        currentRefPosition = &refPosIterator;
6834
6835     if (enregisterLocalVars)
6836     {
6837         VarToRegMap entryVarToRegMap = inVarToRegMaps[compiler->fgFirstBB->bbNum];
6838         for (; refPosIterator != refPositions.end() &&
6839                (currentRefPosition->refType == RefTypeParamDef || currentRefPosition->refType == RefTypeZeroInit);
6840              ++refPosIterator, currentRefPosition = &refPosIterator)
6841         {
6842             Interval* interval = currentRefPosition->getInterval();
6843             assert(interval != nullptr && interval->isLocalVar);
6844             resolveLocalRef(nullptr, nullptr, currentRefPosition);
6845             regNumber reg      = REG_STK;
6846             int       varIndex = interval->getVarIndex(compiler);
6847
6848             if (!currentRefPosition->spillAfter && currentRefPosition->registerAssignment != RBM_NONE)
6849             {
6850                 reg = currentRefPosition->assignedReg();
6851             }
6852             else
6853             {
6854                 reg                = REG_STK;
6855                 interval->isActive = false;
6856             }
6857             setVarReg(entryVarToRegMap, varIndex, reg);
6858         }
6859     }
6860     else
6861     {
6862         assert(refPosIterator == refPositions.end() ||
6863                (refPosIterator->refType != RefTypeParamDef && refPosIterator->refType != RefTypeZeroInit));
6864     }
6865
6866     BasicBlock* insertionBlock = compiler->fgFirstBB;
6867     GenTree*    insertionPoint = LIR::AsRange(insertionBlock).FirstNonPhiNode();
6868
6869     // write back assignments
6870     for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock())
6871     {
6872         assert(curBBNum == block->bbNum);
6873
6874         if (enregisterLocalVars)
6875         {
6876             // Record the var locations at the start of this block.
6877             // (If it's fgFirstBB, we've already done that above, see entryVarToRegMap)
6878
6879             curBBStartLocation = currentRefPosition->nodeLocation;
6880             if (block != compiler->fgFirstBB)
6881             {
6882                 processBlockStartLocations(block);
6883             }
6884
6885             // Handle the DummyDefs, updating the incoming var location.
6886             for (; refPosIterator != refPositions.end() && currentRefPosition->refType == RefTypeDummyDef;
6887                  ++refPosIterator, currentRefPosition = &refPosIterator)
6888             {
6889                 assert(currentRefPosition->isIntervalRef());
6890                 // Don't mark dummy defs as reload
6891                 currentRefPosition->reload = false;
6892                 resolveLocalRef(nullptr, nullptr, currentRefPosition);
6893                 regNumber reg;
6894                 if (currentRefPosition->registerAssignment != RBM_NONE)
6895                 {
6896                     reg = currentRefPosition->assignedReg();
6897                 }
6898                 else
6899                 {
6900                     reg                                         = REG_STK;
6901                     currentRefPosition->getInterval()->isActive = false;
6902                 }
6903                 setInVarRegForBB(curBBNum, currentRefPosition->getInterval()->varNum, reg);
6904             }
6905         }
6906
6907         // The next RefPosition should be for the block.  Move past it.
6908         assert(refPosIterator != refPositions.end());
6909         assert(currentRefPosition->refType == RefTypeBB);
6910         ++refPosIterator;
6911         currentRefPosition = &refPosIterator;
6912
6913         // Handle the RefPositions for the block
6914         for (; refPosIterator != refPositions.end() && currentRefPosition->refType != RefTypeBB &&
6915                currentRefPosition->refType != RefTypeDummyDef;
6916              ++refPosIterator, currentRefPosition = &refPosIterator)
6917         {
6918             currentLocation = currentRefPosition->nodeLocation;
6919
6920             // Ensure that the spill & copy info is valid.
6921             // First, if it's reload, it must not be copyReg or moveReg
6922             assert(!currentRefPosition->reload || (!currentRefPosition->copyReg && !currentRefPosition->moveReg));
6923             // If it's copyReg it must not be moveReg, and vice-versa
6924             assert(!currentRefPosition->copyReg || !currentRefPosition->moveReg);
6925
6926             switch (currentRefPosition->refType)
6927             {
6928 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6929                 case RefTypeUpperVectorSave:
6930                 case RefTypeUpperVectorRestore:
6931 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6932                 case RefTypeUse:
6933                 case RefTypeDef:
6934                     // These are the ones we're interested in
6935                     break;
6936                 case RefTypeKill:
6937                 case RefTypeFixedReg:
6938                     // These require no handling at resolution time
6939                     assert(currentRefPosition->referent != nullptr);
6940                     currentRefPosition->referent->recentRefPosition = currentRefPosition;
6941                     continue;
6942                 case RefTypeExpUse:
6943                     // Ignore the ExpUse cases - a RefTypeExpUse would only exist if the
6944                     // variable is dead at the entry to the next block.  So we'll mark
6945                     // it as in its current location and resolution will take care of any
6946                     // mismatch.
6947                     assert(getNextBlock() == nullptr ||
6948                            !VarSetOps::IsMember(compiler, getNextBlock()->bbLiveIn,
6949                                                 currentRefPosition->getInterval()->getVarIndex(compiler)));
6950                     currentRefPosition->referent->recentRefPosition = currentRefPosition;
6951                     continue;
6952                 case RefTypeKillGCRefs:
6953                     // No action to take at resolution time, and no interval to update recentRefPosition for.
6954                     continue;
6955                 case RefTypeDummyDef:
6956                 case RefTypeParamDef:
6957                 case RefTypeZeroInit:
6958                 // Should have handled all of these already
6959                 default:
6960                     unreached();
6961                     break;
6962             }
6963             updateMaxSpill(currentRefPosition);
6964             GenTree* treeNode = currentRefPosition->treeNode;
6965
6966 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
6967             if (currentRefPosition->refType == RefTypeUpperVectorSave)
6968             {
6969                 // The treeNode is a call or something that might become one.
6970                 noway_assert(treeNode != nullptr);
6971                 // If the associated interval is an UpperVector, this must be a RefPosition for a LargeVectorType
6972                 // LocalVar.
6973                 // Otherwise, this  is a non-lclVar interval that has been spilled, and we don't need to do anything.
6974                 Interval* interval = currentRefPosition->getInterval();
6975                 if (interval->isUpperVector)
6976                 {
6977                     Interval* localVarInterval = interval->relatedInterval;
6978                     if ((localVarInterval->physReg != REG_NA) && !localVarInterval->isPartiallySpilled)
6979                     {
6980                         // If the localVar is in a register, it must be a callee-save register (otherwise it would have
6981                         // already been spilled).
6982                         assert(localVarInterval->assignedReg->isCalleeSave);
6983                         // If we have allocated a register to spill it to, we will use that; otherwise, we will spill it
6984                         // to the stack.  We can use as a temp register any non-arg caller-save register.
6985                         currentRefPosition->referent->recentRefPosition = currentRefPosition;
6986                         insertUpperVectorSave(treeNode, currentRefPosition, currentRefPosition->getInterval(), block);
6987                         localVarInterval->isPartiallySpilled = true;
6988                     }
6989                 }
6990                 else
6991                 {
6992                     // This is a non-lclVar interval that must have been spilled.
6993                     assert(!currentRefPosition->getInterval()->isLocalVar);
6994                     assert(currentRefPosition->getInterval()->firstRefPosition->spillAfter);
6995                 }
6996                 continue;
6997             }
6998             else if (currentRefPosition->refType == RefTypeUpperVectorRestore)
6999             {
7000                 // Since we don't do partial restores of tree temp intervals, this must be an upperVector.
7001                 Interval* interval         = currentRefPosition->getInterval();
7002                 Interval* localVarInterval = interval->relatedInterval;
7003                 assert(interval->isUpperVector && (localVarInterval != nullptr));
7004                 if (localVarInterval->physReg != REG_NA)
7005                 {
7006                     assert(localVarInterval->isPartiallySpilled);
7007                     assert((localVarInterval->assignedReg != nullptr) &&
7008                            (localVarInterval->assignedReg->regNum == localVarInterval->physReg) &&
7009                            (localVarInterval->assignedReg->assignedInterval == localVarInterval));
7010                     insertUpperVectorRestore(treeNode, currentRefPosition, interval, block);
7011                 }
7012                 localVarInterval->isPartiallySpilled = false;
7013             }
7014 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
7015
7016             // Most uses won't actually need to be recorded (they're on the def).
7017             // In those cases, treeNode will be nullptr.
7018             if (treeNode == nullptr)
7019             {
7020                 // This is either a use, a dead def, or a field of a struct
7021                 Interval* interval = currentRefPosition->getInterval();
7022                 assert(currentRefPosition->refType == RefTypeUse ||
7023                        currentRefPosition->registerAssignment == RBM_NONE || interval->isStructField ||
7024                        interval->IsUpperVector());
7025
7026                 // TODO-Review: Need to handle the case where any of the struct fields
7027                 // are reloaded/spilled at this use
7028                 assert(!interval->isStructField ||
7029                        (currentRefPosition->reload == false && currentRefPosition->spillAfter == false));
7030
7031                 if (interval->isLocalVar && !interval->isStructField)
7032                 {
7033                     LclVarDsc* varDsc = interval->getLocalVar(compiler);
7034
7035                     // This must be a dead definition.  We need to mark the lclVar
7036                     // so that it's not considered a candidate for lvRegister, as
7037                     // this dead def will have to go to the stack.
7038                     assert(currentRefPosition->refType == RefTypeDef);
7039                     varDsc->lvRegNum = REG_STK;
7040                 }
7041                 continue;
7042             }
7043
7044             if (currentRefPosition->isIntervalRef() && currentRefPosition->getInterval()->isInternal)
7045             {
7046                 treeNode->gtRsvdRegs |= currentRefPosition->registerAssignment;
7047             }
7048             else
7049             {
7050                 writeRegisters(currentRefPosition, treeNode);
7051
7052                 if (treeNode->IsLocal() && currentRefPosition->getInterval()->isLocalVar)
7053                 {
7054                     resolveLocalRef(block, treeNode, currentRefPosition);
7055                 }
7056
7057                 // Mark spill locations on temps
7058                 // (local vars are handled in resolveLocalRef, above)
7059                 // Note that the tree node will be changed from GTF_SPILL to GTF_SPILLED
7060                 // in codegen, taking care of the "reload" case for temps
7061                 else if (currentRefPosition->spillAfter || (currentRefPosition->nextRefPosition != nullptr &&
7062                                                             currentRefPosition->nextRefPosition->moveReg))
7063                 {
7064                     if (treeNode != nullptr && currentRefPosition->isIntervalRef())
7065                     {
7066                         if (currentRefPosition->spillAfter)
7067                         {
7068                             treeNode->gtFlags |= GTF_SPILL;
7069
7070                             // If this is a constant interval that is reusing a pre-existing value, we actually need
7071                             // to generate the value at this point in order to spill it.
7072                             if (treeNode->IsReuseRegVal())
7073                             {
7074                                 treeNode->ResetReuseRegVal();
7075                             }
7076
7077                             // In case of multi-reg call node, also set spill flag on the
7078                             // register specified by multi-reg index of current RefPosition.
7079                             // Note that the spill flag on treeNode indicates that one or
7080                             // more its allocated registers are in that state.
7081                             if (treeNode->IsMultiRegCall())
7082                             {
7083                                 GenTreeCall* call = treeNode->AsCall();
7084                                 call->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx());
7085                             }
7086 #if FEATURE_ARG_SPLIT
7087                             else if (treeNode->OperIsPutArgSplit())
7088                             {
7089                                 GenTreePutArgSplit* splitArg = treeNode->AsPutArgSplit();
7090                                 splitArg->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx());
7091                             }
7092 #ifdef _TARGET_ARM_
7093                             else if (treeNode->OperIsMultiRegOp())
7094                             {
7095                                 GenTreeMultiRegOp* multiReg = treeNode->AsMultiRegOp();
7096                                 multiReg->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx());
7097                             }
7098 #endif // _TARGET_ARM_
7099 #endif // FEATURE_ARG_SPLIT
7100                         }
7101
7102                         // If the value is reloaded or moved to a different register, we need to insert
7103                         // a node to hold the register to which it should be reloaded
7104                         RefPosition* nextRefPosition = currentRefPosition->nextRefPosition;
7105                         noway_assert(nextRefPosition != nullptr);
7106                         if (INDEBUG(alwaysInsertReload() ||)
7107                                 nextRefPosition->assignedReg() != currentRefPosition->assignedReg())
7108                         {
7109 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
7110                             // Note that we asserted above that this is an Interval RefPosition.
7111                             Interval* currentInterval = currentRefPosition->getInterval();
7112                             if (!currentInterval->isUpperVector && nextRefPosition->refType == RefTypeUpperVectorSave)
7113                             {
7114                                 // The currentRefPosition is a spill of a tree temp.
7115                                 // These have no associated Restore, as we always spill if the vector is
7116                                 // in a register when this is encountered.
7117                                 // The nextRefPosition we're interested in (where we may need to insert a
7118                                 // reload or flag as GTF_NOREG_AT_USE) is the subsequent RefPosition.
7119                                 assert(!currentInterval->isLocalVar);
7120                                 nextRefPosition = nextRefPosition->nextRefPosition;
7121                                 assert(nextRefPosition->refType != RefTypeUpperVectorSave);
7122                             }
7123                             // UpperVector intervals may have unique assignments at each reference.
7124                             if (!currentInterval->isUpperVector)
7125 #endif
7126                             {
7127                                 if (nextRefPosition->assignedReg() != REG_NA)
7128                                 {
7129                                     insertCopyOrReload(block, treeNode, currentRefPosition->getMultiRegIdx(),
7130                                                        nextRefPosition);
7131                                 }
7132                                 else
7133                                 {
7134                                     assert(nextRefPosition->RegOptional());
7135
7136                                     // In case of tree temps, if def is spilled and use didn't
7137                                     // get a register, set a flag on tree node to be treated as
7138                                     // contained at the point of its use.
7139                                     if (currentRefPosition->spillAfter && currentRefPosition->refType == RefTypeDef &&
7140                                         nextRefPosition->refType == RefTypeUse)
7141                                     {
7142                                         assert(nextRefPosition->treeNode == nullptr);
7143                                         treeNode->gtFlags |= GTF_NOREG_AT_USE;
7144                                     }
7145                                 }
7146                             }
7147                         }
7148                     }
7149
7150                     // We should never have to "spill after" a temp use, since
7151                     // they're single use
7152                     else
7153                     {
7154                         unreached();
7155                     }
7156                 }
7157             }
7158         }
7159
7160         if (enregisterLocalVars)
7161         {
7162             processBlockEndLocations(block);
7163         }
7164     }
7165
7166     if (enregisterLocalVars)
7167     {
7168 #ifdef DEBUG
7169         if (VERBOSE)
7170         {
7171             printf("-----------------------\n");
7172             printf("RESOLVING BB BOUNDARIES\n");
7173             printf("-----------------------\n");
7174
7175             printf("Resolution Candidates: ");
7176             dumpConvertedVarSet(compiler, resolutionCandidateVars);
7177             printf("\n");
7178             printf("Has %sCritical Edges\n\n", hasCriticalEdges ? "" : "No");
7179
7180             printf("Prior to Resolution\n");
7181             foreach_block(compiler, block)
7182             {
7183                 printf("\n" FMT_BB " use def in out\n", block->bbNum);
7184                 dumpConvertedVarSet(compiler, block->bbVarUse);
7185                 printf("\n");
7186                 dumpConvertedVarSet(compiler, block->bbVarDef);
7187                 printf("\n");
7188                 dumpConvertedVarSet(compiler, block->bbLiveIn);
7189                 printf("\n");
7190                 dumpConvertedVarSet(compiler, block->bbLiveOut);
7191                 printf("\n");
7192
7193                 dumpInVarToRegMap(block);
7194                 dumpOutVarToRegMap(block);
7195             }
7196
7197             printf("\n\n");
7198         }
7199 #endif // DEBUG
7200
7201         resolveEdges();
7202
7203         // Verify register assignments on variables
7204         unsigned   lclNum;
7205         LclVarDsc* varDsc;
7206         for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++)
7207         {
7208             if (!isCandidateVar(varDsc))
7209             {
7210                 varDsc->lvRegNum = REG_STK;
7211             }
7212             else
7213             {
7214                 Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex);
7215
7216                 // Determine initial position for parameters
7217
7218                 if (varDsc->lvIsParam)
7219                 {
7220                     regMaskTP initialRegMask = interval->firstRefPosition->registerAssignment;
7221                     regNumber initialReg     = (initialRegMask == RBM_NONE || interval->firstRefPosition->spillAfter)
7222                                                ? REG_STK
7223                                                : genRegNumFromMask(initialRegMask);
7224                     regNumber sourceReg = (varDsc->lvIsRegArg) ? varDsc->lvArgReg : REG_STK;
7225
7226 #ifdef _TARGET_ARM_
7227                     if (varTypeIsMultiReg(varDsc))
7228                     {
7229                         // TODO-ARM-NYI: Map the hi/lo intervals back to lvRegNum and lvOtherReg (these should NYI
7230                         // before this)
7231                         assert(!"Multi-reg types not yet supported");
7232                     }
7233                     else
7234 #endif // _TARGET_ARM_
7235                     {
7236                         varDsc->lvArgInitReg = initialReg;
7237                         JITDUMP("  Set V%02u argument initial register to %s\n", lclNum, getRegName(initialReg));
7238                     }
7239
7240                     // Stack args that are part of dependently-promoted structs should never be register candidates (see
7241                     // LinearScan::isRegCandidate).
7242                     assert(varDsc->lvIsRegArg || !compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc));
7243                 }
7244
7245                 // If lvRegNum is REG_STK, that means that either no register
7246                 // was assigned, or (more likely) that the same register was not
7247                 // used for all references.  In that case, codegen gets the register
7248                 // from the tree node.
7249                 if (varDsc->lvRegNum == REG_STK || interval->isSpilled || interval->isSplit)
7250                 {
7251                     // For codegen purposes, we'll set lvRegNum to whatever register
7252                     // it's currently in as we go.
7253                     // However, we never mark an interval as lvRegister if it has either been spilled
7254                     // or split.
7255                     varDsc->lvRegister = false;
7256
7257                     // Skip any dead defs or exposed uses
7258                     // (first use exposed will only occur when there is no explicit initialization)
7259                     RefPosition* firstRefPosition = interval->firstRefPosition;
7260                     while ((firstRefPosition != nullptr) && (firstRefPosition->refType == RefTypeExpUse))
7261                     {
7262                         firstRefPosition = firstRefPosition->nextRefPosition;
7263                     }
7264                     if (firstRefPosition == nullptr)
7265                     {
7266                         // Dead interval
7267                         varDsc->lvLRACandidate = false;
7268                         if (varDsc->lvRefCnt() == 0)
7269                         {
7270                             varDsc->lvOnFrame = false;
7271                         }
7272                         else
7273                         {
7274                             // We may encounter cases where a lclVar actually has no references, but
7275                             // a non-zero refCnt.  For safety (in case this is some "hidden" lclVar that we're
7276                             // not correctly recognizing), we'll mark those as needing a stack location.
7277                             // TODO-Cleanup: Make this an assert if/when we correct the refCnt
7278                             // updating.
7279                             varDsc->lvOnFrame = true;
7280                         }
7281                     }
7282                     else
7283                     {
7284                         // If the interval was not spilled, it doesn't need a stack location.
7285                         if (!interval->isSpilled)
7286                         {
7287                             varDsc->lvOnFrame = false;
7288                         }
7289                         if (firstRefPosition->registerAssignment == RBM_NONE || firstRefPosition->spillAfter)
7290                         {
7291                             // Either this RefPosition is spilled, or regOptional or it is not a "real" def or use
7292                             assert(
7293                                 firstRefPosition->spillAfter || firstRefPosition->RegOptional() ||
7294                                 (firstRefPosition->refType != RefTypeDef && firstRefPosition->refType != RefTypeUse));
7295                             varDsc->lvRegNum = REG_STK;
7296                         }
7297                         else
7298                         {
7299                             varDsc->lvRegNum = firstRefPosition->assignedReg();
7300                         }
7301                     }
7302                 }
7303                 else
7304                 {
7305                     {
7306                         varDsc->lvRegister = true;
7307                         varDsc->lvOnFrame  = false;
7308                     }
7309 #ifdef DEBUG
7310                     regMaskTP registerAssignment = genRegMask(varDsc->lvRegNum);
7311                     assert(!interval->isSpilled && !interval->isSplit);
7312                     RefPosition* refPosition = interval->firstRefPosition;
7313                     assert(refPosition != nullptr);
7314
7315                     while (refPosition != nullptr)
7316                     {
7317                         // All RefPositions must match, except for dead definitions,
7318                         // copyReg/moveReg and RefTypeExpUse positions
7319                         if (refPosition->registerAssignment != RBM_NONE && !refPosition->copyReg &&
7320                             !refPosition->moveReg && refPosition->refType != RefTypeExpUse)
7321                         {
7322                             assert(refPosition->registerAssignment == registerAssignment);
7323                         }
7324                         refPosition = refPosition->nextRefPosition;
7325                     }
7326 #endif // DEBUG
7327                 }
7328             }
7329         }
7330     }
7331
7332 #ifdef DEBUG
7333     if (VERBOSE)
7334     {
7335         printf("Trees after linear scan register allocator (LSRA)\n");
7336         compiler->fgDispBasicBlocks(true);
7337     }
7338
7339     verifyFinalAllocation();
7340 #endif // DEBUG
7341
7342     compiler->raMarkStkVars();
7343     recordMaxSpill();
7344
7345     // TODO-CQ: Review this comment and address as needed.
7346     // Change all unused promoted non-argument struct locals to a non-GC type (in this case TYP_INT)
7347     // so that the gc tracking logic and lvMustInit logic will ignore them.
7348     // Extract the code that does this from raAssignVars, and call it here.
7349     // PRECONDITIONS: Ensure that lvPromoted is set on promoted structs, if and
7350     // only if it is promoted on all paths.
7351     // Call might be something like:
7352     // compiler->BashUnusedStructLocals();
7353 }
7354
7355 //
7356 //------------------------------------------------------------------------
7357 // insertMove: Insert a move of a lclVar with the given lclNum into the given block.
7358 //
7359 // Arguments:
7360 //    block          - the BasicBlock into which the move will be inserted.
7361 //    insertionPoint - the instruction before which to insert the move
7362 //    lclNum         - the lclNum of the var to be moved
7363 //    fromReg        - the register from which the var is moving
7364 //    toReg          - the register to which the var is moving
7365 //
7366 // Return Value:
7367 //    None.
7368 //
7369 // Notes:
7370 //    If insertionPoint is non-NULL, insert before that instruction;
7371 //    otherwise, insert "near" the end (prior to the branch, if any).
7372 //    If fromReg or toReg is REG_STK, then move from/to memory, respectively.
7373
7374 void LinearScan::insertMove(
7375     BasicBlock* block, GenTree* insertionPoint, unsigned lclNum, regNumber fromReg, regNumber toReg)
7376 {
7377     LclVarDsc* varDsc = compiler->lvaTable + lclNum;
7378     // the lclVar must be a register candidate
7379     assert(isRegCandidate(varDsc));
7380     // One or both MUST be a register
7381     assert(fromReg != REG_STK || toReg != REG_STK);
7382     // They must not be the same register.
7383     assert(fromReg != toReg);
7384
7385     // This var can't be marked lvRegister now
7386     varDsc->lvRegNum = REG_STK;
7387
7388     GenTree* src = compiler->gtNewLclvNode(lclNum, varDsc->TypeGet());
7389     SetLsraAdded(src);
7390
7391     // There are three cases we need to handle:
7392     // - We are loading a lclVar from the stack.
7393     // - We are storing a lclVar to the stack.
7394     // - We are copying a lclVar between registers.
7395     //
7396     // In the first and second cases, the lclVar node will be marked with GTF_SPILLED and GTF_SPILL, respectively.
7397     // It is up to the code generator to ensure that any necessary normalization is done when loading or storing the
7398     // lclVar's value.
7399     //
7400     // In the third case, we generate GT_COPY(GT_LCL_VAR) and type each node with the normalized type of the lclVar.
7401     // This is safe because a lclVar is always normalized once it is in a register.
7402
7403     GenTree* dst = src;
7404     if (fromReg == REG_STK)
7405     {
7406         src->gtFlags |= GTF_SPILLED;
7407         src->gtRegNum = toReg;
7408     }
7409     else if (toReg == REG_STK)
7410     {
7411         src->gtFlags |= GTF_SPILL;
7412         src->gtRegNum = fromReg;
7413     }
7414     else
7415     {
7416         var_types movType = genActualType(varDsc->TypeGet());
7417         src->gtType       = movType;
7418
7419         dst = new (compiler, GT_COPY) GenTreeCopyOrReload(GT_COPY, movType, src);
7420         // This is the new home of the lclVar - indicate that by clearing the GTF_VAR_DEATH flag.
7421         // Note that if src is itself a lastUse, this will have no effect.
7422         dst->gtFlags &= ~(GTF_VAR_DEATH);
7423         src->gtRegNum = fromReg;
7424         dst->gtRegNum = toReg;
7425         SetLsraAdded(dst);
7426     }
7427     dst->SetUnusedValue();
7428
7429     LIR::Range  treeRange  = LIR::SeqTree(compiler, dst);
7430     LIR::Range& blockRange = LIR::AsRange(block);
7431
7432     if (insertionPoint != nullptr)
7433     {
7434         blockRange.InsertBefore(insertionPoint, std::move(treeRange));
7435     }
7436     else
7437     {
7438         // Put the copy at the bottom
7439         if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_SWITCH)
7440         {
7441             noway_assert(!blockRange.IsEmpty());
7442
7443             GenTree* branch = blockRange.LastNode();
7444             assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE ||
7445                    branch->OperGet() == GT_SWITCH);
7446
7447             blockRange.InsertBefore(branch, std::move(treeRange));
7448         }
7449         else
7450         {
7451             assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS);
7452             blockRange.InsertAtEnd(std::move(treeRange));
7453         }
7454     }
7455 }
7456
7457 void LinearScan::insertSwap(
7458     BasicBlock* block, GenTree* insertionPoint, unsigned lclNum1, regNumber reg1, unsigned lclNum2, regNumber reg2)
7459 {
7460 #ifdef DEBUG
7461     if (VERBOSE)
7462     {
7463         const char* insertionPointString = "top";
7464         if (insertionPoint == nullptr)
7465         {
7466             insertionPointString = "bottom";
7467         }
7468         printf("   " FMT_BB " %s: swap V%02u in %s with V%02u in %s\n", block->bbNum, insertionPointString, lclNum1,
7469                getRegName(reg1), lclNum2, getRegName(reg2));
7470     }
7471 #endif // DEBUG
7472
7473     LclVarDsc* varDsc1 = compiler->lvaTable + lclNum1;
7474     LclVarDsc* varDsc2 = compiler->lvaTable + lclNum2;
7475     assert(reg1 != REG_STK && reg1 != REG_NA && reg2 != REG_STK && reg2 != REG_NA);
7476
7477     GenTree* lcl1  = compiler->gtNewLclvNode(lclNum1, varDsc1->TypeGet());
7478     lcl1->gtRegNum = reg1;
7479     SetLsraAdded(lcl1);
7480
7481     GenTree* lcl2  = compiler->gtNewLclvNode(lclNum2, varDsc2->TypeGet());
7482     lcl2->gtRegNum = reg2;
7483     SetLsraAdded(lcl2);
7484
7485     GenTree* swap  = compiler->gtNewOperNode(GT_SWAP, TYP_VOID, lcl1, lcl2);
7486     swap->gtRegNum = REG_NA;
7487     SetLsraAdded(swap);
7488
7489     lcl1->gtNext = lcl2;
7490     lcl2->gtPrev = lcl1;
7491     lcl2->gtNext = swap;
7492     swap->gtPrev = lcl2;
7493
7494     LIR::Range  swapRange  = LIR::SeqTree(compiler, swap);
7495     LIR::Range& blockRange = LIR::AsRange(block);
7496
7497     if (insertionPoint != nullptr)
7498     {
7499         blockRange.InsertBefore(insertionPoint, std::move(swapRange));
7500     }
7501     else
7502     {
7503         // Put the copy at the bottom
7504         // If there's a branch, make an embedded statement that executes just prior to the branch
7505         if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_SWITCH)
7506         {
7507             noway_assert(!blockRange.IsEmpty());
7508
7509             GenTree* branch = blockRange.LastNode();
7510             assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE ||
7511                    branch->OperGet() == GT_SWITCH);
7512
7513             blockRange.InsertBefore(branch, std::move(swapRange));
7514         }
7515         else
7516         {
7517             assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS);
7518             blockRange.InsertAtEnd(std::move(swapRange));
7519         }
7520     }
7521 }
7522
7523 //------------------------------------------------------------------------
7524 // getTempRegForResolution: Get a free register to use for resolution code.
7525 //
7526 // Arguments:
7527 //    fromBlock - The "from" block on the edge being resolved.
7528 //    toBlock   - The "to"block on the edge
7529 //    type      - the type of register required
7530 //
7531 // Return Value:
7532 //    Returns a register that is free on the given edge, or REG_NA if none is available.
7533 //
7534 // Notes:
7535 //    It is up to the caller to check the return value, and to determine whether a register is
7536 //    available, and to handle that case appropriately.
7537 //    It is also up to the caller to cache the return value, as this is not cheap to compute.
7538
7539 regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type)
7540 {
7541     // TODO-Throughput: This would be much more efficient if we add RegToVarMaps instead of VarToRegMaps
7542     // and they would be more space-efficient as well.
7543     VarToRegMap fromVarToRegMap = getOutVarToRegMap(fromBlock->bbNum);
7544     VarToRegMap toVarToRegMap   = getInVarToRegMap(toBlock->bbNum);
7545
7546 #ifdef _TARGET_ARM_
7547     regMaskTP freeRegs;
7548     if (type == TYP_DOUBLE)
7549     {
7550         // We have to consider all float registers for TYP_DOUBLE
7551         freeRegs = allRegs(TYP_FLOAT);
7552     }
7553     else
7554     {
7555         freeRegs = allRegs(type);
7556     }
7557 #else  // !_TARGET_ARM_
7558     regMaskTP freeRegs = allRegs(type);
7559 #endif // !_TARGET_ARM_
7560
7561 #ifdef DEBUG
7562     if (getStressLimitRegs() == LSRA_LIMIT_SMALL_SET)
7563     {
7564         return REG_NA;
7565     }
7566 #endif // DEBUG
7567     INDEBUG(freeRegs = stressLimitRegs(nullptr, freeRegs));
7568
7569     // We are only interested in the variables that are live-in to the "to" block.
7570     VarSetOps::Iter iter(compiler, toBlock->bbLiveIn);
7571     unsigned        varIndex = 0;
7572     while (iter.NextElem(&varIndex) && freeRegs != RBM_NONE)
7573     {
7574         regNumber fromReg = getVarReg(fromVarToRegMap, varIndex);
7575         regNumber toReg   = getVarReg(toVarToRegMap, varIndex);
7576         assert(fromReg != REG_NA && toReg != REG_NA);
7577         if (fromReg != REG_STK)
7578         {
7579             freeRegs &= ~genRegMask(fromReg, getIntervalForLocalVar(varIndex)->registerType);
7580         }
7581         if (toReg != REG_STK)
7582         {
7583             freeRegs &= ~genRegMask(toReg, getIntervalForLocalVar(varIndex)->registerType);
7584         }
7585     }
7586
7587 #ifdef _TARGET_ARM_
7588     if (type == TYP_DOUBLE)
7589     {
7590         // Exclude any doubles for which the odd half isn't in freeRegs.
7591         freeRegs = freeRegs & ((freeRegs << 1) & RBM_ALLDOUBLE);
7592     }
7593 #endif
7594
7595     if (freeRegs == RBM_NONE)
7596     {
7597         return REG_NA;
7598     }
7599     else
7600     {
7601         regNumber tempReg = genRegNumFromMask(genFindLowestBit(freeRegs));
7602         return tempReg;
7603     }
7604 }
7605
7606 #ifdef _TARGET_ARM_
7607 //------------------------------------------------------------------------
7608 // addResolutionForDouble: Add resolution move(s) for TYP_DOUBLE interval
7609 //                         and update location.
7610 //
7611 // Arguments:
7612 //    block           - the BasicBlock into which the move will be inserted.
7613 //    insertionPoint  - the instruction before which to insert the move
7614 //    sourceIntervals - maintains sourceIntervals[reg] which each 'reg' is associated with
7615 //    location        - maintains location[reg] which is the location of the var that was originally in 'reg'.
7616 //    toReg           - the register to which the var is moving
7617 //    fromReg         - the register from which the var is moving
7618 //    resolveType     - the type of resolution to be performed
7619 //
7620 // Return Value:
7621 //    None.
7622 //
7623 // Notes:
7624 //    It inserts at least one move and updates incoming parameter 'location'.
7625 //
7626 void LinearScan::addResolutionForDouble(BasicBlock*     block,
7627                                         GenTree*        insertionPoint,
7628                                         Interval**      sourceIntervals,
7629                                         regNumberSmall* location,
7630                                         regNumber       toReg,
7631                                         regNumber       fromReg,
7632                                         ResolveType     resolveType)
7633 {
7634     regNumber secondHalfTargetReg = REG_NEXT(fromReg);
7635     Interval* intervalToBeMoved1  = sourceIntervals[fromReg];
7636     Interval* intervalToBeMoved2  = sourceIntervals[secondHalfTargetReg];
7637
7638     assert(!(intervalToBeMoved1 == nullptr && intervalToBeMoved2 == nullptr));
7639
7640     if (intervalToBeMoved1 != nullptr)
7641     {
7642         if (intervalToBeMoved1->registerType == TYP_DOUBLE)
7643         {
7644             // TYP_DOUBLE interval occupies a double register, i.e. two float registers.
7645             assert(intervalToBeMoved2 == nullptr);
7646             assert(genIsValidDoubleReg(toReg));
7647         }
7648         else
7649         {
7650             // TYP_FLOAT interval occupies 1st half of double register, i.e. 1st float register
7651             assert(genIsValidFloatReg(toReg));
7652         }
7653         addResolution(block, insertionPoint, intervalToBeMoved1, toReg, fromReg);
7654         JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
7655         location[fromReg] = (regNumberSmall)toReg;
7656     }
7657
7658     if (intervalToBeMoved2 != nullptr)
7659     {
7660         // TYP_FLOAT interval occupies 2nd half of double register.
7661         assert(intervalToBeMoved2->registerType == TYP_FLOAT);
7662         regNumber secondHalfTempReg = REG_NEXT(toReg);
7663
7664         addResolution(block, insertionPoint, intervalToBeMoved2, secondHalfTempReg, secondHalfTargetReg);
7665         JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
7666         location[secondHalfTargetReg] = (regNumberSmall)secondHalfTempReg;
7667     }
7668
7669     return;
7670 }
7671 #endif // _TARGET_ARM_
7672
7673 //------------------------------------------------------------------------
7674 // addResolution: Add a resolution move of the given interval
7675 //
7676 // Arguments:
7677 //    block          - the BasicBlock into which the move will be inserted.
7678 //    insertionPoint - the instruction before which to insert the move
7679 //    interval       - the interval of the var to be moved
7680 //    toReg          - the register to which the var is moving
7681 //    fromReg        - the register from which the var is moving
7682 //
7683 // Return Value:
7684 //    None.
7685 //
7686 // Notes:
7687 //    For joins, we insert at the bottom (indicated by an insertionPoint
7688 //    of nullptr), while for splits we insert at the top.
7689 //    This is because for joins 'block' is a pred of the join, while for splits it is a succ.
7690 //    For critical edges, this function may be called twice - once to move from
7691 //    the source (fromReg), if any, to the stack, in which case toReg will be
7692 //    REG_STK, and we insert at the bottom (leave insertionPoint as nullptr).
7693 //    The next time, we want to move from the stack to the destination (toReg),
7694 //    in which case fromReg will be REG_STK, and we insert at the top.
7695
7696 void LinearScan::addResolution(
7697     BasicBlock* block, GenTree* insertionPoint, Interval* interval, regNumber toReg, regNumber fromReg)
7698 {
7699 #ifdef DEBUG
7700     const char* insertionPointString = "top";
7701 #endif // DEBUG
7702     if (insertionPoint == nullptr)
7703     {
7704 #ifdef DEBUG
7705         insertionPointString = "bottom";
7706 #endif // DEBUG
7707     }
7708
7709     JITDUMP("   " FMT_BB " %s: move V%02u from ", block->bbNum, insertionPointString, interval->varNum);
7710     JITDUMP("%s to %s", getRegName(fromReg), getRegName(toReg));
7711
7712     insertMove(block, insertionPoint, interval->varNum, fromReg, toReg);
7713     if (fromReg == REG_STK || toReg == REG_STK)
7714     {
7715         assert(interval->isSpilled);
7716     }
7717     else
7718     {
7719         // We should have already marked this as spilled or split.
7720         assert((interval->isSpilled) || (interval->isSplit));
7721     }
7722
7723     INTRACK_STATS(updateLsraStat(LSRA_STAT_RESOLUTION_MOV, block->bbNum));
7724 }
7725
7726 //------------------------------------------------------------------------
7727 // handleOutgoingCriticalEdges: Performs the necessary resolution on all critical edges that feed out of 'block'
7728 //
7729 // Arguments:
7730 //    block     - the block with outgoing critical edges.
7731 //
7732 // Return Value:
7733 //    None..
7734 //
7735 // Notes:
7736 //    For all outgoing critical edges (i.e. any successor of this block which is
7737 //    a join edge), if there are any conflicts, split the edge by adding a new block,
7738 //    and generate the resolution code into that block.
7739
7740 void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
7741 {
7742     VARSET_TP outResolutionSet(VarSetOps::Intersection(compiler, block->bbLiveOut, resolutionCandidateVars));
7743     if (VarSetOps::IsEmpty(compiler, outResolutionSet))
7744     {
7745         return;
7746     }
7747     VARSET_TP sameResolutionSet(VarSetOps::MakeEmpty(compiler));
7748     VARSET_TP sameLivePathsSet(VarSetOps::MakeEmpty(compiler));
7749     VARSET_TP singleTargetSet(VarSetOps::MakeEmpty(compiler));
7750     VARSET_TP diffResolutionSet(VarSetOps::MakeEmpty(compiler));
7751
7752     // Get the outVarToRegMap for this block
7753     VarToRegMap outVarToRegMap = getOutVarToRegMap(block->bbNum);
7754     unsigned    succCount      = block->NumSucc(compiler);
7755     assert(succCount > 1);
7756     VarToRegMap firstSuccInVarToRegMap = nullptr;
7757     BasicBlock* firstSucc              = nullptr;
7758
7759     // First, determine the live regs at the end of this block so that we know what regs are
7760     // available to copy into.
7761     // Note that for this purpose we use the full live-out set, because we must ensure that
7762     // even the registers that remain the same across the edge are preserved correctly.
7763     regMaskTP       liveOutRegs = RBM_NONE;
7764     VarSetOps::Iter liveOutIter(compiler, block->bbLiveOut);
7765     unsigned        liveOutVarIndex = 0;
7766     while (liveOutIter.NextElem(&liveOutVarIndex))
7767     {
7768         regNumber fromReg = getVarReg(outVarToRegMap, liveOutVarIndex);
7769         if (fromReg != REG_STK)
7770         {
7771             regMaskTP fromRegMask = genRegMask(fromReg, getIntervalForLocalVar(liveOutVarIndex)->registerType);
7772             liveOutRegs |= fromRegMask;
7773         }
7774     }
7775
7776     // Next, if this blocks ends with a switch table, we have to make sure not to copy
7777     // into the registers that it uses.
7778     regMaskTP switchRegs = RBM_NONE;
7779     if (block->bbJumpKind == BBJ_SWITCH)
7780     {
7781         // At this point, Lowering has transformed any non-switch-table blocks into
7782         // cascading ifs.
7783         GenTree* switchTable = LIR::AsRange(block).LastNode();
7784         assert(switchTable != nullptr && switchTable->OperGet() == GT_SWITCH_TABLE);
7785
7786         switchRegs   = switchTable->gtRsvdRegs;
7787         GenTree* op1 = switchTable->gtGetOp1();
7788         GenTree* op2 = switchTable->gtGetOp2();
7789         noway_assert(op1 != nullptr && op2 != nullptr);
7790         assert(op1->gtRegNum != REG_NA && op2->gtRegNum != REG_NA);
7791         // No floating point values, so no need to worry about the register type
7792         // (i.e. for ARM32, where we used the genRegMask overload with a type).
7793         assert(varTypeIsIntegralOrI(op1) && varTypeIsIntegralOrI(op2));
7794         switchRegs |= genRegMask(op1->gtRegNum);
7795         switchRegs |= genRegMask(op2->gtRegNum);
7796     }
7797
7798 #ifdef _TARGET_ARM64_
7799     // Next, if this blocks ends with a JCMP, we have to make sure not to copy
7800     // into the register that it uses or modify the local variable it must consume
7801     LclVarDsc* jcmpLocalVarDsc = nullptr;
7802     if (block->bbJumpKind == BBJ_COND)
7803     {
7804         GenTree* lastNode = LIR::AsRange(block).LastNode();
7805
7806         if (lastNode->OperIs(GT_JCMP))
7807         {
7808             GenTree* op1 = lastNode->gtGetOp1();
7809             switchRegs |= genRegMask(op1->gtRegNum);
7810
7811             if (op1->IsLocal())
7812             {
7813                 GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
7814                 jcmpLocalVarDsc          = &compiler->lvaTable[lcl->gtLclNum];
7815             }
7816         }
7817     }
7818 #endif
7819
7820     VarToRegMap sameVarToRegMap = sharedCriticalVarToRegMap;
7821     regMaskTP   sameWriteRegs   = RBM_NONE;
7822     regMaskTP   diffReadRegs    = RBM_NONE;
7823
7824     // For each var that may require resolution, classify them as:
7825     // - in the same register at the end of this block and at each target (no resolution needed)
7826     // - in different registers at different targets (resolve separately):
7827     //     diffResolutionSet
7828     // - in the same register at each target at which it's live, but different from the end of
7829     //   this block.  We may be able to resolve these as if it is "join", but only if they do not
7830     //   write to any registers that are read by those in the diffResolutionSet:
7831     //     sameResolutionSet
7832
7833     VarSetOps::Iter outResolutionSetIter(compiler, outResolutionSet);
7834     unsigned        outResolutionSetVarIndex = 0;
7835     while (outResolutionSetIter.NextElem(&outResolutionSetVarIndex))
7836     {
7837         regNumber fromReg             = getVarReg(outVarToRegMap, outResolutionSetVarIndex);
7838         bool      isMatch             = true;
7839         bool      isSame              = false;
7840         bool      maybeSingleTarget   = false;
7841         bool      maybeSameLivePaths  = false;
7842         bool      liveOnlyAtSplitEdge = true;
7843         regNumber sameToReg           = REG_NA;
7844         for (unsigned succIndex = 0; succIndex < succCount; succIndex++)
7845         {
7846             BasicBlock* succBlock = block->GetSucc(succIndex, compiler);
7847             if (!VarSetOps::IsMember(compiler, succBlock->bbLiveIn, outResolutionSetVarIndex))
7848             {
7849                 maybeSameLivePaths = true;
7850                 continue;
7851             }
7852             else if (liveOnlyAtSplitEdge)
7853             {
7854                 // Is the var live only at those target blocks which are connected by a split edge to this block
7855                 liveOnlyAtSplitEdge = ((succBlock->bbPreds->flNext == nullptr) && (succBlock != compiler->fgFirstBB));
7856             }
7857
7858             regNumber toReg = getVarReg(getInVarToRegMap(succBlock->bbNum), outResolutionSetVarIndex);
7859             if (sameToReg == REG_NA)
7860             {
7861                 sameToReg = toReg;
7862                 continue;
7863             }
7864             if (toReg == sameToReg)
7865             {
7866                 continue;
7867             }
7868             sameToReg = REG_NA;
7869             break;
7870         }
7871
7872         // Check for the cases where we can't write to a register.
7873         // We only need to check for these cases if sameToReg is an actual register (not REG_STK).
7874         if (sameToReg != REG_NA && sameToReg != REG_STK)
7875         {
7876             // If there's a path on which this var isn't live, it may use the original value in sameToReg.
7877             // In this case, sameToReg will be in the liveOutRegs of this block.
7878             // Similarly, if sameToReg is in sameWriteRegs, it has already been used (i.e. for a lclVar that's
7879             // live only at another target), and we can't copy another lclVar into that reg in this block.
7880             regMaskTP sameToRegMask =
7881                 genRegMask(sameToReg, getIntervalForLocalVar(outResolutionSetVarIndex)->registerType);
7882             if (maybeSameLivePaths &&
7883                 (((sameToRegMask & liveOutRegs) != RBM_NONE) || ((sameToRegMask & sameWriteRegs) != RBM_NONE)))
7884             {
7885                 sameToReg = REG_NA;
7886             }
7887             // If this register is used by a switch table at the end of the block, we can't do the copy
7888             // in this block (since we can't insert it after the switch).
7889             if ((sameToRegMask & switchRegs) != RBM_NONE)
7890             {
7891                 sameToReg = REG_NA;
7892             }
7893
7894 #ifdef _TARGET_ARM64_
7895             if (jcmpLocalVarDsc && (jcmpLocalVarDsc->lvVarIndex == outResolutionSetVarIndex))
7896             {
7897                 sameToReg = REG_NA;
7898             }
7899 #endif
7900
7901             // If the var is live only at those blocks connected by a split edge and not live-in at some of the
7902             // target blocks, we will resolve it the same way as if it were in diffResolutionSet and resolution
7903             // will be deferred to the handling of split edges, which means copy will only be at those target(s).
7904             //
7905             // Another way to achieve similar resolution for vars live only at split edges is by removing them
7906             // from consideration up-front but it requires that we traverse those edges anyway to account for
7907             // the registers that must note be overwritten.
7908             if (liveOnlyAtSplitEdge && maybeSameLivePaths)
7909             {
7910                 sameToReg = REG_NA;
7911             }
7912         }
7913
7914         if (sameToReg == REG_NA)
7915         {
7916             VarSetOps::AddElemD(compiler, diffResolutionSet, outResolutionSetVarIndex);
7917             if (fromReg != REG_STK)
7918             {
7919                 diffReadRegs |= genRegMask(fromReg, getIntervalForLocalVar(outResolutionSetVarIndex)->registerType);
7920             }
7921         }
7922         else if (sameToReg != fromReg)
7923         {
7924             VarSetOps::AddElemD(compiler, sameResolutionSet, outResolutionSetVarIndex);
7925             setVarReg(sameVarToRegMap, outResolutionSetVarIndex, sameToReg);
7926             if (sameToReg != REG_STK)
7927             {
7928                 sameWriteRegs |= genRegMask(sameToReg, getIntervalForLocalVar(outResolutionSetVarIndex)->registerType);
7929             }
7930         }
7931     }
7932
7933     if (!VarSetOps::IsEmpty(compiler, sameResolutionSet))
7934     {
7935         if ((sameWriteRegs & diffReadRegs) != RBM_NONE)
7936         {
7937             // We cannot split the "same" and "diff" regs if the "same" set writes registers
7938             // that must be read by the "diff" set.  (Note that when these are done as a "batch"
7939             // we carefully order them to ensure all the input regs are read before they are
7940             // overwritten.)
7941             VarSetOps::UnionD(compiler, diffResolutionSet, sameResolutionSet);
7942             VarSetOps::ClearD(compiler, sameResolutionSet);
7943         }
7944         else
7945         {
7946             // For any vars in the sameResolutionSet, we can simply add the move at the end of "block".
7947             resolveEdge(block, nullptr, ResolveSharedCritical, sameResolutionSet);
7948         }
7949     }
7950     if (!VarSetOps::IsEmpty(compiler, diffResolutionSet))
7951     {
7952         for (unsigned succIndex = 0; succIndex < succCount; succIndex++)
7953         {
7954             BasicBlock* succBlock = block->GetSucc(succIndex, compiler);
7955
7956             // Any "diffResolutionSet" resolution for a block with no other predecessors will be handled later
7957             // as split resolution.
7958             if ((succBlock->bbPreds->flNext == nullptr) && (succBlock != compiler->fgFirstBB))
7959             {
7960                 continue;
7961             }
7962
7963             // Now collect the resolution set for just this edge, if any.
7964             // Check only the vars in diffResolutionSet that are live-in to this successor.
7965             bool        needsResolution   = false;
7966             VarToRegMap succInVarToRegMap = getInVarToRegMap(succBlock->bbNum);
7967             VARSET_TP   edgeResolutionSet(VarSetOps::Intersection(compiler, diffResolutionSet, succBlock->bbLiveIn));
7968             VarSetOps::Iter iter(compiler, edgeResolutionSet);
7969             unsigned        varIndex = 0;
7970             while (iter.NextElem(&varIndex))
7971             {
7972                 regNumber fromReg = getVarReg(outVarToRegMap, varIndex);
7973                 regNumber toReg   = getVarReg(succInVarToRegMap, varIndex);
7974
7975                 if (fromReg == toReg)
7976                 {
7977                     VarSetOps::RemoveElemD(compiler, edgeResolutionSet, varIndex);
7978                 }
7979             }
7980             if (!VarSetOps::IsEmpty(compiler, edgeResolutionSet))
7981             {
7982                 resolveEdge(block, succBlock, ResolveCritical, edgeResolutionSet);
7983             }
7984         }
7985     }
7986 }
7987
7988 //------------------------------------------------------------------------
7989 // resolveEdges: Perform resolution across basic block edges
7990 //
7991 // Arguments:
7992 //    None.
7993 //
7994 // Return Value:
7995 //    None.
7996 //
7997 // Notes:
7998 //    Traverse the basic blocks.
7999 //    - If this block has a single predecessor that is not the immediately
8000 //      preceding block, perform any needed 'split' resolution at the beginning of this block
8001 //    - Otherwise if this block has critical incoming edges, handle them.
8002 //    - If this block has a single successor that has multiple predecesors, perform any needed
8003 //      'join' resolution at the end of this block.
8004 //    Note that a block may have both 'split' or 'critical' incoming edge(s) and 'join' outgoing
8005 //    edges.
8006
8007 void LinearScan::resolveEdges()
8008 {
8009     JITDUMP("RESOLVING EDGES\n");
8010
8011     // The resolutionCandidateVars set was initialized with all the lclVars that are live-in to
8012     // any block. We now intersect that set with any lclVars that ever spilled or split.
8013     // If there are no candidates for resoultion, simply return.
8014
8015     VarSetOps::IntersectionD(compiler, resolutionCandidateVars, splitOrSpilledVars);
8016     if (VarSetOps::IsEmpty(compiler, resolutionCandidateVars))
8017     {
8018         return;
8019     }
8020
8021     BasicBlock *block, *prevBlock = nullptr;
8022
8023     // Handle all the critical edges first.
8024     // We will try to avoid resolution across critical edges in cases where all the critical-edge
8025     // targets of a block have the same home.  We will then split the edges only for the
8026     // remaining mismatches.  We visit the out-edges, as that allows us to share the moves that are
8027     // common among all the targets.
8028
8029     if (hasCriticalEdges)
8030     {
8031         foreach_block(compiler, block)
8032         {
8033             if (block->bbNum > bbNumMaxBeforeResolution)
8034             {
8035                 // This is a new block added during resolution - we don't need to visit these now.
8036                 continue;
8037             }
8038             if (blockInfo[block->bbNum].hasCriticalOutEdge)
8039             {
8040                 handleOutgoingCriticalEdges(block);
8041             }
8042             prevBlock = block;
8043         }
8044     }
8045
8046     prevBlock = nullptr;
8047     foreach_block(compiler, block)
8048     {
8049         if (block->bbNum > bbNumMaxBeforeResolution)
8050         {
8051             // This is a new block added during resolution - we don't need to visit these now.
8052             continue;
8053         }
8054
8055         unsigned    succCount       = block->NumSucc(compiler);
8056         flowList*   preds           = block->bbPreds;
8057         BasicBlock* uniquePredBlock = block->GetUniquePred(compiler);
8058
8059         // First, if this block has a single predecessor,
8060         // we may need resolution at the beginning of this block.
8061         // This may be true even if it's the block we used for starting locations,
8062         // if a variable was spilled.
8063         VARSET_TP inResolutionSet(VarSetOps::Intersection(compiler, block->bbLiveIn, resolutionCandidateVars));
8064         if (!VarSetOps::IsEmpty(compiler, inResolutionSet))
8065         {
8066             if (uniquePredBlock != nullptr)
8067             {
8068                 // We may have split edges during critical edge resolution, and in the process split
8069                 // a non-critical edge as well.
8070                 // It is unlikely that we would ever have more than one of these in sequence (indeed,
8071                 // I don't think it's possible), but there's no need to assume that it can't.
8072                 while (uniquePredBlock->bbNum > bbNumMaxBeforeResolution)
8073                 {
8074                     uniquePredBlock = uniquePredBlock->GetUniquePred(compiler);
8075                     noway_assert(uniquePredBlock != nullptr);
8076                 }
8077                 resolveEdge(uniquePredBlock, block, ResolveSplit, inResolutionSet);
8078             }
8079         }
8080
8081         // Finally, if this block has a single successor:
8082         //  - and that has at least one other predecessor (otherwise we will do the resolution at the
8083         //    top of the successor),
8084         //  - and that is not the target of a critical edge (otherwise we've already handled it)
8085         // we may need resolution at the end of this block.
8086
8087         if (succCount == 1)
8088         {
8089             BasicBlock* succBlock = block->GetSucc(0, compiler);
8090             if (succBlock->GetUniquePred(compiler) == nullptr)
8091             {
8092                 VARSET_TP outResolutionSet(
8093                     VarSetOps::Intersection(compiler, succBlock->bbLiveIn, resolutionCandidateVars));
8094                 if (!VarSetOps::IsEmpty(compiler, outResolutionSet))
8095                 {
8096                     resolveEdge(block, succBlock, ResolveJoin, outResolutionSet);
8097                 }
8098             }
8099         }
8100     }
8101
8102     // Now, fixup the mapping for any blocks that were adding for edge splitting.
8103     // See the comment prior to the call to fgSplitEdge() in resolveEdge().
8104     // Note that we could fold this loop in with the checking code below, but that
8105     // would only improve the debug case, and would clutter up the code somewhat.
8106     if (compiler->fgBBNumMax > bbNumMaxBeforeResolution)
8107     {
8108         foreach_block(compiler, block)
8109         {
8110             if (block->bbNum > bbNumMaxBeforeResolution)
8111             {
8112                 // There may be multiple blocks inserted when we split.  But we must always have exactly
8113                 // one path (i.e. all blocks must be single-successor and single-predecessor),
8114                 // and only one block along the path may be non-empty.
8115                 // Note that we may have a newly-inserted block that is empty, but which connects
8116                 // two non-resolution blocks. This happens when an edge is split that requires it.
8117
8118                 BasicBlock* succBlock = block;
8119                 do
8120                 {
8121                     succBlock = succBlock->GetUniqueSucc();
8122                     noway_assert(succBlock != nullptr);
8123                 } while ((succBlock->bbNum > bbNumMaxBeforeResolution) && succBlock->isEmpty());
8124
8125                 BasicBlock* predBlock = block;
8126                 do
8127                 {
8128                     predBlock = predBlock->GetUniquePred(compiler);
8129                     noway_assert(predBlock != nullptr);
8130                 } while ((predBlock->bbNum > bbNumMaxBeforeResolution) && predBlock->isEmpty());
8131
8132                 unsigned succBBNum = succBlock->bbNum;
8133                 unsigned predBBNum = predBlock->bbNum;
8134                 if (block->isEmpty())
8135                 {
8136                     // For the case of the empty block, find the non-resolution block (succ or pred).
8137                     if (predBBNum > bbNumMaxBeforeResolution)
8138                     {
8139                         assert(succBBNum <= bbNumMaxBeforeResolution);
8140                         predBBNum = 0;
8141                     }
8142                     else
8143                     {
8144                         succBBNum = 0;
8145                     }
8146                 }
8147                 else
8148                 {
8149                     assert((succBBNum <= bbNumMaxBeforeResolution) && (predBBNum <= bbNumMaxBeforeResolution));
8150                 }
8151                 SplitEdgeInfo info = {predBBNum, succBBNum};
8152                 getSplitBBNumToTargetBBNumMap()->Set(block->bbNum, info);
8153             }
8154         }
8155     }
8156
8157 #ifdef DEBUG
8158     // Make sure the varToRegMaps match up on all edges.
8159     bool foundMismatch = false;
8160     foreach_block(compiler, block)
8161     {
8162         if (block->isEmpty() && block->bbNum > bbNumMaxBeforeResolution)
8163         {
8164             continue;
8165         }
8166         VarToRegMap toVarToRegMap = getInVarToRegMap(block->bbNum);
8167         for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
8168         {
8169             BasicBlock*     predBlock       = pred->flBlock;
8170             VarToRegMap     fromVarToRegMap = getOutVarToRegMap(predBlock->bbNum);
8171             VarSetOps::Iter iter(compiler, block->bbLiveIn);
8172             unsigned        varIndex = 0;
8173             while (iter.NextElem(&varIndex))
8174             {
8175                 regNumber fromReg = getVarReg(fromVarToRegMap, varIndex);
8176                 regNumber toReg   = getVarReg(toVarToRegMap, varIndex);
8177                 if (fromReg != toReg)
8178                 {
8179                     if (!foundMismatch)
8180                     {
8181                         foundMismatch = true;
8182                         printf("Found mismatched var locations after resolution!\n");
8183                     }
8184                     unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
8185                     printf(" V%02u: " FMT_BB " to " FMT_BB ": %s to %s\n", varNum, predBlock->bbNum, block->bbNum,
8186                            getRegName(fromReg), getRegName(toReg));
8187                 }
8188             }
8189         }
8190     }
8191     assert(!foundMismatch);
8192 #endif
8193     JITDUMP("\n");
8194 }
8195
8196 //------------------------------------------------------------------------
8197 // resolveEdge: Perform the specified type of resolution between two blocks.
8198 //
8199 // Arguments:
8200 //    fromBlock     - the block from which the edge originates
8201 //    toBlock       - the block at which the edge terminates
8202 //    resolveType   - the type of resolution to be performed
8203 //    liveSet       - the set of tracked lclVar indices which may require resolution
8204 //
8205 // Return Value:
8206 //    None.
8207 //
8208 // Assumptions:
8209 //    The caller must have performed the analysis to determine the type of the edge.
8210 //
8211 // Notes:
8212 //    This method emits the correctly ordered moves necessary to place variables in the
8213 //    correct registers across a Split, Join or Critical edge.
8214 //    In order to avoid overwriting register values before they have been moved to their
8215 //    new home (register/stack), it first does the register-to-stack moves (to free those
8216 //    registers), then the register to register moves, ensuring that the target register
8217 //    is free before the move, and then finally the stack to register moves.
8218
8219 void LinearScan::resolveEdge(BasicBlock*      fromBlock,
8220                              BasicBlock*      toBlock,
8221                              ResolveType      resolveType,
8222                              VARSET_VALARG_TP liveSet)
8223 {
8224     VarToRegMap fromVarToRegMap = getOutVarToRegMap(fromBlock->bbNum);
8225     VarToRegMap toVarToRegMap;
8226     if (resolveType == ResolveSharedCritical)
8227     {
8228         toVarToRegMap = sharedCriticalVarToRegMap;
8229     }
8230     else
8231     {
8232         toVarToRegMap = getInVarToRegMap(toBlock->bbNum);
8233     }
8234
8235     // The block to which we add the resolution moves depends on the resolveType
8236     BasicBlock* block;
8237     switch (resolveType)
8238     {
8239         case ResolveJoin:
8240         case ResolveSharedCritical:
8241             block = fromBlock;
8242             break;
8243         case ResolveSplit:
8244             block = toBlock;
8245             break;
8246         case ResolveCritical:
8247             // fgSplitEdge may add one or two BasicBlocks.  It returns the block that splits
8248             // the edge from 'fromBlock' and 'toBlock', but if it inserts that block right after
8249             // a block with a fall-through it will have to create another block to handle that edge.
8250             // These new blocks can be mapped to existing blocks in order to correctly handle
8251             // the calls to recordVarLocationsAtStartOfBB() from codegen.  That mapping is handled
8252             // in resolveEdges(), after all the edge resolution has been done (by calling this
8253             // method for each edge).
8254             block = compiler->fgSplitEdge(fromBlock, toBlock);
8255
8256             // Split edges are counted against fromBlock.
8257             INTRACK_STATS(updateLsraStat(LSRA_STAT_SPLIT_EDGE, fromBlock->bbNum));
8258             break;
8259         default:
8260             unreached();
8261             break;
8262     }
8263
8264 #ifndef _TARGET_XARCH_
8265     // We record tempregs for beginning and end of each block.
8266     // For amd64/x86 we only need a tempReg for float - we'll use xchg for int.
8267     // TODO-Throughput: It would be better to determine the tempRegs on demand, but the code below
8268     // modifies the varToRegMaps so we don't have all the correct registers at the time
8269     // we need to get the tempReg.
8270     regNumber tempRegInt =
8271         (resolveType == ResolveSharedCritical) ? REG_NA : getTempRegForResolution(fromBlock, toBlock, TYP_INT);
8272 #endif // !_TARGET_XARCH_
8273     regNumber tempRegFlt = REG_NA;
8274     regNumber tempRegDbl = REG_NA; // Used only for ARM
8275     if ((compiler->compFloatingPointUsed) && (resolveType != ResolveSharedCritical))
8276     {
8277 #ifdef _TARGET_ARM_
8278         // Try to reserve a double register for TYP_DOUBLE and use it for TYP_FLOAT too if available.
8279         tempRegDbl = getTempRegForResolution(fromBlock, toBlock, TYP_DOUBLE);
8280         if (tempRegDbl != REG_NA)
8281         {
8282             tempRegFlt = tempRegDbl;
8283         }
8284         else
8285 #endif // _TARGET_ARM_
8286         {
8287             tempRegFlt = getTempRegForResolution(fromBlock, toBlock, TYP_FLOAT);
8288         }
8289     }
8290
8291     regMaskTP targetRegsToDo      = RBM_NONE;
8292     regMaskTP targetRegsReady     = RBM_NONE;
8293     regMaskTP targetRegsFromStack = RBM_NONE;
8294
8295     // The following arrays capture the location of the registers as they are moved:
8296     // - location[reg] gives the current location of the var that was originally in 'reg'.
8297     //   (Note that a var may be moved more than once.)
8298     // - source[reg] gives the original location of the var that needs to be moved to 'reg'.
8299     // For example, if a var is in rax and needs to be moved to rsi, then we would start with:
8300     //   location[rax] == rax
8301     //   source[rsi] == rax     -- this doesn't change
8302     // Then, if for some reason we need to move it temporary to rbx, we would have:
8303     //   location[rax] == rbx
8304     // Once we have completed the move, we will have:
8305     //   location[rax] == REG_NA
8306     // This indicates that the var originally in rax is now in its target register.
8307
8308     regNumberSmall location[REG_COUNT];
8309     C_ASSERT(sizeof(char) == sizeof(regNumberSmall)); // for memset to work
8310     memset(location, REG_NA, REG_COUNT);
8311     regNumberSmall source[REG_COUNT];
8312     memset(source, REG_NA, REG_COUNT);
8313
8314     // What interval is this register associated with?
8315     // (associated with incoming reg)
8316     Interval* sourceIntervals[REG_COUNT];
8317     memset(&sourceIntervals, 0, sizeof(sourceIntervals));
8318
8319     // Intervals for vars that need to be loaded from the stack
8320     Interval* stackToRegIntervals[REG_COUNT];
8321     memset(&stackToRegIntervals, 0, sizeof(stackToRegIntervals));
8322
8323     // Get the starting insertion point for the "to" resolution
8324     GenTree* insertionPoint = nullptr;
8325     if (resolveType == ResolveSplit || resolveType == ResolveCritical)
8326     {
8327         insertionPoint = LIR::AsRange(block).FirstNonPhiNode();
8328     }
8329
8330     // First:
8331     //   - Perform all moves from reg to stack (no ordering needed on these)
8332     //   - For reg to reg moves, record the current location, associating their
8333     //     source location with the target register they need to go into
8334     //   - For stack to reg moves (done last, no ordering needed between them)
8335     //     record the interval associated with the target reg
8336     // TODO-Throughput: We should be looping over the liveIn and liveOut registers, since
8337     // that will scale better than the live variables
8338
8339     VarSetOps::Iter iter(compiler, liveSet);
8340     unsigned        varIndex = 0;
8341     while (iter.NextElem(&varIndex))
8342     {
8343         regNumber fromReg = getVarReg(fromVarToRegMap, varIndex);
8344         regNumber toReg   = getVarReg(toVarToRegMap, varIndex);
8345         if (fromReg == toReg)
8346         {
8347             continue;
8348         }
8349
8350         // For Critical edges, the location will not change on either side of the edge,
8351         // since we'll add a new block to do the move.
8352         if (resolveType == ResolveSplit)
8353         {
8354             setVarReg(toVarToRegMap, varIndex, fromReg);
8355         }
8356         else if (resolveType == ResolveJoin || resolveType == ResolveSharedCritical)
8357         {
8358             setVarReg(fromVarToRegMap, varIndex, toReg);
8359         }
8360
8361         assert(fromReg < UCHAR_MAX && toReg < UCHAR_MAX);
8362
8363         Interval* interval = getIntervalForLocalVar(varIndex);
8364
8365         if (fromReg == REG_STK)
8366         {
8367             stackToRegIntervals[toReg] = interval;
8368             targetRegsFromStack |= genRegMask(toReg);
8369         }
8370         else if (toReg == REG_STK)
8371         {
8372             // Do the reg to stack moves now
8373             addResolution(block, insertionPoint, interval, REG_STK, fromReg);
8374             JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
8375         }
8376         else
8377         {
8378             location[fromReg]        = (regNumberSmall)fromReg;
8379             source[toReg]            = (regNumberSmall)fromReg;
8380             sourceIntervals[fromReg] = interval;
8381             targetRegsToDo |= genRegMask(toReg);
8382         }
8383     }
8384
8385     // REGISTER to REGISTER MOVES
8386
8387     // First, find all the ones that are ready to move now
8388     regMaskTP targetCandidates = targetRegsToDo;
8389     while (targetCandidates != RBM_NONE)
8390     {
8391         regMaskTP targetRegMask = genFindLowestBit(targetCandidates);
8392         targetCandidates &= ~targetRegMask;
8393         regNumber targetReg = genRegNumFromMask(targetRegMask);
8394         if (location[targetReg] == REG_NA)
8395         {
8396 #ifdef _TARGET_ARM_
8397             regNumber sourceReg = (regNumber)source[targetReg];
8398             Interval* interval  = sourceIntervals[sourceReg];
8399             if (interval->registerType == TYP_DOUBLE)
8400             {
8401                 // For ARM32, make sure that both of the float halves of the double register are available.
8402                 assert(genIsValidDoubleReg(targetReg));
8403                 regNumber anotherHalfRegNum = REG_NEXT(targetReg);
8404                 if (location[anotherHalfRegNum] == REG_NA)
8405                 {
8406                     targetRegsReady |= targetRegMask;
8407                 }
8408             }
8409             else
8410 #endif // _TARGET_ARM_
8411             {
8412                 targetRegsReady |= targetRegMask;
8413             }
8414         }
8415     }
8416
8417     // Perform reg to reg moves
8418     while (targetRegsToDo != RBM_NONE)
8419     {
8420         while (targetRegsReady != RBM_NONE)
8421         {
8422             regMaskTP targetRegMask = genFindLowestBit(targetRegsReady);
8423             targetRegsToDo &= ~targetRegMask;
8424             targetRegsReady &= ~targetRegMask;
8425             regNumber targetReg = genRegNumFromMask(targetRegMask);
8426             assert(location[targetReg] != targetReg);
8427             regNumber sourceReg = (regNumber)source[targetReg];
8428             regNumber fromReg   = (regNumber)location[sourceReg];
8429             assert(fromReg < UCHAR_MAX && sourceReg < UCHAR_MAX);
8430             Interval* interval = sourceIntervals[sourceReg];
8431             assert(interval != nullptr);
8432             addResolution(block, insertionPoint, interval, targetReg, fromReg);
8433             JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
8434             sourceIntervals[sourceReg] = nullptr;
8435             location[sourceReg]        = REG_NA;
8436
8437             // Do we have a free targetReg?
8438             if (fromReg == sourceReg)
8439             {
8440                 if (source[fromReg] != REG_NA)
8441                 {
8442                     regMaskTP fromRegMask = genRegMask(fromReg);
8443                     targetRegsReady |= fromRegMask;
8444 #ifdef _TARGET_ARM_
8445                     if (genIsValidDoubleReg(fromReg))
8446                     {
8447                         // Ensure that either:
8448                         // - the Interval targeting fromReg is not double, or
8449                         // - the other half of the double is free.
8450                         Interval* otherInterval = sourceIntervals[source[fromReg]];
8451                         regNumber upperHalfReg  = REG_NEXT(fromReg);
8452                         if ((otherInterval->registerType == TYP_DOUBLE) && (location[upperHalfReg] != REG_NA))
8453                         {
8454                             targetRegsReady &= ~fromRegMask;
8455                         }
8456                     }
8457                 }
8458                 else if (genIsValidFloatReg(fromReg) && !genIsValidDoubleReg(fromReg))
8459                 {
8460                     // We may have freed up the other half of a double where the lower half
8461                     // was already free.
8462                     regNumber lowerHalfReg    = REG_PREV(fromReg);
8463                     regNumber lowerHalfSrcReg = (regNumber)source[lowerHalfReg];
8464                     regNumber lowerHalfSrcLoc = (regNumber)location[lowerHalfReg];
8465                     // Necessary conditions:
8466                     // - There is a source register for this reg (lowerHalfSrcReg != REG_NA)
8467                     // - It is currently free                    (lowerHalfSrcLoc == REG_NA)
8468                     // - The source interval isn't yet completed (sourceIntervals[lowerHalfSrcReg] != nullptr)
8469                     // - It's not in the ready set               ((targetRegsReady & genRegMask(lowerHalfReg)) ==
8470                     //                                            RBM_NONE)
8471                     //
8472                     if ((lowerHalfSrcReg != REG_NA) && (lowerHalfSrcLoc == REG_NA) &&
8473                         (sourceIntervals[lowerHalfSrcReg] != nullptr) &&
8474                         ((targetRegsReady & genRegMask(lowerHalfReg)) == RBM_NONE))
8475                     {
8476                         // This must be a double interval, otherwise it would be in targetRegsReady, or already
8477                         // completed.
8478                         assert(sourceIntervals[lowerHalfSrcReg]->registerType == TYP_DOUBLE);
8479                         targetRegsReady |= genRegMask(lowerHalfReg);
8480                     }
8481 #endif // _TARGET_ARM_
8482                 }
8483             }
8484         }
8485         if (targetRegsToDo != RBM_NONE)
8486         {
8487             regMaskTP targetRegMask = genFindLowestBit(targetRegsToDo);
8488             regNumber targetReg     = genRegNumFromMask(targetRegMask);
8489
8490             // Is it already there due to other moves?
8491             // If not, move it to the temp reg, OR swap it with another register
8492             regNumber sourceReg = (regNumber)source[targetReg];
8493             regNumber fromReg   = (regNumber)location[sourceReg];
8494             if (targetReg == fromReg)
8495             {
8496                 targetRegsToDo &= ~targetRegMask;
8497             }
8498             else
8499             {
8500                 regNumber tempReg = REG_NA;
8501                 bool      useSwap = false;
8502                 if (emitter::isFloatReg(targetReg))
8503                 {
8504 #ifdef _TARGET_ARM_
8505                     if (sourceIntervals[fromReg]->registerType == TYP_DOUBLE)
8506                     {
8507                         // ARM32 requires a double temp register for TYP_DOUBLE.
8508                         tempReg = tempRegDbl;
8509                     }
8510                     else
8511 #endif // _TARGET_ARM_
8512                         tempReg = tempRegFlt;
8513                 }
8514 #ifdef _TARGET_XARCH_
8515                 else
8516                 {
8517                     useSwap = true;
8518                 }
8519 #else // !_TARGET_XARCH_
8520
8521                 else
8522                 {
8523                     tempReg = tempRegInt;
8524                 }
8525
8526 #endif // !_TARGET_XARCH_
8527                 if (useSwap || tempReg == REG_NA)
8528                 {
8529                     // First, we have to figure out the destination register for what's currently in fromReg,
8530                     // so that we can find its sourceInterval.
8531                     regNumber otherTargetReg = REG_NA;
8532
8533                     // By chance, is fromReg going where it belongs?
8534                     if (location[source[fromReg]] == targetReg)
8535                     {
8536                         otherTargetReg = fromReg;
8537                         // If we can swap, we will be done with otherTargetReg as well.
8538                         // Otherwise, we'll spill it to the stack and reload it later.
8539                         if (useSwap)
8540                         {
8541                             regMaskTP fromRegMask = genRegMask(fromReg);
8542                             targetRegsToDo &= ~fromRegMask;
8543                         }
8544                     }
8545                     else
8546                     {
8547                         // Look at the remaining registers from targetRegsToDo (which we expect to be relatively
8548                         // small at this point) to find out what's currently in targetReg.
8549                         regMaskTP mask = targetRegsToDo;
8550                         while (mask != RBM_NONE && otherTargetReg == REG_NA)
8551                         {
8552                             regMaskTP nextRegMask = genFindLowestBit(mask);
8553                             regNumber nextReg     = genRegNumFromMask(nextRegMask);
8554                             mask &= ~nextRegMask;
8555                             if (location[source[nextReg]] == targetReg)
8556                             {
8557                                 otherTargetReg = nextReg;
8558                             }
8559                         }
8560                     }
8561                     assert(otherTargetReg != REG_NA);
8562
8563                     if (useSwap)
8564                     {
8565                         // Generate a "swap" of fromReg and targetReg
8566                         insertSwap(block, insertionPoint, sourceIntervals[source[otherTargetReg]]->varNum, targetReg,
8567                                    sourceIntervals[sourceReg]->varNum, fromReg);
8568                         location[sourceReg]              = REG_NA;
8569                         location[source[otherTargetReg]] = (regNumberSmall)fromReg;
8570
8571                         INTRACK_STATS(updateLsraStat(LSRA_STAT_RESOLUTION_MOV, block->bbNum));
8572                     }
8573                     else
8574                     {
8575                         // Spill "targetReg" to the stack and add its eventual target (otherTargetReg)
8576                         // to "targetRegsFromStack", which will be handled below.
8577                         // NOTE: This condition is very rare.  Setting COMPlus_JitStressRegs=0x203
8578                         // has been known to trigger it in JIT SH.
8579
8580                         // First, spill "otherInterval" from targetReg to the stack.
8581                         Interval* otherInterval = sourceIntervals[source[otherTargetReg]];
8582                         setIntervalAsSpilled(otherInterval);
8583                         addResolution(block, insertionPoint, otherInterval, REG_STK, targetReg);
8584                         JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
8585                         location[source[otherTargetReg]] = REG_STK;
8586
8587                         // Now, move the interval that is going to targetReg, and add its "fromReg" to
8588                         // "targetRegsReady".
8589                         addResolution(block, insertionPoint, sourceIntervals[sourceReg], targetReg, fromReg);
8590                         JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
8591                         location[sourceReg] = REG_NA;
8592                         targetRegsReady |= genRegMask(fromReg);
8593                     }
8594                     targetRegsToDo &= ~targetRegMask;
8595                 }
8596                 else
8597                 {
8598                     compiler->codeGen->regSet.rsSetRegsModified(genRegMask(tempReg) DEBUGARG(true));
8599 #ifdef _TARGET_ARM_
8600                     if (sourceIntervals[fromReg]->registerType == TYP_DOUBLE)
8601                     {
8602                         assert(genIsValidDoubleReg(targetReg));
8603                         assert(genIsValidDoubleReg(tempReg));
8604
8605                         addResolutionForDouble(block, insertionPoint, sourceIntervals, location, tempReg, targetReg,
8606                                                resolveType);
8607                     }
8608                     else
8609 #endif // _TARGET_ARM_
8610                     {
8611                         assert(sourceIntervals[targetReg] != nullptr);
8612
8613                         addResolution(block, insertionPoint, sourceIntervals[targetReg], tempReg, targetReg);
8614                         JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
8615                         location[targetReg] = (regNumberSmall)tempReg;
8616                     }
8617                     targetRegsReady |= targetRegMask;
8618                 }
8619             }
8620         }
8621     }
8622
8623     // Finally, perform stack to reg moves
8624     // All the target regs will be empty at this point
8625     while (targetRegsFromStack != RBM_NONE)
8626     {
8627         regMaskTP targetRegMask = genFindLowestBit(targetRegsFromStack);
8628         targetRegsFromStack &= ~targetRegMask;
8629         regNumber targetReg = genRegNumFromMask(targetRegMask);
8630
8631         Interval* interval = stackToRegIntervals[targetReg];
8632         assert(interval != nullptr);
8633
8634         addResolution(block, insertionPoint, interval, targetReg, REG_STK);
8635         JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
8636     }
8637 }
8638
8639 #if TRACK_LSRA_STATS
8640 // ----------------------------------------------------------
8641 // updateLsraStat: Increment LSRA stat counter.
8642 //
8643 // Arguments:
8644 //    stat      -   LSRA stat enum
8645 //    bbNum     -   Basic block to which LSRA stat needs to be
8646 //                  associated with.
8647 //
8648 void LinearScan::updateLsraStat(LsraStat stat, unsigned bbNum)
8649 {
8650     if (bbNum > bbNumMaxBeforeResolution)
8651     {
8652         // This is a newly created basic block as part of resolution.
8653         // These blocks contain resolution moves that are already accounted.
8654         return;
8655     }
8656
8657     switch (stat)
8658     {
8659         case LSRA_STAT_SPILL:
8660             ++(blockInfo[bbNum].spillCount);
8661             break;
8662
8663         case LSRA_STAT_COPY_REG:
8664             ++(blockInfo[bbNum].copyRegCount);
8665             break;
8666
8667         case LSRA_STAT_RESOLUTION_MOV:
8668             ++(blockInfo[bbNum].resolutionMovCount);
8669             break;
8670
8671         case LSRA_STAT_SPLIT_EDGE:
8672             ++(blockInfo[bbNum].splitEdgeCount);
8673             break;
8674
8675         default:
8676             break;
8677     }
8678 }
8679
8680 // -----------------------------------------------------------
8681 // dumpLsraStats - dumps Lsra stats to given file.
8682 //
8683 // Arguments:
8684 //    file    -  file to which stats are to be written.
8685 //
8686 void LinearScan::dumpLsraStats(FILE* file)
8687 {
8688     unsigned sumSpillCount         = 0;
8689     unsigned sumCopyRegCount       = 0;
8690     unsigned sumResolutionMovCount = 0;
8691     unsigned sumSplitEdgeCount     = 0;
8692     UINT64   wtdSpillCount         = 0;
8693     UINT64   wtdCopyRegCount       = 0;
8694     UINT64   wtdResolutionMovCount = 0;
8695
8696     fprintf(file, "----------\n");
8697     fprintf(file, "LSRA Stats");
8698 #ifdef DEBUG
8699     if (!VERBOSE)
8700     {
8701         fprintf(file, " : %s\n", compiler->info.compFullName);
8702     }
8703     else
8704     {
8705         // In verbose mode no need to print full name
8706         // while printing lsra stats.
8707         fprintf(file, "\n");
8708     }
8709 #else
8710     fprintf(file, " : %s\n", compiler->eeGetMethodFullName(compiler->info.compCompHnd));
8711 #endif
8712
8713     fprintf(file, "----------\n");
8714
8715     for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
8716     {
8717         if (block->bbNum > bbNumMaxBeforeResolution)
8718         {
8719             continue;
8720         }
8721
8722         unsigned spillCount         = blockInfo[block->bbNum].spillCount;
8723         unsigned copyRegCount       = blockInfo[block->bbNum].copyRegCount;
8724         unsigned resolutionMovCount = blockInfo[block->bbNum].resolutionMovCount;
8725         unsigned splitEdgeCount     = blockInfo[block->bbNum].splitEdgeCount;
8726
8727         if (spillCount != 0 || copyRegCount != 0 || resolutionMovCount != 0 || splitEdgeCount != 0)
8728         {
8729             fprintf(file, FMT_BB " [%8d]: ", block->bbNum, block->bbWeight);
8730             fprintf(file, "SpillCount = %d, ResolutionMovs = %d, SplitEdges = %d, CopyReg = %d\n", spillCount,
8731                     resolutionMovCount, splitEdgeCount, copyRegCount);
8732         }
8733
8734         sumSpillCount += spillCount;
8735         sumCopyRegCount += copyRegCount;
8736         sumResolutionMovCount += resolutionMovCount;
8737         sumSplitEdgeCount += splitEdgeCount;
8738
8739         wtdSpillCount += (UINT64)spillCount * block->bbWeight;
8740         wtdCopyRegCount += (UINT64)copyRegCount * block->bbWeight;
8741         wtdResolutionMovCount += (UINT64)resolutionMovCount * block->bbWeight;
8742     }
8743
8744     fprintf(file, "Total Tracked Vars:  %d\n", compiler->lvaTrackedCount);
8745     fprintf(file, "Total Reg Cand Vars: %d\n", regCandidateVarCount);
8746     fprintf(file, "Total number of Intervals: %d\n", static_cast<unsigned>(intervals.size() - 1));
8747     fprintf(file, "Total number of RefPositions: %d\n", static_cast<unsigned>(refPositions.size() - 1));
8748     fprintf(file, "Total Spill Count: %d    Weighted: %I64u\n", sumSpillCount, wtdSpillCount);
8749     fprintf(file, "Total CopyReg Count: %d   Weighted: %I64u\n", sumCopyRegCount, wtdCopyRegCount);
8750     fprintf(file, "Total ResolutionMov Count: %d    Weighted: %I64u\n", sumResolutionMovCount, wtdResolutionMovCount);
8751     fprintf(file, "Total number of split edges: %d\n", sumSplitEdgeCount);
8752
8753     // compute total number of spill temps created
8754     unsigned numSpillTemps = 0;
8755     for (int i = 0; i < TYP_COUNT; i++)
8756     {
8757         numSpillTemps += maxSpill[i];
8758     }
8759     fprintf(file, "Total Number of spill temps created: %d\n\n", numSpillTemps);
8760 }
8761 #endif // TRACK_LSRA_STATS
8762
8763 #ifdef DEBUG
8764 void dumpRegMask(regMaskTP regs)
8765 {
8766     if (regs == RBM_ALLINT)
8767     {
8768         printf("[allInt]");
8769     }
8770     else if (regs == (RBM_ALLINT & ~RBM_FPBASE))
8771     {
8772         printf("[allIntButFP]");
8773     }
8774     else if (regs == RBM_ALLFLOAT)
8775     {
8776         printf("[allFloat]");
8777     }
8778     else if (regs == RBM_ALLDOUBLE)
8779     {
8780         printf("[allDouble]");
8781     }
8782     else
8783     {
8784         dspRegMask(regs);
8785     }
8786 }
8787
8788 static const char* getRefTypeName(RefType refType)
8789 {
8790     switch (refType)
8791     {
8792 #define DEF_REFTYPE(memberName, memberValue, shortName)                                                                \
8793     case memberName:                                                                                                   \
8794         return #memberName;
8795 #include "lsra_reftypes.h"
8796 #undef DEF_REFTYPE
8797         default:
8798             return nullptr;
8799     }
8800 }
8801
8802 static const char* getRefTypeShortName(RefType refType)
8803 {
8804     switch (refType)
8805     {
8806 #define DEF_REFTYPE(memberName, memberValue, shortName)                                                                \
8807     case memberName:                                                                                                   \
8808         return shortName;
8809 #include "lsra_reftypes.h"
8810 #undef DEF_REFTYPE
8811         default:
8812             return nullptr;
8813     }
8814 }
8815
8816 void RefPosition::dump()
8817 {
8818     printf("<RefPosition #%-3u @%-3u", rpNum, nodeLocation);
8819
8820     printf(" %s ", getRefTypeName(refType));
8821
8822     if (this->isPhysRegRef)
8823     {
8824         this->getReg()->tinyDump();
8825     }
8826     else if (getInterval())
8827     {
8828         this->getInterval()->tinyDump();
8829     }
8830
8831     if (this->treeNode)
8832     {
8833         printf("%s ", treeNode->OpName(treeNode->OperGet()));
8834     }
8835     printf(FMT_BB " ", this->bbNum);
8836
8837     printf("regmask=");
8838     dumpRegMask(registerAssignment);
8839
8840     printf(" minReg=%d", minRegCandidateCount);
8841
8842     if (this->lastUse)
8843     {
8844         printf(" last");
8845     }
8846     if (this->reload)
8847     {
8848         printf(" reload");
8849     }
8850     if (this->spillAfter)
8851     {
8852         printf(" spillAfter");
8853     }
8854     if (this->moveReg)
8855     {
8856         printf(" move");
8857     }
8858     if (this->copyReg)
8859     {
8860         printf(" copy");
8861     }
8862     if (this->isFixedRegRef)
8863     {
8864         printf(" fixed");
8865     }
8866     if (this->isLocalDefUse)
8867     {
8868         printf(" local");
8869     }
8870     if (this->delayRegFree)
8871     {
8872         printf(" delay");
8873     }
8874     if (this->outOfOrder)
8875     {
8876         printf(" outOfOrder");
8877     }
8878
8879     if (this->RegOptional())
8880     {
8881         printf(" regOptional");
8882     }
8883     printf(">\n");
8884 }
8885
8886 void RegRecord::dump()
8887 {
8888     tinyDump();
8889 }
8890
8891 void Interval::dump()
8892 {
8893     printf("Interval %2u:", intervalIndex);
8894
8895     if (isLocalVar)
8896     {
8897         printf(" (V%02u)", varNum);
8898     }
8899     else if (IsUpperVector())
8900     {
8901         assert(relatedInterval != nullptr);
8902         printf(" (U%02u)", relatedInterval->varNum);
8903     }
8904     printf(" %s", varTypeName(registerType));
8905     if (isInternal)
8906     {
8907         printf(" (INTERNAL)");
8908     }
8909     if (isSpilled)
8910     {
8911         printf(" (SPILLED)");
8912     }
8913     if (isSplit)
8914     {
8915         printf(" (SPLIT)");
8916     }
8917     if (isStructField)
8918     {
8919         printf(" (struct)");
8920     }
8921     if (isPromotedStruct)
8922     {
8923         printf(" (promoted struct)");
8924     }
8925     if (hasConflictingDefUse)
8926     {
8927         printf(" (def-use conflict)");
8928     }
8929     if (hasInterferingUses)
8930     {
8931         printf(" (interfering uses)");
8932     }
8933     if (isSpecialPutArg)
8934     {
8935         printf(" (specialPutArg)");
8936     }
8937     if (isConstant)
8938     {
8939         printf(" (constant)");
8940     }
8941
8942     printf(" RefPositions {");
8943     for (RefPosition* refPosition = this->firstRefPosition; refPosition != nullptr;
8944          refPosition              = refPosition->nextRefPosition)
8945     {
8946         printf("#%u@%u", refPosition->rpNum, refPosition->nodeLocation);
8947         if (refPosition->nextRefPosition)
8948         {
8949             printf(" ");
8950         }
8951     }
8952     printf("}");
8953
8954     // this is not used (yet?)
8955     // printf(" SpillOffset %d", this->spillOffset);
8956
8957     printf(" physReg:%s", getRegName(physReg));
8958
8959     printf(" Preferences=");
8960     dumpRegMask(this->registerPreferences);
8961
8962     if (relatedInterval)
8963     {
8964         printf(" RelatedInterval ");
8965         relatedInterval->microDump();
8966     }
8967
8968     printf("\n");
8969 }
8970
8971 // print out very concise representation
8972 void Interval::tinyDump()
8973 {
8974     printf("<Ivl:%u", intervalIndex);
8975     if (isLocalVar)
8976     {
8977         printf(" V%02u", varNum);
8978     }
8979     else if (IsUpperVector())
8980     {
8981         assert(relatedInterval != nullptr);
8982         printf(" (U%02u)", relatedInterval->varNum);
8983     }
8984     else if (isInternal)
8985     {
8986         printf(" internal");
8987     }
8988     printf("> ");
8989 }
8990
8991 // print out extremely concise representation
8992 void Interval::microDump()
8993 {
8994     if (isLocalVar)
8995     {
8996         printf("<V%02u/L%u>", varNum, intervalIndex);
8997         return;
8998     }
8999     else if (IsUpperVector())
9000     {
9001         assert(relatedInterval != nullptr);
9002         printf(" (U%02u)", relatedInterval->varNum);
9003     }
9004     char intervalTypeChar = 'I';
9005     if (isInternal)
9006     {
9007         intervalTypeChar = 'T';
9008     }
9009     printf("<%c%u>", intervalTypeChar, intervalIndex);
9010 }
9011
9012 void RegRecord::tinyDump()
9013 {
9014     printf("<Reg:%-3s> ", getRegName(regNum));
9015 }
9016
9017 void LinearScan::dumpNodeInfo(GenTree* node, regMaskTP dstCandidates, int srcCount, int dstCount)
9018 {
9019     if (!VERBOSE)
9020     {
9021         return;
9022     }
9023     // This is formatted like the old dump to make diffs easier. TODO-Cleanup: improve.
9024     int       internalIntCount   = 0;
9025     int       internalFloatCount = 0;
9026     regMaskTP internalCandidates = RBM_NONE;
9027     for (int i = 0; i < internalCount; i++)
9028     {
9029         RefPosition* def = internalDefs[i];
9030         if (def->getInterval()->registerType == TYP_INT)
9031         {
9032             internalIntCount++;
9033         }
9034         else
9035         {
9036             internalFloatCount++;
9037         }
9038         internalCandidates |= def->registerAssignment;
9039     }
9040     if (dstCandidates == RBM_NONE)
9041     {
9042         dstCandidates = varTypeIsFloating(node) ? allRegs(TYP_FLOAT) : allRegs(TYP_INT);
9043     }
9044     if (internalCandidates == RBM_NONE)
9045     {
9046         internalCandidates = allRegs(TYP_INT);
9047     }
9048     printf("    +<TreeNodeInfo %d=%d %di %df", dstCount, srcCount, internalIntCount, internalFloatCount);
9049     printf(" src=");
9050     dumpRegMask(varTypeIsFloating(node) ? allRegs(TYP_FLOAT) : allRegs(TYP_INT));
9051     printf(" int=");
9052     dumpRegMask(internalCandidates);
9053     printf(" dst=");
9054     dumpRegMask(dstCandidates);
9055     if (node->IsUnusedValue())
9056     {
9057         printf(" L");
9058     }
9059     printf(" I");
9060     if (pendingDelayFree)
9061     {
9062         printf(" D");
9063     }
9064     if (setInternalRegsDelayFree)
9065     {
9066         printf(" ID");
9067     }
9068     printf(">");
9069     node->dumpLIRFlags();
9070     printf("\n  consume= %d produce=%d\n", srcCount, dstCount);
9071 }
9072
9073 void LinearScan::dumpDefList()
9074 {
9075     if (!VERBOSE)
9076     {
9077         return;
9078     }
9079     JITDUMP("DefList: { ");
9080     bool first = true;
9081     for (RefInfoListNode *listNode = defList.Begin(), *end = defList.End(); listNode != end;
9082          listNode = listNode->Next())
9083     {
9084         GenTree* node = listNode->treeNode;
9085         JITDUMP("%sN%03u.t%d. %s", first ? "" : "; ", node->gtSeqNum, node->gtTreeID, GenTree::OpName(node->OperGet()));
9086         first = false;
9087     }
9088     JITDUMP(" }\n");
9089 }
9090
9091 void LinearScan::lsraDumpIntervals(const char* msg)
9092 {
9093     printf("\nLinear scan intervals %s:\n", msg);
9094     for (Interval& interval : intervals)
9095     {
9096         // only dump something if it has references
9097         // if (interval->firstRefPosition)
9098         interval.dump();
9099     }
9100
9101     printf("\n");
9102 }
9103
9104 // Dumps a tree node as a destination or source operand, with the style
9105 // of dump dependent on the mode
9106 void LinearScan::lsraGetOperandString(GenTree*          tree,
9107                                       LsraTupleDumpMode mode,
9108                                       char*             operandString,
9109                                       unsigned          operandStringLength)
9110 {
9111     const char* lastUseChar = "";
9112     if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
9113     {
9114         lastUseChar = "*";
9115     }
9116     switch (mode)
9117     {
9118         case LinearScan::LSRA_DUMP_PRE:
9119             _snprintf_s(operandString, operandStringLength, operandStringLength, "t%d%s", tree->gtTreeID, lastUseChar);
9120             break;
9121         case LinearScan::LSRA_DUMP_REFPOS:
9122             _snprintf_s(operandString, operandStringLength, operandStringLength, "t%d%s", tree->gtTreeID, lastUseChar);
9123             break;
9124         case LinearScan::LSRA_DUMP_POST:
9125         {
9126             Compiler* compiler = JitTls::GetCompiler();
9127
9128             if (!tree->gtHasReg())
9129             {
9130                 _snprintf_s(operandString, operandStringLength, operandStringLength, "STK%s", lastUseChar);
9131             }
9132             else
9133             {
9134                 regNumber reg       = tree->gtRegNum;
9135                 int       charCount = _snprintf_s(operandString, operandStringLength, operandStringLength, "%s%s",
9136                                             getRegName(reg, genIsValidFloatReg(reg)), lastUseChar);
9137                 operandString += charCount;
9138                 operandStringLength -= charCount;
9139
9140                 if (tree->IsMultiRegNode())
9141                 {
9142                     unsigned regCount = tree->GetMultiRegCount();
9143                     for (unsigned regIndex = 1; regIndex < regCount; regIndex++)
9144                     {
9145                         regNumber reg = tree->GetRegByIndex(regIndex);
9146                         charCount     = _snprintf_s(operandString, operandStringLength, operandStringLength, ",%s%s",
9147                                                 getRegName(reg, genIsValidFloatReg(reg)), lastUseChar);
9148                         operandString += charCount;
9149                         operandStringLength -= charCount;
9150                     }
9151                 }
9152             }
9153         }
9154         break;
9155         default:
9156             printf("ERROR: INVALID TUPLE DUMP MODE\n");
9157             break;
9158     }
9159 }
9160 void LinearScan::lsraDispNode(GenTree* tree, LsraTupleDumpMode mode, bool hasDest)
9161 {
9162     Compiler*      compiler            = JitTls::GetCompiler();
9163     const unsigned operandStringLength = 16;
9164     char           operandString[operandStringLength];
9165     const char*    emptyDestOperand = "               ";
9166     char           spillChar        = ' ';
9167
9168     if (mode == LinearScan::LSRA_DUMP_POST)
9169     {
9170         if ((tree->gtFlags & GTF_SPILL) != 0)
9171         {
9172             spillChar = 'S';
9173         }
9174         if (!hasDest && tree->gtHasReg())
9175         {
9176             // A node can define a register, but not produce a value for a parent to consume,
9177             // i.e. in the "localDefUse" case.
9178             // There used to be an assert here that we wouldn't spill such a node.
9179             // However, we can have unused lclVars that wind up being the node at which
9180             // it is spilled. This probably indicates a bug, but we don't realy want to
9181             // assert during a dump.
9182             if (spillChar == 'S')
9183             {
9184                 spillChar = '$';
9185             }
9186             else
9187             {
9188                 spillChar = '*';
9189             }
9190             hasDest = true;
9191         }
9192     }
9193     printf("%c N%03u. ", spillChar, tree->gtSeqNum);
9194
9195     LclVarDsc* varDsc = nullptr;
9196     unsigned   varNum = UINT_MAX;
9197     if (tree->IsLocal())
9198     {
9199         varNum = tree->gtLclVarCommon.gtLclNum;
9200         varDsc = &(compiler->lvaTable[varNum]);
9201         if (varDsc->lvLRACandidate)
9202         {
9203             hasDest = false;
9204         }
9205     }
9206     if (hasDest)
9207     {
9208         if (mode == LinearScan::LSRA_DUMP_POST && tree->gtFlags & GTF_SPILLED)
9209         {
9210             assert(tree->gtHasReg());
9211         }
9212         lsraGetOperandString(tree, mode, operandString, operandStringLength);
9213         printf("%-15s =", operandString);
9214     }
9215     else
9216     {
9217         printf("%-15s  ", emptyDestOperand);
9218     }
9219     if (varDsc != nullptr)
9220     {
9221         if (varDsc->lvLRACandidate)
9222         {
9223             if (mode == LSRA_DUMP_REFPOS)
9224             {
9225                 printf("  V%02u(L%d)", varNum, getIntervalForLocalVar(varDsc->lvVarIndex)->intervalIndex);
9226             }
9227             else
9228             {
9229                 lsraGetOperandString(tree, mode, operandString, operandStringLength);
9230                 printf("  V%02u(%s)", varNum, operandString);
9231                 if (mode == LinearScan::LSRA_DUMP_POST && tree->gtFlags & GTF_SPILLED)
9232                 {
9233                     printf("R");
9234                 }
9235             }
9236         }
9237         else
9238         {
9239             printf("  V%02u MEM", varNum);
9240         }
9241     }
9242     else if (tree->OperIs(GT_ASG))
9243     {
9244         assert(!tree->gtHasReg());
9245         printf("  asg%s  ", GenTree::OpName(tree->OperGet()));
9246     }
9247     else
9248     {
9249         compiler->gtDispNodeName(tree);
9250         if (tree->OperKind() & GTK_LEAF)
9251         {
9252             compiler->gtDispLeaf(tree, nullptr);
9253         }
9254     }
9255 }
9256
9257 //------------------------------------------------------------------------
9258 // DumpOperandDefs: dumps the registers defined by a node.
9259 //
9260 // Arguments:
9261 //    operand - The operand for which to compute a register count.
9262 //
9263 // Returns:
9264 //    The number of registers defined by `operand`.
9265 //
9266 void LinearScan::DumpOperandDefs(
9267     GenTree* operand, bool& first, LsraTupleDumpMode mode, char* operandString, const unsigned operandStringLength)
9268 {
9269     assert(operand != nullptr);
9270     assert(operandString != nullptr);
9271     if (!operand->IsLIR())
9272     {
9273         return;
9274     }
9275
9276     int dstCount = ComputeOperandDstCount(operand);
9277
9278     if (dstCount != 0)
9279     {
9280         // This operand directly produces registers; print it.
9281         if (!first)
9282         {
9283             printf(",");
9284         }
9285         lsraGetOperandString(operand, mode, operandString, operandStringLength);
9286         printf("%s", operandString);
9287         first = false;
9288     }
9289     else if (operand->isContained())
9290     {
9291         // This is a contained node. Dump the defs produced by its operands.
9292         for (GenTree* op : operand->Operands())
9293         {
9294             DumpOperandDefs(op, first, mode, operandString, operandStringLength);
9295         }
9296     }
9297 }
9298
9299 void LinearScan::TupleStyleDump(LsraTupleDumpMode mode)
9300 {
9301     BasicBlock*    block;
9302     LsraLocation   currentLoc          = 1; // 0 is the entry
9303     const unsigned operandStringLength = 16;
9304     char           operandString[operandStringLength];
9305
9306     // currentRefPosition is not used for LSRA_DUMP_PRE
9307     // We keep separate iterators for defs, so that we can print them
9308     // on the lhs of the dump
9309     RefPositionIterator refPosIterator     = refPositions.begin();
9310     RefPosition*        currentRefPosition = &refPosIterator;
9311
9312     switch (mode)
9313     {
9314         case LSRA_DUMP_PRE:
9315             printf("TUPLE STYLE DUMP BEFORE LSRA\n");
9316             break;
9317         case LSRA_DUMP_REFPOS:
9318             printf("TUPLE STYLE DUMP WITH REF POSITIONS\n");
9319             break;
9320         case LSRA_DUMP_POST:
9321             printf("TUPLE STYLE DUMP WITH REGISTER ASSIGNMENTS\n");
9322             break;
9323         default:
9324             printf("ERROR: INVALID TUPLE DUMP MODE\n");
9325             return;
9326     }
9327
9328     if (mode != LSRA_DUMP_PRE)
9329     {
9330         printf("Incoming Parameters: ");
9331         for (; refPosIterator != refPositions.end() && currentRefPosition->refType != RefTypeBB;
9332              ++refPosIterator, currentRefPosition = &refPosIterator)
9333         {
9334             Interval* interval = currentRefPosition->getInterval();
9335             assert(interval != nullptr && interval->isLocalVar);
9336             printf(" V%02d", interval->varNum);
9337             if (mode == LSRA_DUMP_POST)
9338             {
9339                 regNumber reg;
9340                 if (currentRefPosition->registerAssignment == RBM_NONE)
9341                 {
9342                     reg = REG_STK;
9343                 }
9344                 else
9345                 {
9346                     reg = currentRefPosition->assignedReg();
9347                 }
9348                 LclVarDsc* varDsc = &(compiler->lvaTable[interval->varNum]);
9349                 printf("(");
9350                 regNumber assignedReg = varDsc->lvRegNum;
9351                 regNumber argReg      = (varDsc->lvIsRegArg) ? varDsc->lvArgReg : REG_STK;
9352
9353                 assert(reg == assignedReg || varDsc->lvRegister == false);
9354                 if (reg != argReg)
9355                 {
9356                     printf(getRegName(argReg, isFloatRegType(interval->registerType)));
9357                     printf("=>");
9358                 }
9359                 printf("%s)", getRegName(reg, isFloatRegType(interval->registerType)));
9360             }
9361         }
9362         printf("\n");
9363     }
9364
9365     for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock())
9366     {
9367         currentLoc += 2;
9368
9369         if (mode == LSRA_DUMP_REFPOS)
9370         {
9371             bool printedBlockHeader = false;
9372             // We should find the boundary RefPositions in the order of exposed uses, dummy defs, and the blocks
9373             for (; refPosIterator != refPositions.end() &&
9374                    (currentRefPosition->refType == RefTypeExpUse || currentRefPosition->refType == RefTypeDummyDef ||
9375                     (currentRefPosition->refType == RefTypeBB && !printedBlockHeader));
9376                  ++refPosIterator, currentRefPosition = &refPosIterator)
9377             {
9378                 Interval* interval = nullptr;
9379                 if (currentRefPosition->isIntervalRef())
9380                 {
9381                     interval = currentRefPosition->getInterval();
9382                 }
9383                 switch (currentRefPosition->refType)
9384                 {
9385                     case RefTypeExpUse:
9386                         assert(interval != nullptr);
9387                         assert(interval->isLocalVar);
9388                         printf("  Exposed use of V%02u at #%d\n", interval->varNum, currentRefPosition->rpNum);
9389                         break;
9390                     case RefTypeDummyDef:
9391                         assert(interval != nullptr);
9392                         assert(interval->isLocalVar);
9393                         printf("  Dummy def of V%02u at #%d\n", interval->varNum, currentRefPosition->rpNum);
9394                         break;
9395                     case RefTypeBB:
9396                         block->dspBlockHeader(compiler);
9397                         printedBlockHeader = true;
9398                         printf("=====\n");
9399                         break;
9400                     default:
9401                         printf("Unexpected RefPosition type at #%d\n", currentRefPosition->rpNum);
9402                         break;
9403                 }
9404             }
9405         }
9406         else
9407         {
9408             block->dspBlockHeader(compiler);
9409             printf("=====\n");
9410         }
9411         if (enregisterLocalVars && mode == LSRA_DUMP_POST && block != compiler->fgFirstBB &&
9412             block->bbNum <= bbNumMaxBeforeResolution)
9413         {
9414             printf("Predecessor for variable locations: " FMT_BB "\n", blockInfo[block->bbNum].predBBNum);
9415             dumpInVarToRegMap(block);
9416         }
9417         if (block->bbNum > bbNumMaxBeforeResolution)
9418         {
9419             SplitEdgeInfo splitEdgeInfo;
9420             splitBBNumToTargetBBNumMap->Lookup(block->bbNum, &splitEdgeInfo);
9421             assert(splitEdgeInfo.toBBNum <= bbNumMaxBeforeResolution);
9422             assert(splitEdgeInfo.fromBBNum <= bbNumMaxBeforeResolution);
9423             printf("New block introduced for resolution from " FMT_BB " to " FMT_BB "\n", splitEdgeInfo.fromBBNum,
9424                    splitEdgeInfo.toBBNum);
9425         }
9426
9427         for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
9428         {
9429             GenTree* tree = node;
9430
9431             genTreeOps oper      = tree->OperGet();
9432             int        produce   = tree->IsValue() ? ComputeOperandDstCount(tree) : 0;
9433             int        consume   = ComputeAvailableSrcCount(tree);
9434             regMaskTP  killMask  = RBM_NONE;
9435             regMaskTP  fixedMask = RBM_NONE;
9436
9437             lsraDispNode(tree, mode, produce != 0 && mode != LSRA_DUMP_REFPOS);
9438
9439             if (mode != LSRA_DUMP_REFPOS)
9440             {
9441                 if (consume > 0)
9442                 {
9443                     printf("; ");
9444
9445                     bool first = true;
9446                     for (GenTree* operand : tree->Operands())
9447                     {
9448                         DumpOperandDefs(operand, first, mode, operandString, operandStringLength);
9449                     }
9450                 }
9451             }
9452             else
9453             {
9454                 // Print each RefPosition on a new line, but
9455                 // printing all the kills for each node on a single line
9456                 // and combining the fixed regs with their associated def or use
9457                 bool         killPrinted        = false;
9458                 RefPosition* lastFixedRegRefPos = nullptr;
9459                 for (; refPosIterator != refPositions.end() &&
9460                        (currentRefPosition->refType == RefTypeUse || currentRefPosition->refType == RefTypeFixedReg ||
9461                         currentRefPosition->refType == RefTypeKill || currentRefPosition->refType == RefTypeDef) &&
9462                        (currentRefPosition->nodeLocation == tree->gtSeqNum ||
9463                         currentRefPosition->nodeLocation == tree->gtSeqNum + 1);
9464                      ++refPosIterator, currentRefPosition = &refPosIterator)
9465                 {
9466                     Interval* interval = nullptr;
9467                     if (currentRefPosition->isIntervalRef())
9468                     {
9469                         interval = currentRefPosition->getInterval();
9470                     }
9471                     switch (currentRefPosition->refType)
9472                     {
9473                         case RefTypeUse:
9474                             if (currentRefPosition->isPhysRegRef)
9475                             {
9476                                 printf("\n                               Use:R%d(#%d)",
9477                                        currentRefPosition->getReg()->regNum, currentRefPosition->rpNum);
9478                             }
9479                             else
9480                             {
9481                                 assert(interval != nullptr);
9482                                 printf("\n                               Use:");
9483                                 interval->microDump();
9484                                 printf("(#%d)", currentRefPosition->rpNum);
9485                                 if (currentRefPosition->isFixedRegRef && !interval->isInternal)
9486                                 {
9487                                     assert(genMaxOneBit(currentRefPosition->registerAssignment));
9488                                     assert(lastFixedRegRefPos != nullptr);
9489                                     printf(" Fixed:%s(#%d)", getRegName(currentRefPosition->assignedReg(),
9490                                                                         isFloatRegType(interval->registerType)),
9491                                            lastFixedRegRefPos->rpNum);
9492                                     lastFixedRegRefPos = nullptr;
9493                                 }
9494                                 if (currentRefPosition->isLocalDefUse)
9495                                 {
9496                                     printf(" LocalDefUse");
9497                                 }
9498                                 if (currentRefPosition->lastUse)
9499                                 {
9500                                     printf(" *");
9501                                 }
9502                             }
9503                             break;
9504                         case RefTypeDef:
9505                         {
9506                             // Print each def on a new line
9507                             assert(interval != nullptr);
9508                             printf("\n        Def:");
9509                             interval->microDump();
9510                             printf("(#%d)", currentRefPosition->rpNum);
9511                             if (currentRefPosition->isFixedRegRef)
9512                             {
9513                                 assert(genMaxOneBit(currentRefPosition->registerAssignment));
9514                                 printf(" %s", getRegName(currentRefPosition->assignedReg(),
9515                                                          isFloatRegType(interval->registerType)));
9516                             }
9517                             if (currentRefPosition->isLocalDefUse)
9518                             {
9519                                 printf(" LocalDefUse");
9520                             }
9521                             if (currentRefPosition->lastUse)
9522                             {
9523                                 printf(" *");
9524                             }
9525                             if (interval->relatedInterval != nullptr)
9526                             {
9527                                 printf(" Pref:");
9528                                 interval->relatedInterval->microDump();
9529                             }
9530                         }
9531                         break;
9532                         case RefTypeKill:
9533                             if (!killPrinted)
9534                             {
9535                                 printf("\n        Kill: ");
9536                                 killPrinted = true;
9537                             }
9538                             printf(getRegName(currentRefPosition->assignedReg(),
9539                                               isFloatRegType(currentRefPosition->getReg()->registerType)));
9540                             printf(" ");
9541                             break;
9542                         case RefTypeFixedReg:
9543                             lastFixedRegRefPos = currentRefPosition;
9544                             break;
9545                         default:
9546                             printf("Unexpected RefPosition type at #%d\n", currentRefPosition->rpNum);
9547                             break;
9548                     }
9549                 }
9550             }
9551             printf("\n");
9552         }
9553         if (enregisterLocalVars && mode == LSRA_DUMP_POST)
9554         {
9555             dumpOutVarToRegMap(block);
9556         }
9557         printf("\n");
9558     }
9559     printf("\n\n");
9560 }
9561
9562 void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event,
9563                                          Interval*     interval,
9564                                          regNumber     reg,
9565                                          BasicBlock*   currentBlock)
9566 {
9567     if (!(VERBOSE))
9568     {
9569         return;
9570     }
9571     if ((interval != nullptr) && (reg != REG_NA) && (reg != REG_STK))
9572     {
9573         registersToDump |= genRegMask(reg);
9574         dumpRegRecordTitleIfNeeded();
9575     }
9576
9577     switch (event)
9578     {
9579         // Conflicting def/use
9580         case LSRA_EVENT_DEFUSE_CONFLICT:
9581             dumpRefPositionShort(activeRefPosition, currentBlock);
9582             printf("DUconflict ");
9583             dumpRegRecords();
9584             break;
9585         case LSRA_EVENT_DEFUSE_CASE1:
9586             printf(indentFormat, "  Case #1 use defRegAssignment");
9587             dumpRegRecords();
9588             break;
9589         case LSRA_EVENT_DEFUSE_CASE2:
9590             printf(indentFormat, "  Case #2 use useRegAssignment");
9591             dumpRegRecords();
9592             break;
9593         case LSRA_EVENT_DEFUSE_CASE3:
9594             printf(indentFormat, "  Case #3 use useRegAssignment");
9595             dumpRegRecords();
9596             dumpRegRecords();
9597             break;
9598         case LSRA_EVENT_DEFUSE_CASE4:
9599             printf(indentFormat, "  Case #4 use defRegAssignment");
9600             dumpRegRecords();
9601             break;
9602         case LSRA_EVENT_DEFUSE_CASE5:
9603             printf(indentFormat, "  Case #5 set def to all regs");
9604             dumpRegRecords();
9605             break;
9606         case LSRA_EVENT_DEFUSE_CASE6:
9607             printf(indentFormat, "  Case #6 need a copy");
9608             dumpRegRecords();
9609             if (interval == nullptr)
9610             {
9611                 printf(indentFormat, "    NULL interval");
9612                 dumpRegRecords();
9613             }
9614             else if (interval->firstRefPosition->multiRegIdx != 0)
9615             {
9616                 printf(indentFormat, "    (multiReg)");
9617                 dumpRegRecords();
9618             }
9619             break;
9620
9621         case LSRA_EVENT_SPILL:
9622             dumpRefPositionShort(activeRefPosition, currentBlock);
9623             assert(interval != nullptr && interval->assignedReg != nullptr);
9624             printf("Spill %-4s ", getRegName(interval->assignedReg->regNum));
9625             dumpRegRecords();
9626             break;
9627
9628         // Restoring the previous register
9629         case LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL:
9630             assert(interval != nullptr);
9631             dumpRefPositionShort(activeRefPosition, currentBlock);
9632             printf("SRstr %-4s ", getRegName(reg));
9633             dumpRegRecords();
9634             break;
9635
9636         case LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL:
9637             assert(interval != nullptr);
9638             if (activeRefPosition == nullptr)
9639             {
9640                 printf(emptyRefPositionFormat, "");
9641             }
9642             else
9643             {
9644                 dumpRefPositionShort(activeRefPosition, currentBlock);
9645             }
9646             printf("Restr %-4s ", getRegName(reg));
9647             dumpRegRecords();
9648             break;
9649
9650         // Done with GC Kills
9651         case LSRA_EVENT_DONE_KILL_GC_REFS:
9652             printf(indentFormat, "  DoneKillGC ");
9653             break;
9654
9655         // Block boundaries
9656         case LSRA_EVENT_START_BB:
9657             // The RefTypeBB comes after the RefTypeDummyDefs associated with that block,
9658             // so we may have a RefTypeDummyDef at the time we dump this event.
9659             // In that case we'll have another "EVENT" associated with it, so we need to
9660             // print the full line now.
9661             if (activeRefPosition->refType != RefTypeBB)
9662             {
9663                 dumpNewBlock(currentBlock, activeRefPosition->nodeLocation);
9664                 dumpRegRecords();
9665             }
9666             else
9667             {
9668                 dumpRefPositionShort(activeRefPosition, currentBlock);
9669             }
9670             break;
9671
9672         // Allocation decisions
9673         case LSRA_EVENT_NEEDS_NEW_REG:
9674             dumpRefPositionShort(activeRefPosition, currentBlock);
9675             printf("Free  %-4s ", getRegName(reg));
9676             dumpRegRecords();
9677             break;
9678
9679         case LSRA_EVENT_ZERO_REF:
9680             assert(interval != nullptr && interval->isLocalVar);
9681             dumpRefPositionShort(activeRefPosition, currentBlock);
9682             printf("NoRef      ");
9683             dumpRegRecords();
9684             break;
9685
9686         case LSRA_EVENT_FIXED_REG:
9687         case LSRA_EVENT_EXP_USE:
9688         case LSRA_EVENT_KEPT_ALLOCATION:
9689             dumpRefPositionShort(activeRefPosition, currentBlock);
9690             printf("Keep  %-4s ", getRegName(reg));
9691             break;
9692
9693         case LSRA_EVENT_COPY_REG:
9694             assert(interval != nullptr && interval->recentRefPosition != nullptr);
9695             dumpRefPositionShort(activeRefPosition, currentBlock);
9696             printf("Copy  %-4s ", getRegName(reg));
9697             break;
9698
9699         case LSRA_EVENT_MOVE_REG:
9700             assert(interval != nullptr && interval->recentRefPosition != nullptr);
9701             dumpRefPositionShort(activeRefPosition, currentBlock);
9702             printf("Move  %-4s ", getRegName(reg));
9703             dumpRegRecords();
9704             break;
9705
9706         case LSRA_EVENT_ALLOC_REG:
9707             dumpRefPositionShort(activeRefPosition, currentBlock);
9708             printf("Alloc %-4s ", getRegName(reg));
9709             break;
9710
9711         case LSRA_EVENT_REUSE_REG:
9712             dumpRefPositionShort(activeRefPosition, currentBlock);
9713             printf("Reuse %-4s ", getRegName(reg));
9714             break;
9715
9716         case LSRA_EVENT_ALLOC_SPILLED_REG:
9717             dumpRefPositionShort(activeRefPosition, currentBlock);
9718             printf("Steal %-4s ", getRegName(reg));
9719             break;
9720
9721         case LSRA_EVENT_NO_ENTRY_REG_ALLOCATED:
9722             assert(interval != nullptr && interval->isLocalVar);
9723             dumpRefPositionShort(activeRefPosition, currentBlock);
9724             printf("LoRef      ");
9725             break;
9726
9727         case LSRA_EVENT_NO_REG_ALLOCATED:
9728             dumpRefPositionShort(activeRefPosition, currentBlock);
9729             printf("NoReg      ");
9730             break;
9731
9732         case LSRA_EVENT_RELOAD:
9733             dumpRefPositionShort(activeRefPosition, currentBlock);
9734             printf("ReLod %-4s ", getRegName(reg));
9735             dumpRegRecords();
9736             break;
9737
9738         case LSRA_EVENT_SPECIAL_PUTARG:
9739             dumpRefPositionShort(activeRefPosition, currentBlock);
9740             printf("PtArg %-4s ", getRegName(reg));
9741             break;
9742
9743         case LSRA_EVENT_UPPER_VECTOR_SAVE:
9744             dumpRefPositionShort(activeRefPosition, currentBlock);
9745             printf("UVSav %-4s ", getRegName(reg));
9746             break;
9747
9748         case LSRA_EVENT_UPPER_VECTOR_RESTORE:
9749             dumpRefPositionShort(activeRefPosition, currentBlock);
9750             printf("UVRes %-4s ", getRegName(reg));
9751             dumpRegRecords();
9752             break;
9753
9754         // We currently don't dump anything for these events.
9755         case LSRA_EVENT_DEFUSE_FIXED_DELAY_USE:
9756         case LSRA_EVENT_SPILL_EXTENDED_LIFETIME:
9757         case LSRA_EVENT_END_BB:
9758         case LSRA_EVENT_FREE_REGS:
9759         case LSRA_EVENT_INCREMENT_RANGE_END:
9760         case LSRA_EVENT_LAST_USE:
9761         case LSRA_EVENT_LAST_USE_DELAYED:
9762             break;
9763
9764         default:
9765             unreached();
9766     }
9767 }
9768
9769 //------------------------------------------------------------------------
9770 // dumpRegRecordHeader: Dump the header for a column-based dump of the register state.
9771 //
9772 // Arguments:
9773 //    None.
9774 //
9775 // Return Value:
9776 //    None.
9777 //
9778 // Assumptions:
9779 //    Reg names fit in 4 characters (minimum width of the columns)
9780 //
9781 // Notes:
9782 //    In order to make the table as dense as possible (for ease of reading the dumps),
9783 //    we determine the minimum regColumnWidth width required to represent:
9784 //      regs, by name (e.g. eax or xmm0) - this is fixed at 4 characters.
9785 //      intervals, as Vnn for lclVar intervals, or as I<num> for other intervals.
9786 //    The table is indented by the amount needed for dumpRefPositionShort, which is
9787 //    captured in shortRefPositionDumpWidth.
9788 //
9789 void LinearScan::dumpRegRecordHeader()
9790 {
9791     printf("The following table has one or more rows for each RefPosition that is handled during allocation.\n"
9792            "The first column provides the basic information about the RefPosition, with its type (e.g. Def,\n"
9793            "Use, Fixd) followed by a '*' if it is a last use, and a 'D' if it is delayRegFree, and then the\n"
9794            "action taken during allocation (e.g. Alloc a new register, or Keep an existing one).\n"
9795            "The subsequent columns show the Interval occupying each register, if any, followed by 'a' if it is\n"
9796            "active, a 'p' if it is a large vector that has been partially spilled, and 'i'if it is inactive.\n"
9797            "Columns are only printed up to the last modifed register, which may increase during allocation,"
9798            "in which case additional columns will appear.  \n"
9799            "Registers which are not marked modified have ---- in their column.\n\n");
9800
9801     // First, determine the width of each register column (which holds a reg name in the
9802     // header, and an interval name in each subsequent row).
9803     int intervalNumberWidth = (int)log10((double)intervals.size()) + 1;
9804     // The regColumnWidth includes the identifying character (I or V) and an 'i', 'p' or 'a' (inactive,
9805     // partially-spilled or active)
9806     regColumnWidth = intervalNumberWidth + 2;
9807     if (regColumnWidth < 4)
9808     {
9809         regColumnWidth = 4;
9810     }
9811     sprintf_s(intervalNameFormat, MAX_FORMAT_CHARS, "%%c%%-%dd", regColumnWidth - 2);
9812     sprintf_s(regNameFormat, MAX_FORMAT_CHARS, "%%-%ds", regColumnWidth);
9813
9814     // Next, determine the width of the short RefPosition (see dumpRefPositionShort()).
9815     // This is in the form:
9816     // nnn.#mmm NAME TYPEld
9817     // Where:
9818     //    nnn is the Location, right-justified to the width needed for the highest location.
9819     //    mmm is the RefPosition rpNum, left-justified to the width needed for the highest rpNum.
9820     //    NAME is dumped by dumpReferentName(), and is "regColumnWidth".
9821     //    TYPE is RefTypeNameShort, and is 4 characters
9822     //    l is either '*' (if a last use) or ' ' (otherwise)
9823     //    d is either 'D' (if a delayed use) or ' ' (otherwise)
9824
9825     maxNodeLocation = (maxNodeLocation == 0)
9826                           ? 1
9827                           : maxNodeLocation; // corner case of a method with an infinite loop without any gentree nodes
9828     assert(maxNodeLocation >= 1);
9829     assert(refPositions.size() >= 1);
9830     int nodeLocationWidth         = (int)log10((double)maxNodeLocation) + 1;
9831     int refPositionWidth          = (int)log10((double)refPositions.size()) + 1;
9832     int refTypeInfoWidth          = 4 /*TYPE*/ + 2 /* last-use and delayed */ + 1 /* space */;
9833     int locationAndRPNumWidth     = nodeLocationWidth + 2 /* .# */ + refPositionWidth + 1 /* space */;
9834     int shortRefPositionDumpWidth = locationAndRPNumWidth + regColumnWidth + 1 /* space */ + refTypeInfoWidth;
9835     sprintf_s(shortRefPositionFormat, MAX_FORMAT_CHARS, "%%%dd.#%%-%dd ", nodeLocationWidth, refPositionWidth);
9836     sprintf_s(emptyRefPositionFormat, MAX_FORMAT_CHARS, "%%-%ds", shortRefPositionDumpWidth);
9837
9838     // The width of the "allocation info"
9839     //  - a 5-character allocation decision
9840     //  - a space
9841     //  - a 4-character register
9842     //  - a space
9843     int allocationInfoWidth = 5 + 1 + 4 + 1;
9844
9845     // Next, determine the width of the legend for each row.  This includes:
9846     //  - a short RefPosition dump (shortRefPositionDumpWidth), which includes a space
9847     //  - the allocation info (allocationInfoWidth), which also includes a space
9848
9849     regTableIndent = shortRefPositionDumpWidth + allocationInfoWidth;
9850
9851     // BBnn printed left-justified in the NAME Typeld and allocationInfo space.
9852     int bbDumpWidth = regColumnWidth + 1 + refTypeInfoWidth + allocationInfoWidth;
9853     int bbNumWidth  = (int)log10((double)compiler->fgBBNumMax) + 1;
9854     // In the unlikely event that BB numbers overflow the space, we'll simply omit the predBB
9855     int predBBNumDumpSpace = regTableIndent - locationAndRPNumWidth - bbNumWidth - 9; // 'BB' + ' PredBB'
9856     if (predBBNumDumpSpace < bbNumWidth)
9857     {
9858         sprintf_s(bbRefPosFormat, MAX_LEGEND_FORMAT_CHARS, "BB%%-%dd", shortRefPositionDumpWidth - 2);
9859     }
9860     else
9861     {
9862         sprintf_s(bbRefPosFormat, MAX_LEGEND_FORMAT_CHARS, "BB%%-%dd PredBB%%-%dd", bbNumWidth, predBBNumDumpSpace);
9863     }
9864
9865     if (compiler->shouldDumpASCIITrees())
9866     {
9867         columnSeparator = "|";
9868         line            = "-";
9869         leftBox         = "+";
9870         middleBox       = "+";
9871         rightBox        = "+";
9872     }
9873     else
9874     {
9875         columnSeparator = "\xe2\x94\x82";
9876         line            = "\xe2\x94\x80";
9877         leftBox         = "\xe2\x94\x9c";
9878         middleBox       = "\xe2\x94\xbc";
9879         rightBox        = "\xe2\x94\xa4";
9880     }
9881     sprintf_s(indentFormat, MAX_FORMAT_CHARS, "%%-%ds", regTableIndent);
9882
9883     // Now, set up the legend format for the RefPosition info
9884     sprintf_s(legendFormat, MAX_LEGEND_FORMAT_CHARS, "%%-%d.%ds%%-%d.%ds%%-%ds%%s", nodeLocationWidth + 1,
9885               nodeLocationWidth + 1, refPositionWidth + 2, refPositionWidth + 2, regColumnWidth + 1);
9886
9887     // Print a "title row" including the legend and the reg names.
9888     lastDumpedRegisters = RBM_NONE;
9889     dumpRegRecordTitleIfNeeded();
9890 }
9891
9892 void LinearScan::dumpRegRecordTitleIfNeeded()
9893 {
9894     if ((lastDumpedRegisters != registersToDump) || (rowCountSinceLastTitle > MAX_ROWS_BETWEEN_TITLES))
9895     {
9896         lastUsedRegNumIndex = 0;
9897         int lastRegNumIndex = compiler->compFloatingPointUsed ? REG_FP_LAST : REG_INT_LAST;
9898         for (int regNumIndex = 0; regNumIndex <= lastRegNumIndex; regNumIndex++)
9899         {
9900             if ((registersToDump & genRegMask((regNumber)regNumIndex)) != 0)
9901             {
9902                 lastUsedRegNumIndex = regNumIndex;
9903             }
9904         }
9905         dumpRegRecordTitle();
9906         lastDumpedRegisters = registersToDump;
9907     }
9908 }
9909
9910 void LinearScan::dumpRegRecordTitleLines()
9911 {
9912     for (int i = 0; i < regTableIndent; i++)
9913     {
9914         printf("%s", line);
9915     }
9916     for (int regNumIndex = 0; regNumIndex <= lastUsedRegNumIndex; regNumIndex++)
9917     {
9918         regNumber regNum = (regNumber)regNumIndex;
9919         if (shouldDumpReg(regNum))
9920         {
9921             printf("%s", middleBox);
9922             for (int i = 0; i < regColumnWidth; i++)
9923             {
9924                 printf("%s", line);
9925             }
9926         }
9927     }
9928     printf("%s\n", rightBox);
9929 }
9930 void LinearScan::dumpRegRecordTitle()
9931 {
9932     dumpRegRecordTitleLines();
9933
9934     // Print out the legend for the RefPosition info
9935     printf(legendFormat, "Loc ", "RP# ", "Name ", "Type  Action Reg  ");
9936
9937     // Print out the register name column headers
9938     char columnFormatArray[MAX_FORMAT_CHARS];
9939     sprintf_s(columnFormatArray, MAX_FORMAT_CHARS, "%s%%-%d.%ds", columnSeparator, regColumnWidth, regColumnWidth);
9940     for (int regNumIndex = 0; regNumIndex <= lastUsedRegNumIndex; regNumIndex++)
9941     {
9942         regNumber regNum = (regNumber)regNumIndex;
9943         if (shouldDumpReg(regNum))
9944         {
9945             const char* regName = getRegName(regNum);
9946             printf(columnFormatArray, regName);
9947         }
9948     }
9949     printf("%s\n", columnSeparator);
9950
9951     rowCountSinceLastTitle = 0;
9952
9953     dumpRegRecordTitleLines();
9954 }
9955
9956 void LinearScan::dumpRegRecords()
9957 {
9958     static char columnFormatArray[18];
9959
9960     for (int regNumIndex = 0; regNumIndex <= lastUsedRegNumIndex; regNumIndex++)
9961     {
9962         if (shouldDumpReg((regNumber)regNumIndex))
9963         {
9964             printf("%s", columnSeparator);
9965             RegRecord& regRecord = physRegs[regNumIndex];
9966             Interval*  interval  = regRecord.assignedInterval;
9967             if (interval != nullptr)
9968             {
9969                 dumpIntervalName(interval);
9970                 char activeChar = interval->isActive ? 'a' : 'i';
9971 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
9972                 if (interval->isPartiallySpilled)
9973                 {
9974                     activeChar = 'p';
9975                 }
9976 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
9977                 printf("%c", activeChar);
9978             }
9979             else if (regRecord.isBusyUntilNextKill)
9980             {
9981                 printf(columnFormatArray, "Busy");
9982             }
9983             else
9984             {
9985                 sprintf_s(columnFormatArray, MAX_FORMAT_CHARS, "%%-%ds", regColumnWidth);
9986                 printf(columnFormatArray, "");
9987             }
9988         }
9989     }
9990     printf("%s\n", columnSeparator);
9991     rowCountSinceLastTitle++;
9992 }
9993
9994 void LinearScan::dumpIntervalName(Interval* interval)
9995 {
9996     if (interval->isLocalVar)
9997     {
9998         printf(intervalNameFormat, 'V', interval->varNum);
9999     }
10000     else if (interval->IsUpperVector())
10001     {
10002         printf(intervalNameFormat, 'U', interval->relatedInterval->varNum);
10003     }
10004     else if (interval->isConstant)
10005     {
10006         printf(intervalNameFormat, 'C', interval->intervalIndex);
10007     }
10008     else
10009     {
10010         printf(intervalNameFormat, 'I', interval->intervalIndex);
10011     }
10012 }
10013
10014 void LinearScan::dumpEmptyRefPosition()
10015 {
10016     printf(emptyRefPositionFormat, "");
10017 }
10018
10019 //------------------------------------------------------------------------
10020 // dumpNewBlock: Dump a line for a new block in a column-based dump of the register state.
10021 //
10022 // Arguments:
10023 //    currentBlock - the new block to be dumped
10024 //
10025 void LinearScan::dumpNewBlock(BasicBlock* currentBlock, LsraLocation location)
10026 {
10027     if (!VERBOSE)
10028     {
10029         return;
10030     }
10031
10032     // Always print a title row before a RefTypeBB (except for the first, because we
10033     // will already have printed it before the parameters)
10034     if ((currentBlock != compiler->fgFirstBB) && (currentBlock != nullptr))
10035     {
10036         dumpRegRecordTitle();
10037     }
10038     // If the activeRefPosition is a DummyDef, then don't print anything further (printing the
10039     // title line makes it clearer that we're "about to" start the next block).
10040     if (activeRefPosition->refType == RefTypeDummyDef)
10041     {
10042         dumpEmptyRefPosition();
10043         printf("DDefs ");
10044         printf(regNameFormat, "");
10045         return;
10046     }
10047     printf(shortRefPositionFormat, location, activeRefPosition->rpNum);
10048     if (currentBlock == nullptr)
10049     {
10050         printf(regNameFormat, "END");
10051         printf("              ");
10052         printf(regNameFormat, "");
10053     }
10054     else
10055     {
10056         printf(bbRefPosFormat, currentBlock->bbNum,
10057                currentBlock == compiler->fgFirstBB ? 0 : blockInfo[currentBlock->bbNum].predBBNum);
10058     }
10059 }
10060
10061 // Note that the size of this dump is computed in dumpRegRecordHeader().
10062 //
10063 void LinearScan::dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBlock)
10064 {
10065     BasicBlock*         block                  = currentBlock;
10066     static RefPosition* lastPrintedRefPosition = nullptr;
10067     if (refPosition == lastPrintedRefPosition)
10068     {
10069         dumpEmptyRefPosition();
10070         return;
10071     }
10072     lastPrintedRefPosition = refPosition;
10073     if (refPosition->refType == RefTypeBB)
10074     {
10075         dumpNewBlock(currentBlock, refPosition->nodeLocation);
10076         return;
10077     }
10078     printf(shortRefPositionFormat, refPosition->nodeLocation, refPosition->rpNum);
10079     if (refPosition->isIntervalRef())
10080     {
10081         Interval* interval = refPosition->getInterval();
10082         dumpIntervalName(interval);
10083         char lastUseChar = ' ';
10084         char delayChar   = ' ';
10085         if (refPosition->lastUse)
10086         {
10087             lastUseChar = '*';
10088             if (refPosition->delayRegFree)
10089             {
10090                 delayChar = 'D';
10091             }
10092         }
10093         printf("  %s%c%c ", getRefTypeShortName(refPosition->refType), lastUseChar, delayChar);
10094     }
10095     else if (refPosition->isPhysRegRef)
10096     {
10097         RegRecord* regRecord = refPosition->getReg();
10098         printf(regNameFormat, getRegName(regRecord->regNum));
10099         printf(" %s   ", getRefTypeShortName(refPosition->refType));
10100     }
10101     else
10102     {
10103         assert(refPosition->refType == RefTypeKillGCRefs);
10104         // There's no interval or reg name associated with this.
10105         printf(regNameFormat, "   ");
10106         printf(" %s   ", getRefTypeShortName(refPosition->refType));
10107     }
10108 }
10109
10110 //------------------------------------------------------------------------
10111 // LinearScan::IsResolutionMove:
10112 //     Returns true if the given node is a move inserted by LSRA
10113 //     resolution.
10114 //
10115 // Arguments:
10116 //     node - the node to check.
10117 //
10118 bool LinearScan::IsResolutionMove(GenTree* node)
10119 {
10120     if (!IsLsraAdded(node))
10121     {
10122         return false;
10123     }
10124
10125     switch (node->OperGet())
10126     {
10127         case GT_LCL_VAR:
10128         case GT_COPY:
10129             return node->IsUnusedValue();
10130
10131         case GT_SWAP:
10132             return true;
10133
10134         default:
10135             return false;
10136     }
10137 }
10138
10139 //------------------------------------------------------------------------
10140 // LinearScan::IsResolutionNode:
10141 //     Returns true if the given node is either a move inserted by LSRA
10142 //     resolution or an operand to such a move.
10143 //
10144 // Arguments:
10145 //     containingRange - the range that contains the node to check.
10146 //     node - the node to check.
10147 //
10148 bool LinearScan::IsResolutionNode(LIR::Range& containingRange, GenTree* node)
10149 {
10150     for (;;)
10151     {
10152         if (IsResolutionMove(node))
10153         {
10154             return true;
10155         }
10156
10157         if (!IsLsraAdded(node) || (node->OperGet() != GT_LCL_VAR))
10158         {
10159             return false;
10160         }
10161
10162         LIR::Use use;
10163         bool     foundUse = containingRange.TryGetUse(node, &use);
10164         assert(foundUse);
10165
10166         node = use.User();
10167     }
10168 }
10169
10170 //------------------------------------------------------------------------
10171 // verifyFinalAllocation: Traverse the RefPositions and verify various invariants.
10172 //
10173 // Arguments:
10174 //    None.
10175 //
10176 // Return Value:
10177 //    None.
10178 //
10179 // Notes:
10180 //    If verbose is set, this will also dump a table of the final allocations.
10181 void LinearScan::verifyFinalAllocation()
10182 {
10183     if (VERBOSE)
10184     {
10185         printf("\nFinal allocation\n");
10186     }
10187
10188     // Clear register assignments.
10189     for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
10190     {
10191         RegRecord* physRegRecord        = getRegisterRecord(reg);
10192         physRegRecord->assignedInterval = nullptr;
10193     }
10194
10195     for (Interval& interval : intervals)
10196     {
10197         interval.assignedReg = nullptr;
10198         interval.physReg     = REG_NA;
10199     }
10200
10201     DBEXEC(VERBOSE, dumpRegRecordTitle());
10202
10203     BasicBlock*  currentBlock                = nullptr;
10204     GenTree*     firstBlockEndResolutionNode = nullptr;
10205     regMaskTP    regsToFree                  = RBM_NONE;
10206     regMaskTP    delayRegsToFree             = RBM_NONE;
10207     LsraLocation currentLocation             = MinLocation;
10208     for (RefPosition& refPosition : refPositions)
10209     {
10210         RefPosition* currentRefPosition = &refPosition;
10211         Interval*    interval           = nullptr;
10212         RegRecord*   regRecord          = nullptr;
10213         regNumber    regNum             = REG_NA;
10214         activeRefPosition               = currentRefPosition;
10215
10216         if (currentRefPosition->refType == RefTypeBB)
10217         {
10218             regsToFree |= delayRegsToFree;
10219             delayRegsToFree = RBM_NONE;
10220         }
10221         else
10222         {
10223             if (currentRefPosition->isPhysRegRef)
10224             {
10225                 regRecord                    = currentRefPosition->getReg();
10226                 regRecord->recentRefPosition = currentRefPosition;
10227                 regNum                       = regRecord->regNum;
10228             }
10229             else if (currentRefPosition->isIntervalRef())
10230             {
10231                 interval                    = currentRefPosition->getInterval();
10232                 interval->recentRefPosition = currentRefPosition;
10233                 if (currentRefPosition->registerAssignment != RBM_NONE)
10234                 {
10235                     if (!genMaxOneBit(currentRefPosition->registerAssignment))
10236                     {
10237                         assert(currentRefPosition->refType == RefTypeExpUse ||
10238                                currentRefPosition->refType == RefTypeDummyDef);
10239                     }
10240                     else
10241                     {
10242                         regNum    = currentRefPosition->assignedReg();
10243                         regRecord = getRegisterRecord(regNum);
10244                     }
10245                 }
10246             }
10247         }
10248
10249         LsraLocation newLocation = currentRefPosition->nodeLocation;
10250
10251         if (newLocation > currentLocation)
10252         {
10253             // Free Registers.
10254             // We could use the freeRegisters() method, but we'd have to carefully manage the active intervals.
10255             for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
10256             {
10257                 regMaskTP regMask = genRegMask(reg);
10258                 if ((regsToFree & regMask) != RBM_NONE)
10259                 {
10260                     RegRecord* physRegRecord        = getRegisterRecord(reg);
10261                     physRegRecord->assignedInterval = nullptr;
10262                 }
10263             }
10264             regsToFree = delayRegsToFree;
10265             regsToFree = RBM_NONE;
10266         }
10267         currentLocation = newLocation;
10268
10269         switch (currentRefPosition->refType)
10270         {
10271             case RefTypeBB:
10272             {
10273                 if (currentBlock == nullptr)
10274                 {
10275                     currentBlock = startBlockSequence();
10276                 }
10277                 else
10278                 {
10279                     // Verify the resolution moves at the end of the previous block.
10280                     for (GenTree* node = firstBlockEndResolutionNode; node != nullptr; node = node->gtNext)
10281                     {
10282                         assert(enregisterLocalVars);
10283                         // Only verify nodes that are actually moves; don't bother with the nodes that are
10284                         // operands to moves.
10285                         if (IsResolutionMove(node))
10286                         {
10287                             verifyResolutionMove(node, currentLocation);
10288                         }
10289                     }
10290
10291                     // Validate the locations at the end of the previous block.
10292                     if (enregisterLocalVars)
10293                     {
10294                         VarToRegMap     outVarToRegMap = outVarToRegMaps[currentBlock->bbNum];
10295                         VarSetOps::Iter iter(compiler, currentBlock->bbLiveOut);
10296                         unsigned        varIndex = 0;
10297                         while (iter.NextElem(&varIndex))
10298                         {
10299                             if (localVarIntervals[varIndex] == nullptr)
10300                             {
10301                                 assert(!compiler->lvaTable[compiler->lvaTrackedToVarNum[varIndex]].lvLRACandidate);
10302                                 continue;
10303                             }
10304                             regNumber regNum = getVarReg(outVarToRegMap, varIndex);
10305                             interval         = getIntervalForLocalVar(varIndex);
10306                             assert(interval->physReg == regNum || (interval->physReg == REG_NA && regNum == REG_STK));
10307                             interval->physReg     = REG_NA;
10308                             interval->assignedReg = nullptr;
10309                             interval->isActive    = false;
10310                         }
10311                     }
10312
10313                     // Clear register assignments.
10314                     for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
10315                     {
10316                         RegRecord* physRegRecord        = getRegisterRecord(reg);
10317                         physRegRecord->assignedInterval = nullptr;
10318                     }
10319
10320                     // Now, record the locations at the beginning of this block.
10321                     currentBlock = moveToNextBlock();
10322                 }
10323
10324                 if (currentBlock != nullptr)
10325                 {
10326                     if (enregisterLocalVars)
10327                     {
10328                         VarToRegMap     inVarToRegMap = inVarToRegMaps[currentBlock->bbNum];
10329                         VarSetOps::Iter iter(compiler, currentBlock->bbLiveIn);
10330                         unsigned        varIndex = 0;
10331                         while (iter.NextElem(&varIndex))
10332                         {
10333                             if (localVarIntervals[varIndex] == nullptr)
10334                             {
10335                                 assert(!compiler->lvaTable[compiler->lvaTrackedToVarNum[varIndex]].lvLRACandidate);
10336                                 continue;
10337                             }
10338                             regNumber regNum                  = getVarReg(inVarToRegMap, varIndex);
10339                             interval                          = getIntervalForLocalVar(varIndex);
10340                             interval->physReg                 = regNum;
10341                             interval->assignedReg             = &(physRegs[regNum]);
10342                             interval->isActive                = true;
10343                             physRegs[regNum].assignedInterval = interval;
10344                         }
10345                     }
10346
10347                     if (VERBOSE)
10348                     {
10349                         dumpRefPositionShort(currentRefPosition, currentBlock);
10350                         dumpRegRecords();
10351                     }
10352
10353                     // Finally, handle the resolution moves, if any, at the beginning of the next block.
10354                     firstBlockEndResolutionNode = nullptr;
10355                     bool foundNonResolutionNode = false;
10356
10357                     LIR::Range& currentBlockRange = LIR::AsRange(currentBlock);
10358                     for (GenTree* node : currentBlockRange.NonPhiNodes())
10359                     {
10360                         if (IsResolutionNode(currentBlockRange, node))
10361                         {
10362                             assert(enregisterLocalVars);
10363                             if (foundNonResolutionNode)
10364                             {
10365                                 firstBlockEndResolutionNode = node;
10366                                 break;
10367                             }
10368                             else if (IsResolutionMove(node))
10369                             {
10370                                 // Only verify nodes that are actually moves; don't bother with the nodes that are
10371                                 // operands to moves.
10372                                 verifyResolutionMove(node, currentLocation);
10373                             }
10374                         }
10375                         else
10376                         {
10377                             foundNonResolutionNode = true;
10378                         }
10379                     }
10380                 }
10381             }
10382
10383             break;
10384
10385             case RefTypeKill:
10386                 assert(regRecord != nullptr);
10387                 assert(regRecord->assignedInterval == nullptr);
10388                 dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock);
10389                 break;
10390             case RefTypeFixedReg:
10391                 assert(regRecord != nullptr);
10392                 dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock);
10393                 break;
10394
10395             case RefTypeUpperVectorSave:
10396                 dumpLsraAllocationEvent(LSRA_EVENT_UPPER_VECTOR_SAVE, nullptr, REG_NA, currentBlock);
10397                 break;
10398
10399             case RefTypeUpperVectorRestore:
10400                 dumpLsraAllocationEvent(LSRA_EVENT_UPPER_VECTOR_RESTORE, nullptr, REG_NA, currentBlock);
10401                 break;
10402
10403             case RefTypeDef:
10404             case RefTypeUse:
10405             case RefTypeParamDef:
10406             case RefTypeZeroInit:
10407                 assert(interval != nullptr);
10408
10409                 if (interval->isSpecialPutArg)
10410                 {
10411                     dumpLsraAllocationEvent(LSRA_EVENT_SPECIAL_PUTARG, interval, regNum);
10412                     break;
10413                 }
10414                 if (currentRefPosition->reload)
10415                 {
10416                     interval->isActive = true;
10417                     assert(regNum != REG_NA);
10418                     interval->physReg           = regNum;
10419                     interval->assignedReg       = regRecord;
10420                     regRecord->assignedInterval = interval;
10421                     dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, nullptr, regRecord->regNum, currentBlock);
10422                 }
10423                 if (regNum == REG_NA)
10424                 {
10425                     dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, interval);
10426                 }
10427                 else if (RefTypeIsDef(currentRefPosition->refType))
10428                 {
10429                     interval->isActive = true;
10430                     if (VERBOSE)
10431                     {
10432                         if (interval->isConstant && (currentRefPosition->treeNode != nullptr) &&
10433                             currentRefPosition->treeNode->IsReuseRegVal())
10434                         {
10435                             dumpLsraAllocationEvent(LSRA_EVENT_REUSE_REG, nullptr, regRecord->regNum, currentBlock);
10436                         }
10437                         else
10438                         {
10439                             dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, nullptr, regRecord->regNum, currentBlock);
10440                         }
10441                     }
10442                 }
10443                 else if (currentRefPosition->copyReg)
10444                 {
10445                     dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, interval, regRecord->regNum, currentBlock);
10446                 }
10447                 else if (currentRefPosition->moveReg)
10448                 {
10449                     assert(interval->assignedReg != nullptr);
10450                     interval->assignedReg->assignedInterval = nullptr;
10451                     interval->physReg                       = regNum;
10452                     interval->assignedReg                   = regRecord;
10453                     regRecord->assignedInterval             = interval;
10454                     if (VERBOSE)
10455                     {
10456                         printf("Move  %-4s ", getRegName(regRecord->regNum));
10457                     }
10458                 }
10459                 else
10460                 {
10461                     dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock);
10462                 }
10463                 if (currentRefPosition->lastUse || currentRefPosition->spillAfter)
10464                 {
10465                     interval->isActive = false;
10466                 }
10467                 if (regNum != REG_NA)
10468                 {
10469                     if (currentRefPosition->spillAfter)
10470                     {
10471                         if (VERBOSE)
10472                         {
10473                             // If refPos is marked as copyReg, then the reg that is spilled
10474                             // is the homeReg of the interval not the reg currently assigned
10475                             // to refPos.
10476                             regNumber spillReg = regNum;
10477                             if (currentRefPosition->copyReg)
10478                             {
10479                                 assert(interval != nullptr);
10480                                 spillReg = interval->physReg;
10481                             }
10482                             dumpRegRecords();
10483                             dumpEmptyRefPosition();
10484                             printf("Spill %-4s ", getRegName(spillReg));
10485                         }
10486                     }
10487                     else if (currentRefPosition->copyReg)
10488                     {
10489                         regRecord->assignedInterval = interval;
10490                     }
10491                     else
10492                     {
10493                         interval->physReg           = regNum;
10494                         interval->assignedReg       = regRecord;
10495                         regRecord->assignedInterval = interval;
10496                     }
10497                 }
10498                 break;
10499             case RefTypeKillGCRefs:
10500                 // No action to take.
10501                 // However, we will assert that, at resolution time, no registers contain GC refs.
10502                 {
10503                     DBEXEC(VERBOSE, printf("           "));
10504                     regMaskTP candidateRegs = currentRefPosition->registerAssignment;
10505                     while (candidateRegs != RBM_NONE)
10506                     {
10507                         regMaskTP nextRegBit = genFindLowestBit(candidateRegs);
10508                         candidateRegs &= ~nextRegBit;
10509                         regNumber  nextReg          = genRegNumFromMask(nextRegBit);
10510                         RegRecord* regRecord        = getRegisterRecord(nextReg);
10511                         Interval*  assignedInterval = regRecord->assignedInterval;
10512                         assert(assignedInterval == nullptr || !varTypeIsGC(assignedInterval->registerType));
10513                     }
10514                 }
10515                 break;
10516
10517             case RefTypeExpUse:
10518             case RefTypeDummyDef:
10519                 // Do nothing; these will be handled by the RefTypeBB.
10520                 DBEXEC(VERBOSE, dumpRefPositionShort(currentRefPosition, currentBlock));
10521                 DBEXEC(VERBOSE, printf("           "));
10522                 break;
10523
10524             case RefTypeInvalid:
10525                 // for these 'currentRefPosition->refType' values, No action to take
10526                 break;
10527         }
10528
10529         if (currentRefPosition->refType != RefTypeBB)
10530         {
10531             DBEXEC(VERBOSE, dumpRegRecords());
10532             if (interval != nullptr)
10533             {
10534                 if (currentRefPosition->copyReg)
10535                 {
10536                     assert(interval->physReg != regNum);
10537                     regRecord->assignedInterval = nullptr;
10538                     assert(interval->assignedReg != nullptr);
10539                     regRecord = interval->assignedReg;
10540                 }
10541                 if (currentRefPosition->spillAfter || currentRefPosition->lastUse)
10542                 {
10543                     interval->physReg     = REG_NA;
10544                     interval->assignedReg = nullptr;
10545
10546                     // regRegcord could be null if the RefPosition does not require a register.
10547                     if (regRecord != nullptr)
10548                     {
10549                         regRecord->assignedInterval = nullptr;
10550                     }
10551 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
10552                     else if (interval->isUpperVector && !currentRefPosition->RegOptional())
10553                     {
10554                         // These only require a register if they are not RegOptional, and their lclVar
10555                         // interval is living in a register and not already partially spilled.
10556                         if ((currentRefPosition->refType == RefTypeUpperVectorSave) ||
10557                             (currentRefPosition->refType == RefTypeUpperVectorRestore))
10558                         {
10559                             Interval* lclVarInterval = interval->relatedInterval;
10560                             assert((lclVarInterval->physReg == REG_NA) || lclVarInterval->isPartiallySpilled);
10561                         }
10562                     }
10563 #endif
10564                     else
10565                     {
10566                         assert(currentRefPosition->RegOptional());
10567                     }
10568                 }
10569             }
10570         }
10571     }
10572
10573     // Now, verify the resolution blocks.
10574     // Currently these are nearly always at the end of the method, but that may not always be the case.
10575     // So, we'll go through all the BBs looking for blocks whose bbNum is greater than bbNumMaxBeforeResolution.
10576     for (BasicBlock* currentBlock = compiler->fgFirstBB; currentBlock != nullptr; currentBlock = currentBlock->bbNext)
10577     {
10578         if (currentBlock->bbNum > bbNumMaxBeforeResolution)
10579         {
10580             // If we haven't enregistered an lclVars, we have no resolution blocks.
10581             assert(enregisterLocalVars);
10582
10583             if (VERBOSE)
10584             {
10585                 dumpRegRecordTitle();
10586                 printf(shortRefPositionFormat, 0, 0);
10587                 assert(currentBlock->bbPreds != nullptr && currentBlock->bbPreds->flBlock != nullptr);
10588                 printf(bbRefPosFormat, currentBlock->bbNum, currentBlock->bbPreds->flBlock->bbNum);
10589                 dumpRegRecords();
10590             }
10591
10592             // Clear register assignments.
10593             for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
10594             {
10595                 RegRecord* physRegRecord        = getRegisterRecord(reg);
10596                 physRegRecord->assignedInterval = nullptr;
10597             }
10598
10599             // Set the incoming register assignments
10600             VarToRegMap     inVarToRegMap = getInVarToRegMap(currentBlock->bbNum);
10601             VarSetOps::Iter iter(compiler, currentBlock->bbLiveIn);
10602             unsigned        varIndex = 0;
10603             while (iter.NextElem(&varIndex))
10604             {
10605                 if (localVarIntervals[varIndex] == nullptr)
10606                 {
10607                     assert(!compiler->lvaTable[compiler->lvaTrackedToVarNum[varIndex]].lvLRACandidate);
10608                     continue;
10609                 }
10610                 regNumber regNum                  = getVarReg(inVarToRegMap, varIndex);
10611                 Interval* interval                = getIntervalForLocalVar(varIndex);
10612                 interval->physReg                 = regNum;
10613                 interval->assignedReg             = &(physRegs[regNum]);
10614                 interval->isActive                = true;
10615                 physRegs[regNum].assignedInterval = interval;
10616             }
10617
10618             // Verify the moves in this block
10619             LIR::Range& currentBlockRange = LIR::AsRange(currentBlock);
10620             for (GenTree* node : currentBlockRange.NonPhiNodes())
10621             {
10622                 assert(IsResolutionNode(currentBlockRange, node));
10623                 if (IsResolutionMove(node))
10624                 {
10625                     // Only verify nodes that are actually moves; don't bother with the nodes that are
10626                     // operands to moves.
10627                     verifyResolutionMove(node, currentLocation);
10628                 }
10629             }
10630
10631             // Verify the outgoing register assignments
10632             {
10633                 VarToRegMap     outVarToRegMap = getOutVarToRegMap(currentBlock->bbNum);
10634                 VarSetOps::Iter iter(compiler, currentBlock->bbLiveOut);
10635                 unsigned        varIndex = 0;
10636                 while (iter.NextElem(&varIndex))
10637                 {
10638                     if (localVarIntervals[varIndex] == nullptr)
10639                     {
10640                         assert(!compiler->lvaTable[compiler->lvaTrackedToVarNum[varIndex]].lvLRACandidate);
10641                         continue;
10642                     }
10643                     regNumber regNum   = getVarReg(outVarToRegMap, varIndex);
10644                     Interval* interval = getIntervalForLocalVar(varIndex);
10645                     assert(interval->physReg == regNum || (interval->physReg == REG_NA && regNum == REG_STK));
10646                     interval->physReg     = REG_NA;
10647                     interval->assignedReg = nullptr;
10648                     interval->isActive    = false;
10649                 }
10650             }
10651         }
10652     }
10653
10654     DBEXEC(VERBOSE, printf("\n"));
10655 }
10656
10657 //------------------------------------------------------------------------
10658 // verifyResolutionMove: Verify a resolution statement.  Called by verifyFinalAllocation()
10659 //
10660 // Arguments:
10661 //    resolutionMove    - A GenTree* that must be a resolution move.
10662 //    currentLocation   - The LsraLocation of the most recent RefPosition that has been verified.
10663 //
10664 // Return Value:
10665 //    None.
10666 //
10667 // Notes:
10668 //    If verbose is set, this will also dump the moves into the table of final allocations.
10669 void LinearScan::verifyResolutionMove(GenTree* resolutionMove, LsraLocation currentLocation)
10670 {
10671     GenTree* dst = resolutionMove;
10672     assert(IsResolutionMove(dst));
10673
10674     if (dst->OperGet() == GT_SWAP)
10675     {
10676         GenTreeLclVarCommon* left          = dst->gtGetOp1()->AsLclVarCommon();
10677         GenTreeLclVarCommon* right         = dst->gtGetOp2()->AsLclVarCommon();
10678         regNumber            leftRegNum    = left->gtRegNum;
10679         regNumber            rightRegNum   = right->gtRegNum;
10680         LclVarDsc*           leftVarDsc    = compiler->lvaTable + left->gtLclNum;
10681         LclVarDsc*           rightVarDsc   = compiler->lvaTable + right->gtLclNum;
10682         Interval*            leftInterval  = getIntervalForLocalVar(leftVarDsc->lvVarIndex);
10683         Interval*            rightInterval = getIntervalForLocalVar(rightVarDsc->lvVarIndex);
10684         assert(leftInterval->physReg == leftRegNum && rightInterval->physReg == rightRegNum);
10685         leftInterval->physReg                  = rightRegNum;
10686         rightInterval->physReg                 = leftRegNum;
10687         leftInterval->assignedReg              = &physRegs[rightRegNum];
10688         rightInterval->assignedReg             = &physRegs[leftRegNum];
10689         physRegs[rightRegNum].assignedInterval = leftInterval;
10690         physRegs[leftRegNum].assignedInterval  = rightInterval;
10691         if (VERBOSE)
10692         {
10693             printf(shortRefPositionFormat, currentLocation, 0);
10694             dumpIntervalName(leftInterval);
10695             printf("  Swap   ");
10696             printf("      %-4s ", getRegName(rightRegNum));
10697             dumpRegRecords();
10698             printf(shortRefPositionFormat, currentLocation, 0);
10699             dumpIntervalName(rightInterval);
10700             printf("  \"      ");
10701             printf("      %-4s ", getRegName(leftRegNum));
10702             dumpRegRecords();
10703         }
10704         return;
10705     }
10706     regNumber            dstRegNum = dst->gtRegNum;
10707     regNumber            srcRegNum;
10708     GenTreeLclVarCommon* lcl;
10709     if (dst->OperGet() == GT_COPY)
10710     {
10711         lcl       = dst->gtGetOp1()->AsLclVarCommon();
10712         srcRegNum = lcl->gtRegNum;
10713     }
10714     else
10715     {
10716         lcl = dst->AsLclVarCommon();
10717         if ((lcl->gtFlags & GTF_SPILLED) != 0)
10718         {
10719             srcRegNum = REG_STK;
10720         }
10721         else
10722         {
10723             assert((lcl->gtFlags & GTF_SPILL) != 0);
10724             srcRegNum = dstRegNum;
10725             dstRegNum = REG_STK;
10726         }
10727     }
10728
10729     Interval* interval = getIntervalForLocalVarNode(lcl);
10730     assert(interval->physReg == srcRegNum || (srcRegNum == REG_STK && interval->physReg == REG_NA));
10731     if (srcRegNum != REG_STK)
10732     {
10733         physRegs[srcRegNum].assignedInterval = nullptr;
10734     }
10735     if (dstRegNum != REG_STK)
10736     {
10737         interval->physReg                    = dstRegNum;
10738         interval->assignedReg                = &(physRegs[dstRegNum]);
10739         physRegs[dstRegNum].assignedInterval = interval;
10740         interval->isActive                   = true;
10741     }
10742     else
10743     {
10744         interval->physReg     = REG_NA;
10745         interval->assignedReg = nullptr;
10746         interval->isActive    = false;
10747     }
10748     if (VERBOSE)
10749     {
10750         printf(shortRefPositionFormat, currentLocation, 0);
10751         dumpIntervalName(interval);
10752         printf("  Move   ");
10753         printf("      %-4s ", getRegName(dstRegNum));
10754         dumpRegRecords();
10755     }
10756 }
10757 #endif // DEBUG