Delete unused variables in jit. Part 2. (#23481)
[platform/upstream/coreclr.git] / src / jit / lsraarm64.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7 XX                                                                           XX
8 XX                    Register Requirements for ARM64                        XX
9 XX                                                                           XX
10 XX  This encapsulates all the logic for setting register requirements for    XX
11 XX  the ARM64 architecture.                                                  XX
12 XX                                                                           XX
13 XX                                                                           XX
14 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
15 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
16 */
17
18 #include "jitpch.h"
19 #ifdef _MSC_VER
20 #pragma hdrstop
21 #endif
22
23 #ifdef _TARGET_ARM64_
24
25 #include "jit.h"
26 #include "sideeffects.h"
27 #include "lower.h"
28
29 //------------------------------------------------------------------------
30 // BuildNode: Build the RefPositions for for a node
31 //
32 // Arguments:
33 //    treeNode - the node of interest
34 //
35 // Return Value:
36 //    The number of sources consumed by this node.
37 //
38 // Notes:
39 // Preconditions:
40 //    LSRA Has been initialized.
41 //
42 // Postconditions:
43 //    RefPositions have been built for all the register defs and uses required
44 //    for this node.
45 //
46 int LinearScan::BuildNode(GenTree* tree)
47 {
48     assert(!tree->isContained());
49     int       srcCount;
50     int       dstCount      = 0;
51     regMaskTP dstCandidates = RBM_NONE;
52     regMaskTP killMask      = RBM_NONE;
53     bool      isLocalDefUse = false;
54
55     // Reset the build-related members of LinearScan.
56     clearBuildState();
57
58     // Set the default dstCount. This may be modified below.
59     if (tree->IsValue())
60     {
61         dstCount = 1;
62         if (tree->IsUnusedValue())
63         {
64             isLocalDefUse = true;
65         }
66     }
67     else
68     {
69         dstCount = 0;
70     }
71
72     switch (tree->OperGet())
73     {
74         default:
75             srcCount = BuildSimple(tree);
76             break;
77
78         case GT_LCL_VAR:
79         case GT_LCL_FLD:
80         {
81             // We handle tracked variables differently from non-tracked ones.  If it is tracked,
82             // we will simply add a use of the tracked variable at its parent/consumer.
83             // Otherwise, for a use we need to actually add the appropriate references for loading
84             // or storing the variable.
85             //
86             // A tracked variable won't actually get used until the appropriate ancestor tree node
87             // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument
88             // to a call or an orphaned dead node.
89             //
90             LclVarDsc* const varDsc = &compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum];
91             if (isCandidateVar(varDsc))
92             {
93                 INDEBUG(dumpNodeInfo(tree, dstCandidates, 0, 1));
94                 return 0;
95             }
96             srcCount = 0;
97 #ifdef FEATURE_SIMD
98             // Need an additional register to read upper 4 bytes of Vector3.
99             if (tree->TypeGet() == TYP_SIMD12)
100             {
101                 // We need an internal register different from targetReg in which 'tree' produces its result
102                 // because both targetReg and internal reg will be in use at the same time.
103                 buildInternalFloatRegisterDefForNode(tree, allSIMDRegs());
104                 setInternalRegsDelayFree = true;
105                 buildInternalRegisterUses();
106             }
107 #endif
108             BuildDef(tree);
109         }
110         break;
111
112         case GT_STORE_LCL_FLD:
113         case GT_STORE_LCL_VAR:
114             srcCount = 1;
115             assert(dstCount == 0);
116             srcCount = BuildStoreLoc(tree->AsLclVarCommon());
117             break;
118
119         case GT_FIELD_LIST:
120             // These should always be contained. We don't correctly allocate or
121             // generate code for a non-contained GT_FIELD_LIST.
122             noway_assert(!"Non-contained GT_FIELD_LIST");
123             srcCount = 0;
124             break;
125
126         case GT_LIST:
127         case GT_ARGPLACE:
128         case GT_NO_OP:
129         case GT_START_NONGC:
130         case GT_PROF_HOOK:
131             srcCount = 0;
132             assert(dstCount == 0);
133             break;
134
135         case GT_START_PREEMPTGC:
136             // This kills GC refs in callee save regs
137             srcCount = 0;
138             assert(dstCount == 0);
139             BuildDefsWithKills(tree, 0, RBM_NONE, RBM_NONE);
140             break;
141
142         case GT_CNS_DBL:
143         {
144             GenTreeDblCon* dblConst   = tree->AsDblCon();
145             double         constValue = dblConst->gtDblCon.gtDconVal;
146
147             if (emitter::emitIns_valid_imm_for_fmov(constValue))
148             {
149                 // Directly encode constant to instructions.
150             }
151             else
152             {
153                 // Reserve int to load constant from memory (IF_LARGELDC)
154                 buildInternalIntRegisterDefForNode(tree);
155                 buildInternalRegisterUses();
156             }
157         }
158             __fallthrough;
159
160         case GT_CNS_INT:
161         {
162             srcCount = 0;
163             assert(dstCount == 1);
164             RefPosition* def               = BuildDef(tree);
165             def->getInterval()->isConstant = true;
166         }
167         break;
168
169         case GT_BOX:
170         case GT_COMMA:
171         case GT_QMARK:
172         case GT_COLON:
173             srcCount = 0;
174             assert(dstCount == 0);
175             unreached();
176             break;
177
178         case GT_RETURN:
179             srcCount = BuildReturn(tree);
180             break;
181
182         case GT_RETFILT:
183             assert(dstCount == 0);
184             if (tree->TypeGet() == TYP_VOID)
185             {
186                 srcCount = 0;
187             }
188             else
189             {
190                 assert(tree->TypeGet() == TYP_INT);
191                 srcCount = 1;
192                 BuildUse(tree->gtGetOp1(), RBM_INTRET);
193             }
194             break;
195
196         case GT_NOP:
197             // A GT_NOP is either a passthrough (if it is void, or if it has
198             // a child), but must be considered to produce a dummy value if it
199             // has a type but no child.
200             srcCount = 0;
201             if (tree->TypeGet() != TYP_VOID && tree->gtGetOp1() == nullptr)
202             {
203                 assert(dstCount == 1);
204                 BuildDef(tree);
205             }
206             else
207             {
208                 assert(dstCount == 0);
209             }
210             break;
211
212         case GT_JTRUE:
213             srcCount = 0;
214             assert(dstCount == 0);
215             break;
216
217         case GT_JMP:
218             srcCount = 0;
219             assert(dstCount == 0);
220             break;
221
222         case GT_SWITCH:
223             // This should never occur since switch nodes must not be visible at this
224             // point in the JIT.
225             srcCount = 0;
226             noway_assert(!"Switch must be lowered at this point");
227             break;
228
229         case GT_JMPTABLE:
230             srcCount = 0;
231             assert(dstCount == 1);
232             BuildDef(tree);
233             break;
234
235         case GT_SWITCH_TABLE:
236             buildInternalIntRegisterDefForNode(tree);
237             srcCount = BuildBinaryUses(tree->AsOp());
238             assert(dstCount == 0);
239             break;
240
241         case GT_ASG:
242             noway_assert(!"We should never hit any assignment operator in lowering");
243             srcCount = 0;
244             break;
245
246         case GT_ADD:
247         case GT_SUB:
248             if (varTypeIsFloating(tree->TypeGet()))
249             {
250                 // overflow operations aren't supported on float/double types.
251                 assert(!tree->gtOverflow());
252
253                 // No implicit conversions at this stage as the expectation is that
254                 // everything is made explicit by adding casts.
255                 assert(tree->gtGetOp1()->TypeGet() == tree->gtGetOp2()->TypeGet());
256             }
257
258             __fallthrough;
259
260         case GT_AND:
261         case GT_OR:
262         case GT_XOR:
263         case GT_LSH:
264         case GT_RSH:
265         case GT_RSZ:
266         case GT_ROR:
267             srcCount = BuildBinaryUses(tree->AsOp());
268             assert(dstCount == 1);
269             BuildDef(tree);
270             break;
271
272         case GT_RETURNTRAP:
273             // this just turns into a compare of its child with an int
274             // + a conditional call
275             BuildUse(tree->gtGetOp1());
276             srcCount = 1;
277             assert(dstCount == 0);
278             killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
279             BuildDefsWithKills(tree, 0, RBM_NONE, killMask);
280             break;
281
282         case GT_MOD:
283         case GT_UMOD:
284             NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in ARM64");
285             assert(!"Shouldn't see an integer typed GT_MOD node in ARM64");
286             srcCount = 0;
287             break;
288
289         case GT_MUL:
290             if (tree->gtOverflow())
291             {
292                 // Need a register different from target reg to check for overflow.
293                 buildInternalIntRegisterDefForNode(tree);
294                 setInternalRegsDelayFree = true;
295             }
296             __fallthrough;
297
298         case GT_DIV:
299         case GT_MULHI:
300         case GT_UDIV:
301         {
302             srcCount = BuildBinaryUses(tree->AsOp());
303             buildInternalRegisterUses();
304             assert(dstCount == 1);
305             BuildDef(tree);
306         }
307         break;
308
309         case GT_INTRINSIC:
310         {
311             noway_assert((tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs) ||
312                          (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Ceiling) ||
313                          (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Floor) ||
314                          (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round) ||
315                          (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt));
316
317             // Both operand and its result must be of the same floating point type.
318             GenTree* op1 = tree->gtGetOp1();
319             assert(varTypeIsFloating(op1));
320             assert(op1->TypeGet() == tree->TypeGet());
321
322             BuildUse(op1);
323             srcCount = 1;
324             assert(dstCount == 1);
325             BuildDef(tree);
326         }
327         break;
328
329 #ifdef FEATURE_SIMD
330         case GT_SIMD:
331             srcCount = BuildSIMD(tree->AsSIMD());
332             break;
333 #endif // FEATURE_SIMD
334
335 #ifdef FEATURE_HW_INTRINSICS
336         case GT_HWIntrinsic:
337             srcCount = BuildHWIntrinsic(tree->AsHWIntrinsic());
338             break;
339 #endif // FEATURE_HW_INTRINSICS
340
341         case GT_CAST:
342             assert(dstCount == 1);
343             srcCount = BuildCast(tree->AsCast());
344             break;
345
346         case GT_NEG:
347         case GT_NOT:
348             BuildUse(tree->gtGetOp1());
349             srcCount = 1;
350             assert(dstCount == 1);
351             BuildDef(tree);
352             break;
353
354         case GT_EQ:
355         case GT_NE:
356         case GT_LT:
357         case GT_LE:
358         case GT_GE:
359         case GT_GT:
360         case GT_TEST_EQ:
361         case GT_TEST_NE:
362         case GT_JCMP:
363             srcCount = BuildCmp(tree);
364             break;
365
366         case GT_CKFINITE:
367             srcCount = 1;
368             assert(dstCount == 1);
369             buildInternalIntRegisterDefForNode(tree);
370             BuildUse(tree->gtGetOp1());
371             BuildDef(tree);
372             buildInternalRegisterUses();
373             break;
374
375         case GT_CMPXCHG:
376         {
377             GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg();
378             srcCount                    = cmpXchgNode->gtOpComparand->isContained() ? 2 : 3;
379             assert(dstCount == 1);
380
381             if (!compiler->compSupports(InstructionSet_Atomics))
382             {
383                 // For ARMv8 exclusives requires a single internal register
384                 buildInternalIntRegisterDefForNode(tree);
385             }
386
387             // For ARMv8 exclusives the lifetime of the addr and data must be extended because
388             // it may be used used multiple during retries
389
390             // For ARMv8.1 atomic cas the lifetime of the addr and data must be extended to prevent
391             // them being reused as the target register which must be destroyed early
392
393             RefPosition* locationUse = BuildUse(tree->gtCmpXchg.gtOpLocation);
394             setDelayFree(locationUse);
395             RefPosition* valueUse = BuildUse(tree->gtCmpXchg.gtOpValue);
396             setDelayFree(valueUse);
397             if (!cmpXchgNode->gtOpComparand->isContained())
398             {
399                 RefPosition* comparandUse = BuildUse(tree->gtCmpXchg.gtOpComparand);
400
401                 // For ARMv8 exclusives the lifetime of the comparand must be extended because
402                 // it may be used used multiple during retries
403                 if (!compiler->compSupports(InstructionSet_Atomics))
404                 {
405                     setDelayFree(comparandUse);
406                 }
407             }
408
409             // Internals may not collide with target
410             setInternalRegsDelayFree = true;
411             buildInternalRegisterUses();
412             BuildDef(tree);
413         }
414         break;
415
416         case GT_LOCKADD:
417         case GT_XADD:
418         case GT_XCHG:
419         {
420             assert(dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1);
421             srcCount = tree->gtGetOp2()->isContained() ? 1 : 2;
422
423             if (!compiler->compSupports(InstructionSet_Atomics))
424             {
425                 // GT_XCHG requires a single internal register; the others require two.
426                 buildInternalIntRegisterDefForNode(tree);
427                 if (tree->OperGet() != GT_XCHG)
428                 {
429                     buildInternalIntRegisterDefForNode(tree);
430                 }
431             }
432
433             assert(!tree->gtGetOp1()->isContained());
434             RefPosition* op1Use = BuildUse(tree->gtGetOp1());
435             RefPosition* op2Use = nullptr;
436             if (!tree->gtGetOp2()->isContained())
437             {
438                 op2Use = BuildUse(tree->gtGetOp2());
439             }
440
441             // For ARMv8 exclusives the lifetime of the addr and data must be extended because
442             // it may be used used multiple during retries
443             if (!compiler->compSupports(InstructionSet_Atomics))
444             {
445                 // Internals may not collide with target
446                 if (dstCount == 1)
447                 {
448                     setDelayFree(op1Use);
449                     if (op2Use != nullptr)
450                     {
451                         setDelayFree(op2Use);
452                     }
453                     setInternalRegsDelayFree = true;
454                 }
455                 buildInternalRegisterUses();
456             }
457             if (dstCount == 1)
458             {
459                 BuildDef(tree);
460             }
461         }
462         break;
463
464 #if FEATURE_ARG_SPLIT
465         case GT_PUTARG_SPLIT:
466             srcCount = BuildPutArgSplit(tree->AsPutArgSplit());
467             dstCount = tree->AsPutArgSplit()->gtNumRegs;
468             break;
469 #endif // FEATURE _SPLIT_ARG
470
471         case GT_PUTARG_STK:
472             srcCount = BuildPutArgStk(tree->AsPutArgStk());
473             break;
474
475         case GT_PUTARG_REG:
476             srcCount = BuildPutArgReg(tree->AsUnOp());
477             break;
478
479         case GT_CALL:
480             srcCount = BuildCall(tree->AsCall());
481             if (tree->AsCall()->HasMultiRegRetVal())
482             {
483                 dstCount = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount();
484             }
485             break;
486
487         case GT_ADDR:
488         {
489             // For a GT_ADDR, the child node should not be evaluated into a register
490             GenTree* child = tree->gtGetOp1();
491             assert(!isCandidateLocalRef(child));
492             assert(child->isContained());
493             assert(dstCount == 1);
494             srcCount = 0;
495             BuildDef(tree);
496         }
497         break;
498
499         case GT_BLK:
500         case GT_DYN_BLK:
501             // These should all be eliminated prior to Lowering.
502             assert(!"Non-store block node in Lowering");
503             srcCount = 0;
504             break;
505
506         case GT_STORE_BLK:
507         case GT_STORE_OBJ:
508         case GT_STORE_DYN_BLK:
509             srcCount = BuildBlockStore(tree->AsBlk());
510             break;
511
512         case GT_INIT_VAL:
513             // Always a passthrough of its child's value.
514             assert(!"INIT_VAL should always be contained");
515             srcCount = 0;
516             break;
517
518         case GT_LCLHEAP:
519         {
520             assert(dstCount == 1);
521
522             // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
523             // Here '-' means don't care.
524             //
525             //  Size?                   Init Memory?    # temp regs
526             //   0                          -               0
527             //   const and <=6 ptr words    -               0
528             //   const and <PageSize        No              0
529             //   >6 ptr words               Yes             0
530             //   Non-const                  Yes             0
531             //   Non-const                  No              2
532             //
533
534             GenTree* size = tree->gtGetOp1();
535             if (size->IsCnsIntOrI())
536             {
537                 assert(size->isContained());
538                 srcCount = 0;
539
540                 size_t sizeVal = size->gtIntCon.gtIconVal;
541
542                 if (sizeVal != 0)
543                 {
544                     // Compute the amount of memory to properly STACK_ALIGN.
545                     // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
546                     // This should also help in debugging as we can examine the original size specified with
547                     // localloc.
548                     sizeVal         = AlignUp(sizeVal, STACK_ALIGN);
549                     size_t stpCount = sizeVal / (REGSIZE_BYTES * 2);
550
551                     // For small allocations up to 4 'stp' instructions (i.e. 16 to 64 bytes of localloc)
552                     //
553                     if (stpCount <= 4)
554                     {
555                         // Need no internal registers
556                     }
557                     else if (!compiler->info.compInitMem)
558                     {
559                         // No need to initialize allocated stack space.
560                         if (sizeVal < compiler->eeGetPageSize())
561                         {
562                             // Need no internal registers
563                         }
564                         else
565                         {
566                             // We need two registers: regCnt and RegTmp
567                             buildInternalIntRegisterDefForNode(tree);
568                             buildInternalIntRegisterDefForNode(tree);
569                         }
570                     }
571                 }
572             }
573             else
574             {
575                 srcCount = 1;
576                 if (!compiler->info.compInitMem)
577                 {
578                     buildInternalIntRegisterDefForNode(tree);
579                     buildInternalIntRegisterDefForNode(tree);
580                 }
581             }
582
583             if (!size->isContained())
584             {
585                 BuildUse(size);
586             }
587             buildInternalRegisterUses();
588             BuildDef(tree);
589         }
590         break;
591
592         case GT_ARR_BOUNDS_CHECK:
593 #ifdef FEATURE_SIMD
594         case GT_SIMD_CHK:
595 #endif // FEATURE_SIMD
596 #ifdef FEATURE_HW_INTRINSICS
597         case GT_HW_INTRINSIC_CHK:
598 #endif // FEATURE_HW_INTRINSICS
599         {
600             GenTreeBoundsChk* node = tree->AsBoundsChk();
601             // Consumes arrLen & index - has no result
602             assert(dstCount == 0);
603             srcCount = BuildOperandUses(node->gtIndex);
604             srcCount += BuildOperandUses(node->gtArrLen);
605         }
606         break;
607
608         case GT_ARR_ELEM:
609             // These must have been lowered to GT_ARR_INDEX
610             noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
611             srcCount = 0;
612             assert(dstCount == 0);
613             break;
614
615         case GT_ARR_INDEX:
616         {
617             srcCount = 2;
618             assert(dstCount == 1);
619             buildInternalIntRegisterDefForNode(tree);
620             setInternalRegsDelayFree = true;
621
622             // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
623             // times while the result is being computed.
624             RefPosition* arrObjUse = BuildUse(tree->AsArrIndex()->ArrObj());
625             setDelayFree(arrObjUse);
626             BuildUse(tree->AsArrIndex()->IndexExpr());
627             buildInternalRegisterUses();
628             BuildDef(tree);
629         }
630         break;
631
632         case GT_ARR_OFFSET:
633             // This consumes the offset, if any, the arrObj and the effective index,
634             // and produces the flattened offset for this dimension.
635             srcCount = 2;
636             if (!tree->gtArrOffs.gtOffset->isContained())
637             {
638                 BuildUse(tree->AsArrOffs()->gtOffset);
639                 srcCount++;
640             }
641             BuildUse(tree->AsArrOffs()->gtIndex);
642             BuildUse(tree->AsArrOffs()->gtArrObj);
643             assert(dstCount == 1);
644             buildInternalIntRegisterDefForNode(tree);
645             buildInternalRegisterUses();
646             BuildDef(tree);
647             break;
648
649         case GT_LEA:
650         {
651             GenTreeAddrMode* lea = tree->AsAddrMode();
652
653             GenTree* base  = lea->Base();
654             GenTree* index = lea->Index();
655             int      cns   = lea->Offset();
656
657             // This LEA is instantiating an address, so we set up the srcCount here.
658             srcCount = 0;
659             if (base != nullptr)
660             {
661                 srcCount++;
662                 BuildUse(base);
663             }
664             if (index != nullptr)
665             {
666                 srcCount++;
667                 BuildUse(index);
668             }
669             assert(dstCount == 1);
670
671             // On ARM64 we may need a single internal register
672             // (when both conditions are true then we still only need a single internal register)
673             if ((index != nullptr) && (cns != 0))
674             {
675                 // ARM64 does not support both Index and offset so we need an internal register
676                 buildInternalIntRegisterDefForNode(tree);
677             }
678             else if (!emitter::emitIns_valid_imm_for_add(cns, EA_8BYTE))
679             {
680                 // This offset can't be contained in the add instruction, so we need an internal register
681                 buildInternalIntRegisterDefForNode(tree);
682             }
683             buildInternalRegisterUses();
684             BuildDef(tree);
685         }
686         break;
687
688         case GT_STOREIND:
689         {
690             assert(dstCount == 0);
691
692             if (compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree))
693             {
694                 srcCount = BuildGCWriteBarrier(tree);
695                 break;
696             }
697
698             srcCount = BuildIndir(tree->AsIndir());
699             if (!tree->gtGetOp2()->isContained())
700             {
701                 BuildUse(tree->gtGetOp2());
702                 srcCount++;
703             }
704         }
705         break;
706
707         case GT_NULLCHECK:
708             // Unlike ARM, ARM64 implements NULLCHECK as a load to REG_ZR, so no internal register
709             // is required, and it is not a localDefUse.
710             assert(dstCount == 0);
711             assert(!tree->gtGetOp1()->isContained());
712             BuildUse(tree->gtGetOp1());
713             srcCount = 1;
714             break;
715
716         case GT_IND:
717             assert(dstCount == 1);
718             srcCount = BuildIndir(tree->AsIndir());
719             break;
720
721         case GT_CATCH_ARG:
722             srcCount = 0;
723             assert(dstCount == 1);
724             BuildDef(tree, RBM_EXCEPTION_OBJECT);
725             break;
726
727         case GT_CLS_VAR:
728             srcCount = 0;
729             // GT_CLS_VAR, by the time we reach the backend, must always
730             // be a pure use.
731             // It will produce a result of the type of the
732             // node, and use an internal register for the address.
733
734             assert(dstCount == 1);
735             assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG)) == 0);
736             buildInternalIntRegisterDefForNode(tree);
737             buildInternalRegisterUses();
738             BuildDef(tree);
739             break;
740
741         case GT_INDEX_ADDR:
742             assert(dstCount == 1);
743             srcCount = BuildBinaryUses(tree->AsOp());
744             buildInternalIntRegisterDefForNode(tree);
745             buildInternalRegisterUses();
746             BuildDef(tree);
747             break;
748
749     } // end switch (tree->OperGet())
750
751     if (tree->IsUnusedValue() && (dstCount != 0))
752     {
753         isLocalDefUse = true;
754     }
755     // We need to be sure that we've set srcCount and dstCount appropriately
756     assert((dstCount < 2) || tree->IsMultiRegCall());
757     assert(isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
758     assert(!tree->IsUnusedValue() || (dstCount != 0));
759     assert(dstCount == tree->GetRegisterDstCount());
760     INDEBUG(dumpNodeInfo(tree, dstCandidates, srcCount, dstCount));
761     return srcCount;
762 }
763
764 #ifdef FEATURE_SIMD
765 //------------------------------------------------------------------------
766 // BuildSIMD: Set the NodeInfo for a GT_SIMD tree.
767 //
768 // Arguments:
769 //    tree       - The GT_SIMD node of interest
770 //
771 // Return Value:
772 //    The number of sources consumed by this node.
773 //
774 int LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
775 {
776     int srcCount = 0;
777     // Only SIMDIntrinsicInit can be contained
778     if (simdTree->isContained())
779     {
780         assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit);
781     }
782     int dstCount = simdTree->IsValue() ? 1 : 0;
783     assert(dstCount == 1);
784
785     bool buildUses = true;
786
787     GenTree* op1 = simdTree->gtGetOp1();
788     GenTree* op2 = simdTree->gtGetOp2();
789
790     switch (simdTree->gtSIMDIntrinsicID)
791     {
792         case SIMDIntrinsicInit:
793         case SIMDIntrinsicCast:
794         case SIMDIntrinsicSqrt:
795         case SIMDIntrinsicAbs:
796         case SIMDIntrinsicConvertToSingle:
797         case SIMDIntrinsicConvertToInt32:
798         case SIMDIntrinsicConvertToDouble:
799         case SIMDIntrinsicConvertToInt64:
800         case SIMDIntrinsicWidenLo:
801         case SIMDIntrinsicWidenHi:
802             // No special handling required.
803             break;
804
805         case SIMDIntrinsicGetItem:
806         {
807             op1 = simdTree->gtGetOp1();
808             op2 = simdTree->gtGetOp2();
809
810             // We have an object and an index, either of which may be contained.
811             bool setOp2DelayFree = false;
812             if (!op2->IsCnsIntOrI() && (!op1->isContained() || op1->OperIsLocal()))
813             {
814                 // If the index is not a constant and the object is not contained or is a local
815                 // we will need a general purpose register to calculate the address
816                 // internal register must not clobber input index
817                 // TODO-Cleanup: An internal register will never clobber a source; this code actually
818                 // ensures that the index (op2) doesn't interfere with the target.
819                 buildInternalIntRegisterDefForNode(simdTree);
820                 setOp2DelayFree = true;
821             }
822             srcCount += BuildOperandUses(op1);
823             if (!op2->isContained())
824             {
825                 RefPosition* op2Use = BuildUse(op2);
826                 if (setOp2DelayFree)
827                 {
828                     setDelayFree(op2Use);
829                 }
830                 srcCount++;
831             }
832
833             if (!op2->IsCnsIntOrI() && (!op1->isContained()))
834             {
835                 // If vector is not already in memory (contained) and the index is not a constant,
836                 // we will use the SIMD temp location to store the vector.
837                 compiler->getSIMDInitTempVarNum();
838             }
839             buildUses = false;
840         }
841         break;
842
843         case SIMDIntrinsicAdd:
844         case SIMDIntrinsicSub:
845         case SIMDIntrinsicMul:
846         case SIMDIntrinsicDiv:
847         case SIMDIntrinsicBitwiseAnd:
848         case SIMDIntrinsicBitwiseAndNot:
849         case SIMDIntrinsicBitwiseOr:
850         case SIMDIntrinsicBitwiseXor:
851         case SIMDIntrinsicMin:
852         case SIMDIntrinsicMax:
853         case SIMDIntrinsicEqual:
854         case SIMDIntrinsicLessThan:
855         case SIMDIntrinsicGreaterThan:
856         case SIMDIntrinsicLessThanOrEqual:
857         case SIMDIntrinsicGreaterThanOrEqual:
858             // No special handling required.
859             break;
860
861         case SIMDIntrinsicSetX:
862         case SIMDIntrinsicSetY:
863         case SIMDIntrinsicSetZ:
864         case SIMDIntrinsicSetW:
865         case SIMDIntrinsicNarrow:
866         {
867             // Op1 will write to dst before Op2 is free
868             BuildUse(op1);
869             RefPosition* op2Use = BuildUse(op2);
870             setDelayFree(op2Use);
871             srcCount  = 2;
872             buildUses = false;
873             break;
874         }
875
876         case SIMDIntrinsicInitN:
877         {
878             var_types baseType = simdTree->gtSIMDBaseType;
879             srcCount           = (short)(simdTree->gtSIMDSize / genTypeSize(baseType));
880             if (varTypeIsFloating(simdTree->gtSIMDBaseType))
881             {
882                 // Need an internal register to stitch together all the values into a single vector in a SIMD reg.
883                 buildInternalFloatRegisterDefForNode(simdTree);
884             }
885
886             int initCount = 0;
887             for (GenTree* list = op1; list != nullptr; list = list->gtGetOp2())
888             {
889                 assert(list->OperGet() == GT_LIST);
890                 GenTree* listItem = list->gtGetOp1();
891                 assert(listItem->TypeGet() == baseType);
892                 assert(!listItem->isContained());
893                 BuildUse(listItem);
894                 initCount++;
895             }
896             assert(initCount == srcCount);
897             buildUses = false;
898
899             break;
900         }
901
902         case SIMDIntrinsicInitArray:
903             // We have an array and an index, which may be contained.
904             break;
905
906         case SIMDIntrinsicOpEquality:
907         case SIMDIntrinsicOpInEquality:
908             buildInternalFloatRegisterDefForNode(simdTree);
909             break;
910
911         case SIMDIntrinsicDotProduct:
912             buildInternalFloatRegisterDefForNode(simdTree);
913             break;
914
915         case SIMDIntrinsicSelect:
916             // TODO-ARM64-CQ Allow lowering to see SIMDIntrinsicSelect so we can generate BSL VC, VA, VB
917             // bsl target register must be VC.  Reserve a temp in case we need to shuffle things.
918             // This will require a different approach, as GenTreeSIMD has only two operands.
919             assert(!"SIMDIntrinsicSelect not yet supported");
920             buildInternalFloatRegisterDefForNode(simdTree);
921             break;
922
923         case SIMDIntrinsicInitArrayX:
924         case SIMDIntrinsicInitFixed:
925         case SIMDIntrinsicCopyToArray:
926         case SIMDIntrinsicCopyToArrayX:
927         case SIMDIntrinsicNone:
928         case SIMDIntrinsicGetCount:
929         case SIMDIntrinsicGetOne:
930         case SIMDIntrinsicGetZero:
931         case SIMDIntrinsicGetAllOnes:
932         case SIMDIntrinsicGetX:
933         case SIMDIntrinsicGetY:
934         case SIMDIntrinsicGetZ:
935         case SIMDIntrinsicGetW:
936         case SIMDIntrinsicInstEquals:
937         case SIMDIntrinsicHWAccel:
938         case SIMDIntrinsicWiden:
939         case SIMDIntrinsicInvalid:
940             assert(!"These intrinsics should not be seen during register allocation");
941             __fallthrough;
942
943         default:
944             noway_assert(!"Unimplemented SIMD node type.");
945             unreached();
946     }
947     if (buildUses)
948     {
949         assert(!op1->OperIs(GT_LIST));
950         assert(srcCount == 0);
951         srcCount = BuildOperandUses(op1);
952         if ((op2 != nullptr) && !op2->isContained())
953         {
954             srcCount += BuildOperandUses(op2);
955         }
956     }
957     assert(internalCount <= MaxInternalCount);
958     buildInternalRegisterUses();
959     if (dstCount == 1)
960     {
961         BuildDef(simdTree);
962     }
963     else
964     {
965         assert(dstCount == 0);
966     }
967     return srcCount;
968 }
969 #endif // FEATURE_SIMD
970
971 #ifdef FEATURE_HW_INTRINSICS
972 #include "hwintrinsic.h"
973 //------------------------------------------------------------------------
974 // BuildHWIntrinsic: Set the NodeInfo for a GT_HWIntrinsic tree.
975 //
976 // Arguments:
977 //    tree       - The GT_HWIntrinsic node of interest
978 //
979 // Return Value:
980 //    The number of sources consumed by this node.
981 //
982 int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
983 {
984     NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId;
985     int            numArgs     = HWIntrinsicInfo::lookupNumArgs(intrinsicTree);
986
987     GenTree* op1      = intrinsicTree->gtGetOp1();
988     GenTree* op2      = intrinsicTree->gtGetOp2();
989     GenTree* op3      = nullptr;
990     int      srcCount = 0;
991
992     if ((op1 != nullptr) && op1->OperIsList())
993     {
994         // op2 must be null, and there must be at least two more arguments.
995         assert(op2 == nullptr);
996         noway_assert(op1->AsArgList()->Rest() != nullptr);
997         noway_assert(op1->AsArgList()->Rest()->Rest() != nullptr);
998         assert(op1->AsArgList()->Rest()->Rest()->Rest() == nullptr);
999         op2 = op1->AsArgList()->Rest()->Current();
1000         op3 = op1->AsArgList()->Rest()->Rest()->Current();
1001         op1 = op1->AsArgList()->Current();
1002     }
1003
1004     bool op2IsDelayFree = false;
1005     bool op3IsDelayFree = false;
1006
1007     // Create internal temps, and handle any other special requirements.
1008     switch (HWIntrinsicInfo::lookup(intrinsicID).form)
1009     {
1010         case HWIntrinsicInfo::Sha1HashOp:
1011             assert((numArgs == 3) && (op2 != nullptr) && (op3 != nullptr));
1012             if (!op2->isContained())
1013             {
1014                 assert(!op3->isContained());
1015                 op2IsDelayFree           = true;
1016                 op3IsDelayFree           = true;
1017                 setInternalRegsDelayFree = true;
1018             }
1019             buildInternalFloatRegisterDefForNode(intrinsicTree);
1020             break;
1021         case HWIntrinsicInfo::SimdTernaryRMWOp:
1022             assert((numArgs == 3) && (op2 != nullptr) && (op3 != nullptr));
1023             if (!op2->isContained())
1024             {
1025                 assert(!op3->isContained());
1026                 op2IsDelayFree = true;
1027                 op3IsDelayFree = true;
1028             }
1029             break;
1030         case HWIntrinsicInfo::Sha1RotateOp:
1031             buildInternalFloatRegisterDefForNode(intrinsicTree);
1032             break;
1033
1034         case HWIntrinsicInfo::SimdExtractOp:
1035         case HWIntrinsicInfo::SimdInsertOp:
1036             if (!op2->isContained())
1037             {
1038                 // We need a temp to create a switch table
1039                 buildInternalIntRegisterDefForNode(intrinsicTree);
1040             }
1041             break;
1042
1043         default:
1044             break;
1045     }
1046
1047     // Next, build uses
1048     if (numArgs > 3)
1049     {
1050         srcCount = 0;
1051         assert(!op2IsDelayFree && !op3IsDelayFree);
1052         assert(op1->OperIs(GT_LIST));
1053         {
1054             for (GenTreeArgList* list = op1->AsArgList(); list != nullptr; list = list->Rest())
1055             {
1056                 srcCount += BuildOperandUses(list->Current());
1057             }
1058         }
1059         assert(srcCount == numArgs);
1060     }
1061     else
1062     {
1063         if (op1 != nullptr)
1064         {
1065             srcCount += BuildOperandUses(op1);
1066             if (op2 != nullptr)
1067             {
1068                 srcCount += (op2IsDelayFree) ? BuildDelayFreeUses(op2) : BuildOperandUses(op2);
1069                 if (op3 != nullptr)
1070                 {
1071                     srcCount += (op3IsDelayFree) ? BuildDelayFreeUses(op3) : BuildOperandUses(op3);
1072                 }
1073             }
1074         }
1075     }
1076     buildInternalRegisterUses();
1077
1078     // Now defs
1079     if (intrinsicTree->IsValue())
1080     {
1081         BuildDef(intrinsicTree);
1082     }
1083
1084     return srcCount;
1085 }
1086 #endif
1087
1088 #endif // _TARGET_ARM64_