1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8 XX Register Requirements for ARM and ARM64 common code XX
10 XX This encapsulates common logic for setting register requirements for XX
11 XX the ARM and ARM64 architectures. XX
13 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
14 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
22 #ifdef _TARGET_ARMARCH_ // This file is ONLY used for ARM and ARM64 architectures
25 #include "sideeffects.h"
29 //------------------------------------------------------------------------
30 // BuildIndir: Specify register requirements for address expression
31 // of an indirection operation.
34 // indirTree - GT_IND, GT_STOREIND or block gentree node
37 // The number of sources consumed by this node.
39 int LinearScan::BuildIndir(GenTreeIndir* indirTree)
42 // If this is the rhs of a block copy (i.e. non-enregisterable struct),
43 // it has no register requirements.
44 if (indirTree->TypeGet() == TYP_STRUCT)
49 GenTree* addr = indirTree->Addr();
50 GenTree* index = nullptr;
54 // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
55 if (indirTree->gtFlags & GTF_IND_UNALIGNED)
57 var_types type = TYP_UNDEF;
58 if (indirTree->OperGet() == GT_STOREIND)
60 type = indirTree->AsStoreInd()->Data()->TypeGet();
62 else if (indirTree->OperGet() == GT_IND)
64 type = indirTree->TypeGet();
67 if (type == TYP_FLOAT)
69 buildInternalIntRegisterDefForNode(indirTree);
71 else if (type == TYP_DOUBLE)
73 buildInternalIntRegisterDefForNode(indirTree);
74 buildInternalIntRegisterDefForNode(indirTree);
79 if (addr->isContained())
81 assert(addr->OperGet() == GT_LEA);
82 GenTreeAddrMode* lea = addr->AsAddrMode();
86 // On ARM we may need a single internal register
87 // (when both conditions are true then we still only need a single internal register)
88 if ((index != nullptr) && (cns != 0))
90 // ARM does not support both Index and offset so we need an internal register
91 buildInternalIntRegisterDefForNode(indirTree);
93 else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree)))
95 // This offset can't be contained in the ldr/str instruction, so we need an internal register
96 buildInternalIntRegisterDefForNode(indirTree);
101 if (indirTree->TypeGet() == TYP_SIMD12)
103 // If indirTree is of TYP_SIMD12, addr is not contained. See comment in LowerIndir().
104 assert(!addr->isContained());
106 // Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
107 // To assemble the vector properly we would need an additional int register
108 buildInternalIntRegisterDefForNode(indirTree);
110 #endif // FEATURE_SIMD
112 srcCount = BuildIndirUses(indirTree);
113 buildInternalRegisterUses();
115 if (indirTree->gtOper != GT_STOREIND)
122 //------------------------------------------------------------------------
123 // BuildCall: Set the NodeInfo for a call.
126 // call - The call node of interest
129 // The number of sources consumed by this node.
131 int LinearScan::BuildCall(GenTreeCall* call)
133 bool hasMultiRegRetVal = false;
134 ReturnTypeDesc* retTypeDesc = nullptr;
135 regMaskTP dstCandidates = RBM_NONE;
139 if (call->TypeGet() != TYP_VOID)
141 hasMultiRegRetVal = call->HasMultiRegRetVal();
142 if (hasMultiRegRetVal)
144 // dst count = number of registers in which the value is returned by call
145 retTypeDesc = call->GetReturnTypeDesc();
146 dstCount = retTypeDesc->GetReturnRegCount();
154 GenTree* ctrlExpr = call->gtControlExpr;
155 regMaskTP ctrlExprCandidates = RBM_NONE;
156 if (call->gtCallType == CT_INDIRECT)
158 // either gtControlExpr != null or gtCallAddr != null.
159 // Both cannot be non-null at the same time.
160 assert(ctrlExpr == nullptr);
161 assert(call->gtCallAddr != nullptr);
162 ctrlExpr = call->gtCallAddr;
165 // set reg requirements on call target represented as control sequence.
166 if (ctrlExpr != nullptr)
168 // we should never see a gtControlExpr whose type is void.
169 assert(ctrlExpr->TypeGet() != TYP_VOID);
171 // In case of fast tail implemented as jmp, make sure that gtControlExpr is
172 // computed into a register.
173 if (call->IsFastTailCall())
175 // Fast tail call - make sure that call target is always computed in R12(ARM32)/IP0(ARM64)
176 // so that epilog sequence can generate "br xip0/r12" to achieve fast tail call.
177 ctrlExprCandidates = RBM_FASTTAILCALL_TARGET;
183 buildInternalIntRegisterDefForNode(call);
186 if (call->NeedsNullCheck())
188 buildInternalIntRegisterDefForNode(call);
191 #endif // _TARGET_ARM_
193 RegisterType registerType = call->TypeGet();
195 // Set destination candidates for return value of the call.
198 if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
200 // The ARM CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
201 // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers.
202 dstCandidates = RBM_PINVOKE_TCB;
205 #endif // _TARGET_ARM_
206 if (hasMultiRegRetVal)
208 assert(retTypeDesc != nullptr);
209 dstCandidates = retTypeDesc->GetABIReturnRegs();
211 else if (varTypeIsFloating(registerType))
213 dstCandidates = RBM_FLOATRET;
215 else if (registerType == TYP_LONG)
217 dstCandidates = RBM_LNGRET;
221 dstCandidates = RBM_INTRET;
224 // First, count reg args
225 // Each register argument corresponds to one source.
226 bool callHasFloatRegArgs = false;
228 for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext())
230 assert(list->OperIsList());
232 GenTree* argNode = list->Current();
235 // During Build, we only use the ArgTabEntry for validation,
236 // as getting it is rather expensive.
237 fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
238 regNumber argReg = curArgTabEntry->regNum;
239 assert(curArgTabEntry);
242 if (argNode->gtOper == GT_PUTARG_STK)
244 // late arg that is not passed in a register
245 assert(curArgTabEntry->regNum == REG_STK);
246 // These should never be contained.
247 assert(!argNode->isContained());
251 // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct
252 if (argNode->OperGet() == GT_FIELD_LIST)
254 assert(argNode->isContained());
256 // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
257 for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest())
260 assert(entry->Current()->OperIs(GT_PUTARG_REG));
261 assert(entry->Current()->gtRegNum == argReg);
262 // Update argReg for the next putarg_reg (if any)
263 argReg = genRegArgNext(argReg);
265 #if defined(_TARGET_ARM_)
266 // A double register is modelled as an even-numbered single one
267 if (entry->Current()->TypeGet() == TYP_DOUBLE)
269 argReg = genRegArgNext(argReg);
271 #endif // _TARGET_ARM_
273 BuildUse(entry->Current(), genRegMask(entry->Current()->gtRegNum));
277 #if FEATURE_ARG_SPLIT
278 else if (argNode->OperGet() == GT_PUTARG_SPLIT)
280 unsigned regCount = argNode->AsPutArgSplit()->gtNumRegs;
281 assert(regCount == curArgTabEntry->numRegs);
282 for (unsigned int i = 0; i < regCount; i++)
284 BuildUse(argNode, genRegMask(argNode->AsPutArgSplit()->GetRegNumByIdx(i)), i);
286 srcCount += regCount;
288 #endif // FEATURE_ARG_SPLIT
291 assert(argNode->OperIs(GT_PUTARG_REG));
292 assert(argNode->gtRegNum == argReg);
293 HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs);
295 // The `double` types have been transformed to `long` on armel,
296 // while the actual long types have been decomposed.
297 // On ARM we may have bitcasts from DOUBLE to LONG.
298 if (argNode->TypeGet() == TYP_LONG)
300 assert(argNode->IsMultiRegNode());
301 BuildUse(argNode, genRegMask(argNode->gtRegNum), 0);
302 BuildUse(argNode, genRegMask(genRegArgNext(argNode->gtRegNum)), 1);
306 #endif // _TARGET_ARM_
308 BuildUse(argNode, genRegMask(argNode->gtRegNum));
314 // Now, count stack args
315 // Note that these need to be computed into a register, but then
316 // they're just stored to the stack - so the reg doesn't
317 // need to remain live until the call. In fact, it must not
318 // because the code generator doesn't actually consider it live,
319 // so it can't be spilled.
321 GenTree* args = call->gtCallArgs;
324 GenTree* arg = args->gtGetOp1();
326 // Skip arguments that have been moved to the Late Arg list
327 if (!(args->gtFlags & GTF_LATE_ARG))
330 fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
331 assert(curArgTabEntry);
333 #if FEATURE_ARG_SPLIT
334 // PUTARG_SPLIT nodes must be in the gtCallLateArgs list, since they
335 // define registers used by the call.
336 assert(arg->OperGet() != GT_PUTARG_SPLIT);
337 #endif // FEATURE_ARG_SPLIT
338 if (arg->gtOper == GT_PUTARG_STK)
340 assert(curArgTabEntry->regNum == REG_STK);
344 assert(!arg->IsValue() || arg->IsUnusedValue());
347 args = args->gtGetOp2();
350 // If it is a fast tail call, it is already preferenced to use IP0.
351 // Therefore, no need set src candidates on call tgt again.
352 if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
354 NYI_ARM("float reg varargs");
356 // Don't assign the call target to any of the argument registers because
357 // we will use them to also pass floating point arguments as required
359 ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS);
362 if (ctrlExpr != nullptr)
364 BuildUse(ctrlExpr, ctrlExprCandidates);
368 buildInternalRegisterUses();
370 // Now generate defs and kills.
371 regMaskTP killMask = getKillSetForCall(call);
372 BuildDefsWithKills(call, dstCount, dstCandidates, killMask);
376 //------------------------------------------------------------------------
377 // BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node
380 // argNode - a GT_PUTARG_STK node
383 // The number of sources consumed by this node.
386 // Set the child node(s) to be contained when we have a multireg arg
388 int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
390 assert(argNode->gtOper == GT_PUTARG_STK);
392 GenTree* putArgChild = argNode->gtGetOp1();
396 // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct
397 if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST))
399 // We will use store instructions that each write a register sized value
401 if (putArgChild->OperGet() == GT_FIELD_LIST)
403 assert(putArgChild->isContained());
404 // We consume all of the items in the GT_FIELD_LIST
405 for (GenTreeFieldList* current = putArgChild->AsFieldList(); current != nullptr; current = current->Rest())
407 BuildUse(current->Current());
413 // We can use a ldp/stp sequence so we need two internal registers for ARM64; one for ARM.
414 buildInternalIntRegisterDefForNode(argNode);
415 #ifdef _TARGET_ARM64_
416 buildInternalIntRegisterDefForNode(argNode);
417 #endif // _TARGET_ARM64_
419 if (putArgChild->OperGet() == GT_OBJ)
421 assert(putArgChild->isContained());
422 GenTree* objChild = putArgChild->gtGetOp1();
423 if (objChild->OperGet() == GT_LCL_VAR_ADDR)
425 // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR
426 // as one contained operation, and there are no source registers.
428 assert(objChild->isContained());
432 // We will generate all of the code for the GT_PUTARG_STK and its child node
433 // as one contained operation
435 srcCount = BuildOperandUses(objChild);
440 // No source registers.
441 putArgChild->OperIs(GT_LCL_VAR);
447 assert(!putArgChild->isContained());
448 srcCount = BuildOperandUses(putArgChild);
450 buildInternalRegisterUses();
454 #if FEATURE_ARG_SPLIT
455 //------------------------------------------------------------------------
456 // BuildPutArgSplit: Set the NodeInfo for a GT_PUTARG_SPLIT node
459 // argNode - a GT_PUTARG_SPLIT node
462 // The number of sources consumed by this node.
465 // Set the child node(s) to be contained
467 int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode)
470 assert(argNode->gtOper == GT_PUTARG_SPLIT);
472 GenTree* putArgChild = argNode->gtGetOp1();
474 // Registers for split argument corresponds to source
475 int dstCount = argNode->gtNumRegs;
477 regNumber argReg = argNode->gtRegNum;
478 regMaskTP argMask = RBM_NONE;
479 for (unsigned i = 0; i < argNode->gtNumRegs; i++)
481 regNumber thisArgReg = (regNumber)((unsigned)argReg + i);
482 argMask |= genRegMask(thisArgReg);
483 argNode->SetRegNumByIdx(thisArgReg, i);
486 if (putArgChild->OperGet() == GT_FIELD_LIST)
489 // 1. Consume all of the items in the GT_FIELD_LIST (source)
490 // 2. Store to target slot and move to target registers (destination) from source
492 unsigned sourceRegCount = 0;
494 // To avoid redundant moves, have the argument operand computed in the
495 // register in which the argument is passed to the call.
497 for (GenTreeFieldList* fieldListPtr = putArgChild->AsFieldList(); fieldListPtr != nullptr;
498 fieldListPtr = fieldListPtr->Rest())
500 GenTree* node = fieldListPtr->gtGetOp1();
501 assert(!node->isContained());
502 // The only multi-reg nodes we should see are OperIsMultiRegOp()
503 unsigned currentRegCount;
505 if (node->OperIsMultiRegOp())
507 currentRegCount = node->AsMultiRegOp()->GetRegCount();
510 #endif // _TARGET_ARM
512 assert(!node->IsMultiRegNode());
515 // Consume all the registers, setting the appropriate register mask for the ones that
516 // go into registers.
517 for (unsigned regIndex = 0; regIndex < currentRegCount; regIndex++)
519 regMaskTP sourceMask = RBM_NONE;
520 if (sourceRegCount < argNode->gtNumRegs)
522 sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount));
525 BuildUse(node, sourceMask, regIndex);
528 srcCount += sourceRegCount;
529 assert(putArgChild->isContained());
533 assert(putArgChild->TypeGet() == TYP_STRUCT);
534 assert(putArgChild->OperGet() == GT_OBJ);
536 // We can use a ldr/str sequence so we need an internal register
537 buildInternalIntRegisterDefForNode(argNode, allRegs(TYP_INT) & ~argMask);
539 GenTree* objChild = putArgChild->gtGetOp1();
540 if (objChild->OperGet() == GT_LCL_VAR_ADDR)
542 // We will generate all of the code for the GT_PUTARG_SPLIT, the GT_OBJ and the GT_LCL_VAR_ADDR
543 // as one contained operation
545 assert(objChild->isContained());
549 srcCount = BuildIndirUses(putArgChild->AsIndir());
551 assert(putArgChild->isContained());
553 buildInternalRegisterUses();
554 BuildDefs(argNode, dstCount, argMask);
557 #endif // FEATURE_ARG_SPLIT
559 //------------------------------------------------------------------------
560 // BuildBlockStore: Set the NodeInfo for a block store.
563 // blkNode - The block store node of interest
566 // The number of sources consumed by this node.
568 int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
570 GenTree* dstAddr = blkNode->Addr();
571 unsigned size = blkNode->gtBlkSize;
572 GenTree* source = blkNode->Data();
575 GenTree* srcAddrOrFill = nullptr;
576 bool isInitBlk = blkNode->OperIsInitBlkOp();
578 regMaskTP dstAddrRegMask = RBM_NONE;
579 regMaskTP sourceRegMask = RBM_NONE;
580 regMaskTP blkSizeRegMask = RBM_NONE;
581 regMaskTP internalIntCandidates = RBM_NONE;
585 GenTree* initVal = source;
586 if (initVal->OperIsInitVal())
588 assert(initVal->isContained());
589 initVal = initVal->gtGetOp1();
591 srcAddrOrFill = initVal;
593 if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll)
595 // TODO-ARM-CQ: Currently we generate a helper call for every
596 // initblk we encounter. Later on we should implement loop unrolling
597 // code sequences to improve CQ.
598 // For reference see the code in lsraxarch.cpp.
599 NYI_ARM("initblk loop unrolling is currently not implemented.");
603 assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper);
604 assert(!initVal->isContained());
605 // The helper follows the regular ABI.
606 dstAddrRegMask = RBM_ARG_0;
607 sourceRegMask = RBM_ARG_1;
608 blkSizeRegMask = RBM_ARG_2;
613 // CopyObj or CopyBlk
614 // Sources are src and dest and size if not constant.
615 if (source->gtOper == GT_IND)
617 assert(source->isContained());
618 srcAddrOrFill = source->gtGetOp1();
619 assert(!srcAddrOrFill->isContained());
621 if (blkNode->OperGet() == GT_STORE_OBJ)
624 // We don't need to materialize the struct size but we still need
625 // a temporary register to perform the sequence of loads and stores.
626 // We can't use the special Write Barrier registers, so exclude them from the mask
627 internalIntCandidates = allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF);
628 buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
630 if (size >= 2 * REGSIZE_BYTES)
632 // We will use ldp/stp to reduce code size and improve performance
633 // so we need to reserve an extra internal register
634 buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
637 // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF.
638 dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF;
640 // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF.
641 // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF,
642 // which is killed by a StoreObj (and thus needn't be reserved).
643 if (srcAddrOrFill != nullptr)
645 sourceRegMask = RBM_WRITE_BARRIER_SRC_BYREF;
651 if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll)
653 // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size
654 // we should unroll the loop to improve CQ.
655 // For reference see the code in lsraxarch.cpp.
657 buildInternalIntRegisterDefForNode(blkNode);
659 #ifdef _TARGET_ARM64_
660 if (size >= 2 * REGSIZE_BYTES)
662 // We will use ldp/stp to reduce code size and improve performance
663 // so we need to reserve an extra internal register
664 buildInternalIntRegisterDefForNode(blkNode);
666 #endif // _TARGET_ARM64_
670 assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper);
671 dstAddrRegMask = RBM_ARG_0;
672 // The srcAddr goes in arg1.
673 if (srcAddrOrFill != nullptr)
675 sourceRegMask = RBM_ARG_1;
677 blkSizeRegMask = RBM_ARG_2;
682 if ((size != 0) && (blkSizeRegMask != RBM_NONE))
684 // Reserve a temp register for the block size argument.
685 buildInternalIntRegisterDefForNode(blkNode, blkSizeRegMask);
688 if (!dstAddr->isContained() && !blkNode->IsReverseOp())
691 BuildUse(dstAddr, dstAddrRegMask);
693 if ((srcAddrOrFill != nullptr) && !srcAddrOrFill->isContained())
696 BuildUse(srcAddrOrFill, sourceRegMask);
698 if (!dstAddr->isContained() && blkNode->IsReverseOp())
701 BuildUse(dstAddr, dstAddrRegMask);
706 assert(blkNode->OperIs(GT_STORE_DYN_BLK));
707 // The block size argument is a third argument to GT_STORE_DYN_BLK
709 GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
710 BuildUse(blockSize, blkSizeRegMask);
713 buildInternalRegisterUses();
714 regMaskTP killMask = getKillSetForBlockStore(blkNode);
715 BuildDefsWithKills(blkNode, 0, RBM_NONE, killMask);
719 //------------------------------------------------------------------------
720 // BuildCast: Set the NodeInfo for a GT_CAST.
723 // cast - The GT_CAST node
726 // The number of sources consumed by this node.
728 int LinearScan::BuildCast(GenTreeCast* cast)
730 GenTree* src = cast->gtGetOp1();
732 const var_types srcType = genActualType(src->TypeGet());
733 const var_types castType = cast->gtCastType;
736 assert(!varTypeIsLong(srcType) || (src->OperIs(GT_LONG) && src->isContained()));
738 // Floating point to integer casts requires a temporary register.
739 if (varTypeIsFloating(srcType) && !varTypeIsFloating(castType))
741 buildInternalFloatRegisterDefForNode(cast, RBM_ALLFLOAT);
742 setInternalRegsDelayFree = true;
745 // Overflow checking cast from TYP_LONG to TYP_INT requires a temporary register to
746 // store the min and max immediate values that cannot be encoded in the CMP instruction.
747 if (cast->gtOverflow() && varTypeIsLong(srcType) && !cast->IsUnsigned() && (castType == TYP_INT))
749 buildInternalIntRegisterDefForNode(cast);
753 int srcCount = BuildOperandUses(src);
754 buildInternalRegisterUses();
759 #endif // _TARGET_ARMARCH_