1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8 XX ARM/ARM64 Code Generator Common Code XX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
18 #ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
20 #ifdef _TARGET_ARMARCH_ // This file is ONLY used for ARM and ARM64 architectures
27 //------------------------------------------------------------------------
28 // genCodeForTreeNode Generate code for a single node in the tree.
31 // All operands have been evaluated.
33 void CodeGen::genCodeForTreeNode(GenTree* treeNode)
35 regNumber targetReg = treeNode->gtRegNum;
36 var_types targetType = treeNode->TypeGet();
37 emitter* emit = getEmitter();
40 // Validate that all the operands for the current node are consumed in order.
41 // This is important because LSRA ensures that any necessary copies will be
43 lastConsumedNode = nullptr;
44 if (compiler->verbose)
46 unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
47 compiler->gtDispLIRNode(treeNode, "Generating: ");
51 #ifdef _TARGET_ARM64_ // TODO-ARM: is this applicable to ARM32?
52 // Is this a node whose value is already in a register? LSRA denotes this by
53 // setting the GTF_REUSE_REG_VAL flag.
54 if (treeNode->IsReuseRegVal())
56 // For now, this is only used for constant nodes.
57 assert((treeNode->OperGet() == GT_CNS_INT) || (treeNode->OperGet() == GT_CNS_DBL));
58 JITDUMP(" TreeNode is marked ReuseReg\n");
61 #endif // _TARGET_ARM64_
63 // contained nodes are part of their parents for codegen purposes
64 // ex : immediates, most LEAs
65 if (treeNode->isContained())
70 switch (treeNode->gtOper)
73 getEmitter()->emitDisableGC();
77 // We should be seeing this only if profiler hook is needed
78 noway_assert(compiler->compIsProfilerHookNeeded());
80 #ifdef PROFILING_SUPPORTED
81 // Right now this node is used only for tail calls. In future if
82 // we intend to use it for Enter or Leave hooks, add a data member
83 // to this node indicating the kind of profiler hook. For example,
84 // helper number can be used.
85 genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
86 #endif // PROFILING_SUPPORTED
95 genSetRegToConst(targetReg, targetType, treeNode);
96 genProduceReg(treeNode);
101 genCodeForNegNot(treeNode);
108 genCodeForDivMod(treeNode->AsOp());
114 assert(varTypeIsIntegralOrI(treeNode));
118 #if !defined(_TARGET_64BIT_)
123 #endif // !defined(_TARGET_64BIT_)
128 genConsumeOperands(treeNode->AsOp());
129 genCodeForBinary(treeNode);
135 // case GT_ROL: // No ROL instruction on ARM; it has been lowered to ROR.
137 genCodeForShift(treeNode);
140 #if !defined(_TARGET_64BIT_)
144 genCodeForShiftLong(treeNode);
147 #endif // !defined(_TARGET_64BIT_)
150 genCodeForCast(treeNode->AsOp());
155 GenTree* op1 = treeNode->gtOp.gtOp1;
156 if (varTypeIsFloating(treeNode) != varTypeIsFloating(op1))
158 #ifdef _TARGET_ARM64_
159 inst_RV_RV(INS_fmov, targetReg, genConsumeReg(op1), targetType);
160 #else // !_TARGET_ARM64_
161 if (varTypeIsFloating(treeNode))
163 // GT_BITCAST on ARM is only used to cast floating-point arguments to integer
164 // registers. Nobody generates GT_BITCAST from int to float currently.
165 NYI_ARM("GT_BITCAST from 'int' to 'float'");
169 assert(varTypeIsFloating(op1));
171 if (op1->TypeGet() == TYP_FLOAT)
173 inst_RV_RV(INS_vmov_f2i, targetReg, genConsumeReg(op1), targetType);
177 assert(op1->TypeGet() == TYP_DOUBLE);
178 regNumber otherReg = treeNode->AsMultiRegOp()->gtOtherReg;
179 assert(otherReg != REG_NA);
180 inst_RV_RV_RV(INS_vmov_d2i, targetReg, otherReg, genConsumeReg(op1), EA_8BYTE);
183 #endif // !_TARGET_ARM64_
187 inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
192 case GT_LCL_FLD_ADDR:
193 case GT_LCL_VAR_ADDR:
194 genCodeForLclAddr(treeNode);
198 genCodeForLclFld(treeNode->AsLclFld());
202 genCodeForLclVar(treeNode->AsLclVar());
205 case GT_STORE_LCL_FLD:
206 genCodeForStoreLclFld(treeNode->AsLclFld());
209 case GT_STORE_LCL_VAR:
210 genCodeForStoreLclVar(treeNode->AsLclVar());
219 // If we are here, it is the case where there is an LEA that cannot be folded into a parent instruction.
220 genLeaInstruction(treeNode->AsAddrMode());
224 genCodeForIndexAddr(treeNode->AsIndexAddr());
228 genCodeForIndir(treeNode->AsIndir());
233 genCodeForMulLong(treeNode->AsMultiRegOp());
235 #endif // _TARGET_ARM_
237 #ifdef _TARGET_ARM64_
240 genCodeForMulHi(treeNode->AsOp());
244 genCodeForSwap(treeNode->AsOp());
246 #endif // _TARGET_ARM64_
249 genJmpMethod(treeNode);
253 genCkfinite(treeNode);
257 genIntrinsic(treeNode);
262 genSIMDIntrinsic(treeNode->AsSIMD());
264 #endif // FEATURE_SIMD
266 #ifdef FEATURE_HW_INTRINSICS
268 genHWIntrinsic(treeNode->AsHWIntrinsic());
270 #endif // FEATURE_HW_INTRINSICS
279 #ifdef _TARGET_ARM64_
282 #endif // _TARGET_ARM64_
283 genCodeForCompare(treeNode->AsOp());
287 genCodeForJumpTrue(treeNode);
290 #ifdef _TARGET_ARM64_
292 genCodeForJumpCompare(treeNode->AsOp());
294 #endif // _TARGET_ARM64_
297 genCodeForJcc(treeNode->AsCC());
301 genCodeForSetcc(treeNode->AsCC());
305 genCodeForReturnTrap(treeNode->AsOp());
309 genCodeForStoreInd(treeNode->AsStoreInd());
313 // This is handled at the time we call genConsumeReg() on the GT_COPY
318 // Should always be marked contained.
319 assert(!"LIST, FIELD_LIST nodes should always be marked contained.");
323 genPutArgStk(treeNode->AsPutArgStk());
327 genPutArgReg(treeNode->AsOp());
331 case GT_PUTARG_SPLIT:
332 genPutArgSplit(treeNode->AsPutArgSplit());
334 #endif // _TARGET_ARM_
337 genCallInstruction(treeNode->AsCall());
340 case GT_MEMORYBARRIER:
341 instGen_MemoryBarrier();
344 #ifdef _TARGET_ARM64_
348 genLockedInstructions(treeNode->AsOp());
352 genCodeForCmpXchg(treeNode->AsCmpXchg());
354 #endif // _TARGET_ARM64_
357 // do nothing - reload is just a marker.
358 // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
359 // into the register specified in this node.
369 case GT_ARR_BOUNDS_CHECK:
372 #endif // FEATURE_SIMD
373 genRangeCheck(treeNode);
377 genCodeForPhysReg(treeNode->AsPhysReg());
381 genCodeForNullCheck(treeNode->AsOp());
386 noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
388 /* Catch arguments get passed in a register. genCodeForBBlist()
389 would have marked it as holding a GC object, but not used. */
391 noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
392 genConsumeReg(treeNode);
395 case GT_PINVOKE_PROLOG:
396 noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
398 // the runtime side requires the codegen here to be consistent
399 emit->emitDisableRandomNops();
403 genPendingCallLabel = genCreateTempLabel();
404 treeNode->gtLabel.gtLabBB = genPendingCallLabel;
405 emit->emitIns_R_L(INS_adr, EA_PTRSIZE, genPendingCallLabel, targetReg);
409 case GT_STORE_DYN_BLK:
411 genCodeForStoreBlk(treeNode->AsBlk());
415 genJumpTable(treeNode);
418 case GT_SWITCH_TABLE:
419 genTableBasedSwitch(treeNode);
423 genCodeForArrIndex(treeNode->AsArrIndex());
427 genCodeForArrOffset(treeNode->AsArrOffs());
432 case GT_CLS_VAR_ADDR:
433 emit->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0);
434 genProduceReg(treeNode);
438 assert(treeNode->isUsedFromReg());
439 genConsumeRegs(treeNode);
442 #endif // _TARGET_ARM_
445 // Do nothing; these nodes are simply markers for debug info.
452 _snprintf_s(message, _countof(message), _TRUNCATE, "NYI: Unimplemented node type %s",
453 GenTree::OpName(treeNode->OperGet()));
456 NYI("unimplemented node");
463 //------------------------------------------------------------------------
464 // genSetRegToIcon: Generate code that will set the given register to the integer constant.
466 void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
468 // Reg cannot be a FP reg
469 assert(!genIsValidFloatReg(reg));
471 // The only TYP_REF constant that can come this path is a managed 'null' since it is not
472 // relocatable. Other ref type constants (e.g. string objects) go through a different
474 noway_assert(type != TYP_REF || val == 0);
476 instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
479 //---------------------------------------------------------------------
480 // genIntrinsic - generate code for a given intrinsic
483 // treeNode - the GT_INTRINSIC node
488 void CodeGen::genIntrinsic(GenTree* treeNode)
490 assert(treeNode->OperIs(GT_INTRINSIC));
492 // Both operand and its result must be of the same floating point type.
493 GenTree* srcNode = treeNode->gtOp.gtOp1;
494 assert(varTypeIsFloating(srcNode));
495 assert(srcNode->TypeGet() == treeNode->TypeGet());
497 // Right now only Abs/Ceiling/Floor/Round/Sqrt are treated as math intrinsics.
499 switch (treeNode->gtIntrinsic.gtIntrinsicId)
501 case CORINFO_INTRINSIC_Abs:
502 genConsumeOperands(treeNode->AsOp());
503 getEmitter()->emitInsBinary(INS_ABS, emitActualTypeSize(treeNode), treeNode, srcNode);
506 #ifdef _TARGET_ARM64_
507 case CORINFO_INTRINSIC_Ceiling:
508 genConsumeOperands(treeNode->AsOp());
509 getEmitter()->emitInsBinary(INS_frintp, emitActualTypeSize(treeNode), treeNode, srcNode);
512 case CORINFO_INTRINSIC_Floor:
513 genConsumeOperands(treeNode->AsOp());
514 getEmitter()->emitInsBinary(INS_frintm, emitActualTypeSize(treeNode), treeNode, srcNode);
517 case CORINFO_INTRINSIC_Round:
518 genConsumeOperands(treeNode->AsOp());
519 getEmitter()->emitInsBinary(INS_frintn, emitActualTypeSize(treeNode), treeNode, srcNode);
521 #endif // _TARGET_ARM64_
523 case CORINFO_INTRINSIC_Sqrt:
524 genConsumeOperands(treeNode->AsOp());
525 getEmitter()->emitInsBinary(INS_SQRT, emitActualTypeSize(treeNode), treeNode, srcNode);
529 assert(!"genIntrinsic: Unsupported intrinsic");
533 genProduceReg(treeNode);
536 //---------------------------------------------------------------------
537 // genPutArgStk - generate code for a GT_PUTARG_STK node
540 // treeNode - the GT_PUTARG_STK node
545 void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
547 assert(treeNode->OperIs(GT_PUTARG_STK));
548 GenTree* source = treeNode->gtOp1;
549 var_types targetType = genActualType(source->TypeGet());
550 emitter* emit = getEmitter();
552 // This is the varNum for our store operations,
553 // typically this is the varNum for the Outgoing arg space
554 // When we are generating a tail call it will be the varNum for arg0
555 unsigned varNumOut = (unsigned)-1;
556 unsigned argOffsetMax = (unsigned)-1; // Records the maximum size of this area for assert checks
558 // Get argument offset to use with 'varNumOut'
559 // Here we cross check that argument offset hasn't changed from lowering to codegen since
560 // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
561 unsigned argOffsetOut = treeNode->gtSlotNum * TARGET_POINTER_SIZE;
564 fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(treeNode->gtCall, treeNode);
565 assert(curArgTabEntry);
566 assert(argOffsetOut == (curArgTabEntry->slotNum * TARGET_POINTER_SIZE));
569 // Whether to setup stk arg in incoming or out-going arg area?
570 // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
571 // All other calls - stk arg is setup in out-going arg area.
572 if (treeNode->putInIncomingArgArea())
574 varNumOut = getFirstArgWithStackSlot();
575 argOffsetMax = compiler->compArgSize;
576 #if FEATURE_FASTTAILCALL
577 // This must be a fast tail call.
578 assert(treeNode->gtCall->IsFastTailCall());
580 // Since it is a fast tail call, the existence of first incoming arg is guaranteed
581 // because fast tail call requires that in-coming arg area of caller is >= out-going
582 // arg area required for tail call.
583 LclVarDsc* varDsc = &(compiler->lvaTable[varNumOut]);
584 assert(varDsc != nullptr);
585 #endif // FEATURE_FASTTAILCALL
589 varNumOut = compiler->lvaOutgoingArgSpaceVar;
590 argOffsetMax = compiler->lvaOutgoingArgSpaceSize;
593 bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_FIELD_LIST);
595 if (varTypeIsSIMD(targetType))
597 assert(!source->isContained());
599 regNumber srcReg = genConsumeReg(source);
601 emitAttr storeAttr = emitTypeSize(targetType);
603 assert((srcReg != REG_NA) && (genIsValidFloatReg(srcReg)));
604 emit->emitIns_S_R(INS_str, storeAttr, srcReg, varNumOut, argOffsetOut);
606 argOffsetOut += EA_SIZE_IN_BYTES(storeAttr);
607 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
611 if (!isStruct) // a normal non-Struct argument
613 instruction storeIns = ins_Store(targetType);
614 emitAttr storeAttr = emitTypeSize(targetType);
616 // If it is contained then source must be the integer constant zero
617 if (source->isContained())
619 #ifdef _TARGET_ARM64_
620 assert(source->OperGet() == GT_CNS_INT);
621 assert(source->AsIntConCommon()->IconValue() == 0);
623 emit->emitIns_S_R(storeIns, storeAttr, REG_ZR, varNumOut, argOffsetOut);
624 #else // !_TARGET_ARM64_
625 // There is no zero register on ARM32
627 #endif // !_TARGET_ARM64
631 genConsumeReg(source);
632 emit->emitIns_S_R(storeIns, storeAttr, source->gtRegNum, varNumOut, argOffsetOut);
634 if (targetType == TYP_LONG)
636 // This case currently only occurs for double types that are passed as TYP_LONG;
637 // actual long types would have been decomposed by now.
638 assert(source->IsCopyOrReload());
639 regNumber otherReg = (regNumber)source->AsCopyOrReload()->GetRegNumByIdx(1);
640 assert(otherReg != REG_NA);
641 argOffsetOut += EA_4BYTE;
642 emit->emitIns_S_R(storeIns, storeAttr, otherReg, varNumOut, argOffsetOut);
644 #endif // _TARGET_ARM_
646 argOffsetOut += EA_SIZE_IN_BYTES(storeAttr);
647 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
649 else // We have some kind of a struct argument
651 assert(source->isContained()); // We expect that this node was marked as contained in Lower
653 if (source->OperGet() == GT_FIELD_LIST)
655 // Deal with the multi register passed struct args.
656 GenTreeFieldList* fieldListPtr = source->AsFieldList();
658 // Evaluate each of the GT_FIELD_LIST items into their register
659 // and store their register into the outgoing argument area
660 for (; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest())
662 GenTree* nextArgNode = fieldListPtr->gtOp.gtOp1;
663 genConsumeReg(nextArgNode);
665 regNumber reg = nextArgNode->gtRegNum;
666 var_types type = nextArgNode->TypeGet();
667 emitAttr attr = emitTypeSize(type);
669 // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
671 emit->emitIns_S_R(ins_Store(type), attr, reg, varNumOut, argOffsetOut);
672 argOffsetOut += EA_SIZE_IN_BYTES(attr);
673 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
676 else // We must have a GT_OBJ or a GT_LCL_VAR
678 noway_assert((source->OperGet() == GT_LCL_VAR) || (source->OperGet() == GT_OBJ));
680 var_types targetType = source->TypeGet();
681 noway_assert(varTypeIsStruct(targetType));
683 // We will copy this struct to the stack, possibly using a ldp/ldr instruction
685 // Setup loReg (and hiReg) from the internal registers that we reserved in lower.
687 regNumber loReg = treeNode->ExtractTempReg();
688 #ifdef _TARGET_ARM64_
689 regNumber hiReg = treeNode->GetSingleTempReg();
690 #endif // _TARGET_ARM64_
691 regNumber addrReg = REG_NA;
693 GenTreeLclVarCommon* varNode = nullptr;
694 GenTree* addrNode = nullptr;
696 if (source->OperGet() == GT_LCL_VAR)
698 varNode = source->AsLclVarCommon();
700 else // we must have a GT_OBJ
702 assert(source->OperGet() == GT_OBJ);
704 addrNode = source->gtOp.gtOp1;
706 // addrNode can either be a GT_LCL_VAR_ADDR or an address expression
708 if (addrNode->OperGet() == GT_LCL_VAR_ADDR)
710 // We have a GT_OBJ(GT_LCL_VAR_ADDR)
712 // We will treat this case the same as above
713 // (i.e if we just had this GT_LCL_VAR directly as the source)
714 // so update 'source' to point this GT_LCL_VAR_ADDR node
715 // and continue to the codegen for the LCL_VAR node below
717 varNode = addrNode->AsLclVarCommon();
722 // Either varNode or addrNOde must have been setup above,
723 // the xor ensures that only one of the two is setup, not both
724 assert((varNode != nullptr) ^ (addrNode != nullptr));
726 BYTE gcPtrArray[MAX_ARG_REG_COUNT] = {}; // TYPE_GC_NONE = 0
727 BYTE* gcPtrs = gcPtrArray;
729 unsigned gcPtrCount; // The count of GC pointers in the struct
733 // This is the varNum for our load operations,
734 // only used when we have a multireg struct with a LclVar source
735 unsigned varNumInp = BAD_VAR_NUM;
738 // On ARM32, size of reference map can be larger than MAX_ARG_REG_COUNT
739 gcPtrs = treeNode->gtGcPtrs;
740 gcPtrCount = treeNode->gtNumberReferenceSlots;
742 // Setup the structSize, isHFa, and gcPtrCount
743 if (varNode != nullptr)
745 varNumInp = varNode->gtLclNum;
746 assert(varNumInp < compiler->lvaCount);
747 LclVarDsc* varDsc = &compiler->lvaTable[varNumInp];
749 // This struct also must live in the stack frame
750 // And it can't live in a register (SIMD)
751 assert(varDsc->lvType == TYP_STRUCT);
752 assert(varDsc->lvOnFrame && !varDsc->lvRegister);
754 structSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine
755 // as that is how much stack is allocated for this LclVar
756 isHfa = varDsc->lvIsHfa();
757 #ifdef _TARGET_ARM64_
758 gcPtrCount = varDsc->lvStructGcCount;
759 for (unsigned i = 0; i < gcPtrCount; ++i)
760 gcPtrs[i] = varDsc->lvGcLayout[i];
761 #endif // _TARGET_ARM_
763 else // addrNode is used
765 assert(addrNode != nullptr);
767 // Generate code to load the address that we need into a register
768 genConsumeAddress(addrNode);
769 addrReg = addrNode->gtRegNum;
771 #ifdef _TARGET_ARM64_
772 // If addrReg equal to loReg, swap(loReg, hiReg)
773 // This reduces code complexity by only supporting one addrReg overwrite case
774 if (loReg == addrReg)
779 #endif // _TARGET_ARM64_
781 CORINFO_CLASS_HANDLE objClass = source->gtObj.gtClass;
783 structSize = compiler->info.compCompHnd->getClassSize(objClass);
784 isHfa = compiler->IsHfa(objClass);
785 #ifdef _TARGET_ARM64_
786 gcPtrCount = compiler->info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
790 // If we have an HFA we can't have any GC pointers,
791 // if not then the max size for the the struct is 16 bytes
794 noway_assert(gcPtrCount == 0);
796 #ifdef _TARGET_ARM64_
799 noway_assert(structSize <= 2 * TARGET_POINTER_SIZE);
802 noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES);
803 #endif // _TARGET_ARM64_
805 int remainingSize = structSize;
806 unsigned structOffset = 0;
807 unsigned nextIndex = 0;
809 #ifdef _TARGET_ARM64_
810 // For a >= 16-byte structSize we will generate a ldp and stp instruction each loop
812 // stp x2, x3, [sp, #16]
814 while (remainingSize >= 2 * TARGET_POINTER_SIZE)
816 var_types type0 = compiler->getJitGCType(gcPtrs[nextIndex + 0]);
817 var_types type1 = compiler->getJitGCType(gcPtrs[nextIndex + 1]);
819 if (varNode != nullptr)
821 // Load from our varNumImp source
822 emit->emitIns_R_R_S_S(INS_ldp, emitTypeSize(type0), emitTypeSize(type1), loReg, hiReg, varNumInp,
827 // check for case of destroying the addrRegister while we still need it
828 assert(loReg != addrReg);
829 noway_assert((remainingSize == 2 * TARGET_POINTER_SIZE) || (hiReg != addrReg));
831 // Load from our address expression source
832 emit->emitIns_R_R_R_I(INS_ldp, emitTypeSize(type0), loReg, hiReg, addrReg, structOffset,
833 INS_OPTS_NONE, emitTypeSize(type0));
836 // Emit stp instruction to store the two registers into the outgoing argument area
837 emit->emitIns_S_S_R_R(INS_stp, emitTypeSize(type0), emitTypeSize(type1), loReg, hiReg, varNumOut,
839 argOffsetOut += (2 * TARGET_POINTER_SIZE); // We stored 16-bytes of the struct
840 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
842 remainingSize -= (2 * TARGET_POINTER_SIZE); // We loaded 16-bytes of the struct
843 structOffset += (2 * TARGET_POINTER_SIZE);
846 #else // _TARGET_ARM_
847 // For a >= 4 byte structSize we will generate a ldr and str instruction each loop
850 while (remainingSize >= TARGET_POINTER_SIZE)
852 var_types type = compiler->getJitGCType(gcPtrs[nextIndex]);
854 if (varNode != nullptr)
856 // Load from our varNumImp source
857 emit->emitIns_R_S(INS_ldr, emitTypeSize(type), loReg, varNumInp, structOffset);
861 // check for case of destroying the addrRegister while we still need it
862 assert(loReg != addrReg || remainingSize == TARGET_POINTER_SIZE);
864 // Load from our address expression source
865 emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), loReg, addrReg, structOffset);
868 // Emit str instruction to store the register into the outgoing argument area
869 emit->emitIns_S_R(INS_str, emitTypeSize(type), loReg, varNumOut, argOffsetOut);
870 argOffsetOut += TARGET_POINTER_SIZE; // We stored 4-bytes of the struct
871 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
873 remainingSize -= TARGET_POINTER_SIZE; // We loaded 4-bytes of the struct
874 structOffset += TARGET_POINTER_SIZE;
877 #endif // _TARGET_ARM_
879 // For a 12-byte structSize we will we will generate two load instructions
885 while (remainingSize > 0)
887 if (remainingSize >= TARGET_POINTER_SIZE)
889 var_types nextType = compiler->getJitGCType(gcPtrs[nextIndex]);
890 emitAttr nextAttr = emitTypeSize(nextType);
891 remainingSize -= TARGET_POINTER_SIZE;
893 if (varNode != nullptr)
895 // Load from our varNumImp source
896 emit->emitIns_R_S(ins_Load(nextType), nextAttr, loReg, varNumInp, structOffset);
900 assert(loReg != addrReg);
902 // Load from our address expression source
903 emit->emitIns_R_R_I(ins_Load(nextType), nextAttr, loReg, addrReg, structOffset);
905 // Emit a store instruction to store the register into the outgoing argument area
906 emit->emitIns_S_R(ins_Store(nextType), nextAttr, loReg, varNumOut, argOffsetOut);
907 argOffsetOut += EA_SIZE_IN_BYTES(nextAttr);
908 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
910 structOffset += TARGET_POINTER_SIZE;
913 else // (remainingSize < TARGET_POINTER_SIZE)
915 int loadSize = remainingSize;
918 // We should never have to do a non-pointer sized load when we have a LclVar source
919 assert(varNode == nullptr);
921 // the left over size is smaller than a pointer and thus can never be a GC type
922 assert(varTypeIsGC(compiler->getJitGCType(gcPtrs[nextIndex])) == false);
924 var_types loadType = TYP_UINT;
927 loadType = TYP_UBYTE;
929 else if (loadSize == 2)
931 loadType = TYP_USHORT;
935 // Need to handle additional loadSize cases here
936 noway_assert(loadSize == 4);
939 instruction loadIns = ins_Load(loadType);
940 emitAttr loadAttr = emitAttr(loadSize);
942 assert(loReg != addrReg);
944 emit->emitIns_R_R_I(loadIns, loadAttr, loReg, addrReg, structOffset);
946 // Emit a store instruction to store the register into the outgoing argument area
947 emit->emitIns_S_R(ins_Store(loadType), loadAttr, loReg, varNumOut, argOffsetOut);
948 argOffsetOut += EA_SIZE_IN_BYTES(loadAttr);
949 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
956 //---------------------------------------------------------------------
957 // genPutArgReg - generate code for a GT_PUTARG_REG node
960 // tree - the GT_PUTARG_REG node
965 void CodeGen::genPutArgReg(GenTreeOp* tree)
967 assert(tree->OperIs(GT_PUTARG_REG));
969 var_types targetType = tree->TypeGet();
970 regNumber targetReg = tree->gtRegNum;
972 assert(targetType != TYP_STRUCT);
974 GenTree* op1 = tree->gtOp1;
977 // If child node is not already in the register we need, move it
978 if (targetReg != op1->gtRegNum)
980 inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
987 //---------------------------------------------------------------------
988 // genPutArgSplit - generate code for a GT_PUTARG_SPLIT node
991 // tree - the GT_PUTARG_SPLIT node
996 void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode)
998 assert(treeNode->OperIs(GT_PUTARG_SPLIT));
1000 GenTree* source = treeNode->gtOp1;
1001 emitter* emit = getEmitter();
1002 unsigned varNumOut = compiler->lvaOutgoingArgSpaceVar;
1003 unsigned argOffsetMax = compiler->lvaOutgoingArgSpaceSize;
1004 unsigned argOffsetOut = treeNode->gtSlotNum * TARGET_POINTER_SIZE;
1006 if (source->OperGet() == GT_FIELD_LIST)
1008 // Evaluate each of the GT_FIELD_LIST items into their register
1009 // and store their register into the outgoing argument area
1010 unsigned regIndex = 0;
1011 for (GenTreeFieldList* fieldListPtr = source->AsFieldList(); fieldListPtr != nullptr;
1012 fieldListPtr = fieldListPtr->Rest())
1014 GenTree* nextArgNode = fieldListPtr->gtGetOp1();
1015 regNumber fieldReg = nextArgNode->gtRegNum;
1016 genConsumeReg(nextArgNode);
1018 if (regIndex >= treeNode->gtNumRegs)
1020 var_types type = nextArgNode->TypeGet();
1021 emitAttr attr = emitTypeSize(type);
1023 // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
1025 emit->emitIns_S_R(ins_Store(type), attr, fieldReg, varNumOut, argOffsetOut);
1026 argOffsetOut += EA_SIZE_IN_BYTES(attr);
1027 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
1031 var_types type = treeNode->GetRegType(regIndex);
1032 regNumber argReg = treeNode->GetRegNumByIdx(regIndex);
1033 if (type == TYP_LONG)
1035 // We should only see long fields for DOUBLEs passed in 2 integer registers, via bitcast.
1036 // All other LONGs should have been decomposed.
1037 // Handle the first INT, and then handle the 2nd below.
1038 assert(nextArgNode->OperIs(GT_BITCAST));
1040 if (argReg != fieldReg)
1042 inst_RV_RV(ins_Copy(type), argReg, fieldReg, type);
1044 // Now set up the next register for the 2nd INT
1045 argReg = REG_NEXT(argReg);
1047 assert(argReg == treeNode->GetRegNumByIdx(regIndex));
1048 fieldReg = nextArgNode->AsMultiRegOp()->GetRegNumByIdx(1);
1051 // If child node is not already in the register we need, move it
1052 if (argReg != fieldReg)
1054 inst_RV_RV(ins_Copy(type), argReg, fieldReg, type);
1062 var_types targetType = source->TypeGet();
1063 assert(source->OperGet() == GT_OBJ);
1064 assert(varTypeIsStruct(targetType));
1066 regNumber baseReg = treeNode->ExtractTempReg();
1067 regNumber addrReg = REG_NA;
1069 GenTreeLclVarCommon* varNode = nullptr;
1070 GenTree* addrNode = nullptr;
1072 addrNode = source->gtOp.gtOp1;
1074 // addrNode can either be a GT_LCL_VAR_ADDR or an address expression
1076 if (addrNode->OperGet() == GT_LCL_VAR_ADDR)
1078 // We have a GT_OBJ(GT_LCL_VAR_ADDR)
1080 // We will treat this case the same as above
1081 // (i.e if we just had this GT_LCL_VAR directly as the source)
1082 // so update 'source' to point this GT_LCL_VAR_ADDR node
1083 // and continue to the codegen for the LCL_VAR node below
1085 varNode = addrNode->AsLclVarCommon();
1089 // Either varNode or addrNOde must have been setup above,
1090 // the xor ensures that only one of the two is setup, not both
1091 assert((varNode != nullptr) ^ (addrNode != nullptr));
1093 // Setup the structSize, isHFa, and gcPtrCount
1094 BYTE* gcPtrs = treeNode->gtGcPtrs;
1095 unsigned gcPtrCount = treeNode->gtNumberReferenceSlots; // The count of GC pointers in the struct
1096 int structSize = treeNode->getArgSize();
1098 // This is the varNum for our load operations,
1099 // only used when we have a struct with a LclVar source
1100 unsigned srcVarNum = BAD_VAR_NUM;
1102 if (varNode != nullptr)
1104 srcVarNum = varNode->gtLclNum;
1105 assert(srcVarNum < compiler->lvaCount);
1107 // handle promote situation
1108 LclVarDsc* varDsc = compiler->lvaTable + srcVarNum;
1110 // This struct also must live in the stack frame
1111 // And it can't live in a register (SIMD)
1112 assert(varDsc->lvType == TYP_STRUCT);
1113 assert(varDsc->lvOnFrame && !varDsc->lvRegister);
1115 // We don't split HFA struct
1116 assert(!varDsc->lvIsHfa());
1118 else // addrNode is used
1120 assert(addrNode != nullptr);
1122 // Generate code to load the address that we need into a register
1123 genConsumeAddress(addrNode);
1124 addrReg = addrNode->gtRegNum;
1126 // If addrReg equal to baseReg, we use the last target register as alternative baseReg.
1127 // Because the candidate mask for the internal baseReg does not include any of the target register,
1128 // we can ensure that baseReg, addrReg, and the last target register are not all same.
1129 assert(baseReg != addrReg);
1131 // We don't split HFA struct
1132 assert(!compiler->IsHfa(source->gtObj.gtClass));
1135 // Put on stack first
1136 unsigned nextIndex = treeNode->gtNumRegs;
1137 unsigned structOffset = nextIndex * TARGET_POINTER_SIZE;
1138 int remainingSize = structSize - structOffset;
1140 // remainingSize is always multiple of TARGET_POINTER_SIZE
1141 assert(remainingSize % TARGET_POINTER_SIZE == 0);
1142 while (remainingSize > 0)
1144 var_types type = compiler->getJitGCType(gcPtrs[nextIndex]);
1146 if (varNode != nullptr)
1148 // Load from our varNumImp source
1149 emit->emitIns_R_S(INS_ldr, emitTypeSize(type), baseReg, srcVarNum, structOffset);
1153 // check for case of destroying the addrRegister while we still need it
1154 assert(baseReg != addrReg);
1156 // Load from our address expression source
1157 emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), baseReg, addrReg, structOffset);
1160 // Emit str instruction to store the register into the outgoing argument area
1161 emit->emitIns_S_R(INS_str, emitTypeSize(type), baseReg, varNumOut, argOffsetOut);
1162 argOffsetOut += TARGET_POINTER_SIZE; // We stored 4-bytes of the struct
1163 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
1164 remainingSize -= TARGET_POINTER_SIZE; // We loaded 4-bytes of the struct
1165 structOffset += TARGET_POINTER_SIZE;
1169 // We set up the registers in order, so that we assign the last target register `baseReg` is no longer in use,
1170 // in case we had to reuse the last target register for it.
1172 for (unsigned idx = 0; idx < treeNode->gtNumRegs; idx++)
1174 regNumber targetReg = treeNode->GetRegNumByIdx(idx);
1175 var_types type = treeNode->GetRegType(idx);
1177 if (varNode != nullptr)
1179 // Load from our varNumImp source
1180 emit->emitIns_R_S(INS_ldr, emitTypeSize(type), targetReg, srcVarNum, structOffset);
1184 // check for case of destroying the addrRegister while we still need it
1185 if (targetReg == addrReg && idx != treeNode->gtNumRegs - 1)
1187 assert(targetReg != baseReg);
1188 emit->emitIns_R_R(INS_mov, emitActualTypeSize(type), baseReg, addrReg);
1192 // Load from our address expression source
1193 emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), targetReg, addrReg, structOffset);
1195 structOffset += TARGET_POINTER_SIZE;
1198 genProduceReg(treeNode);
1200 #endif // _TARGET_ARM_
1202 //----------------------------------------------------------------------------------
1203 // genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local
1206 // treeNode - Gentree of GT_STORE_LCL_VAR
1212 // The child of store is a multi-reg call node.
1213 // genProduceReg() on treeNode is made by caller of this routine.
1215 void CodeGen::genMultiRegCallStoreToLocal(GenTree* treeNode)
1217 assert(treeNode->OperGet() == GT_STORE_LCL_VAR);
1219 #if defined(_TARGET_ARM_)
1220 // Longs are returned in two return registers on Arm32.
1221 // Structs are returned in four registers on ARM32 and HFAs.
1222 assert(varTypeIsLong(treeNode) || varTypeIsStruct(treeNode));
1223 #elif defined(_TARGET_ARM64_)
1224 // Structs of size >=9 and <=16 are returned in two return registers on ARM64 and HFAs.
1225 assert(varTypeIsStruct(treeNode));
1228 // Assumption: current implementation requires that a multi-reg
1229 // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
1231 unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
1232 LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
1233 noway_assert(varDsc->lvIsMultiRegRet);
1235 GenTree* op1 = treeNode->gtGetOp1();
1236 GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
1237 GenTreeCall* call = actualOp1->AsCall();
1238 assert(call->HasMultiRegRetVal());
1240 genConsumeRegs(op1);
1242 ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
1243 unsigned regCount = pRetTypeDesc->GetReturnRegCount();
1245 if (treeNode->gtRegNum != REG_NA)
1247 // Right now the only enregistrable multi-reg return types supported are SIMD types.
1248 assert(varTypeIsSIMD(treeNode));
1249 assert(regCount != 0);
1251 regNumber dst = treeNode->gtRegNum;
1253 // Treat dst register as a homogenous vector with element size equal to the src size
1254 // Insert pieces in reverse order
1255 for (int i = regCount - 1; i >= 0; --i)
1257 var_types type = pRetTypeDesc->GetReturnRegType(i);
1258 regNumber reg = call->GetRegNumByIdx(i);
1259 if (op1->IsCopyOrReload())
1261 // GT_COPY/GT_RELOAD will have valid reg for those positions
1262 // that need to be copied or reloaded.
1263 regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
1264 if (reloadReg != REG_NA)
1270 assert(reg != REG_NA);
1271 if (varTypeIsFloating(type))
1273 // If the register piece was passed in a floating point register
1274 // Use a vector mov element instruction
1275 // src is not a vector, so it is in the first element reg[0]
1276 // mov dst[i], reg[0]
1277 // This effectively moves from `reg[0]` to `dst[i]`, leaving other dst bits unchanged till further
1279 // For the case where reg == dst, if we iterate so that we write dst[0] last, we eliminate the need for
1281 getEmitter()->emitIns_R_R_I_I(INS_mov, emitTypeSize(type), dst, reg, i, 0);
1285 // If the register piece was passed in an integer register
1286 // Use a vector mov from general purpose register instruction
1288 // This effectively moves from `reg` to `dst[i]`
1289 getEmitter()->emitIns_R_R_I(INS_mov, emitTypeSize(type), dst, reg, i);
1293 genProduceReg(treeNode);
1299 for (unsigned i = 0; i < regCount; ++i)
1301 var_types type = pRetTypeDesc->GetReturnRegType(i);
1302 regNumber reg = call->GetRegNumByIdx(i);
1303 if (op1->IsCopyOrReload())
1305 // GT_COPY/GT_RELOAD will have valid reg for those positions
1306 // that need to be copied or reloaded.
1307 regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
1308 if (reloadReg != REG_NA)
1314 assert(reg != REG_NA);
1315 getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
1316 offset += genTypeSize(type);
1319 varDsc->lvRegNum = REG_STK;
1323 //------------------------------------------------------------------------
1324 // genRangeCheck: generate code for GT_ARR_BOUNDS_CHECK node.
1326 void CodeGen::genRangeCheck(GenTree* oper)
1329 noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK);
1330 #else // !FEATURE_SIMD
1331 noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
1332 #endif // !FEATURE_SIMD
1334 GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
1336 GenTree* arrLen = bndsChk->gtArrLen;
1337 GenTree* arrIndex = bndsChk->gtIndex;
1338 GenTree* arrRef = NULL;
1343 emitJumpKind jmpKind;
1345 genConsumeRegs(arrIndex);
1346 genConsumeRegs(arrLen);
1348 if (arrIndex->isContainedIntOrIImmed())
1350 // To encode using a cmp immediate, we place the
1351 // constant operand in the second position
1354 jmpKind = genJumpKindForOper(GT_LE, CK_UNSIGNED);
1360 jmpKind = genJumpKindForOper(GT_GE, CK_UNSIGNED);
1363 var_types bndsChkType = genActualType(src2->TypeGet());
1365 // Bounds checks can only be 32 or 64 bit sized comparisons.
1366 assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG);
1368 // The type of the bounds check should always wide enough to compare against the index.
1369 assert(emitTypeSize(bndsChkType) >= emitActualTypeSize(src1->TypeGet()));
1372 getEmitter()->emitInsBinary(INS_cmp, emitActualTypeSize(bndsChkType), src1, src2);
1373 genJumpToThrowHlpBlk(jmpKind, bndsChk->gtThrowKind, bndsChk->gtIndRngFailBB);
1376 //---------------------------------------------------------------------
1377 // genCodeForPhysReg - generate code for a GT_PHYSREG node
1380 // tree - the GT_PHYSREG node
1385 void CodeGen::genCodeForPhysReg(GenTreePhysReg* tree)
1387 assert(tree->OperIs(GT_PHYSREG));
1389 var_types targetType = tree->TypeGet();
1390 regNumber targetReg = tree->gtRegNum;
1392 if (targetReg != tree->gtSrcReg)
1394 inst_RV_RV(ins_Copy(targetType), targetReg, tree->gtSrcReg, targetType);
1395 genTransferRegGCState(targetReg, tree->gtSrcReg);
1398 genProduceReg(tree);
1401 //---------------------------------------------------------------------
1402 // genCodeForNullCheck - generate code for a GT_NULLCHECK node
1405 // tree - the GT_NULLCHECK node
1410 void CodeGen::genCodeForNullCheck(GenTreeOp* tree)
1412 assert(tree->OperIs(GT_NULLCHECK));
1413 assert(!tree->gtOp1->isContained());
1414 regNumber addrReg = genConsumeReg(tree->gtOp1);
1416 #ifdef _TARGET_ARM64_
1417 regNumber targetReg = REG_ZR;
1419 regNumber targetReg = tree->GetSingleTempReg();
1422 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, targetReg, addrReg, 0);
1425 //------------------------------------------------------------------------
1426 // genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the
1427 // lower bound for the given dimension.
1430 // elemType - the element type of the array
1431 // rank - the rank of the array
1432 // dimension - the dimension for which the lower bound offset will be returned.
1436 // TODO-Cleanup: move to CodeGenCommon.cpp
1439 unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension)
1441 // Note that the lower bound and length fields of the Array object are always TYP_INT
1442 return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank);
1445 //------------------------------------------------------------------------
1446 // genOffsetOfMDArrayLength: Returns the offset from the Array object to the
1447 // size for the given dimension.
1450 // elemType - the element type of the array
1451 // rank - the rank of the array
1452 // dimension - the dimension for which the lower bound offset will be returned.
1456 // TODO-Cleanup: move to CodeGenCommon.cpp
1459 unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension)
1461 // Note that the lower bound and length fields of the Array object are always TYP_INT
1462 return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension;
1465 //------------------------------------------------------------------------
1466 // genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference,
1467 // producing the effective index by subtracting the lower bound.
1470 // arrIndex - the node for which we're generating code
1475 void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex)
1477 emitter* emit = getEmitter();
1478 GenTree* arrObj = arrIndex->ArrObj();
1479 GenTree* indexNode = arrIndex->IndexExpr();
1480 regNumber arrReg = genConsumeReg(arrObj);
1481 regNumber indexReg = genConsumeReg(indexNode);
1482 regNumber tgtReg = arrIndex->gtRegNum;
1483 noway_assert(tgtReg != REG_NA);
1485 // We will use a temp register to load the lower bound and dimension size values.
1487 regNumber tmpReg = arrIndex->GetSingleTempReg();
1488 assert(tgtReg != tmpReg);
1490 unsigned dim = arrIndex->gtCurrDim;
1491 unsigned rank = arrIndex->gtArrRank;
1492 var_types elemType = arrIndex->gtArrElemType;
1495 offset = genOffsetOfMDArrayLowerBound(elemType, rank, dim);
1496 emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_PTRSIZE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load
1497 emit->emitIns_R_R_R(INS_sub, EA_4BYTE, tgtReg, indexReg, tmpReg);
1499 offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim);
1500 emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_PTRSIZE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load
1501 emit->emitIns_R_R(INS_cmp, EA_4BYTE, tgtReg, tmpReg);
1503 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
1504 genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL);
1506 genProduceReg(arrIndex);
1509 //------------------------------------------------------------------------
1510 // genCodeForArrOffset: Generates code to compute the flattened array offset for
1511 // one dimension of an array reference:
1512 // result = (prevDimOffset * dimSize) + effectiveIndex
1513 // where dimSize is obtained from the arrObj operand
1516 // arrOffset - the node for which we're generating code
1522 // dimSize and effectiveIndex are always non-negative, the former by design,
1523 // and the latter because it has been normalized to be zero-based.
1525 void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
1527 GenTree* offsetNode = arrOffset->gtOffset;
1528 GenTree* indexNode = arrOffset->gtIndex;
1529 regNumber tgtReg = arrOffset->gtRegNum;
1531 noway_assert(tgtReg != REG_NA);
1533 if (!offsetNode->IsIntegralConst(0))
1535 emitter* emit = getEmitter();
1536 regNumber offsetReg = genConsumeReg(offsetNode);
1537 regNumber indexReg = genConsumeReg(indexNode);
1538 regNumber arrReg = genConsumeReg(arrOffset->gtArrObj);
1539 noway_assert(offsetReg != REG_NA);
1540 noway_assert(indexReg != REG_NA);
1541 noway_assert(arrReg != REG_NA);
1543 regNumber tmpReg = arrOffset->GetSingleTempReg();
1545 unsigned dim = arrOffset->gtCurrDim;
1546 unsigned rank = arrOffset->gtArrRank;
1547 var_types elemType = arrOffset->gtArrElemType;
1548 unsigned offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim);
1550 // Load tmpReg with the dimension size and evaluate
1551 // tgtReg = offsetReg*tmpReg + indexReg.
1552 emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_PTRSIZE, tmpReg, arrReg, offset);
1553 emit->emitIns_R_R_R_R(INS_MULADD, EA_PTRSIZE, tgtReg, tmpReg, offsetReg, indexReg);
1557 regNumber indexReg = genConsumeReg(indexNode);
1558 if (indexReg != tgtReg)
1560 inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT);
1563 genProduceReg(arrOffset);
1566 //------------------------------------------------------------------------
1567 // genCodeForShift: Generates the code sequence for a GenTree node that
1568 // represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
1571 // tree - the bit shift node (that specifies the type of bit shift to perform).
1574 // a) All GenTrees are register allocated.
1576 void CodeGen::genCodeForShift(GenTree* tree)
1578 var_types targetType = tree->TypeGet();
1579 genTreeOps oper = tree->OperGet();
1580 instruction ins = genGetInsForOper(oper, targetType);
1581 emitAttr size = emitActualTypeSize(tree);
1583 assert(tree->gtRegNum != REG_NA);
1585 genConsumeOperands(tree->AsOp());
1587 GenTree* operand = tree->gtGetOp1();
1588 GenTree* shiftBy = tree->gtGetOp2();
1589 if (!shiftBy->IsCnsIntOrI())
1591 getEmitter()->emitIns_R_R_R(ins, size, tree->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum);
1595 unsigned immWidth = emitter::getBitWidth(size); // For ARM64, immWidth will be set to 32 or 64
1596 ssize_t shiftByImm = shiftBy->gtIntCon.gtIconVal & (immWidth - 1);
1598 getEmitter()->emitIns_R_R_I(ins, size, tree->gtRegNum, operand->gtRegNum, shiftByImm);
1601 genProduceReg(tree);
1604 //------------------------------------------------------------------------
1605 // genCodeForLclAddr: Generates the code for GT_LCL_FLD_ADDR/GT_LCL_VAR_ADDR.
1610 void CodeGen::genCodeForLclAddr(GenTree* tree)
1612 assert(tree->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR));
1614 var_types targetType = tree->TypeGet();
1615 regNumber targetReg = tree->gtRegNum;
1617 // Address of a local var.
1618 noway_assert(targetType == TYP_BYREF);
1620 inst_RV_TT(INS_lea, targetReg, tree, 0, EA_BYREF);
1621 genProduceReg(tree);
1624 //------------------------------------------------------------------------
1625 // genCodeForLclFld: Produce code for a GT_LCL_FLD node.
1628 // tree - the GT_LCL_FLD node
1630 void CodeGen::genCodeForLclFld(GenTreeLclFld* tree)
1632 assert(tree->OperIs(GT_LCL_FLD));
1634 var_types targetType = tree->TypeGet();
1635 regNumber targetReg = tree->gtRegNum;
1636 emitter* emit = getEmitter();
1638 NYI_IF(targetType == TYP_STRUCT, "GT_LCL_FLD: struct load local field not supported");
1639 assert(targetReg != REG_NA);
1641 emitAttr size = emitTypeSize(targetType);
1642 unsigned offs = tree->gtLclOffs;
1643 unsigned varNum = tree->gtLclNum;
1644 assert(varNum < compiler->lvaCount);
1646 if (varTypeIsFloating(targetType) || varTypeIsSIMD(targetType))
1648 emit->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offs);
1652 #ifdef _TARGET_ARM64_
1653 size = EA_SET_SIZE(size, EA_8BYTE);
1654 #endif // _TARGET_ARM64_
1655 emit->emitIns_R_S(ins_Move_Extend(targetType, false), size, targetReg, varNum, offs);
1658 genProduceReg(tree);
1661 //------------------------------------------------------------------------
1662 // genCodeForIndexAddr: Produce code for a GT_INDEX_ADDR node.
1665 // tree - the GT_INDEX_ADDR node
1667 void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node)
1669 GenTree* const base = node->Arr();
1670 GenTree* const index = node->Index();
1672 genConsumeReg(base);
1673 genConsumeReg(index);
1675 // NOTE: `genConsumeReg` marks the consumed register as not a GC pointer, as it assumes that the input registers
1676 // die at the first instruction generated by the node. This is not the case for `INDEX_ADDR`, however, as the
1677 // base register is multiply-used. As such, we need to mark the base register as containing a GC pointer until
1678 // we are finished generating the code for this node.
1680 gcInfo.gcMarkRegPtrVal(base->gtRegNum, base->TypeGet());
1681 assert(!varTypeIsGC(index->TypeGet()));
1683 const regNumber tmpReg = node->GetSingleTempReg();
1685 // Generate the bounds check if necessary.
1686 if ((node->gtFlags & GTF_INX_RNGCHK) != 0)
1688 // Create a GT_IND(GT_LEA)) tree for the array length access and load the length into a register.
1689 GenTreeAddrMode arrLenAddr(base->TypeGet(), base, nullptr, 0, static_cast<unsigned>(node->gtLenOffset));
1690 arrLenAddr.gtRegNum = REG_NA;
1691 arrLenAddr.SetContained();
1693 GenTreeIndir arrLen = indirForm(TYP_INT, &arrLenAddr);
1694 arrLen.gtRegNum = tmpReg;
1695 arrLen.ClearContained();
1697 getEmitter()->emitInsLoadStoreOp(ins_Load(TYP_INT), emitTypeSize(TYP_INT), arrLen.gtRegNum, &arrLen);
1699 #ifdef _TARGET_64BIT_
1700 // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case that the index
1701 // is a native int on a 64-bit platform, we will need to widen the array length and the compare.
1702 if (index->TypeGet() == TYP_I_IMPL)
1704 // Extend the array length as needed.
1705 getEmitter()->emitIns_R_R(ins_Move_Extend(TYP_INT, true), EA_8BYTE, arrLen.gtRegNum, arrLen.gtRegNum);
1709 // Generate the range check.
1710 getEmitter()->emitInsBinary(INS_cmp, emitActualTypeSize(TYP_I_IMPL), index, &arrLen);
1711 genJumpToThrowHlpBlk(genJumpKindForOper(GT_GE, CK_UNSIGNED), SCK_RNGCHK_FAIL, node->gtIndRngFailBB);
1714 // Compute the address of the array element.
1715 switch (node->gtElemSize)
1718 // dest = base + index
1719 getEmitter()->emitIns_R_R_R(INS_add, emitActualTypeSize(node), node->gtRegNum, base->gtRegNum,
1729 BitScanForward(&lsl, node->gtElemSize);
1731 // dest = base + index * scale
1732 genScaledAdd(emitActualTypeSize(node), node->gtRegNum, base->gtRegNum, index->gtRegNum, lsl);
1739 CodeGen::genSetRegToIcon(tmpReg, (ssize_t)node->gtElemSize, TYP_INT);
1741 // dest = index * tmp + base
1742 getEmitter()->emitIns_R_R_R_R(INS_MULADD, emitActualTypeSize(node), node->gtRegNum, index->gtRegNum, tmpReg,
1748 // dest = dest + elemOffs
1749 getEmitter()->emitIns_R_R_I(INS_add, emitActualTypeSize(node), node->gtRegNum, node->gtRegNum, node->gtElemOffset);
1751 gcInfo.gcMarkRegSetNpt(base->gtGetRegMask());
1753 genProduceReg(node);
1756 //------------------------------------------------------------------------
1757 // genCodeForIndir: Produce code for a GT_IND node.
1760 // tree - the GT_IND node
1762 void CodeGen::genCodeForIndir(GenTreeIndir* tree)
1764 assert(tree->OperIs(GT_IND));
1766 var_types targetType = tree->TypeGet();
1767 regNumber targetReg = tree->gtRegNum;
1768 emitter* emit = getEmitter();
1769 emitAttr attr = emitTypeSize(tree);
1770 instruction ins = ins_Load(targetType);
1773 // Handling of Vector3 type values loaded through indirection.
1774 if (tree->TypeGet() == TYP_SIMD12)
1776 genLoadIndTypeSIMD12(tree);
1779 #endif // FEATURE_SIMD
1781 genConsumeAddress(tree->Addr());
1782 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
1784 bool isAligned = ((tree->gtFlags & GTF_IND_UNALIGNED) == 0);
1786 assert((attr != EA_1BYTE) || isAligned);
1788 #ifdef _TARGET_ARM64_
1789 GenTree* addr = tree->Addr();
1790 bool useLoadAcquire = genIsValidIntReg(targetReg) && !addr->isContained() &&
1791 (varTypeIsUnsigned(targetType) || varTypeIsI(targetType)) &&
1792 !(tree->gtFlags & GTF_IND_UNALIGNED);
1796 switch (EA_SIZE(attr))
1799 assert(ins == INS_ldrb);
1803 assert(ins == INS_ldrh);
1808 assert(ins == INS_ldr);
1812 assert(false); // We should not get here
1816 emit->emitInsLoadStoreOp(ins, attr, targetReg, tree);
1818 if (!useLoadAcquire) // issue a INS_BARRIER_OSHLD after a volatile LdInd operation
1819 instGen_MemoryBarrier(INS_BARRIER_OSHLD);
1821 emit->emitInsLoadStoreOp(ins, attr, targetReg, tree);
1823 // issue a full memory barrier after a volatile LdInd operation
1824 instGen_MemoryBarrier();
1825 #endif // _TARGET_ARM64_
1829 emit->emitInsLoadStoreOp(ins, attr, targetReg, tree);
1832 genProduceReg(tree);
1835 // Generate code for a CpBlk node by the means of the VM memcpy helper call
1837 // a) The size argument of the CpBlk is not an integer constant
1838 // b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes.
1839 void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode)
1841 // Make sure we got the arguments of the cpblk operation in the right registers
1842 unsigned blockSize = cpBlkNode->Size();
1843 GenTree* dstAddr = cpBlkNode->Addr();
1844 assert(!dstAddr->isContained());
1846 genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
1848 #ifdef _TARGET_ARM64_
1851 assert(blockSize > CPBLK_UNROLL_LIMIT);
1853 #endif // _TARGET_ARM64_
1855 if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE)
1857 // issue a full memory barrier before a volatile CpBlk operation
1858 instGen_MemoryBarrier();
1861 genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
1863 if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE)
1865 #ifdef _TARGET_ARM64_
1866 // issue a INS_BARRIER_ISHLD after a volatile CpBlk operation
1867 instGen_MemoryBarrier(INS_BARRIER_ISHLD);
1869 // issue a full memory barrier after a volatile CpBlk operation
1870 instGen_MemoryBarrier();
1871 #endif // _TARGET_ARM64_
1875 //----------------------------------------------------------------------------------
1876 // genCodeForCpBlkUnroll: Generates CpBlk code by performing a loop unroll
1879 // cpBlkNode - Copy block node
1885 // The size argument of the CpBlk node is a constant and <= CPBLK_UNROLL_LIMIT bytes.
1887 void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
1889 // Make sure we got the arguments of the cpblk operation in the right registers
1890 unsigned size = cpBlkNode->Size();
1891 GenTree* dstAddr = cpBlkNode->Addr();
1892 GenTree* source = cpBlkNode->Data();
1893 GenTree* srcAddr = nullptr;
1895 assert((size != 0) && (size <= CPBLK_UNROLL_LIMIT));
1897 emitter* emit = getEmitter();
1899 if (dstAddr->isUsedFromReg())
1901 genConsumeReg(dstAddr);
1904 if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE)
1906 // issue a full memory barrier before a volatile CpBlkUnroll operation
1907 instGen_MemoryBarrier();
1910 if (source->gtOper == GT_IND)
1912 srcAddr = source->gtGetOp1();
1913 if (srcAddr->isUsedFromReg())
1915 genConsumeReg(srcAddr);
1920 noway_assert(source->IsLocal());
1921 // TODO-Cleanup: Consider making the addrForm() method in Rationalize public, e.g. in GenTree.
1922 // OR: transform source to GT_IND(GT_LCL_VAR_ADDR)
1923 if (source->OperGet() == GT_LCL_VAR)
1925 source->SetOper(GT_LCL_VAR_ADDR);
1929 assert(source->OperGet() == GT_LCL_FLD);
1930 source->SetOper(GT_LCL_FLD_ADDR);
1935 unsigned offset = 0;
1937 // Grab the integer temp register to emit the loads and stores.
1938 regNumber tmpReg = cpBlkNode->ExtractTempReg(RBM_ALLINT);
1940 #ifdef _TARGET_ARM64_
1941 if (size >= 2 * REGSIZE_BYTES)
1943 regNumber tmp2Reg = cpBlkNode->ExtractTempReg(RBM_ALLINT);
1945 size_t slots = size / (2 * REGSIZE_BYTES);
1950 genCodeForLoadPairOffset(tmpReg, tmp2Reg, srcAddr, offset);
1952 genCodeForStorePairOffset(tmpReg, tmp2Reg, dstAddr, offset);
1953 offset += 2 * REGSIZE_BYTES;
1957 // Fill the remainder (15 bytes or less) if there's one.
1958 if ((size & 0xf) != 0)
1960 if ((size & 8) != 0)
1962 genCodeForLoadOffset(INS_ldr, EA_8BYTE, tmpReg, srcAddr, offset);
1963 genCodeForStoreOffset(INS_str, EA_8BYTE, tmpReg, dstAddr, offset);
1966 if ((size & 4) != 0)
1968 genCodeForLoadOffset(INS_ldr, EA_4BYTE, tmpReg, srcAddr, offset);
1969 genCodeForStoreOffset(INS_str, EA_4BYTE, tmpReg, dstAddr, offset);
1972 if ((size & 2) != 0)
1974 genCodeForLoadOffset(INS_ldrh, EA_2BYTE, tmpReg, srcAddr, offset);
1975 genCodeForStoreOffset(INS_strh, EA_2BYTE, tmpReg, dstAddr, offset);
1978 if ((size & 1) != 0)
1980 genCodeForLoadOffset(INS_ldrb, EA_1BYTE, tmpReg, srcAddr, offset);
1981 genCodeForStoreOffset(INS_strb, EA_1BYTE, tmpReg, dstAddr, offset);
1984 #else // !_TARGET_ARM64_
1985 size_t slots = size / REGSIZE_BYTES;
1988 genCodeForLoadOffset(INS_ldr, EA_4BYTE, tmpReg, srcAddr, offset);
1989 genCodeForStoreOffset(INS_str, EA_4BYTE, tmpReg, dstAddr, offset);
1990 offset += REGSIZE_BYTES;
1993 // Fill the remainder (3 bytes or less) if there's one.
1994 if ((size & 0x03) != 0)
1996 if ((size & 2) != 0)
1998 genCodeForLoadOffset(INS_ldrh, EA_2BYTE, tmpReg, srcAddr, offset);
1999 genCodeForStoreOffset(INS_strh, EA_2BYTE, tmpReg, dstAddr, offset);
2002 if ((size & 1) != 0)
2004 genCodeForLoadOffset(INS_ldrb, EA_1BYTE, tmpReg, srcAddr, offset);
2005 genCodeForStoreOffset(INS_strb, EA_1BYTE, tmpReg, dstAddr, offset);
2008 #endif // !_TARGET_ARM64_
2010 if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE)
2012 #ifdef _TARGET_ARM64_
2013 // issue a INS_BARRIER_ISHLD after a volatile CpBlkUnroll operation
2014 instGen_MemoryBarrier(INS_BARRIER_ISHLD);
2016 // issue a full memory barrier after a volatile CpBlk operation
2017 instGen_MemoryBarrier();
2018 #endif // !_TARGET_ARM64_
2022 // Generates code for InitBlk by calling the VM memset helper function.
2024 // a) The size argument of the InitBlk is not an integer constant.
2025 // b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes.
2026 void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
2028 unsigned size = initBlkNode->Size();
2029 GenTree* dstAddr = initBlkNode->Addr();
2030 GenTree* initVal = initBlkNode->Data();
2031 if (initVal->OperIsInitVal())
2033 initVal = initVal->gtGetOp1();
2036 assert(!dstAddr->isContained());
2037 assert(!initVal->isContained());
2039 #ifdef _TARGET_ARM64_
2042 assert((size > INITBLK_UNROLL_LIMIT) || !initVal->IsCnsIntOrI());
2044 #endif // _TARGET_ARM64_
2046 genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
2048 if (initBlkNode->gtFlags & GTF_BLK_VOLATILE)
2050 // issue a full memory barrier before a volatile initBlock Operation
2051 instGen_MemoryBarrier();
2054 genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
2057 // Generate code for a load from some address + offset
2058 // base: tree node which can be either a local address or arbitrary node
2059 // offset: distance from the base from which to load
2060 void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset)
2062 emitter* emit = getEmitter();
2064 if (base->OperIsLocalAddr())
2066 if (base->gtOper == GT_LCL_FLD_ADDR)
2067 offset += base->gtLclFld.gtLclOffs;
2068 emit->emitIns_R_S(ins, size, dst, base->gtLclVarCommon.gtLclNum, offset);
2072 emit->emitIns_R_R_I(ins, size, dst, base->gtRegNum, offset);
2076 // Generate code for a store to some address + offset
2077 // base: tree node which can be either a local address or arbitrary node
2078 // offset: distance from the base from which to load
2079 void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset)
2081 emitter* emit = getEmitter();
2083 if (base->OperIsLocalAddr())
2085 if (base->gtOper == GT_LCL_FLD_ADDR)
2086 offset += base->gtLclFld.gtLclOffs;
2087 emit->emitIns_S_R(ins, size, src, base->gtLclVarCommon.gtLclNum, offset);
2091 emit->emitIns_R_R_I(ins, size, src, base->gtRegNum, offset);
2095 //------------------------------------------------------------------------
2096 // genRegCopy: Generate a register copy.
2098 void CodeGen::genRegCopy(GenTree* treeNode)
2100 assert(treeNode->OperGet() == GT_COPY);
2102 var_types targetType = treeNode->TypeGet();
2103 regNumber targetReg = treeNode->gtRegNum;
2104 assert(targetReg != REG_NA);
2106 GenTree* op1 = treeNode->gtOp.gtOp1;
2108 // Check whether this node and the node from which we're copying the value have the same
2110 // This can happen if (currently iff) we have a SIMD vector type that fits in an integer
2111 // register, in which case it is passed as an argument, or returned from a call,
2112 // in an integer register and must be copied if it's in an xmm register.
2114 if (varTypeIsFloating(treeNode) != varTypeIsFloating(op1))
2116 #ifdef _TARGET_ARM64_
2117 inst_RV_RV(INS_fmov, targetReg, genConsumeReg(op1), targetType);
2118 #else // !_TARGET_ARM64_
2119 if (varTypeIsFloating(treeNode))
2121 // GT_COPY from 'int' to 'float' currently can't happen. Maybe if ARM SIMD is implemented
2122 // it will happen, according to the comment above?
2123 NYI_ARM("genRegCopy from 'int' to 'float'");
2127 assert(varTypeIsFloating(op1));
2129 if (op1->TypeGet() == TYP_FLOAT)
2131 inst_RV_RV(INS_vmov_f2i, targetReg, genConsumeReg(op1), targetType);
2135 regNumber otherReg = (regNumber)treeNode->AsCopyOrReload()->gtOtherRegs[0];
2136 assert(otherReg != REG_NA);
2137 inst_RV_RV_RV(INS_vmov_d2i, targetReg, otherReg, genConsumeReg(op1), EA_8BYTE);
2140 #endif // !_TARGET_ARM64_
2144 inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
2149 // The lclVar will never be a def.
2150 // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
2151 // appropriately set the gcInfo for the copied value.
2152 // If not, there are two cases we need to handle:
2153 // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
2154 // will remain live in its original register.
2155 // genProduceReg() will appropriately set the gcInfo for the copied value,
2156 // and genConsumeReg will reset it.
2157 // - Otherwise, we need to update register info for the lclVar.
2159 GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
2160 assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
2162 if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
2164 LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
2166 // If we didn't just spill it (in genConsumeReg, above), then update the register info
2167 if (varDsc->lvRegNum != REG_STK)
2169 // The old location is dying
2170 genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
2172 gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum));
2174 genUpdateVarReg(varDsc, treeNode);
2176 // The new location is going live
2177 genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
2182 genProduceReg(treeNode);
2185 //------------------------------------------------------------------------
2186 // genCallInstruction: Produce code for a GT_CALL node
2188 void CodeGen::genCallInstruction(GenTreeCall* call)
2190 gtCallTypes callType = (gtCallTypes)call->gtCallType;
2192 IL_OFFSETX ilOffset = BAD_IL_OFFSET;
2194 // all virtuals should have been expanded into a control expression
2195 assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
2197 // Consume all the arg regs
2198 for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext())
2200 assert(list->OperIsList());
2202 GenTree* argNode = list->Current();
2204 fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
2205 assert(curArgTabEntry);
2207 // GT_RELOAD/GT_COPY use the child node
2208 argNode = argNode->gtSkipReloadOrCopy();
2210 if (curArgTabEntry->regNum == REG_STK)
2213 // Deal with multi register passed struct args.
2214 if (argNode->OperGet() == GT_FIELD_LIST)
2216 GenTreeArgList* argListPtr = argNode->AsArgList();
2217 unsigned iterationNum = 0;
2218 regNumber argReg = curArgTabEntry->regNum;
2219 for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++)
2221 GenTree* putArgRegNode = argListPtr->gtOp.gtOp1;
2222 assert(putArgRegNode->gtOper == GT_PUTARG_REG);
2224 genConsumeReg(putArgRegNode);
2226 if (putArgRegNode->gtRegNum != argReg)
2228 inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), true), argReg, putArgRegNode->gtRegNum);
2231 argReg = genRegArgNext(argReg);
2233 #if defined(_TARGET_ARM_)
2234 // A double register is modelled as an even-numbered single one
2235 if (putArgRegNode->TypeGet() == TYP_DOUBLE)
2237 argReg = genRegArgNext(argReg);
2239 #endif // _TARGET_ARM_
2243 else if (curArgTabEntry->isSplit)
2245 assert(curArgTabEntry->numRegs >= 1);
2246 genConsumeArgSplitStruct(argNode->AsPutArgSplit());
2247 for (unsigned idx = 0; idx < curArgTabEntry->numRegs; idx++)
2249 regNumber argReg = (regNumber)((unsigned)curArgTabEntry->regNum + idx);
2250 regNumber allocReg = argNode->AsPutArgSplit()->GetRegNumByIdx(idx);
2251 if (argReg != allocReg)
2253 inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), true), argReg, allocReg);
2260 regNumber argReg = curArgTabEntry->regNum;
2261 genConsumeReg(argNode);
2262 if (argNode->gtRegNum != argReg)
2264 inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), true), argReg, argNode->gtRegNum);
2269 // Insert a null check on "this" pointer if asked.
2270 if (call->NeedsNullCheck())
2272 const regNumber regThis = genGetThisArgReg(call);
2274 #if defined(_TARGET_ARM_)
2275 const regNumber tmpReg = call->ExtractTempReg();
2276 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, regThis, 0);
2277 #elif defined(_TARGET_ARM64_)
2278 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, regThis, 0);
2282 // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method.
2283 CORINFO_METHOD_HANDLE methHnd;
2284 GenTree* target = call->gtControlExpr;
2285 if (callType == CT_INDIRECT)
2287 assert(target == nullptr);
2288 target = call->gtCallAddr;
2293 methHnd = call->gtCallMethHnd;
2296 CORINFO_SIG_INFO* sigInfo = nullptr;
2298 // Pass the call signature information down into the emitter so the emitter can associate
2299 // native call sites with the signatures they were generated from.
2300 if (callType != CT_HELPER)
2302 sigInfo = call->callSig;
2306 // If fast tail call, then we are done. In this case we setup the args (both reg args
2307 // and stack args in incoming arg area) and call target. Epilog sequence would
2308 // generate "br <reg>".
2309 if (call->IsFastTailCall())
2311 // Don't support fast tail calling JIT helpers
2312 assert(callType != CT_HELPER);
2314 // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
2315 assert(target != nullptr);
2317 genConsumeReg(target);
2319 // Use IP0 on ARM64 and R12 on ARM32 as the call target register.
2320 if (target->gtRegNum != REG_FASTTAILCALL_TARGET)
2322 inst_RV_RV(INS_mov, REG_FASTTAILCALL_TARGET, target->gtRegNum);
2328 // For a pinvoke to unmanaged code we emit a label to clear
2329 // the GC pointer state before the callsite.
2330 // We can't utilize the typical lazy killing of GC pointers
2331 // at (or inside) the callsite.
2332 if (compiler->killGCRefs(call))
2334 genDefineTempLabel(genCreateTempLabel());
2337 // Determine return value size(s).
2338 ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
2339 emitAttr retSize = EA_PTRSIZE;
2340 emitAttr secondRetSize = EA_UNKNOWN;
2342 if (call->HasMultiRegRetVal())
2344 retSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(0));
2345 secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1));
2349 assert(!varTypeIsStruct(call));
2351 if (call->gtType == TYP_REF)
2355 else if (call->gtType == TYP_BYREF)
2361 // We need to propagate the IL offset information to the call instruction, so we can emit
2362 // an IL to native mapping record for the call, to support managed return value debugging.
2363 // We don't want tail call helper calls that were converted from normal calls to get a record,
2364 // so we skip this hash table lookup logic in that case.
2365 if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall())
2367 (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
2370 if (target != nullptr)
2372 // A call target can not be a contained indirection
2373 assert(!target->isContainedIndir());
2375 genConsumeReg(target);
2377 // We have already generated code for gtControlExpr evaluating it into a register.
2378 // We just need to emit "call reg" in this case.
2380 assert(genIsValidIntReg(target->gtRegNum));
2382 genEmitCall(emitter::EC_INDIR_R, methHnd,
2383 INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr
2384 retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset, target->gtRegNum);
2388 // Generate a direct call to a non-virtual user defined or helper method
2389 assert(callType == CT_HELPER || callType == CT_USER_FUNC);
2391 void* addr = nullptr;
2392 #ifdef FEATURE_READYTORUN_COMPILER
2393 if (call->gtEntryPoint.addr != NULL)
2395 assert(call->gtEntryPoint.accessType == IAT_VALUE);
2396 addr = call->gtEntryPoint.addr;
2399 #endif // FEATURE_READYTORUN_COMPILER
2400 if (callType == CT_HELPER)
2402 CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
2403 noway_assert(helperNum != CORINFO_HELP_UNDEF);
2405 void* pAddr = nullptr;
2406 addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
2407 assert(pAddr == nullptr);
2411 // Direct call to a non-virtual user function.
2412 addr = call->gtDirectCallAddress;
2415 assert(addr != nullptr);
2417 // Non-virtual direct call to known addresses
2419 if (!arm_Valid_Imm_For_BL((ssize_t)addr))
2421 regNumber tmpReg = call->GetSingleTempReg();
2422 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, tmpReg, (ssize_t)addr);
2423 genEmitCall(emitter::EC_INDIR_R, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) NULL, retSize, ilOffset, tmpReg);
2426 #endif // _TARGET_ARM_
2428 genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr,
2429 retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
2432 #if 0 && defined(_TARGET_ARM64_)
2433 // Use this path if you want to load an absolute call target using
2434 // a sequence of movs followed by an indirect call (blr instruction)
2436 // Load the call target address in x16
2437 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_IP0, (ssize_t) addr);
2439 // indirect call to constant address in IP0
2440 genEmitCall(emitter::EC_INDIR_R,
2442 INDEBUG_LDISASM_COMMA(sigInfo)
2451 // if it was a pinvoke we may have needed to get the address of a label
2452 if (genPendingCallLabel)
2454 assert(call->IsUnmanaged());
2455 genDefineTempLabel(genPendingCallLabel);
2456 genPendingCallLabel = nullptr;
2460 // All Callee arg registers are trashed and no longer contain any GC pointers.
2461 // TODO-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
2462 // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
2463 // registers from RBM_CALLEE_TRASH
2464 assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
2465 assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
2466 gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
2467 gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
2469 var_types returnType = call->TypeGet();
2470 if (returnType != TYP_VOID)
2472 regNumber returnReg;
2474 if (call->HasMultiRegRetVal())
2476 assert(pRetTypeDesc != nullptr);
2477 unsigned regCount = pRetTypeDesc->GetReturnRegCount();
2479 // If regs allocated to call node are different from ABI return
2480 // regs in which the call has returned its result, move the result
2481 // to regs allocated to call node.
2482 for (unsigned i = 0; i < regCount; ++i)
2484 var_types regType = pRetTypeDesc->GetReturnRegType(i);
2485 returnReg = pRetTypeDesc->GetABIReturnReg(i);
2486 regNumber allocatedReg = call->GetRegNumByIdx(i);
2487 if (returnReg != allocatedReg)
2489 inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
2496 if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
2498 // The CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
2499 // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers.
2500 returnReg = REG_PINVOKE_TCB;
2503 #endif // _TARGET_ARM_
2504 if (varTypeIsFloating(returnType) && !compiler->opts.compUseSoftFP)
2506 returnReg = REG_FLOATRET;
2510 returnReg = REG_INTRET;
2513 if (call->gtRegNum != returnReg)
2516 if (compiler->opts.compUseSoftFP && returnType == TYP_DOUBLE)
2518 inst_RV_RV_RV(INS_vmov_i2d, call->gtRegNum, returnReg, genRegArgNext(returnReg), EA_8BYTE);
2520 else if (compiler->opts.compUseSoftFP && returnType == TYP_FLOAT)
2522 inst_RV_RV(INS_vmov_i2f, call->gtRegNum, returnReg, returnType);
2527 inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
2532 genProduceReg(call);
2535 // If there is nothing next, that means the result is thrown away, so this value is not live.
2536 // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
2537 if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
2539 gcInfo.gcMarkRegSetNpt(RBM_INTRET);
2543 // Produce code for a GT_JMP node.
2544 // The arguments of the caller needs to be transferred to the callee before exiting caller.
2545 // The actual jump to callee is generated as part of caller epilog sequence.
2546 // Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup.
2547 void CodeGen::genJmpMethod(GenTree* jmp)
2549 assert(jmp->OperGet() == GT_JMP);
2550 assert(compiler->compJmpOpUsed);
2552 // If no arguments, nothing to do
2553 if (compiler->info.compArgsCount == 0)
2558 // Make sure register arguments are in their initial registers
2559 // and stack arguments are put back as well.
2563 // First move any en-registered stack arguments back to the stack.
2564 // At the same time any reg arg not in correct reg is moved back to its stack location.
2566 // We are not strictly required to spill reg args that are not in the desired reg for a jmp call
2567 // But that would require us to deal with circularity while moving values around. Spilling
2568 // to stack makes the implementation simple, which is not a bad trade off given Jmp calls
2569 // are not frequent.
2570 for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
2572 varDsc = compiler->lvaTable + varNum;
2574 if (varDsc->lvPromoted)
2576 noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
2578 unsigned fieldVarNum = varDsc->lvFieldLclStart;
2579 varDsc = compiler->lvaTable + fieldVarNum;
2581 noway_assert(varDsc->lvIsParam);
2583 if (varDsc->lvIsRegArg && (varDsc->lvRegNum != REG_STK))
2585 // Skip reg args which are already in its right register for jmp call.
2586 // If not, we will spill such args to their stack locations.
2588 // If we need to generate a tail call profiler hook, then spill all
2589 // arg regs to free them up for the callback.
2590 if (!compiler->compIsProfilerHookNeeded() && (varDsc->lvRegNum == varDsc->lvArgReg))
2593 else if (varDsc->lvRegNum == REG_STK)
2595 // Skip args which are currently living in stack.
2599 // If we came here it means either a reg argument not in the right register or
2600 // a stack argument currently living in a register. In either case the following
2601 // assert should hold.
2602 assert(varDsc->lvRegNum != REG_STK);
2603 assert(varDsc->TypeGet() != TYP_STRUCT);
2604 var_types storeType = genActualType(varDsc->TypeGet());
2605 emitAttr storeSize = emitActualTypeSize(storeType);
2608 if (varDsc->TypeGet() == TYP_LONG)
2610 // long - at least the low half must be enregistered
2611 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvRegNum, varNum, 0);
2613 // Is the upper half also enregistered?
2614 if (varDsc->lvOtherReg != REG_STK)
2616 getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvOtherReg, varNum, sizeof(int));
2620 #endif // _TARGET_ARM_
2622 getEmitter()->emitIns_S_R(ins_Store(storeType), storeSize, varDsc->lvRegNum, varNum, 0);
2624 // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live.
2625 // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
2626 // Therefore manually update life of varDsc->lvRegNum.
2627 regMaskTP tempMask = genRegMask(varDsc->lvRegNum);
2628 regSet.RemoveMaskVars(tempMask);
2629 gcInfo.gcMarkRegSetNpt(tempMask);
2630 if (compiler->lvaIsGCTracked(varDsc))
2632 VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
2636 #ifdef PROFILING_SUPPORTED
2637 // At this point all arg regs are free.
2638 // Emit tail call profiler callback.
2639 genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
2642 // Next move any un-enregistered register arguments back to their register.
2643 regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method.
2644 unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
2645 for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
2647 varDsc = compiler->lvaTable + varNum;
2648 if (varDsc->lvPromoted)
2650 noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
2652 unsigned fieldVarNum = varDsc->lvFieldLclStart;
2653 varDsc = compiler->lvaTable + fieldVarNum;
2655 noway_assert(varDsc->lvIsParam);
2657 // Skip if arg not passed in a register.
2658 if (!varDsc->lvIsRegArg)
2661 // Register argument
2662 noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
2664 // Is register argument already in the right register?
2665 // If not load it from its stack location.
2666 regNumber argReg = varDsc->lvArgReg; // incoming arg register
2667 regNumber argRegNext = REG_NA;
2669 #ifdef _TARGET_ARM64_
2670 if (varDsc->lvRegNum != argReg)
2672 var_types loadType = TYP_UNDEF;
2673 if (varTypeIsStruct(varDsc))
2675 // Must be <= 16 bytes or else it wouldn't be passed in registers
2676 noway_assert(EA_SIZE_IN_BYTES(varDsc->lvSize()) <= MAX_PASS_MULTIREG_BYTES);
2677 loadType = compiler->getJitGCType(varDsc->lvGcLayout[0]);
2681 loadType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet()));
2683 emitAttr loadSize = emitActualTypeSize(loadType);
2684 getEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argReg, varNum, 0);
2686 // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
2687 // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
2688 // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
2689 // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
2690 regSet.AddMaskVars(genRegMask(argReg));
2691 gcInfo.gcMarkRegPtrVal(argReg, loadType);
2693 if (compiler->lvaIsMultiregStruct(varDsc))
2695 if (varDsc->lvIsHfa())
2697 NYI_ARM64("CodeGen::genJmpMethod with multireg HFA arg");
2700 // Restore the second register.
2701 argRegNext = genRegArgNext(argReg);
2703 loadType = compiler->getJitGCType(varDsc->lvGcLayout[1]);
2704 loadSize = emitActualTypeSize(loadType);
2705 getEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argRegNext, varNum, TARGET_POINTER_SIZE);
2707 regSet.AddMaskVars(genRegMask(argRegNext));
2708 gcInfo.gcMarkRegPtrVal(argRegNext, loadType);
2711 if (compiler->lvaIsGCTracked(varDsc))
2713 VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
2717 // In case of a jmp call to a vararg method ensure only integer registers are passed.
2718 if (compiler->info.compIsVarArgs)
2720 assert((genRegMask(argReg) & RBM_ARG_REGS) != RBM_NONE);
2722 fixedIntArgMask |= genRegMask(argReg);
2724 if (compiler->lvaIsMultiregStruct(varDsc))
2726 assert(argRegNext != REG_NA);
2727 fixedIntArgMask |= genRegMask(argRegNext);
2730 if (argReg == REG_ARG_0)
2732 assert(firstArgVarNum == BAD_VAR_NUM);
2733 firstArgVarNum = varNum;
2737 bool twoParts = false;
2738 var_types loadType = TYP_UNDEF;
2739 if (varDsc->TypeGet() == TYP_LONG)
2743 else if (varDsc->TypeGet() == TYP_DOUBLE)
2745 if (compiler->info.compIsVarArgs || compiler->opts.compUseSoftFP)
2753 argRegNext = genRegArgNext(argReg);
2755 if (varDsc->lvRegNum != argReg)
2757 getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, argReg, varNum, 0);
2758 getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, argRegNext, varNum, REGSIZE_BYTES);
2761 if (compiler->info.compIsVarArgs)
2763 fixedIntArgMask |= genRegMask(argReg);
2764 fixedIntArgMask |= genRegMask(argRegNext);
2767 else if (varDsc->lvIsHfaRegArg())
2769 loadType = varDsc->GetHfaType();
2770 regNumber fieldReg = argReg;
2771 emitAttr loadSize = emitActualTypeSize(loadType);
2772 unsigned maxSize = min(varDsc->lvSize(), (LAST_FP_ARGREG + 1 - argReg) * REGSIZE_BYTES);
2774 for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)loadSize)
2776 if (varDsc->lvRegNum != argReg)
2778 getEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, fieldReg, varNum, ofs);
2780 assert(genIsValidFloatReg(fieldReg)); // we don't use register tracking for FP
2781 fieldReg = regNextOfType(fieldReg, loadType);
2784 else if (varTypeIsStruct(varDsc))
2786 regNumber slotReg = argReg;
2787 unsigned maxSize = min(varDsc->lvSize(), (REG_ARG_LAST + 1 - argReg) * REGSIZE_BYTES);
2789 for (unsigned ofs = 0; ofs < maxSize; ofs += REGSIZE_BYTES)
2791 unsigned idx = ofs / REGSIZE_BYTES;
2792 loadType = compiler->getJitGCType(varDsc->lvGcLayout[idx]);
2794 if (varDsc->lvRegNum != argReg)
2796 emitAttr loadSize = emitActualTypeSize(loadType);
2798 getEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, slotReg, varNum, ofs);
2801 regSet.AddMaskVars(genRegMask(slotReg));
2802 gcInfo.gcMarkRegPtrVal(slotReg, loadType);
2803 if (genIsValidIntReg(slotReg) && compiler->info.compIsVarArgs)
2805 fixedIntArgMask |= genRegMask(slotReg);
2808 slotReg = genRegArgNext(slotReg);
2813 loadType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet()));
2815 if (varDsc->lvRegNum != argReg)
2817 getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
2820 regSet.AddMaskVars(genRegMask(argReg));
2821 gcInfo.gcMarkRegPtrVal(argReg, loadType);
2823 if (genIsValidIntReg(argReg) && compiler->info.compIsVarArgs)
2825 fixedIntArgMask |= genRegMask(argReg);
2829 if (compiler->lvaIsGCTracked(varDsc))
2831 VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
2836 // Jmp call to a vararg method - if the method has fewer than fixed arguments that can be max size of reg,
2837 // load the remaining integer arg registers from the corresponding
2838 // shadow stack slots. This is for the reason that we don't know the number and type
2839 // of non-fixed params passed by the caller, therefore we have to assume the worst case
2840 // of caller passing all integer arg regs that can be max size of reg.
2842 // The caller could have passed gc-ref/byref type var args. Since these are var args
2843 // the callee no way of knowing their gc-ness. Therefore, mark the region that loads
2844 // remaining arg registers from shadow stack slots as non-gc interruptible.
2845 if (fixedIntArgMask != RBM_NONE)
2847 assert(compiler->info.compIsVarArgs);
2848 assert(firstArgVarNum != BAD_VAR_NUM);
2850 regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
2851 if (remainingIntArgMask != RBM_NONE)
2853 getEmitter()->emitDisableGC();
2854 for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
2856 regNumber argReg = intArgRegs[argNum];
2857 regMaskTP argRegMask = genRegMask(argReg);
2859 if ((remainingIntArgMask & argRegMask) != 0)
2861 remainingIntArgMask &= ~argRegMask;
2862 getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, argReg, firstArgVarNum, argOffset);
2865 argOffset += REGSIZE_BYTES;
2867 getEmitter()->emitEnableGC();
2872 //------------------------------------------------------------------------
2873 // genIntToIntCast: Generate code for an integer cast
2876 // treeNode - The GT_CAST node
2882 // The treeNode must have an assigned register.
2883 // For a signed convert from byte, the source must be in a byte-addressable register.
2884 // Neither the source nor target type can be a floating point type.
2886 // TODO-ARM64-CQ: Allow castOp to be a contained node without an assigned register.
2888 void CodeGen::genIntToIntCast(GenTree* treeNode)
2890 assert(treeNode->OperGet() == GT_CAST);
2892 GenTree* castOp = treeNode->gtCast.CastOp();
2893 emitter* emit = getEmitter();
2895 var_types dstType = treeNode->CastToType();
2896 var_types srcType = genActualType(castOp->TypeGet());
2897 emitAttr movSize = emitActualTypeSize(dstType);
2898 bool movRequired = false;
2900 assert(genTypeSize(srcType) <= genTypeSize(TYP_I_IMPL));
2902 regNumber targetReg = treeNode->gtRegNum;
2903 regNumber sourceReg = castOp->gtRegNum;
2905 // For Long to Int conversion we will have a reserved integer register to hold the immediate mask
2906 regNumber tmpReg = (treeNode->AvailableTempRegCount() == 0) ? REG_NA : treeNode->GetSingleTempReg();
2908 assert(genIsValidIntReg(targetReg));
2909 assert(genIsValidIntReg(sourceReg));
2911 instruction ins = INS_invalid;
2913 genConsumeReg(castOp);
2914 Lowering::CastInfo castInfo;
2916 // Get information about the cast.
2917 Lowering::getCastDescription(treeNode, &castInfo);
2919 if (castInfo.requiresOverflowCheck)
2921 emitAttr cmpSize = EA_ATTR(genTypeSize(srcType));
2923 if (castInfo.signCheckOnly)
2925 // We only need to check for a negative value in sourceReg
2926 emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, 0);
2927 emitJumpKind jmpLT = genJumpKindForOper(GT_LT, CK_SIGNED);
2928 genJumpToThrowHlpBlk(jmpLT, SCK_OVERFLOW);
2929 noway_assert(genTypeSize(srcType) == 4 || genTypeSize(srcType) == 8);
2930 // This is only interesting case to ensure zero-upper bits.
2931 if ((srcType == TYP_INT) && (dstType == TYP_ULONG))
2933 // cast to TYP_ULONG:
2934 // We use a mov with size=EA_4BYTE
2935 // which will zero out the upper bits
2940 else if (castInfo.unsignedSource || castInfo.unsignedDest)
2942 // When we are converting from/to unsigned,
2943 // we only have to check for any bits set in 'typeMask'
2945 noway_assert(castInfo.typeMask != 0);
2946 #if defined(_TARGET_ARM_)
2947 if (arm_Valid_Imm_For_Instr(INS_tst, castInfo.typeMask, INS_FLAGS_DONT_CARE))
2949 emit->emitIns_R_I(INS_tst, cmpSize, sourceReg, castInfo.typeMask);
2953 noway_assert(tmpReg != REG_NA);
2954 instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMask);
2955 emit->emitIns_R_R(INS_tst, cmpSize, sourceReg, tmpReg);
2957 #elif defined(_TARGET_ARM64_)
2958 emit->emitIns_R_I(INS_tst, cmpSize, sourceReg, castInfo.typeMask);
2959 #endif // _TARGET_ARM*
2960 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
2961 genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
2965 // For a narrowing signed cast
2967 // We must check the value is in a signed range.
2969 // Compare with the MAX
2971 noway_assert((castInfo.typeMin != 0) && (castInfo.typeMax != 0));
2973 #if defined(_TARGET_ARM_)
2974 if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, INS_FLAGS_DONT_CARE))
2975 #elif defined(_TARGET_ARM64_)
2976 if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, cmpSize))
2979 emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMax);
2983 noway_assert(tmpReg != REG_NA);
2984 instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMax);
2985 emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg);
2988 emitJumpKind jmpGT = genJumpKindForOper(GT_GT, CK_SIGNED);
2989 genJumpToThrowHlpBlk(jmpGT, SCK_OVERFLOW);
2991 // Compare with the MIN
2993 #if defined(_TARGET_ARM_)
2994 if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, INS_FLAGS_DONT_CARE))
2995 #elif defined(_TARGET_ARM64_)
2996 if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, cmpSize))
2999 emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMin);
3003 noway_assert(tmpReg != REG_NA);
3004 instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMin);
3005 emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg);
3008 emitJumpKind jmpLT = genJumpKindForOper(GT_LT, CK_SIGNED);
3009 genJumpToThrowHlpBlk(jmpLT, SCK_OVERFLOW);
3013 else // Non-overflow checking cast.
3015 if (genTypeSize(srcType) == genTypeSize(dstType))
3021 var_types extendType = TYP_UNKNOWN;
3023 if (genTypeSize(srcType) < genTypeSize(dstType))
3025 // If we need to treat a signed type as unsigned
3026 if ((treeNode->gtFlags & GTF_UNSIGNED) != 0)
3028 extendType = genUnsignedType(srcType);
3031 extendType = srcType;
3033 movSize = emitTypeSize(extendType);
3034 #endif // _TARGET_ARM_
3035 if (extendType == TYP_UINT)
3037 #ifdef _TARGET_ARM64_
3038 // If we are casting from a smaller type to
3039 // a larger type, then we need to make sure the
3040 // higher 4 bytes are zero to gaurentee the correct value.
3041 // Therefore using a mov with EA_4BYTE in place of EA_8BYTE
3042 // will zero the upper bits
3044 #endif // _TARGET_ARM64_
3048 else // (genTypeSize(srcType) > genTypeSize(dstType))
3050 // If we need to treat a signed type as unsigned
3051 if ((treeNode->gtFlags & GTF_UNSIGNED) != 0)
3053 extendType = genUnsignedType(dstType);
3056 extendType = dstType;
3057 #if defined(_TARGET_ARM_)
3058 movSize = emitTypeSize(extendType);
3059 #elif defined(_TARGET_ARM64_)
3060 if (extendType == TYP_INT)
3062 movSize = EA_8BYTE; // a sxtw instruction requires EA_8BYTE
3067 ins = ins_Move_Extend(extendType, true);
3071 // We should never be generating a load from memory instruction here!
3072 assert(!emit->emitInsIsLoad(ins));
3074 if ((ins != INS_mov) || movRequired || (targetReg != sourceReg))
3076 emit->emitIns_R_R(ins, movSize, targetReg, sourceReg);
3079 genProduceReg(treeNode);
3082 //------------------------------------------------------------------------
3083 // genFloatToFloatCast: Generate code for a cast between float and double
3086 // treeNode - The GT_CAST node
3092 // Cast is a non-overflow conversion.
3093 // The treeNode must have an assigned register.
3094 // The cast is between float and double.
3096 void CodeGen::genFloatToFloatCast(GenTree* treeNode)
3098 // float <--> double conversions are always non-overflow ones
3099 assert(treeNode->OperGet() == GT_CAST);
3100 assert(!treeNode->gtOverflow());
3102 regNumber targetReg = treeNode->gtRegNum;
3103 assert(genIsValidFloatReg(targetReg));
3105 GenTree* op1 = treeNode->gtOp.gtOp1;
3106 assert(!op1->isContained()); // Cannot be contained
3107 assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg.
3109 var_types dstType = treeNode->CastToType();
3110 var_types srcType = op1->TypeGet();
3111 assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
3113 genConsumeOperands(treeNode->AsOp());
3115 // treeNode must be a reg
3116 assert(!treeNode->isContained());
3118 #if defined(_TARGET_ARM_)
3120 if (srcType != dstType)
3122 instruction insVcvt = (srcType == TYP_FLOAT) ? INS_vcvt_f2d // convert Float to Double
3123 : INS_vcvt_d2f; // convert Double to Float
3125 getEmitter()->emitIns_R_R(insVcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
3127 else if (treeNode->gtRegNum != op1->gtRegNum)
3129 getEmitter()->emitIns_R_R(INS_vmov, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
3132 #elif defined(_TARGET_ARM64_)
3134 if (srcType != dstType)
3136 insOpts cvtOption = (srcType == TYP_FLOAT) ? INS_OPTS_S_TO_D // convert Single to Double
3137 : INS_OPTS_D_TO_S; // convert Double to Single
3139 getEmitter()->emitIns_R_R(INS_fcvt, emitActualTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum, cvtOption);
3141 else if (treeNode->gtRegNum != op1->gtRegNum)
3143 // If double to double cast or float to float cast. Emit a move instruction.
3144 getEmitter()->emitIns_R_R(INS_mov, emitActualTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
3149 genProduceReg(treeNode);
3152 //------------------------------------------------------------------------
3153 // genCreateAndStoreGCInfo: Create and record GC Info for the function.
3155 void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
3156 unsigned prologSize,
3157 unsigned epilogSize DEBUGARG(void* codePtr))
3159 IAllocator* allowZeroAlloc = new (compiler, CMK_GC) CompIAllocator(compiler->getAllocatorGC());
3160 GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
3161 GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
3162 assert(gcInfoEncoder != nullptr);
3164 // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
3165 gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
3167 // We keep the call count for the second call to gcMakeRegPtrTable() below.
3168 unsigned callCnt = 0;
3170 // First we figure out the encoder ID's for the stack slots and registers.
3171 gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt);
3173 // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
3174 gcInfoEncoder->FinalizeSlotIds();
3176 // Now we can actually use those slot ID's to declare live ranges.
3177 gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt);
3179 #ifdef _TARGET_ARM64_
3181 if (compiler->opts.compDbgEnC)
3183 // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
3187 // -saved 'this' pointer and bool for synchronized methods
3189 // 4 slots for RBP + return address + RSI + RDI
3190 int preservedAreaSize = 4 * REGSIZE_BYTES;
3192 if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
3194 if (!(compiler->info.compFlags & CORINFO_FLG_STATIC))
3195 preservedAreaSize += REGSIZE_BYTES;
3197 preservedAreaSize += 1; // bool for synchronized methods
3200 // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the
3202 gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
3205 #endif // _TARGET_ARM64_
3207 gcInfoEncoder->Build();
3209 // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
3210 // let's save the values anyway for debugging purposes
3211 compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
3212 compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
3215 //-------------------------------------------------------------------------------------------
3216 // genJumpKindsForTree: Determine the number and kinds of conditional branches
3217 // necessary to implement the given GT_CMP node
3220 // cmpTree - (input) The GenTree node that is used to set the Condition codes
3221 // - The GenTree Relop node that was used to set the Condition codes
3222 // jmpKind[2] - (output) One or two conditional branch instructions
3223 // jmpToTrueLabel[2] - (output) On Arm64 both branches will always branch to the true label
3226 // Sets the proper values into the array elements of jmpKind[] and jmpToTrueLabel[]
3229 // At least one conditional branch instruction will be returned.
3230 // Typically only one conditional branch is needed
3231 // and the second jmpKind[] value is set to EJ_NONE
3233 void CodeGen::genJumpKindsForTree(GenTree* cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2])
3235 // On ARM both branches will always branch to the true label
3236 jmpToTrueLabel[0] = true;
3237 jmpToTrueLabel[1] = true;
3239 // For integer comparisons just use genJumpKindForOper
3240 if (!varTypeIsFloating(cmpTree->gtOp.gtOp1))
3242 CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
3243 jmpKind[0] = genJumpKindForOper(cmpTree->gtOper, compareKind);
3244 jmpKind[1] = EJ_NONE;
3246 else // We have a Floating Point Compare operation
3248 assert(cmpTree->OperIsCompare());
3250 // For details on this mapping, see the ARM Condition Code table
3251 // at section A8.3 in the ARMv7 architecture manual or
3252 // at section C1.2.3 in the ARMV8 architecture manual.
3254 // We must check the GTF_RELOP_NAN_UN to find out
3255 // if we need to branch when we have a NaN operand.
3257 if ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) != 0)
3259 // Must branch if we have an NaN, unordered
3260 switch (cmpTree->gtOper)
3263 jmpKind[0] = EJ_eq; // branch or set when equal (and no NaN's)
3264 jmpKind[1] = EJ_vs; // branch or set when we have a NaN
3268 jmpKind[0] = EJ_ne; // branch or set when not equal (or have NaN's)
3269 jmpKind[1] = EJ_NONE;
3273 jmpKind[0] = EJ_lt; // branch or set when less than (or have NaN's)
3274 jmpKind[1] = EJ_NONE;
3278 jmpKind[0] = EJ_le; // branch or set when less than or equal (or have NaN's)
3279 jmpKind[1] = EJ_NONE;
3283 jmpKind[0] = EJ_hi; // branch or set when greater than (or have NaN's)
3284 jmpKind[1] = EJ_NONE;
3288 jmpKind[0] = EJ_hs; // branch or set when greater than or equal (or have NaN's)
3289 jmpKind[1] = EJ_NONE;
3296 else // ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) == 0)
3298 // Do not branch if we have an NaN, unordered
3299 switch (cmpTree->gtOper)
3302 jmpKind[0] = EJ_eq; // branch or set when equal (and no NaN's)
3303 jmpKind[1] = EJ_NONE;
3307 jmpKind[0] = EJ_gt; // branch or set when greater than (and no NaN's)
3308 jmpKind[1] = EJ_lo; // branch or set when less than (and no NaN's)
3312 jmpKind[0] = EJ_lo; // branch or set when less than (and no NaN's)
3313 jmpKind[1] = EJ_NONE;
3317 jmpKind[0] = EJ_ls; // branch or set when less than or equal (and no NaN's)
3318 jmpKind[1] = EJ_NONE;
3322 jmpKind[0] = EJ_gt; // branch or set when greater than (and no NaN's)
3323 jmpKind[1] = EJ_NONE;
3327 jmpKind[0] = EJ_ge; // branch or set when greater than or equal (and no NaN's)
3328 jmpKind[1] = EJ_NONE;
3338 //------------------------------------------------------------------------
3339 // genCodeForJumpTrue: Generates code for jmpTrue statement.
3342 // tree - The GT_JTRUE tree node.
3347 void CodeGen::genCodeForJumpTrue(GenTree* tree)
3349 GenTree* cmp = tree->gtOp.gtOp1;
3350 assert(cmp->OperIsCompare());
3351 assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
3353 // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp
3354 // is governed by a flag NOT by the inherent type of the node
3355 emitJumpKind jumpKind[2];
3356 bool branchToTrueLabel[2];
3357 genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel);
3358 assert(jumpKind[0] != EJ_NONE);
3360 // On ARM the branches will always branch to the true label
3361 assert(branchToTrueLabel[0]);
3362 inst_JMP(jumpKind[0], compiler->compCurBB->bbJumpDest);
3364 if (jumpKind[1] != EJ_NONE)
3366 // the second conditional branch always has to be to the true label
3367 assert(branchToTrueLabel[1]);
3368 inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest);
3372 //------------------------------------------------------------------------
3373 // genCodeForJcc: Produce code for a GT_JCC node.
3378 void CodeGen::genCodeForJcc(GenTreeCC* tree)
3380 assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
3382 CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
3383 emitJumpKind jumpKind = genJumpKindForOper(tree->gtCondition, compareKind);
3385 inst_JMP(jumpKind, compiler->compCurBB->bbJumpDest);
3388 //------------------------------------------------------------------------
3389 // genCodeForSetcc: Generates code for a GT_SETCC node.
3392 // setcc - the GT_SETCC node
3395 // The condition represents an integer comparison. This code doesn't
3396 // have the necessary logic to deal with floating point comparisons,
3397 // in fact it doesn't even know if the comparison is integer or floating
3398 // point because SETCC nodes do not have any operands.
3401 void CodeGen::genCodeForSetcc(GenTreeCC* setcc)
3403 regNumber dstReg = setcc->gtRegNum;
3404 CompareKind compareKind = setcc->IsUnsigned() ? CK_UNSIGNED : CK_SIGNED;
3405 emitJumpKind jumpKind = genJumpKindForOper(setcc->gtCondition, compareKind);
3407 assert(genIsValidIntReg(dstReg));
3408 // Make sure nobody is setting GTF_RELOP_NAN_UN on this node as it is ignored.
3409 assert((setcc->gtFlags & GTF_RELOP_NAN_UN) == 0);
3411 #ifdef _TARGET_ARM64_
3412 inst_SET(jumpKind, dstReg);
3414 // Emit code like that:
3424 BasicBlock* labelTrue = genCreateTempLabel();
3425 getEmitter()->emitIns_J(emitter::emitJumpKindToIns(jumpKind), labelTrue);
3427 getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(setcc->TypeGet()), dstReg, 0);
3429 BasicBlock* labelNext = genCreateTempLabel();
3430 getEmitter()->emitIns_J(INS_b, labelNext);
3432 genDefineTempLabel(labelTrue);
3433 getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(setcc->TypeGet()), dstReg, 1);
3434 genDefineTempLabel(labelNext);
3437 genProduceReg(setcc);
3440 //------------------------------------------------------------------------
3441 // genCodeForStoreBlk: Produce code for a GT_STORE_OBJ/GT_STORE_DYN_BLK/GT_STORE_BLK node.
3446 void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
3448 assert(blkOp->OperIs(GT_STORE_OBJ, GT_STORE_DYN_BLK, GT_STORE_BLK));
3450 if (blkOp->OperIs(GT_STORE_OBJ) && blkOp->OperIsCopyBlkOp())
3452 assert(blkOp->AsObj()->gtGcPtrCount != 0);
3453 genCodeForCpObj(blkOp->AsObj());
3457 if (blkOp->gtBlkOpGcUnsafe)
3459 getEmitter()->emitDisableGC();
3461 bool isCopyBlk = blkOp->OperIsCopyBlkOp();
3463 switch (blkOp->gtBlkOpKind)
3465 case GenTreeBlk::BlkOpKindHelper:
3468 genCodeForCpBlk(blkOp);
3472 genCodeForInitBlk(blkOp);
3476 case GenTreeBlk::BlkOpKindUnroll:
3479 genCodeForCpBlkUnroll(blkOp);
3483 genCodeForInitBlkUnroll(blkOp);
3491 if (blkOp->gtBlkOpGcUnsafe)
3493 getEmitter()->emitEnableGC();
3497 //------------------------------------------------------------------------
3498 // genScaledAdd: A helper for genLeaInstruction.
3500 void CodeGen::genScaledAdd(emitAttr attr, regNumber targetReg, regNumber baseReg, regNumber indexReg, int scale)
3502 emitter* emit = getEmitter();
3503 #if defined(_TARGET_ARM_)
3504 emit->emitIns_R_R_R_I(INS_add, attr, targetReg, baseReg, indexReg, scale, INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
3505 #elif defined(_TARGET_ARM64_)
3506 emit->emitIns_R_R_R_I(INS_add, attr, targetReg, baseReg, indexReg, scale, INS_OPTS_LSL);
3510 //------------------------------------------------------------------------
3511 // genLeaInstruction: Produce code for a GT_LEA node.
3516 void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
3518 genConsumeOperands(lea);
3519 emitter* emit = getEmitter();
3520 emitAttr size = emitTypeSize(lea);
3521 int offset = lea->Offset();
3523 // In ARM we can only load addresses of the form:
3525 // [Base + index*scale]
3527 // [Literal] (PC-Relative)
3529 // So for the case of a LEA node of the form [Base + Index*Scale + Offset] we will generate:
3530 // destReg = baseReg + indexReg * scale;
3531 // destReg = destReg + offset;
3533 // TODO-ARM64-CQ: The purpose of the GT_LEA node is to directly reflect a single target architecture
3534 // addressing mode instruction. Currently we're 'cheating' by producing one or more
3535 // instructions to generate the addressing mode so we need to modify lowering to
3536 // produce LEAs that are a 1:1 relationship to the ARM64 architecture.
3537 if (lea->Base() && lea->Index())
3539 GenTree* memBase = lea->Base();
3540 GenTree* index = lea->Index();
3544 assert(isPow2(lea->gtScale));
3545 BitScanForward(&lsl, lea->gtScale);
3551 regNumber tmpReg = lea->GetSingleTempReg();
3553 if (emitter::emitIns_valid_imm_for_add(offset))
3557 // Generate code to set tmpReg = base + index*scale
3558 genScaledAdd(size, tmpReg, memBase->gtRegNum, index->gtRegNum, lsl);
3562 // Generate code to set tmpReg = base + index
3563 emit->emitIns_R_R_R(INS_add, size, tmpReg, memBase->gtRegNum, index->gtRegNum);
3566 // Then compute target reg from [tmpReg + offset]
3567 emit->emitIns_R_R_I(INS_add, size, lea->gtRegNum, tmpReg, offset);
3569 else // large offset
3571 // First load/store tmpReg with the large offset constant
3572 instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
3573 // Then add the base register
3575 emit->emitIns_R_R_R(INS_add, size, tmpReg, tmpReg, memBase->gtRegNum);
3577 noway_assert(tmpReg != index->gtRegNum);
3579 // Then compute target reg from [tmpReg + index*scale]
3580 genScaledAdd(size, lea->gtRegNum, tmpReg, index->gtRegNum, lsl);
3587 // Then compute target reg from [base + index*scale]
3588 genScaledAdd(size, lea->gtRegNum, memBase->gtRegNum, index->gtRegNum, lsl);
3592 // Then compute target reg from [base + index]
3593 emit->emitIns_R_R_R(INS_add, size, lea->gtRegNum, memBase->gtRegNum, index->gtRegNum);
3597 else if (lea->Base())
3599 GenTree* memBase = lea->Base();
3601 if (emitter::emitIns_valid_imm_for_add(offset))
3605 // Then compute target reg from [memBase + offset]
3606 emit->emitIns_R_R_I(INS_add, size, lea->gtRegNum, memBase->gtRegNum, offset);
3608 else // offset is zero
3610 if (lea->gtRegNum != memBase->gtRegNum)
3612 emit->emitIns_R_R(INS_mov, size, lea->gtRegNum, memBase->gtRegNum);
3618 // We require a tmpReg to hold the offset
3619 regNumber tmpReg = lea->GetSingleTempReg();
3621 // First load tmpReg with the large offset constant
3622 instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
3624 // Then compute target reg from [memBase + tmpReg]
3625 emit->emitIns_R_R_R(INS_add, size, lea->gtRegNum, memBase->gtRegNum, tmpReg);
3628 else if (lea->Index())
3630 // If we encounter a GT_LEA node without a base it means it came out
3631 // when attempting to optimize an arbitrary arithmetic expression during lower.
3632 // This is currently disabled in ARM64 since we need to adjust lower to account
3633 // for the simpler instructions ARM64 supports.
3634 // TODO-ARM64-CQ: Fix this and let LEA optimize arithmetic trees too.
3635 assert(!"We shouldn't see a baseless address computation during CodeGen for ARM64");
3641 //------------------------------------------------------------------------
3642 // isStructReturn: Returns whether the 'treeNode' is returning a struct.
3645 // treeNode - The tree node to evaluate whether is a struct return.
3648 // Returns true if the 'treeNode" is a GT_RETURN node of type struct.
3649 // Otherwise returns false.
3651 bool CodeGen::isStructReturn(GenTree* treeNode)
3653 // This method could be called for 'treeNode' of GT_RET_FILT or GT_RETURN.
3654 // For the GT_RET_FILT, the return is always
3655 // a bool or a void, for the end of a finally block.
3656 noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
3658 return varTypeIsStruct(treeNode);
3661 //------------------------------------------------------------------------
3662 // genStructReturn: Generates code for returning a struct.
3665 // treeNode - The GT_RETURN tree node.
3671 // op1 of GT_RETURN node is either GT_LCL_VAR or multi-reg GT_CALL
3672 void CodeGen::genStructReturn(GenTree* treeNode)
3674 assert(treeNode->OperGet() == GT_RETURN);
3675 assert(isStructReturn(treeNode));
3676 GenTree* op1 = treeNode->gtGetOp1();
3678 if (op1->OperGet() == GT_LCL_VAR)
3680 GenTreeLclVarCommon* lclVar = op1->AsLclVarCommon();
3681 LclVarDsc* varDsc = &(compiler->lvaTable[lclVar->gtLclNum]);
3682 var_types lclType = genActualType(varDsc->TypeGet());
3684 assert(varTypeIsStruct(lclType));
3685 assert(varDsc->lvIsMultiRegRet);
3687 ReturnTypeDesc retTypeDesc;
3690 retTypeDesc.InitializeStructReturnType(compiler, varDsc->lvVerTypeInfo.GetClassHandle());
3691 regCount = retTypeDesc.GetReturnRegCount();
3693 assert(regCount >= 2);
3695 assert(varTypeIsSIMD(lclType) || op1->isContained());
3697 if (op1->isContained())
3699 // Copy var on stack into ABI return registers
3700 // TODO: It could be optimized by reducing two float loading to one double
3702 for (unsigned i = 0; i < regCount; ++i)
3704 var_types type = retTypeDesc.GetReturnRegType(i);
3705 regNumber reg = retTypeDesc.GetABIReturnReg(i);
3706 getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), reg, lclVar->gtLclNum, offset);
3707 offset += genTypeSize(type);
3712 // Handle SIMD genStructReturn case
3713 NYI_ARM("SIMD genStructReturn");
3715 #ifdef _TARGET_ARM64_
3716 genConsumeRegs(op1);
3717 regNumber src = op1->gtRegNum;
3719 // Treat src register as a homogenous vector with element size equal to the reg size
3720 // Insert pieces in order
3721 for (unsigned i = 0; i < regCount; ++i)
3723 var_types type = retTypeDesc.GetReturnRegType(i);
3724 regNumber reg = retTypeDesc.GetABIReturnReg(i);
3725 if (varTypeIsFloating(type))
3727 // If the register piece is to be passed in a floating point register
3728 // Use a vector mov element instruction
3729 // reg is not a vector, so it is in the first element reg[0]
3730 // mov reg[0], src[i]
3731 // This effectively moves from `src[i]` to `reg[0]`, upper bits of reg remain unchanged
3732 // For the case where src == reg, since we are only writing reg[0], as long as we iterate
3733 // so that src[0] is consumed before writing reg[0], we do not need a temporary.
3734 getEmitter()->emitIns_R_R_I_I(INS_mov, emitTypeSize(type), reg, src, 0, i);
3738 // If the register piece is to be passed in an integer register
3739 // Use a vector mov to general purpose register instruction
3741 // This effectively moves from `src[i]` to `reg`
3742 getEmitter()->emitIns_R_R_I(INS_mov, emitTypeSize(type), reg, src, i);
3745 #endif // _TARGET_ARM64_
3748 else // op1 must be multi-reg GT_CALL
3750 assert(op1->IsMultiRegCall() || op1->IsCopyOrReloadOfMultiRegCall());
3752 genConsumeRegs(op1);
3754 GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
3755 GenTreeCall* call = actualOp1->AsCall();
3757 ReturnTypeDesc* pRetTypeDesc;
3759 unsigned matchingCount = 0;
3761 pRetTypeDesc = call->GetReturnTypeDesc();
3762 regCount = pRetTypeDesc->GetReturnRegCount();
3764 var_types regType[MAX_RET_REG_COUNT];
3765 regNumber returnReg[MAX_RET_REG_COUNT];
3766 regNumber allocatedReg[MAX_RET_REG_COUNT];
3767 regMaskTP srcRegsMask = 0;
3768 regMaskTP dstRegsMask = 0;
3769 bool needToShuffleRegs = false; // Set to true if we have to move any registers
3771 for (unsigned i = 0; i < regCount; ++i)
3773 regType[i] = pRetTypeDesc->GetReturnRegType(i);
3774 returnReg[i] = pRetTypeDesc->GetABIReturnReg(i);
3776 regNumber reloadReg = REG_NA;
3777 if (op1->IsCopyOrReload())
3779 // GT_COPY/GT_RELOAD will have valid reg for those positions
3780 // that need to be copied or reloaded.
3781 reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
3784 if (reloadReg != REG_NA)
3786 allocatedReg[i] = reloadReg;
3790 allocatedReg[i] = call->GetRegNumByIdx(i);
3793 if (returnReg[i] == allocatedReg[i])
3797 else // We need to move this value
3799 // We want to move the value from allocatedReg[i] into returnReg[i]
3800 // so record these two registers in the src and dst masks
3802 srcRegsMask |= genRegMask(allocatedReg[i]);
3803 dstRegsMask |= genRegMask(returnReg[i]);
3805 needToShuffleRegs = true;
3809 if (needToShuffleRegs)
3811 assert(matchingCount < regCount);
3813 unsigned remainingRegCount = regCount - matchingCount;
3814 regMaskTP extraRegMask = treeNode->gtRsvdRegs;
3816 while (remainingRegCount > 0)
3818 // set 'available' to the 'dst' registers that are not currently holding 'src' registers
3820 regMaskTP availableMask = dstRegsMask & ~srcRegsMask;
3825 var_types curType = TYP_UNKNOWN;
3826 regNumber freeUpReg = REG_NA;
3828 if (availableMask == 0)
3830 // Circular register dependencies
3831 // So just free up the lowest register in dstRegsMask by moving it to the 'extra' register
3833 assert(dstRegsMask == srcRegsMask); // this has to be true for us to reach here
3834 assert(extraRegMask != 0); // we require an 'extra' register
3835 assert((extraRegMask & ~dstRegsMask) != 0); // it can't be part of dstRegsMask
3837 availableMask = extraRegMask & ~dstRegsMask;
3839 regMaskTP srcMask = genFindLowestBit(srcRegsMask);
3840 freeUpReg = genRegNumFromMask(srcMask);
3843 dstMask = genFindLowestBit(availableMask);
3844 dstReg = genRegNumFromMask(dstMask);
3847 if (freeUpReg != REG_NA)
3849 // We will free up the srcReg by moving it to dstReg which is an extra register
3853 // Find the 'srcReg' and set 'curType', change allocatedReg[] to dstReg
3854 // and add the new register mask bit to srcRegsMask
3856 for (unsigned i = 0; i < regCount; ++i)
3858 if (allocatedReg[i] == srcReg)
3860 curType = regType[i];
3861 allocatedReg[i] = dstReg;
3862 srcRegsMask |= genRegMask(dstReg);
3866 else // The normal case
3868 // Find the 'srcReg' and set 'curType'
3870 for (unsigned i = 0; i < regCount; ++i)
3872 if (returnReg[i] == dstReg)
3874 srcReg = allocatedReg[i];
3875 curType = regType[i];
3878 // After we perform this move we will have one less registers to setup
3879 remainingRegCount--;
3881 assert(curType != TYP_UNKNOWN);
3883 inst_RV_RV(ins_Copy(curType), dstReg, srcReg, curType);
3885 // Clear the appropriate bits in srcRegsMask and dstRegsMask
3886 srcRegsMask &= ~genRegMask(srcReg);
3887 dstRegsMask &= ~genRegMask(dstReg);
3889 } // while (remainingRegCount > 0)
3891 } // (needToShuffleRegs)
3893 } // op1 must be multi-reg GT_CALL
3895 #endif // _TARGET_ARMARCH_
3897 #endif // !LEGACY_BACKEND