1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8 XX ARM/ARM64 Code Generator Common Code XX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
18 #ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
20 #ifdef _TARGET_ARMARCH_ // This file is ONLY used for ARM and ARM64 architectures
27 //------------------------------------------------------------------------
28 // genCodeForTreeNode Generate code for a single node in the tree.
31 // All operands have been evaluated.
33 void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
35 regNumber targetReg = treeNode->gtRegNum;
36 var_types targetType = treeNode->TypeGet();
37 emitter* emit = getEmitter();
40 // Validate that all the operands for the current node are consumed in order.
41 // This is important because LSRA ensures that any necessary copies will be
43 lastConsumedNode = nullptr;
44 if (compiler->verbose)
46 unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
47 compiler->gtDispLIRNode(treeNode, "Generating: ");
51 #ifdef _TARGET_ARM64_ // TODO-ARM: is this applicable to ARM32?
52 // Is this a node whose value is already in a register? LSRA denotes this by
53 // setting the GTF_REUSE_REG_VAL flag.
54 if (treeNode->IsReuseRegVal())
56 // For now, this is only used for constant nodes.
57 assert((treeNode->OperGet() == GT_CNS_INT) || (treeNode->OperGet() == GT_CNS_DBL));
58 JITDUMP(" TreeNode is marked ReuseReg\n");
61 #endif // _TARGET_ARM64_
63 // contained nodes are part of their parents for codegen purposes
64 // ex : immediates, most LEAs
65 if (treeNode->isContained())
70 switch (treeNode->gtOper)
75 getEmitter()->emitDisableGC();
79 // We should be seeing this only if profiler hook is needed
80 noway_assert(compiler->compIsProfilerHookNeeded());
82 #ifdef PROFILING_SUPPORTED
83 // Right now this node is used only for tail calls. In future if
84 // we intend to use it for Enter or Leave hooks, add a data member
85 // to this node indicating the kind of profiler hook. For example,
86 // helper number can be used.
87 genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
88 #endif // PROFILING_SUPPORTED
91 #endif // _TARGET_ARM64_
99 genSetRegToConst(targetReg, targetType, treeNode);
100 genProduceReg(treeNode);
105 genCodeForNegNot(treeNode);
112 genCodeForDivMod(treeNode->AsOp());
118 assert(varTypeIsIntegralOrI(treeNode));
122 #if !defined(_TARGET_64BIT_)
127 #endif // !defined(_TARGET_64BIT_)
132 genConsumeOperands(treeNode->AsOp());
133 genCodeForBinary(treeNode);
139 // case GT_ROL: // No ROL instruction on ARM; it has been lowered to ROR.
141 genCodeForShift(treeNode);
144 #if !defined(_TARGET_64BIT_)
148 genCodeForShiftLong(treeNode);
151 #endif // !defined(_TARGET_64BIT_)
154 genCodeForCast(treeNode->AsOp());
157 case GT_LCL_FLD_ADDR:
158 case GT_LCL_VAR_ADDR:
159 genCodeForLclAddr(treeNode);
163 genCodeForLclFld(treeNode->AsLclFld());
167 genCodeForLclVar(treeNode->AsLclVar());
170 case GT_STORE_LCL_FLD:
171 genCodeForStoreLclFld(treeNode->AsLclFld());
174 case GT_STORE_LCL_VAR:
175 genCodeForStoreLclVar(treeNode->AsLclVar());
184 // If we are here, it is the case where there is an LEA that cannot be folded into a parent instruction.
185 genLeaInstruction(treeNode->AsAddrMode());
189 genCodeForIndir(treeNode->AsIndir());
194 genCodeForMulLong(treeNode->AsMultiRegOp());
196 #endif // _TARGET_ARM_
198 #ifdef _TARGET_ARM64_
201 genCodeForMulHi(treeNode->AsOp());
205 genCodeForSwap(treeNode->AsOp());
207 #endif // _TARGET_ARM64_
210 genJmpMethod(treeNode);
214 genCkfinite(treeNode);
218 genIntrinsic(treeNode);
223 genSIMDIntrinsic(treeNode->AsSIMD());
225 #endif // FEATURE_SIMD
234 genCodeForCompare(treeNode->AsOp());
238 genCodeForJumpTrue(treeNode);
244 genCodeForJcc(treeNode->AsCC());
248 genCodeForSetcc(treeNode->AsCC());
251 #endif // _TARGET_ARM_
254 genCodeForReturnTrap(treeNode->AsOp());
258 genCodeForStoreInd(treeNode->AsStoreInd());
262 // This is handled at the time we call genConsumeReg() on the GT_COPY
272 genPutArgStk(treeNode->AsPutArgStk());
276 genPutArgReg(treeNode->AsOp());
280 case GT_PUTARG_SPLIT:
281 genPutArgSplit(treeNode->AsPutArgSplit());
286 genCallInstruction(treeNode->AsCall());
292 genLockedInstructions(treeNode->AsOp());
295 case GT_MEMORYBARRIER:
296 instGen_MemoryBarrier();
304 // do nothing - reload is just a marker.
305 // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
306 // into the register specified in this node.
316 case GT_ARR_BOUNDS_CHECK:
319 #endif // FEATURE_SIMD
320 genRangeCheck(treeNode);
324 genCodeForPhysReg(treeNode->AsPhysReg());
328 genCodeForNullCheck(treeNode->AsOp());
333 noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
335 /* Catch arguments get passed in a register. genCodeForBBlist()
336 would have marked it as holding a GC object, but not used. */
338 noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
339 genConsumeReg(treeNode);
342 case GT_PINVOKE_PROLOG:
343 noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
345 // the runtime side requires the codegen here to be consistent
346 emit->emitDisableRandomNops();
350 genPendingCallLabel = genCreateTempLabel();
351 treeNode->gtLabel.gtLabBB = genPendingCallLabel;
352 emit->emitIns_R_L(INS_adr, EA_PTRSIZE, genPendingCallLabel, targetReg);
356 case GT_STORE_DYN_BLK:
358 genCodeForStoreBlk(treeNode->AsBlk());
362 genJumpTable(treeNode);
365 case GT_SWITCH_TABLE:
366 genTableBasedSwitch(treeNode);
370 genCodeForArrIndex(treeNode->AsArrIndex());
374 genCodeForArrOffset(treeNode->AsArrOffs());
379 case GT_CLS_VAR_ADDR:
380 emit->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0);
381 genProduceReg(treeNode);
385 assert(treeNode->isUsedFromReg());
386 genConsumeRegs(treeNode);
389 #endif // _TARGET_ARM_
392 // Do nothing; these nodes are simply markers for debug info.
399 _snprintf_s(message, _countof(message), _TRUNCATE, "NYI: Unimplemented node type %s",
400 GenTree::OpName(treeNode->OperGet()));
403 NYI("unimplemented node");
410 //------------------------------------------------------------------------
411 // genSetRegToIcon: Generate code that will set the given register to the integer constant.
413 void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
415 // Reg cannot be a FP reg
416 assert(!genIsValidFloatReg(reg));
418 // The only TYP_REF constant that can come this path is a managed 'null' since it is not
419 // relocatable. Other ref type constants (e.g. string objects) go through a different
421 noway_assert(type != TYP_REF || val == 0);
423 instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
426 //---------------------------------------------------------------------
427 // genIntrinsic - generate code for a given intrinsic
430 // treeNode - the GT_INTRINSIC node
435 void CodeGen::genIntrinsic(GenTreePtr treeNode)
437 assert(treeNode->OperIs(GT_INTRINSIC));
439 // Both operand and its result must be of the same floating point type.
440 GenTreePtr srcNode = treeNode->gtOp.gtOp1;
441 assert(varTypeIsFloating(srcNode));
442 assert(srcNode->TypeGet() == treeNode->TypeGet());
444 // Right now only Abs/Round/Sqrt are treated as math intrinsics.
446 switch (treeNode->gtIntrinsic.gtIntrinsicId)
448 case CORINFO_INTRINSIC_Abs:
449 genConsumeOperands(treeNode->AsOp());
450 getEmitter()->emitInsBinary(INS_ABS, emitTypeSize(treeNode), treeNode, srcNode);
453 case CORINFO_INTRINSIC_Round:
454 NYI_ARM("genIntrinsic for round - not implemented yet");
455 genConsumeOperands(treeNode->AsOp());
456 getEmitter()->emitInsBinary(INS_ROUND, emitTypeSize(treeNode), treeNode, srcNode);
459 case CORINFO_INTRINSIC_Sqrt:
460 genConsumeOperands(treeNode->AsOp());
461 getEmitter()->emitInsBinary(INS_SQRT, emitTypeSize(treeNode), treeNode, srcNode);
465 assert(!"genIntrinsic: Unsupported intrinsic");
469 genProduceReg(treeNode);
472 //---------------------------------------------------------------------
473 // genPutArgStk - generate code for a GT_PUTARG_STK node
476 // treeNode - the GT_PUTARG_STK node
481 void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
483 assert(treeNode->OperIs(GT_PUTARG_STK));
484 var_types targetType = treeNode->TypeGet();
485 GenTreePtr source = treeNode->gtOp1;
486 emitter* emit = getEmitter();
488 // This is the varNum for our store operations,
489 // typically this is the varNum for the Outgoing arg space
490 // When we are generating a tail call it will be the varNum for arg0
491 unsigned varNumOut = (unsigned)-1;
492 unsigned argOffsetMax = (unsigned)-1; // Records the maximum size of this area for assert checks
494 // Get argument offset to use with 'varNumOut'
495 // Here we cross check that argument offset hasn't changed from lowering to codegen since
496 // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
497 unsigned argOffsetOut = treeNode->gtSlotNum * TARGET_POINTER_SIZE;
500 fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->gtCall, treeNode);
501 assert(curArgTabEntry);
502 assert(argOffsetOut == (curArgTabEntry->slotNum * TARGET_POINTER_SIZE));
505 // Whether to setup stk arg in incoming or out-going arg area?
506 // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
507 // All other calls - stk arg is setup in out-going arg area.
508 if (treeNode->putInIncomingArgArea())
510 NYI_ARM("genPutArgStk: fast tail call");
512 #ifdef _TARGET_ARM64_
513 varNumOut = getFirstArgWithStackSlot();
514 argOffsetMax = compiler->compArgSize;
515 #if FEATURE_FASTTAILCALL
516 // This must be a fast tail call.
517 assert(treeNode->gtCall->IsFastTailCall());
519 // Since it is a fast tail call, the existence of first incoming arg is guaranteed
520 // because fast tail call requires that in-coming arg area of caller is >= out-going
521 // arg area required for tail call.
522 LclVarDsc* varDsc = &(compiler->lvaTable[varNumOut]);
523 assert(varDsc != nullptr);
524 #endif // FEATURE_FASTTAILCALL
525 #endif // _TARGET_ARM64_
529 varNumOut = compiler->lvaOutgoingArgSpaceVar;
530 argOffsetMax = compiler->lvaOutgoingArgSpaceSize;
533 bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_FIELD_LIST);
535 if (!isStruct) // a normal non-Struct argument
537 instruction storeIns = ins_Store(targetType);
538 emitAttr storeAttr = emitTypeSize(targetType);
540 // If it is contained then source must be the integer constant zero
541 if (source->isContained())
543 #ifdef _TARGET_ARM64_
544 assert(source->OperGet() == GT_CNS_INT);
545 assert(source->AsIntConCommon()->IconValue() == 0);
547 emit->emitIns_S_R(storeIns, storeAttr, REG_ZR, varNumOut, argOffsetOut);
548 #else // !_TARGET_ARM64_
549 // There is no zero register on ARM32
551 #endif // !_TARGET_ARM64
555 genConsumeReg(source);
556 emit->emitIns_S_R(storeIns, storeAttr, source->gtRegNum, varNumOut, argOffsetOut);
557 if (compiler->opts.compUseSoftFP && targetType == TYP_LONG)
559 // This case currently only occurs for double types that are passed as TYP_LONG;
560 // actual long types would have been decomposed by now.
561 assert(source->IsCopyOrReload());
562 regNumber otherReg = (regNumber)source->AsCopyOrReload()->GetRegNumByIdx(1);
563 assert(otherReg != REG_NA);
564 argOffsetOut += EA_4BYTE;
565 emit->emitIns_S_R(storeIns, storeAttr, otherReg, varNumOut, argOffsetOut);
568 argOffsetOut += EA_SIZE_IN_BYTES(storeAttr);
569 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
571 else // We have some kind of a struct argument
573 assert(source->isContained()); // We expect that this node was marked as contained in Lower
575 if (source->OperGet() == GT_FIELD_LIST)
577 // Deal with the multi register passed struct args.
578 GenTreeFieldList* fieldListPtr = source->AsFieldList();
580 // Evaluate each of the GT_FIELD_LIST items into their register
581 // and store their register into the outgoing argument area
582 for (; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest())
584 GenTreePtr nextArgNode = fieldListPtr->gtOp.gtOp1;
585 genConsumeReg(nextArgNode);
587 regNumber reg = nextArgNode->gtRegNum;
588 var_types type = nextArgNode->TypeGet();
589 emitAttr attr = emitTypeSize(type);
591 // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
593 emit->emitIns_S_R(ins_Store(type), attr, reg, varNumOut, argOffsetOut);
594 argOffsetOut += EA_SIZE_IN_BYTES(attr);
595 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
598 else // We must have a GT_OBJ or a GT_LCL_VAR
600 noway_assert((source->OperGet() == GT_LCL_VAR) || (source->OperGet() == GT_OBJ));
602 var_types targetType = source->TypeGet();
603 noway_assert(varTypeIsStruct(targetType));
605 // We will copy this struct to the stack, possibly using a ldp/ldr instruction
607 // Setup loReg (and hiReg) from the internal registers that we reserved in lower.
609 regNumber loReg = treeNode->ExtractTempReg();
610 #ifdef _TARGET_ARM64_
611 regNumber hiReg = treeNode->GetSingleTempReg();
612 #endif // _TARGET_ARM64_
613 regNumber addrReg = REG_NA;
615 GenTreeLclVarCommon* varNode = nullptr;
616 GenTreePtr addrNode = nullptr;
618 if (source->OperGet() == GT_LCL_VAR)
620 varNode = source->AsLclVarCommon();
622 else // we must have a GT_OBJ
624 assert(source->OperGet() == GT_OBJ);
626 addrNode = source->gtOp.gtOp1;
628 // addrNode can either be a GT_LCL_VAR_ADDR or an address expression
630 if (addrNode->OperGet() == GT_LCL_VAR_ADDR)
632 // We have a GT_OBJ(GT_LCL_VAR_ADDR)
634 // We will treat this case the same as above
635 // (i.e if we just had this GT_LCL_VAR directly as the source)
636 // so update 'source' to point this GT_LCL_VAR_ADDR node
637 // and continue to the codegen for the LCL_VAR node below
639 varNode = addrNode->AsLclVarCommon();
644 // Either varNode or addrNOde must have been setup above,
645 // the xor ensures that only one of the two is setup, not both
646 assert((varNode != nullptr) ^ (addrNode != nullptr));
648 BYTE gcPtrArray[MAX_ARG_REG_COUNT] = {}; // TYPE_GC_NONE = 0
649 BYTE* gcPtrs = gcPtrArray;
651 unsigned gcPtrCount; // The count of GC pointers in the struct
655 // This is the varNum for our load operations,
656 // only used when we have a multireg struct with a LclVar source
657 unsigned varNumInp = BAD_VAR_NUM;
660 // On ARM32, size of reference map can be larger than MAX_ARG_REG_COUNT
661 gcPtrs = treeNode->gtGcPtrs;
662 gcPtrCount = treeNode->gtNumberReferenceSlots;
664 // Setup the structSize, isHFa, and gcPtrCount
665 if (varNode != nullptr)
667 varNumInp = varNode->gtLclNum;
668 assert(varNumInp < compiler->lvaCount);
669 LclVarDsc* varDsc = &compiler->lvaTable[varNumInp];
671 assert(varDsc->lvType == TYP_STRUCT);
673 if (varDsc->lvPromoted)
675 NYI_ARM("CodeGen::genPutArgStk - promoted struct");
678 #endif // _TARGET_ARM_
679 // This struct also must live in the stack frame
680 // And it can't live in a register (SIMD)
681 assert(varDsc->lvOnFrame && !varDsc->lvRegister);
683 structSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine
684 // as that is how much stack is allocated for this LclVar
685 isHfa = varDsc->lvIsHfa();
686 #ifdef _TARGET_ARM64_
687 gcPtrCount = varDsc->lvStructGcCount;
688 for (unsigned i = 0; i < gcPtrCount; ++i)
689 gcPtrs[i] = varDsc->lvGcLayout[i];
690 #endif // _TARGET_ARM_
692 else // addrNode is used
694 assert(addrNode != nullptr);
696 // Generate code to load the address that we need into a register
697 genConsumeAddress(addrNode);
698 addrReg = addrNode->gtRegNum;
700 #ifdef _TARGET_ARM64_
701 // If addrReg equal to loReg, swap(loReg, hiReg)
702 // This reduces code complexity by only supporting one addrReg overwrite case
703 if (loReg == addrReg)
708 #endif // _TARGET_ARM64_
710 CORINFO_CLASS_HANDLE objClass = source->gtObj.gtClass;
712 structSize = compiler->info.compCompHnd->getClassSize(objClass);
713 isHfa = compiler->IsHfa(objClass);
714 #ifdef _TARGET_ARM64_
715 gcPtrCount = compiler->info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
719 // If we have an HFA we can't have any GC pointers,
720 // if not then the max size for the the struct is 16 bytes
723 noway_assert(gcPtrCount == 0);
725 #ifdef _TARGET_ARM64_
728 noway_assert(structSize <= 2 * TARGET_POINTER_SIZE);
731 noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES);
732 #endif // _TARGET_ARM64_
734 int remainingSize = structSize;
735 unsigned structOffset = 0;
736 unsigned nextIndex = 0;
738 #ifdef _TARGET_ARM64_
739 // For a >= 16-byte structSize we will generate a ldp and stp instruction each loop
741 // stp x2, x3, [sp, #16]
743 while (remainingSize >= 2 * TARGET_POINTER_SIZE)
745 var_types type0 = compiler->getJitGCType(gcPtrs[nextIndex + 0]);
746 var_types type1 = compiler->getJitGCType(gcPtrs[nextIndex + 1]);
748 if (varNode != nullptr)
750 // Load from our varNumImp source
751 emit->emitIns_R_R_S_S(INS_ldp, emitTypeSize(type0), emitTypeSize(type1), loReg, hiReg, varNumInp,
756 // check for case of destroying the addrRegister while we still need it
757 assert(loReg != addrReg);
758 noway_assert((remainingSize == 2 * TARGET_POINTER_SIZE) || (hiReg != addrReg));
760 // Load from our address expression source
761 emit->emitIns_R_R_R_I(INS_ldp, emitTypeSize(type0), loReg, hiReg, addrReg, structOffset,
762 INS_OPTS_NONE, emitTypeSize(type0));
765 // Emit stp instruction to store the two registers into the outgoing argument area
766 emit->emitIns_S_S_R_R(INS_stp, emitTypeSize(type0), emitTypeSize(type1), loReg, hiReg, varNumOut,
768 argOffsetOut += (2 * TARGET_POINTER_SIZE); // We stored 16-bytes of the struct
769 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
771 remainingSize -= (2 * TARGET_POINTER_SIZE); // We loaded 16-bytes of the struct
772 structOffset += (2 * TARGET_POINTER_SIZE);
775 #else // _TARGET_ARM_
776 // For a >= 4 byte structSize we will generate a ldr and str instruction each loop
779 while (remainingSize >= TARGET_POINTER_SIZE)
781 var_types type = compiler->getJitGCType(gcPtrs[nextIndex]);
783 if (varNode != nullptr)
785 // Load from our varNumImp source
786 emit->emitIns_R_S(INS_ldr, emitTypeSize(type), loReg, varNumInp, structOffset);
790 // check for case of destroying the addrRegister while we still need it
791 assert(loReg != addrReg || remainingSize == TARGET_POINTER_SIZE);
793 // Load from our address expression source
794 emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), loReg, addrReg, structOffset);
797 // Emit str instruction to store the register into the outgoing argument area
798 emit->emitIns_S_R(INS_str, emitTypeSize(type), loReg, varNumOut, argOffsetOut);
799 argOffsetOut += TARGET_POINTER_SIZE; // We stored 4-bytes of the struct
800 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
802 remainingSize -= TARGET_POINTER_SIZE; // We loaded 4-bytes of the struct
803 structOffset += TARGET_POINTER_SIZE;
806 #endif // _TARGET_ARM_
808 // For a 12-byte structSize we will we will generate two load instructions
814 while (remainingSize > 0)
816 if (remainingSize >= TARGET_POINTER_SIZE)
818 var_types nextType = compiler->getJitGCType(gcPtrs[nextIndex]);
819 emitAttr nextAttr = emitTypeSize(nextType);
820 remainingSize -= TARGET_POINTER_SIZE;
822 if (varNode != nullptr)
824 // Load from our varNumImp source
825 emit->emitIns_R_S(ins_Load(nextType), nextAttr, loReg, varNumInp, structOffset);
829 assert(loReg != addrReg);
831 // Load from our address expression source
832 emit->emitIns_R_R_I(ins_Load(nextType), nextAttr, loReg, addrReg, structOffset);
834 // Emit a store instruction to store the register into the outgoing argument area
835 emit->emitIns_S_R(ins_Store(nextType), nextAttr, loReg, varNumOut, argOffsetOut);
836 argOffsetOut += EA_SIZE_IN_BYTES(nextAttr);
837 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
839 structOffset += TARGET_POINTER_SIZE;
842 else // (remainingSize < TARGET_POINTER_SIZE)
844 int loadSize = remainingSize;
847 // We should never have to do a non-pointer sized load when we have a LclVar source
848 assert(varNode == nullptr);
850 // the left over size is smaller than a pointer and thus can never be a GC type
851 assert(varTypeIsGC(compiler->getJitGCType(gcPtrs[nextIndex])) == false);
853 var_types loadType = TYP_UINT;
856 loadType = TYP_UBYTE;
858 else if (loadSize == 2)
860 loadType = TYP_USHORT;
864 // Need to handle additional loadSize cases here
865 noway_assert(loadSize == 4);
868 instruction loadIns = ins_Load(loadType);
869 emitAttr loadAttr = emitAttr(loadSize);
871 assert(loReg != addrReg);
873 emit->emitIns_R_R_I(loadIns, loadAttr, loReg, addrReg, structOffset);
875 // Emit a store instruction to store the register into the outgoing argument area
876 emit->emitIns_S_R(ins_Store(loadType), loadAttr, loReg, varNumOut, argOffsetOut);
877 argOffsetOut += EA_SIZE_IN_BYTES(loadAttr);
878 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
885 //---------------------------------------------------------------------
886 // genPutArgReg - generate code for a GT_PUTARG_REG node
889 // tree - the GT_PUTARG_REG node
894 void CodeGen::genPutArgReg(GenTreeOp* tree)
896 assert(tree->OperIs(GT_PUTARG_REG));
898 var_types targetType = tree->TypeGet();
899 regNumber targetReg = tree->gtRegNum;
901 assert(targetType != TYP_STRUCT);
903 GenTree* op1 = tree->gtOp1;
906 // If child node is not already in the register we need, move it
907 if (targetReg != op1->gtRegNum)
909 inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
916 //---------------------------------------------------------------------
917 // genPutArgSplit - generate code for a GT_PUTARG_SPLIT node
920 // tree - the GT_PUTARG_SPLIT node
925 void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode)
927 assert(treeNode->OperIs(GT_PUTARG_SPLIT));
929 GenTreePtr source = treeNode->gtOp1;
930 emitter* emit = getEmitter();
931 unsigned varNumOut = compiler->lvaOutgoingArgSpaceVar;
932 unsigned argOffsetMax = compiler->lvaOutgoingArgSpaceSize;
933 unsigned argOffsetOut = treeNode->gtSlotNum * TARGET_POINTER_SIZE;
935 if (source->OperGet() == GT_FIELD_LIST)
937 GenTreeFieldList* fieldListPtr = source->AsFieldList();
939 // Evaluate each of the GT_FIELD_LIST items into their register
940 // and store their register into the outgoing argument area
941 for (unsigned idx = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), idx++)
943 GenTreePtr nextArgNode = fieldListPtr->gtGetOp1();
944 regNumber fieldReg = nextArgNode->gtRegNum;
945 genConsumeReg(nextArgNode);
947 if (idx >= treeNode->gtNumRegs)
949 var_types type = nextArgNode->TypeGet();
950 emitAttr attr = emitTypeSize(type);
952 // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
954 emit->emitIns_S_R(ins_Store(type), attr, fieldReg, varNumOut, argOffsetOut);
955 argOffsetOut += EA_SIZE_IN_BYTES(attr);
956 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
960 var_types type = treeNode->GetRegType(idx);
961 regNumber argReg = treeNode->GetRegNumByIdx(idx);
963 // If child node is not already in the register we need, move it
964 if (argReg != fieldReg)
966 inst_RV_RV(ins_Copy(type), argReg, fieldReg, type);
973 var_types targetType = source->TypeGet();
974 assert(source->OperGet() == GT_OBJ);
975 assert(varTypeIsStruct(targetType));
977 regNumber baseReg = treeNode->ExtractTempReg();
978 regNumber addrReg = REG_NA;
980 GenTreeLclVarCommon* varNode = nullptr;
981 GenTreePtr addrNode = nullptr;
983 addrNode = source->gtOp.gtOp1;
985 // addrNode can either be a GT_LCL_VAR_ADDR or an address expression
987 if (addrNode->OperGet() == GT_LCL_VAR_ADDR)
989 // We have a GT_OBJ(GT_LCL_VAR_ADDR)
991 // We will treat this case the same as above
992 // (i.e if we just had this GT_LCL_VAR directly as the source)
993 // so update 'source' to point this GT_LCL_VAR_ADDR node
994 // and continue to the codegen for the LCL_VAR node below
996 varNode = addrNode->AsLclVarCommon();
1000 // Either varNode or addrNOde must have been setup above,
1001 // the xor ensures that only one of the two is setup, not both
1002 assert((varNode != nullptr) ^ (addrNode != nullptr));
1004 // Setup the structSize, isHFa, and gcPtrCount
1005 BYTE* gcPtrs = treeNode->gtGcPtrs;
1006 unsigned gcPtrCount = treeNode->gtNumberReferenceSlots; // The count of GC pointers in the struct
1007 int structSize = treeNode->getArgSize();
1009 // This is the varNum for our load operations,
1010 // only used when we have a struct with a LclVar source
1011 unsigned srcVarNum = BAD_VAR_NUM;
1013 if (varNode != nullptr)
1015 srcVarNum = varNode->gtLclNum;
1016 assert(srcVarNum < compiler->lvaCount);
1018 // handle promote situation
1019 LclVarDsc* varDsc = compiler->lvaTable + srcVarNum;
1020 if (varDsc->lvPromoted)
1022 NYI_ARM("CodeGen::genPutArgSplit - promoted struct");
1025 // We don't split HFA struct
1026 assert(!varDsc->lvIsHfa());
1028 else // addrNode is used
1030 assert(addrNode != nullptr);
1032 // Generate code to load the address that we need into a register
1033 genConsumeAddress(addrNode);
1034 addrReg = addrNode->gtRegNum;
1036 // If addrReg equal to baseReg, we use the last target register as alternative baseReg.
1037 // Because the candidate mask for the internal baseReg does not include any of the target register,
1038 // we can ensure that baseReg, addrReg, and the last target register are not all same.
1039 assert(baseReg != addrReg);
1041 // We don't split HFA struct
1042 assert(!compiler->IsHfa(source->gtObj.gtClass));
1045 // Put on stack first
1046 unsigned nextIndex = treeNode->gtNumRegs;
1047 unsigned structOffset = nextIndex * TARGET_POINTER_SIZE;
1048 int remainingSize = structSize - structOffset;
1050 // remainingSize is always multiple of TARGET_POINTER_SIZE
1051 assert(remainingSize % TARGET_POINTER_SIZE == 0);
1052 while (remainingSize > 0)
1054 var_types type = compiler->getJitGCType(gcPtrs[nextIndex]);
1056 if (varNode != nullptr)
1058 // Load from our varNumImp source
1059 emit->emitIns_R_S(INS_ldr, emitTypeSize(type), baseReg, srcVarNum, structOffset);
1063 // check for case of destroying the addrRegister while we still need it
1064 assert(baseReg != addrReg);
1066 // Load from our address expression source
1067 emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), baseReg, addrReg, structOffset);
1070 // Emit str instruction to store the register into the outgoing argument area
1071 emit->emitIns_S_R(INS_str, emitTypeSize(type), baseReg, varNumOut, argOffsetOut);
1072 argOffsetOut += TARGET_POINTER_SIZE; // We stored 4-bytes of the struct
1073 assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
1074 remainingSize -= TARGET_POINTER_SIZE; // We loaded 4-bytes of the struct
1075 structOffset += TARGET_POINTER_SIZE;
1079 // We set up the registers in order, so that we assign the last target register `baseReg` is no longer in use,
1080 // in case we had to reuse the last target register for it.
1082 for (unsigned idx = 0; idx < treeNode->gtNumRegs; idx++)
1084 regNumber targetReg = treeNode->GetRegNumByIdx(idx);
1085 var_types type = treeNode->GetRegType(idx);
1087 if (varNode != nullptr)
1089 // Load from our varNumImp source
1090 emit->emitIns_R_S(INS_ldr, emitTypeSize(type), targetReg, srcVarNum, structOffset);
1094 // check for case of destroying the addrRegister while we still need it
1095 if (targetReg == addrReg && idx != treeNode->gtNumRegs - 1)
1097 assert(targetReg != baseReg);
1098 emit->emitIns_R_R(INS_mov, emitTypeSize(type), baseReg, addrReg);
1102 // Load from our address expression source
1103 emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), targetReg, addrReg, structOffset);
1105 structOffset += TARGET_POINTER_SIZE;
1108 genProduceReg(treeNode);
1110 #endif // _TARGET_ARM_
1112 //----------------------------------------------------------------------------------
1113 // genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local
1116 // treeNode - Gentree of GT_STORE_LCL_VAR
1122 // The child of store is a multi-reg call node.
1123 // genProduceReg() on treeNode is made by caller of this routine.
1125 void CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode)
1127 assert(treeNode->OperGet() == GT_STORE_LCL_VAR);
1129 #if defined(_TARGET_ARM_)
1130 // Longs are returned in two return registers on Arm32.
1131 // Structs are returned in four registers on ARM32 and HFAs.
1132 assert(varTypeIsLong(treeNode) || varTypeIsStruct(treeNode));
1133 #elif defined(_TARGET_ARM64_)
1134 // Structs of size >=9 and <=16 are returned in two return registers on ARM64 and HFAs.
1135 assert(varTypeIsStruct(treeNode));
1138 // Assumption: current implementation requires that a multi-reg
1139 // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
1141 unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
1142 LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
1143 noway_assert(varDsc->lvIsMultiRegRet);
1145 GenTree* op1 = treeNode->gtGetOp1();
1146 GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
1147 GenTreeCall* call = actualOp1->AsCall();
1148 assert(call->HasMultiRegRetVal());
1150 genConsumeRegs(op1);
1152 ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
1153 unsigned regCount = pRetTypeDesc->GetReturnRegCount();
1155 if (treeNode->gtRegNum != REG_NA)
1157 // Right now the only enregistrable multi-reg return types supported are SIMD types.
1158 assert(varTypeIsSIMD(treeNode));
1159 NYI("GT_STORE_LCL_VAR of a SIMD enregisterable struct");
1165 for (unsigned i = 0; i < regCount; ++i)
1167 var_types type = pRetTypeDesc->GetReturnRegType(i);
1168 regNumber reg = call->GetRegNumByIdx(i);
1169 if (op1->IsCopyOrReload())
1171 // GT_COPY/GT_RELOAD will have valid reg for those positions
1172 // that need to be copied or reloaded.
1173 regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
1174 if (reloadReg != REG_NA)
1180 assert(reg != REG_NA);
1181 getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
1182 offset += genTypeSize(type);
1185 varDsc->lvRegNum = REG_STK;
1189 //------------------------------------------------------------------------
1190 // genRangeCheck: generate code for GT_ARR_BOUNDS_CHECK node.
1192 void CodeGen::genRangeCheck(GenTreePtr oper)
1195 noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK);
1196 #else // !FEATURE_SIMD
1197 noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
1198 #endif // !FEATURE_SIMD
1200 GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
1202 GenTreePtr arrLen = bndsChk->gtArrLen;
1203 GenTreePtr arrIndex = bndsChk->gtIndex;
1204 GenTreePtr arrRef = NULL;
1209 emitJumpKind jmpKind;
1211 genConsumeRegs(arrIndex);
1212 genConsumeRegs(arrLen);
1214 if (arrIndex->isContainedIntOrIImmed())
1216 // To encode using a cmp immediate, we place the
1217 // constant operand in the second position
1220 jmpKind = genJumpKindForOper(GT_LE, CK_UNSIGNED);
1226 jmpKind = genJumpKindForOper(GT_GE, CK_UNSIGNED);
1229 getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, src1, src2);
1230 genJumpToThrowHlpBlk(jmpKind, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
1233 //---------------------------------------------------------------------
1234 // genCodeForPhysReg - generate code for a GT_PHYSREG node
1237 // tree - the GT_PHYSREG node
1242 void CodeGen::genCodeForPhysReg(GenTreePhysReg* tree)
1244 assert(tree->OperIs(GT_PHYSREG));
1246 var_types targetType = tree->TypeGet();
1247 regNumber targetReg = tree->gtRegNum;
1249 if (targetReg != tree->gtSrcReg)
1251 inst_RV_RV(ins_Copy(targetType), targetReg, tree->gtSrcReg, targetType);
1252 genTransferRegGCState(targetReg, tree->gtSrcReg);
1255 genProduceReg(tree);
1258 //---------------------------------------------------------------------
1259 // genCodeForNullCheck - generate code for a GT_NULLCHECK node
1262 // tree - the GT_NULLCHECK node
1267 void CodeGen::genCodeForNullCheck(GenTreeOp* tree)
1269 assert(tree->OperIs(GT_NULLCHECK));
1270 assert(!tree->gtOp1->isContained());
1271 regNumber addrReg = genConsumeReg(tree->gtOp1);
1273 #ifdef _TARGET_ARM64_
1274 regNumber targetReg = REG_ZR;
1276 regNumber targetReg = tree->gtRegNum;
1279 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, targetReg, addrReg, 0);
1282 //------------------------------------------------------------------------
1283 // genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the
1284 // lower bound for the given dimension.
1287 // elemType - the element type of the array
1288 // rank - the rank of the array
1289 // dimension - the dimension for which the lower bound offset will be returned.
1293 // TODO-Cleanup: move to CodeGenCommon.cpp
1296 unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension)
1298 // Note that the lower bound and length fields of the Array object are always TYP_INT
1299 return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank);
1302 //------------------------------------------------------------------------
1303 // genOffsetOfMDArrayLength: Returns the offset from the Array object to the
1304 // size for the given dimension.
1307 // elemType - the element type of the array
1308 // rank - the rank of the array
1309 // dimension - the dimension for which the lower bound offset will be returned.
1313 // TODO-Cleanup: move to CodeGenCommon.cpp
1316 unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension)
1318 // Note that the lower bound and length fields of the Array object are always TYP_INT
1319 return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension;
1322 //------------------------------------------------------------------------
1323 // genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference,
1324 // producing the effective index by subtracting the lower bound.
1327 // arrIndex - the node for which we're generating code
1332 void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex)
1334 emitter* emit = getEmitter();
1335 GenTreePtr arrObj = arrIndex->ArrObj();
1336 GenTreePtr indexNode = arrIndex->IndexExpr();
1337 regNumber arrReg = genConsumeReg(arrObj);
1338 regNumber indexReg = genConsumeReg(indexNode);
1339 regNumber tgtReg = arrIndex->gtRegNum;
1340 noway_assert(tgtReg != REG_NA);
1342 // We will use a temp register to load the lower bound and dimension size values.
1344 regNumber tmpReg = arrIndex->GetSingleTempReg();
1345 assert(tgtReg != tmpReg);
1347 unsigned dim = arrIndex->gtCurrDim;
1348 unsigned rank = arrIndex->gtArrRank;
1349 var_types elemType = arrIndex->gtArrElemType;
1352 offset = genOffsetOfMDArrayLowerBound(elemType, rank, dim);
1353 emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_PTRSIZE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load
1354 emit->emitIns_R_R_R(INS_sub, EA_4BYTE, tgtReg, indexReg, tmpReg);
1356 offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim);
1357 emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_PTRSIZE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load
1358 emit->emitIns_R_R(INS_cmp, EA_4BYTE, tgtReg, tmpReg);
1360 emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
1361 genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL);
1363 genProduceReg(arrIndex);
1366 //------------------------------------------------------------------------
1367 // genCodeForArrOffset: Generates code to compute the flattened array offset for
1368 // one dimension of an array reference:
1369 // result = (prevDimOffset * dimSize) + effectiveIndex
1370 // where dimSize is obtained from the arrObj operand
1373 // arrOffset - the node for which we're generating code
1379 // dimSize and effectiveIndex are always non-negative, the former by design,
1380 // and the latter because it has been normalized to be zero-based.
1382 void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
1384 GenTreePtr offsetNode = arrOffset->gtOffset;
1385 GenTreePtr indexNode = arrOffset->gtIndex;
1386 regNumber tgtReg = arrOffset->gtRegNum;
1388 noway_assert(tgtReg != REG_NA);
1390 if (!offsetNode->IsIntegralConst(0))
1392 emitter* emit = getEmitter();
1393 regNumber offsetReg = genConsumeReg(offsetNode);
1394 regNumber indexReg = genConsumeReg(indexNode);
1395 regNumber arrReg = genConsumeReg(arrOffset->gtArrObj);
1396 noway_assert(offsetReg != REG_NA);
1397 noway_assert(indexReg != REG_NA);
1398 noway_assert(arrReg != REG_NA);
1400 regNumber tmpReg = arrOffset->GetSingleTempReg();
1402 unsigned dim = arrOffset->gtCurrDim;
1403 unsigned rank = arrOffset->gtArrRank;
1404 var_types elemType = arrOffset->gtArrElemType;
1405 unsigned offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim);
1407 // Load tmpReg with the dimension size and evaluate
1408 // tgtReg = offsetReg*tmpReg + indexReg.
1409 emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_PTRSIZE, tmpReg, arrReg, offset);
1410 emit->emitIns_R_R_R_R(INS_MULADD, EA_PTRSIZE, tgtReg, tmpReg, offsetReg, indexReg);
1414 regNumber indexReg = genConsumeReg(indexNode);
1415 if (indexReg != tgtReg)
1417 inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT);
1420 genProduceReg(arrOffset);
1423 //------------------------------------------------------------------------
1424 // indirForm: Make a temporary indir we can feed to pattern matching routines
1425 // in cases where we don't want to instantiate all the indirs that happen.
1427 GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
1429 GenTreeIndir i(GT_IND, type, base, nullptr);
1430 i.gtRegNum = REG_NA;
1432 // has to be nonnull (because contained nodes can't be the last in block)
1433 // but don't want it to be a valid pointer
1434 i.gtNext = (GenTree*)(-1);
1438 //------------------------------------------------------------------------
1439 // intForm: Make a temporary int we can feed to pattern matching routines
1440 // in cases where we don't want to instantiate.
1442 GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
1444 GenTreeIntCon i(type, value);
1445 i.gtRegNum = REG_NA;
1446 // has to be nonnull (because contained nodes can't be the last in block)
1447 // but don't want it to be a valid pointer
1448 i.gtNext = (GenTree*)(-1);
1452 //------------------------------------------------------------------------
1453 // genCodeForShift: Generates the code sequence for a GenTree node that
1454 // represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
1457 // tree - the bit shift node (that specifies the type of bit shift to perform).
1460 // a) All GenTrees are register allocated.
1462 void CodeGen::genCodeForShift(GenTreePtr tree)
1464 var_types targetType = tree->TypeGet();
1465 genTreeOps oper = tree->OperGet();
1466 instruction ins = genGetInsForOper(oper, targetType);
1467 emitAttr size = emitTypeSize(tree);
1469 assert(tree->gtRegNum != REG_NA);
1471 genConsumeOperands(tree->AsOp());
1473 GenTreePtr operand = tree->gtGetOp1();
1474 GenTreePtr shiftBy = tree->gtGetOp2();
1475 if (!shiftBy->IsCnsIntOrI())
1477 getEmitter()->emitIns_R_R_R(ins, size, tree->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum);
1481 unsigned immWidth = emitter::getBitWidth(size); // For ARM64, immWidth will be set to 32 or 64
1482 ssize_t shiftByImm = shiftBy->gtIntCon.gtIconVal & (immWidth - 1);
1484 getEmitter()->emitIns_R_R_I(ins, size, tree->gtRegNum, operand->gtRegNum, shiftByImm);
1487 genProduceReg(tree);
1490 //------------------------------------------------------------------------
1491 // genCodeForLclAddr: Generates the code for GT_LCL_FLD_ADDR/GT_LCL_VAR_ADDR.
1496 void CodeGen::genCodeForLclAddr(GenTree* tree)
1498 assert(tree->OperIs(GT_LCL_FLD_ADDR, GT_LCL_VAR_ADDR));
1500 var_types targetType = tree->TypeGet();
1501 regNumber targetReg = tree->gtRegNum;
1503 // Address of a local var.
1504 noway_assert(targetType == TYP_BYREF);
1506 inst_RV_TT(INS_lea, targetReg, tree, 0, EA_BYREF);
1507 genProduceReg(tree);
1510 //------------------------------------------------------------------------
1511 // genCodeForLclFld: Produce code for a GT_LCL_FLD node.
1514 // tree - the GT_LCL_FLD node
1516 void CodeGen::genCodeForLclFld(GenTreeLclFld* tree)
1518 assert(tree->OperIs(GT_LCL_FLD));
1520 var_types targetType = tree->TypeGet();
1521 regNumber targetReg = tree->gtRegNum;
1522 emitter* emit = getEmitter();
1524 NYI_IF(targetType == TYP_STRUCT, "GT_LCL_FLD: struct load local field not supported");
1525 assert(targetReg != REG_NA);
1527 emitAttr size = emitTypeSize(targetType);
1528 unsigned offs = tree->gtLclOffs;
1529 unsigned varNum = tree->gtLclNum;
1530 assert(varNum < compiler->lvaCount);
1532 if (varTypeIsFloating(targetType))
1534 emit->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offs);
1538 #ifdef _TARGET_ARM64_
1539 size = EA_SET_SIZE(size, EA_8BYTE);
1540 #endif // _TARGET_ARM64_
1541 emit->emitIns_R_S(ins_Move_Extend(targetType, false), size, targetReg, varNum, offs);
1544 genProduceReg(tree);
1547 //------------------------------------------------------------------------
1548 // genCodeForIndir: Produce code for a GT_IND node.
1551 // tree - the GT_IND node
1553 void CodeGen::genCodeForIndir(GenTreeIndir* tree)
1555 assert(tree->OperIs(GT_IND));
1557 var_types targetType = tree->TypeGet();
1558 regNumber targetReg = tree->gtRegNum;
1559 emitter* emit = getEmitter();
1560 emitAttr attr = emitTypeSize(tree);
1561 instruction ins = ins_Load(targetType);
1563 assert((attr != EA_1BYTE) || !(tree->gtFlags & GTF_IND_UNALIGNED));
1565 genConsumeAddress(tree->Addr());
1566 if (tree->gtFlags & GTF_IND_VOLATILE)
1568 #ifdef _TARGET_ARM64_
1569 GenTree* addr = tree->Addr();
1570 bool useLoadAcquire = genIsValidIntReg(targetReg) && !addr->isContained() &&
1571 (varTypeIsUnsigned(targetType) || varTypeIsI(targetType)) &&
1572 !(tree->gtFlags & GTF_IND_UNALIGNED);
1576 switch (EA_SIZE(attr))
1579 assert(ins == INS_ldrb);
1583 assert(ins == INS_ldrh);
1588 assert(ins == INS_ldr);
1592 assert(false); // We should not get here
1596 emit->emitInsLoadStoreOp(ins, attr, targetReg, tree);
1598 if (!useLoadAcquire) // issue a INS_BARRIER_OSHLD after a volatile LdInd operation
1599 instGen_MemoryBarrier(INS_BARRIER_OSHLD);
1601 emit->emitInsLoadStoreOp(ins, attr, targetReg, tree);
1603 // issue a full memory barrier after a volatile LdInd operation
1604 instGen_MemoryBarrier();
1605 #endif // _TARGET_ARM64_
1609 emit->emitInsLoadStoreOp(ins, attr, targetReg, tree);
1612 genProduceReg(tree);
1615 // Generate code for a CpBlk node by the means of the VM memcpy helper call
1617 // a) The size argument of the CpBlk is not an integer constant
1618 // b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes.
1619 void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode)
1621 // Make sure we got the arguments of the cpblk operation in the right registers
1622 unsigned blockSize = cpBlkNode->Size();
1623 GenTreePtr dstAddr = cpBlkNode->Addr();
1624 assert(!dstAddr->isContained());
1626 genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
1628 #ifdef _TARGET_ARM64_
1631 assert(blockSize > CPBLK_UNROLL_LIMIT);
1633 #endif // _TARGET_ARM64_
1635 if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE)
1637 // issue a full memory barrier before a volatile CpBlk operation
1638 instGen_MemoryBarrier();
1641 genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
1643 if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE)
1645 #ifdef _TARGET_ARM64_
1646 // issue a INS_BARRIER_ISHLD after a volatile CpBlk operation
1647 instGen_MemoryBarrier(INS_BARRIER_ISHLD);
1649 // issue a full memory barrier after a volatile CpBlk operation
1650 instGen_MemoryBarrier();
1651 #endif // _TARGET_ARM64_
1655 // Generates code for InitBlk by calling the VM memset helper function.
1657 // a) The size argument of the InitBlk is not an integer constant.
1658 // b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes.
1659 void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
1661 // Make sure we got the arguments of the initblk operation in the right registers
1662 unsigned size = initBlkNode->Size();
1663 GenTreePtr dstAddr = initBlkNode->Addr();
1664 GenTreePtr initVal = initBlkNode->Data();
1665 if (initVal->OperIsInitVal())
1667 initVal = initVal->gtGetOp1();
1670 assert(!dstAddr->isContained());
1671 assert(!initVal->isContained());
1672 if (initBlkNode->gtOper == GT_STORE_DYN_BLK)
1674 assert(initBlkNode->AsDynBlk()->gtDynamicSize->gtRegNum == REG_ARG_2);
1678 assert(initBlkNode->gtRsvdRegs == RBM_ARG_2);
1681 #ifdef _TARGET_ARM64_
1684 assert(size > INITBLK_UNROLL_LIMIT);
1686 #endif // _TARGET_ARM64_
1688 genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
1690 if (initBlkNode->gtFlags & GTF_BLK_VOLATILE)
1692 // issue a full memory barrier before a volatile initBlock Operation
1693 instGen_MemoryBarrier();
1696 genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
1699 //------------------------------------------------------------------------
1700 // genRegCopy: Generate a register copy.
1702 void CodeGen::genRegCopy(GenTree* treeNode)
1704 assert(treeNode->OperGet() == GT_COPY);
1706 var_types targetType = treeNode->TypeGet();
1707 regNumber targetReg = treeNode->gtRegNum;
1708 assert(targetReg != REG_NA);
1710 GenTree* op1 = treeNode->gtOp.gtOp1;
1712 // Check whether this node and the node from which we're copying the value have the same
1714 // This can happen if (currently iff) we have a SIMD vector type that fits in an integer
1715 // register, in which case it is passed as an argument, or returned from a call,
1716 // in an integer register and must be copied if it's in an xmm register.
1718 if (varTypeIsFloating(treeNode) != varTypeIsFloating(op1))
1720 #ifdef _TARGET_ARM64_
1721 inst_RV_RV(INS_fmov, targetReg, genConsumeReg(op1), targetType);
1722 #else // !_TARGET_ARM64_
1723 if (varTypeIsFloating(treeNode))
1725 NYI_ARM("genRegCopy from 'int' to 'float'");
1729 assert(varTypeIsFloating(op1));
1731 if (op1->TypeGet() == TYP_FLOAT)
1733 inst_RV_RV(INS_vmov_f2i, targetReg, genConsumeReg(op1), targetType);
1737 regNumber otherReg = (regNumber)treeNode->AsCopyOrReload()->gtOtherRegs[0];
1738 assert(otherReg != REG_NA);
1739 inst_RV_RV_RV(INS_vmov_d2i, targetReg, otherReg, genConsumeReg(op1), EA_8BYTE);
1742 #endif // !_TARGET_ARM64_
1746 inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
1751 // The lclVar will never be a def.
1752 // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
1753 // appropriately set the gcInfo for the copied value.
1754 // If not, there are two cases we need to handle:
1755 // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
1756 // will remain live in its original register.
1757 // genProduceReg() will appropriately set the gcInfo for the copied value,
1758 // and genConsumeReg will reset it.
1759 // - Otherwise, we need to update register info for the lclVar.
1761 GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
1762 assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
1764 if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
1766 LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
1768 // If we didn't just spill it (in genConsumeReg, above), then update the register info
1769 if (varDsc->lvRegNum != REG_STK)
1771 // The old location is dying
1772 genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
1774 gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum));
1776 genUpdateVarReg(varDsc, treeNode);
1778 // The new location is going live
1779 genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
1784 genProduceReg(treeNode);
1787 //------------------------------------------------------------------------
1788 // genCallInstruction: Produce code for a GT_CALL node
1790 void CodeGen::genCallInstruction(GenTreeCall* call)
1792 gtCallTypes callType = (gtCallTypes)call->gtCallType;
1794 IL_OFFSETX ilOffset = BAD_IL_OFFSET;
1796 // all virtuals should have been expanded into a control expression
1797 assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
1799 // Consume all the arg regs
1800 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
1802 assert(list->OperIsList());
1804 GenTreePtr argNode = list->Current();
1806 fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy());
1807 assert(curArgTabEntry);
1809 if (curArgTabEntry->regNum == REG_STK)
1812 // Deal with multi register passed struct args.
1813 if (argNode->OperGet() == GT_FIELD_LIST)
1815 GenTreeArgList* argListPtr = argNode->AsArgList();
1816 unsigned iterationNum = 0;
1817 regNumber argReg = curArgTabEntry->regNum;
1818 for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++)
1820 GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
1821 assert(putArgRegNode->gtOper == GT_PUTARG_REG);
1823 genConsumeReg(putArgRegNode);
1825 if (putArgRegNode->gtRegNum != argReg)
1827 inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), true), argReg, putArgRegNode->gtRegNum);
1830 argReg = genRegArgNext(argReg);
1832 #if defined(_TARGET_ARM_)
1833 // A double register is modelled as an even-numbered single one
1834 if (putArgRegNode->TypeGet() == TYP_DOUBLE)
1836 argReg = genRegArgNext(argReg);
1838 #endif // _TARGET_ARM_
1842 else if (curArgTabEntry->isSplit)
1844 assert(curArgTabEntry->numRegs >= 1);
1845 genConsumeArgSplitStruct(argNode->AsPutArgSplit());
1846 for (unsigned idx = 0; idx < curArgTabEntry->numRegs; idx++)
1848 regNumber argReg = (regNumber)((unsigned)curArgTabEntry->regNum + idx);
1849 regNumber allocReg = argNode->AsPutArgSplit()->GetRegNumByIdx(idx);
1850 if (argReg != allocReg)
1852 inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), true), argReg, allocReg);
1859 regNumber argReg = curArgTabEntry->regNum;
1860 genConsumeReg(argNode);
1861 if (argNode->gtRegNum != argReg)
1863 inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), true), argReg, argNode->gtRegNum);
1867 // In the case of a varargs call,
1868 // the ABI dictates that if we have floating point args,
1869 // we must pass the enregistered arguments in both the
1870 // integer and floating point registers so, let's do that.
1871 if (call->IsVarargs() && varTypeIsFloating(argNode))
1873 NYI_ARM("CodeGen - IsVarargs");
1874 NYI_ARM64("CodeGen - IsVarargs");
1878 // Insert a null check on "this" pointer if asked.
1879 if (call->NeedsNullCheck())
1881 const regNumber regThis = genGetThisArgReg(call);
1883 #if defined(_TARGET_ARM_)
1884 const regNumber tmpReg = call->ExtractTempReg();
1885 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, regThis, 0);
1886 #elif defined(_TARGET_ARM64_)
1887 getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, regThis, 0);
1891 // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method.
1892 CORINFO_METHOD_HANDLE methHnd;
1893 GenTree* target = call->gtControlExpr;
1894 if (callType == CT_INDIRECT)
1896 assert(target == nullptr);
1897 target = call->gtCallAddr;
1902 methHnd = call->gtCallMethHnd;
1905 CORINFO_SIG_INFO* sigInfo = nullptr;
1907 // Pass the call signature information down into the emitter so the emitter can associate
1908 // native call sites with the signatures they were generated from.
1909 if (callType != CT_HELPER)
1911 sigInfo = call->callSig;
1915 // If fast tail call, then we are done. In this case we setup the args (both reg args
1916 // and stack args in incoming arg area) and call target. Epilog sequence would
1917 // generate "br <reg>".
1918 if (call->IsFastTailCall())
1920 // Don't support fast tail calling JIT helpers
1921 assert(callType != CT_HELPER);
1923 // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
1924 assert(target != nullptr);
1926 genConsumeReg(target);
1928 NYI_ARM("fast tail call");
1930 #ifdef _TARGET_ARM64_
1931 // Use IP0 as the call target register.
1932 if (target->gtRegNum != REG_IP0)
1934 inst_RV_RV(INS_mov, REG_IP0, target->gtRegNum);
1936 #endif // _TARGET_ARM64_
1941 // For a pinvoke to unmanaged code we emit a label to clear
1942 // the GC pointer state before the callsite.
1943 // We can't utilize the typical lazy killing of GC pointers
1944 // at (or inside) the callsite.
1945 if (call->IsUnmanaged())
1947 genDefineTempLabel(genCreateTempLabel());
1950 // Determine return value size(s).
1951 ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
1952 emitAttr retSize = EA_PTRSIZE;
1953 emitAttr secondRetSize = EA_UNKNOWN;
1955 if (call->HasMultiRegRetVal())
1957 retSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(0));
1958 secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1));
1962 assert(!varTypeIsStruct(call));
1964 if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
1968 else if (call->gtType == TYP_BYREF)
1974 // We need to propagate the IL offset information to the call instruction, so we can emit
1975 // an IL to native mapping record for the call, to support managed return value debugging.
1976 // We don't want tail call helper calls that were converted from normal calls to get a record,
1977 // so we skip this hash table lookup logic in that case.
1978 if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall())
1980 (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
1983 if (target != nullptr)
1985 // A call target can not be a contained indirection
1986 assert(!target->isContainedIndir());
1988 genConsumeReg(target);
1990 // We have already generated code for gtControlExpr evaluating it into a register.
1991 // We just need to emit "call reg" in this case.
1993 assert(genIsValidIntReg(target->gtRegNum));
1995 genEmitCall(emitter::EC_INDIR_R, methHnd,
1996 INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr
1997 retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset, target->gtRegNum);
2001 // Generate a direct call to a non-virtual user defined or helper method
2002 assert(callType == CT_HELPER || callType == CT_USER_FUNC);
2004 void* addr = nullptr;
2005 if (callType == CT_HELPER)
2007 // Direct call to a helper method.
2008 CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
2009 noway_assert(helperNum != CORINFO_HELP_UNDEF);
2011 void* pAddr = nullptr;
2012 addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
2014 if (addr == nullptr)
2021 // Direct call to a non-virtual user function.
2022 CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
2023 if (call->IsSameThis())
2025 aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
2028 if ((call->NeedsNullCheck()) == 0)
2030 aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
2033 CORINFO_CONST_LOOKUP addrInfo;
2034 compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
2036 addr = addrInfo.addr;
2039 assert(addr != nullptr);
2041 // Non-virtual direct call to known addresses
2043 if (!arm_Valid_Imm_For_BL((ssize_t)addr))
2045 regNumber tmpReg = call->GetSingleTempReg();
2046 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, tmpReg, (ssize_t)addr);
2047 genEmitCall(emitter::EC_INDIR_R, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) NULL, retSize, ilOffset, tmpReg);
2050 #endif // _TARGET_ARM_
2052 genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr,
2053 retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
2056 #if 0 && defined(_TARGET_ARM64_)
2057 // Use this path if you want to load an absolute call target using
2058 // a sequence of movs followed by an indirect call (blr instruction)
2060 // Load the call target address in x16
2061 instGen_Set_Reg_To_Imm(EA_8BYTE, REG_IP0, (ssize_t) addr);
2063 // indirect call to constant address in IP0
2064 genEmitCall(emitter::EC_INDIR_R,
2066 INDEBUG_LDISASM_COMMA(sigInfo)
2075 // if it was a pinvoke we may have needed to get the address of a label
2076 if (genPendingCallLabel)
2078 assert(call->IsUnmanaged());
2079 genDefineTempLabel(genPendingCallLabel);
2080 genPendingCallLabel = nullptr;
2084 // All Callee arg registers are trashed and no longer contain any GC pointers.
2085 // TODO-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
2086 // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
2087 // registers from RBM_CALLEE_TRASH
2088 assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
2089 assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
2090 gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
2091 gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
2093 var_types returnType = call->TypeGet();
2094 if (returnType != TYP_VOID)
2096 regNumber returnReg;
2098 if (call->HasMultiRegRetVal())
2100 assert(pRetTypeDesc != nullptr);
2101 unsigned regCount = pRetTypeDesc->GetReturnRegCount();
2103 // If regs allocated to call node are different from ABI return
2104 // regs in which the call has returned its result, move the result
2105 // to regs allocated to call node.
2106 for (unsigned i = 0; i < regCount; ++i)
2108 var_types regType = pRetTypeDesc->GetReturnRegType(i);
2109 returnReg = pRetTypeDesc->GetABIReturnReg(i);
2110 regNumber allocatedReg = call->GetRegNumByIdx(i);
2111 if (returnReg != allocatedReg)
2113 inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
2120 if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
2122 // The CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
2123 // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers.
2124 returnReg = REG_PINVOKE_TCB;
2127 #endif // _TARGET_ARM_
2128 if (varTypeIsFloating(returnType) && !compiler->opts.compUseSoftFP)
2130 returnReg = REG_FLOATRET;
2134 returnReg = REG_INTRET;
2137 if (call->gtRegNum != returnReg)
2140 if (compiler->opts.compUseSoftFP && returnType == TYP_DOUBLE)
2142 inst_RV_RV_RV(INS_vmov_i2d, call->gtRegNum, returnReg, genRegArgNext(returnReg), EA_8BYTE);
2144 else if (compiler->opts.compUseSoftFP && returnType == TYP_FLOAT)
2146 inst_RV_RV(INS_vmov_i2f, call->gtRegNum, returnReg, returnType);
2151 inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
2156 genProduceReg(call);
2159 // If there is nothing next, that means the result is thrown away, so this value is not live.
2160 // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
2161 if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
2163 gcInfo.gcMarkRegSetNpt(RBM_INTRET);
2167 // Produce code for a GT_JMP node.
2168 // The arguments of the caller needs to be transferred to the callee before exiting caller.
2169 // The actual jump to callee is generated as part of caller epilog sequence.
2170 // Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup.
2171 void CodeGen::genJmpMethod(GenTreePtr jmp)
2173 assert(jmp->OperGet() == GT_JMP);
2174 assert(compiler->compJmpOpUsed);
2176 // If no arguments, nothing to do
2177 if (compiler->info.compArgsCount == 0)
2182 // Make sure register arguments are in their initial registers
2183 // and stack arguments are put back as well.
2187 // First move any en-registered stack arguments back to the stack.
2188 // At the same time any reg arg not in correct reg is moved back to its stack location.
2190 // We are not strictly required to spill reg args that are not in the desired reg for a jmp call
2191 // But that would require us to deal with circularity while moving values around. Spilling
2192 // to stack makes the implementation simple, which is not a bad trade off given Jmp calls
2193 // are not frequent.
2194 for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
2196 varDsc = compiler->lvaTable + varNum;
2198 if (varDsc->lvPromoted)
2200 noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
2202 unsigned fieldVarNum = varDsc->lvFieldLclStart;
2203 varDsc = compiler->lvaTable + fieldVarNum;
2205 noway_assert(varDsc->lvIsParam);
2207 if (varDsc->lvIsRegArg && (varDsc->lvRegNum != REG_STK))
2209 // Skip reg args which are already in its right register for jmp call.
2210 // If not, we will spill such args to their stack locations.
2212 // If we need to generate a tail call profiler hook, then spill all
2213 // arg regs to free them up for the callback.
2214 if (!compiler->compIsProfilerHookNeeded() && (varDsc->lvRegNum == varDsc->lvArgReg))
2217 else if (varDsc->lvRegNum == REG_STK)
2219 // Skip args which are currently living in stack.
2223 // If we came here it means either a reg argument not in the right register or
2224 // a stack argument currently living in a register. In either case the following
2225 // assert should hold.
2226 assert(varDsc->lvRegNum != REG_STK);
2227 assert(varDsc->TypeGet() != TYP_STRUCT);
2228 var_types storeType = genActualType(varDsc->TypeGet());
2229 emitAttr storeSize = emitActualTypeSize(storeType);
2231 getEmitter()->emitIns_S_R(ins_Store(storeType), storeSize, varDsc->lvRegNum, varNum, 0);
2232 // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live.
2233 // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
2234 // Therefore manually update life of varDsc->lvRegNum.
2235 regMaskTP tempMask = genRegMask(varDsc->lvRegNum);
2236 regSet.RemoveMaskVars(tempMask);
2237 gcInfo.gcMarkRegSetNpt(tempMask);
2238 if (compiler->lvaIsGCTracked(varDsc))
2240 VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
2244 #ifdef PROFILING_SUPPORTED
2245 // At this point all arg regs are free.
2246 // Emit tail call profiler callback.
2247 genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
2250 // Next move any un-enregistered register arguments back to their register.
2251 regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method.
2252 unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
2253 for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
2255 varDsc = compiler->lvaTable + varNum;
2256 if (varDsc->lvPromoted)
2258 noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
2260 unsigned fieldVarNum = varDsc->lvFieldLclStart;
2261 varDsc = compiler->lvaTable + fieldVarNum;
2263 noway_assert(varDsc->lvIsParam);
2265 // Skip if arg not passed in a register.
2266 if (!varDsc->lvIsRegArg)
2269 // Register argument
2270 noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
2272 // Is register argument already in the right register?
2273 // If not load it from its stack location.
2274 regNumber argReg = varDsc->lvArgReg; // incoming arg register
2275 regNumber argRegNext = REG_NA;
2277 if (varDsc->lvRegNum != argReg)
2279 var_types loadType = TYP_UNDEF;
2280 if (varTypeIsStruct(varDsc))
2282 // Must be <= 16 bytes or else it wouldn't be passed in registers
2283 noway_assert(EA_SIZE_IN_BYTES(varDsc->lvSize()) <= MAX_PASS_MULTIREG_BYTES);
2284 loadType = compiler->getJitGCType(varDsc->lvGcLayout[0]);
2288 loadType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet()));
2290 emitAttr loadSize = emitActualTypeSize(loadType);
2291 getEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argReg, varNum, 0);
2293 // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
2294 // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
2295 // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
2296 // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
2297 regSet.AddMaskVars(genRegMask(argReg));
2298 gcInfo.gcMarkRegPtrVal(argReg, loadType);
2300 if (compiler->lvaIsMultiregStruct(varDsc))
2302 if (varDsc->lvIsHfa())
2304 NYI_ARM("CodeGen::genJmpMethod with multireg HFA arg");
2305 NYI_ARM64("CodeGen::genJmpMethod with multireg HFA arg");
2308 // Restore the second register.
2309 argRegNext = genRegArgNext(argReg);
2311 loadType = compiler->getJitGCType(varDsc->lvGcLayout[1]);
2312 loadSize = emitActualTypeSize(loadType);
2313 getEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argRegNext, varNum, TARGET_POINTER_SIZE);
2315 regSet.AddMaskVars(genRegMask(argRegNext));
2316 gcInfo.gcMarkRegPtrVal(argRegNext, loadType);
2319 if (compiler->lvaIsGCTracked(varDsc))
2321 VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
2325 // In case of a jmp call to a vararg method ensure only integer registers are passed.
2326 if (compiler->info.compIsVarArgs)
2328 assert((genRegMask(argReg) & RBM_ARG_REGS) != RBM_NONE);
2330 fixedIntArgMask |= genRegMask(argReg);
2332 if (compiler->lvaIsMultiregStruct(varDsc))
2334 assert(argRegNext != REG_NA);
2335 fixedIntArgMask |= genRegMask(argRegNext);
2338 if (argReg == REG_ARG_0)
2340 assert(firstArgVarNum == BAD_VAR_NUM);
2341 firstArgVarNum = varNum;
2346 // Jmp call to a vararg method - if the method has fewer than fixed arguments that can be max size of reg,
2347 // load the remaining integer arg registers from the corresponding
2348 // shadow stack slots. This is for the reason that we don't know the number and type
2349 // of non-fixed params passed by the caller, therefore we have to assume the worst case
2350 // of caller passing all integer arg regs that can be max size of reg.
2352 // The caller could have passed gc-ref/byref type var args. Since these are var args
2353 // the callee no way of knowing their gc-ness. Therefore, mark the region that loads
2354 // remaining arg registers from shadow stack slots as non-gc interruptible.
2355 if (fixedIntArgMask != RBM_NONE)
2357 assert(compiler->info.compIsVarArgs);
2358 assert(firstArgVarNum != BAD_VAR_NUM);
2360 regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
2361 if (remainingIntArgMask != RBM_NONE)
2363 getEmitter()->emitDisableGC();
2364 for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
2366 regNumber argReg = intArgRegs[argNum];
2367 regMaskTP argRegMask = genRegMask(argReg);
2369 if ((remainingIntArgMask & argRegMask) != 0)
2371 remainingIntArgMask &= ~argRegMask;
2372 getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, argReg, firstArgVarNum, argOffset);
2375 argOffset += REGSIZE_BYTES;
2377 getEmitter()->emitEnableGC();
2382 //------------------------------------------------------------------------
2383 // genIntToIntCast: Generate code for an integer cast
2386 // treeNode - The GT_CAST node
2392 // The treeNode must have an assigned register.
2393 // For a signed convert from byte, the source must be in a byte-addressable register.
2394 // Neither the source nor target type can be a floating point type.
2396 // TODO-ARM64-CQ: Allow castOp to be a contained node without an assigned register.
2398 void CodeGen::genIntToIntCast(GenTreePtr treeNode)
2400 assert(treeNode->OperGet() == GT_CAST);
2402 GenTreePtr castOp = treeNode->gtCast.CastOp();
2403 emitter* emit = getEmitter();
2405 var_types dstType = treeNode->CastToType();
2406 var_types srcType = genActualType(castOp->TypeGet());
2407 emitAttr movSize = emitActualTypeSize(dstType);
2408 bool movRequired = false;
2411 if (varTypeIsLong(srcType))
2413 genLongToIntCast(treeNode);
2416 #endif // _TARGET_ARM_
2418 regNumber targetReg = treeNode->gtRegNum;
2419 regNumber sourceReg = castOp->gtRegNum;
2421 // For Long to Int conversion we will have a reserved integer register to hold the immediate mask
2422 regNumber tmpReg = (treeNode->AvailableTempRegCount() == 0) ? REG_NA : treeNode->GetSingleTempReg();
2424 assert(genIsValidIntReg(targetReg));
2425 assert(genIsValidIntReg(sourceReg));
2427 instruction ins = INS_invalid;
2429 genConsumeReg(castOp);
2430 Lowering::CastInfo castInfo;
2432 // Get information about the cast.
2433 Lowering::getCastDescription(treeNode, &castInfo);
2435 if (castInfo.requiresOverflowCheck)
2437 emitAttr cmpSize = EA_ATTR(genTypeSize(srcType));
2439 if (castInfo.signCheckOnly)
2441 // We only need to check for a negative value in sourceReg
2442 emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, 0);
2443 emitJumpKind jmpLT = genJumpKindForOper(GT_LT, CK_SIGNED);
2444 genJumpToThrowHlpBlk(jmpLT, SCK_OVERFLOW);
2445 noway_assert(genTypeSize(srcType) == 4 || genTypeSize(srcType) == 8);
2446 // This is only interesting case to ensure zero-upper bits.
2447 if ((srcType == TYP_INT) && (dstType == TYP_ULONG))
2449 // cast to TYP_ULONG:
2450 // We use a mov with size=EA_4BYTE
2451 // which will zero out the upper bits
2456 else if (castInfo.unsignedSource || castInfo.unsignedDest)
2458 // When we are converting from/to unsigned,
2459 // we only have to check for any bits set in 'typeMask'
2461 noway_assert(castInfo.typeMask != 0);
2462 #if defined(_TARGET_ARM_)
2463 if (arm_Valid_Imm_For_Instr(INS_tst, castInfo.typeMask, INS_FLAGS_DONT_CARE))
2465 emit->emitIns_R_I(INS_tst, cmpSize, sourceReg, castInfo.typeMask);
2469 noway_assert(tmpReg != REG_NA);
2470 instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMask);
2471 emit->emitIns_R_R(INS_tst, cmpSize, sourceReg, tmpReg);
2473 #elif defined(_TARGET_ARM64_)
2474 emit->emitIns_R_I(INS_tst, cmpSize, sourceReg, castInfo.typeMask);
2475 #endif // _TARGET_ARM*
2476 emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
2477 genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
2481 // For a narrowing signed cast
2483 // We must check the value is in a signed range.
2485 // Compare with the MAX
2487 noway_assert((castInfo.typeMin != 0) && (castInfo.typeMax != 0));
2489 #if defined(_TARGET_ARM_)
2490 if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, INS_FLAGS_DONT_CARE))
2491 #elif defined(_TARGET_ARM64_)
2492 if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, cmpSize))
2495 emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMax);
2499 noway_assert(tmpReg != REG_NA);
2500 instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMax);
2501 emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg);
2504 emitJumpKind jmpGT = genJumpKindForOper(GT_GT, CK_SIGNED);
2505 genJumpToThrowHlpBlk(jmpGT, SCK_OVERFLOW);
2507 // Compare with the MIN
2509 #if defined(_TARGET_ARM_)
2510 if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, INS_FLAGS_DONT_CARE))
2511 #elif defined(_TARGET_ARM64_)
2512 if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, cmpSize))
2515 emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMin);
2519 noway_assert(tmpReg != REG_NA);
2520 instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMin);
2521 emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg);
2524 emitJumpKind jmpLT = genJumpKindForOper(GT_LT, CK_SIGNED);
2525 genJumpToThrowHlpBlk(jmpLT, SCK_OVERFLOW);
2529 else // Non-overflow checking cast.
2531 if (genTypeSize(srcType) == genTypeSize(dstType))
2537 var_types extendType = TYP_UNKNOWN;
2539 if (genTypeSize(srcType) < genTypeSize(dstType))
2541 // If we need to treat a signed type as unsigned
2542 if ((treeNode->gtFlags & GTF_UNSIGNED) != 0)
2544 extendType = genUnsignedType(srcType);
2547 extendType = srcType;
2549 movSize = emitTypeSize(extendType);
2550 #endif // _TARGET_ARM_
2551 if (extendType == TYP_UINT)
2553 #ifdef _TARGET_ARM64_
2554 // If we are casting from a smaller type to
2555 // a larger type, then we need to make sure the
2556 // higher 4 bytes are zero to gaurentee the correct value.
2557 // Therefore using a mov with EA_4BYTE in place of EA_8BYTE
2558 // will zero the upper bits
2560 #endif // _TARGET_ARM64_
2564 else // (genTypeSize(srcType) > genTypeSize(dstType))
2566 // If we need to treat a signed type as unsigned
2567 if ((treeNode->gtFlags & GTF_UNSIGNED) != 0)
2569 extendType = genUnsignedType(dstType);
2572 extendType = dstType;
2573 #if defined(_TARGET_ARM_)
2574 movSize = emitTypeSize(extendType);
2575 #elif defined(_TARGET_ARM64_)
2576 if (extendType == TYP_INT)
2578 movSize = EA_8BYTE; // a sxtw instruction requires EA_8BYTE
2583 ins = ins_Move_Extend(extendType, true);
2587 // We should never be generating a load from memory instruction here!
2588 assert(!emit->emitInsIsLoad(ins));
2590 if ((ins != INS_mov) || movRequired || (targetReg != sourceReg))
2592 emit->emitIns_R_R(ins, movSize, targetReg, sourceReg);
2595 genProduceReg(treeNode);
2598 //------------------------------------------------------------------------
2599 // genFloatToFloatCast: Generate code for a cast between float and double
2602 // treeNode - The GT_CAST node
2608 // Cast is a non-overflow conversion.
2609 // The treeNode must have an assigned register.
2610 // The cast is between float and double.
2612 void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
2614 // float <--> double conversions are always non-overflow ones
2615 assert(treeNode->OperGet() == GT_CAST);
2616 assert(!treeNode->gtOverflow());
2618 regNumber targetReg = treeNode->gtRegNum;
2619 assert(genIsValidFloatReg(targetReg));
2621 GenTreePtr op1 = treeNode->gtOp.gtOp1;
2622 assert(!op1->isContained()); // Cannot be contained
2623 assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg.
2625 var_types dstType = treeNode->CastToType();
2626 var_types srcType = op1->TypeGet();
2627 assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
2629 genConsumeOperands(treeNode->AsOp());
2631 // treeNode must be a reg
2632 assert(!treeNode->isContained());
2634 #if defined(_TARGET_ARM_)
2636 if (srcType != dstType)
2638 instruction insVcvt = (srcType == TYP_FLOAT) ? INS_vcvt_f2d // convert Float to Double
2639 : INS_vcvt_d2f; // convert Double to Float
2641 getEmitter()->emitIns_R_R(insVcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
2643 else if (treeNode->gtRegNum != op1->gtRegNum)
2645 getEmitter()->emitIns_R_R(INS_vmov, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
2648 #elif defined(_TARGET_ARM64_)
2650 if (srcType != dstType)
2652 insOpts cvtOption = (srcType == TYP_FLOAT) ? INS_OPTS_S_TO_D // convert Single to Double
2653 : INS_OPTS_D_TO_S; // convert Double to Single
2655 getEmitter()->emitIns_R_R(INS_fcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum, cvtOption);
2657 else if (treeNode->gtRegNum != op1->gtRegNum)
2659 // If double to double cast or float to float cast. Emit a move instruction.
2660 getEmitter()->emitIns_R_R(INS_mov, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
2665 genProduceReg(treeNode);
2668 //------------------------------------------------------------------------
2669 // genCreateAndStoreGCInfo: Create and record GC Info for the function.
2671 void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
2672 unsigned prologSize,
2673 unsigned epilogSize DEBUGARG(void* codePtr))
2675 IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
2676 GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
2677 GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
2678 assert(gcInfoEncoder != nullptr);
2680 // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
2681 gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
2683 // We keep the call count for the second call to gcMakeRegPtrTable() below.
2684 unsigned callCnt = 0;
2686 // First we figure out the encoder ID's for the stack slots and registers.
2687 gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt);
2689 // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
2690 gcInfoEncoder->FinalizeSlotIds();
2692 // Now we can actually use those slot ID's to declare live ranges.
2693 gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt);
2695 #ifdef _TARGET_ARM64_
2697 if (compiler->opts.compDbgEnC)
2699 // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
2703 // -saved 'this' pointer and bool for synchronized methods
2705 // 4 slots for RBP + return address + RSI + RDI
2706 int preservedAreaSize = 4 * REGSIZE_BYTES;
2708 if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
2710 if (!(compiler->info.compFlags & CORINFO_FLG_STATIC))
2711 preservedAreaSize += REGSIZE_BYTES;
2713 preservedAreaSize += 1; // bool for synchronized methods
2716 // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the
2718 gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
2721 #endif // _TARGET_ARM64_
2723 gcInfoEncoder->Build();
2725 // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
2726 // let's save the values anyway for debugging purposes
2727 compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
2728 compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
2731 //-------------------------------------------------------------------------------------------
2732 // genJumpKindsForTree: Determine the number and kinds of conditional branches
2733 // necessary to implement the given GT_CMP node
2736 // cmpTree - (input) The GenTree node that is used to set the Condition codes
2737 // - The GenTree Relop node that was used to set the Condition codes
2738 // jmpKind[2] - (output) One or two conditional branch instructions
2739 // jmpToTrueLabel[2] - (output) On Arm64 both branches will always branch to the true label
2742 // Sets the proper values into the array elements of jmpKind[] and jmpToTrueLabel[]
2745 // At least one conditional branch instruction will be returned.
2746 // Typically only one conditional branch is needed
2747 // and the second jmpKind[] value is set to EJ_NONE
2749 void CodeGen::genJumpKindsForTree(GenTreePtr cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2])
2751 // On ARM both branches will always branch to the true label
2752 jmpToTrueLabel[0] = true;
2753 jmpToTrueLabel[1] = true;
2755 // For integer comparisons just use genJumpKindForOper
2756 if (!varTypeIsFloating(cmpTree->gtOp.gtOp1))
2758 CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
2759 jmpKind[0] = genJumpKindForOper(cmpTree->gtOper, compareKind);
2760 jmpKind[1] = EJ_NONE;
2762 else // We have a Floating Point Compare operation
2764 assert(cmpTree->OperIsCompare());
2766 // For details on this mapping, see the ARM Condition Code table
2767 // at section A8.3 in the ARMv7 architecture manual or
2768 // at section C1.2.3 in the ARMV8 architecture manual.
2770 // We must check the GTF_RELOP_NAN_UN to find out
2771 // if we need to branch when we have a NaN operand.
2773 if ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) != 0)
2775 // Must branch if we have an NaN, unordered
2776 switch (cmpTree->gtOper)
2779 jmpKind[0] = EJ_eq; // branch or set when equal (and no NaN's)
2780 jmpKind[1] = EJ_vs; // branch or set when we have a NaN
2784 jmpKind[0] = EJ_ne; // branch or set when not equal (or have NaN's)
2785 jmpKind[1] = EJ_NONE;
2789 jmpKind[0] = EJ_lt; // branch or set when less than (or have NaN's)
2790 jmpKind[1] = EJ_NONE;
2794 jmpKind[0] = EJ_le; // branch or set when less than or equal (or have NaN's)
2795 jmpKind[1] = EJ_NONE;
2799 jmpKind[0] = EJ_hi; // branch or set when greater than (or have NaN's)
2800 jmpKind[1] = EJ_NONE;
2804 jmpKind[0] = EJ_hs; // branch or set when greater than or equal (or have NaN's)
2805 jmpKind[1] = EJ_NONE;
2812 else // ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) == 0)
2814 // Do not branch if we have an NaN, unordered
2815 switch (cmpTree->gtOper)
2818 jmpKind[0] = EJ_eq; // branch or set when equal (and no NaN's)
2819 jmpKind[1] = EJ_NONE;
2823 jmpKind[0] = EJ_gt; // branch or set when greater than (and no NaN's)
2824 jmpKind[1] = EJ_lo; // branch or set when less than (and no NaN's)
2828 jmpKind[0] = EJ_lo; // branch or set when less than (and no NaN's)
2829 jmpKind[1] = EJ_NONE;
2833 jmpKind[0] = EJ_ls; // branch or set when less than or equal (and no NaN's)
2834 jmpKind[1] = EJ_NONE;
2838 jmpKind[0] = EJ_gt; // branch or set when greater than (and no NaN's)
2839 jmpKind[1] = EJ_NONE;
2843 jmpKind[0] = EJ_ge; // branch or set when greater than or equal (and no NaN's)
2844 jmpKind[1] = EJ_NONE;
2854 //------------------------------------------------------------------------
2855 // genCodeForJumpTrue: Generates code for jmpTrue statement.
2858 // tree - The GT_JTRUE tree node.
2863 void CodeGen::genCodeForJumpTrue(GenTreePtr tree)
2865 GenTree* cmp = tree->gtOp.gtOp1;
2866 assert(cmp->OperIsCompare());
2867 assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
2869 // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp
2870 // is governed by a flag NOT by the inherent type of the node
2871 emitJumpKind jumpKind[2];
2872 bool branchToTrueLabel[2];
2873 genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel);
2874 assert(jumpKind[0] != EJ_NONE);
2876 // On ARM the branches will always branch to the true label
2877 assert(branchToTrueLabel[0]);
2878 inst_JMP(jumpKind[0], compiler->compCurBB->bbJumpDest);
2880 if (jumpKind[1] != EJ_NONE)
2882 // the second conditional branch always has to be to the true label
2883 assert(branchToTrueLabel[1]);
2884 inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest);
2888 #if defined(_TARGET_ARM_)
2890 //------------------------------------------------------------------------
2891 // genCodeForJcc: Produce code for a GT_JCC node.
2896 void CodeGen::genCodeForJcc(GenTreeCC* tree)
2898 assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
2900 CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
2901 emitJumpKind jumpKind = genJumpKindForOper(tree->gtCondition, compareKind);
2903 inst_JMP(jumpKind, compiler->compCurBB->bbJumpDest);
2906 //------------------------------------------------------------------------
2907 // genCodeForSetcc: Generates code for a GT_SETCC node.
2910 // setcc - the GT_SETCC node
2913 // The condition represents an integer comparison. This code doesn't
2914 // have the necessary logic to deal with floating point comparisons,
2915 // in fact it doesn't even know if the comparison is integer or floating
2916 // point because SETCC nodes do not have any operands.
2919 void CodeGen::genCodeForSetcc(GenTreeCC* setcc)
2921 regNumber dstReg = setcc->gtRegNum;
2922 CompareKind compareKind = setcc->IsUnsigned() ? CK_UNSIGNED : CK_SIGNED;
2923 emitJumpKind jumpKind = genJumpKindForOper(setcc->gtCondition, compareKind);
2925 assert(genIsValidIntReg(dstReg));
2926 // Make sure nobody is setting GTF_RELOP_NAN_UN on this node as it is ignored.
2927 assert((setcc->gtFlags & GTF_RELOP_NAN_UN) == 0);
2929 // Emit code like that:
2939 BasicBlock* labelTrue = genCreateTempLabel();
2940 getEmitter()->emitIns_J(emitter::emitJumpKindToIns(jumpKind), labelTrue);
2942 getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(setcc->TypeGet()), dstReg, 0);
2944 BasicBlock* labelNext = genCreateTempLabel();
2945 getEmitter()->emitIns_J(INS_b, labelNext);
2947 genDefineTempLabel(labelTrue);
2948 getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(setcc->TypeGet()), dstReg, 1);
2949 genDefineTempLabel(labelNext);
2951 genProduceReg(setcc);
2954 #endif // defined(_TARGET_ARM_)
2956 //------------------------------------------------------------------------
2957 // genCodeForStoreBlk: Produce code for a GT_STORE_OBJ/GT_STORE_DYN_BLK/GT_STORE_BLK node.
2962 void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
2964 assert(blkOp->OperIs(GT_STORE_OBJ, GT_STORE_DYN_BLK, GT_STORE_BLK));
2966 if (blkOp->OperIs(GT_STORE_OBJ) && blkOp->OperIsCopyBlkOp())
2968 assert(blkOp->AsObj()->gtGcPtrCount != 0);
2969 genCodeForCpObj(blkOp->AsObj());
2973 if (blkOp->gtBlkOpGcUnsafe)
2975 getEmitter()->emitDisableGC();
2977 bool isCopyBlk = blkOp->OperIsCopyBlkOp();
2979 switch (blkOp->gtBlkOpKind)
2981 case GenTreeBlk::BlkOpKindHelper:
2984 genCodeForCpBlk(blkOp);
2988 genCodeForInitBlk(blkOp);
2992 case GenTreeBlk::BlkOpKindUnroll:
2995 genCodeForCpBlkUnroll(blkOp);
2999 genCodeForInitBlkUnroll(blkOp);
3007 if (blkOp->gtBlkOpGcUnsafe)
3009 getEmitter()->emitEnableGC();
3013 //------------------------------------------------------------------------
3014 // genScaledAdd: A helper for genLeaInstruction.
3016 void CodeGen::genScaledAdd(emitAttr attr, regNumber targetReg, regNumber baseReg, regNumber indexReg, int scale)
3018 emitter* emit = getEmitter();
3019 #if defined(_TARGET_ARM_)
3020 emit->emitIns_R_R_R_I(INS_add, attr, targetReg, baseReg, indexReg, scale, INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
3021 #elif defined(_TARGET_ARM64_)
3022 emit->emitIns_R_R_R_I(INS_add, attr, targetReg, baseReg, indexReg, scale, INS_OPTS_LSL);
3026 //------------------------------------------------------------------------
3027 // genLeaInstruction: Produce code for a GT_LEA node.
3032 void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
3034 genConsumeOperands(lea);
3035 emitter* emit = getEmitter();
3036 emitAttr size = emitTypeSize(lea);
3037 unsigned offset = lea->gtOffset;
3039 // In ARM we can only load addresses of the form:
3041 // [Base + index*scale]
3043 // [Literal] (PC-Relative)
3045 // So for the case of a LEA node of the form [Base + Index*Scale + Offset] we will generate:
3046 // destReg = baseReg + indexReg * scale;
3047 // destReg = destReg + offset;
3049 // TODO-ARM64-CQ: The purpose of the GT_LEA node is to directly reflect a single target architecture
3050 // addressing mode instruction. Currently we're 'cheating' by producing one or more
3051 // instructions to generate the addressing mode so we need to modify lowering to
3052 // produce LEAs that are a 1:1 relationship to the ARM64 architecture.
3053 if (lea->Base() && lea->Index())
3055 GenTree* memBase = lea->Base();
3056 GenTree* index = lea->Index();
3057 unsigned offset = lea->gtOffset;
3061 assert(isPow2(lea->gtScale));
3062 BitScanForward(&lsl, lea->gtScale);
3068 regNumber tmpReg = lea->GetSingleTempReg();
3070 if (emitter::emitIns_valid_imm_for_add(offset))
3074 // Generate code to set tmpReg = base + index*scale
3075 genScaledAdd(size, tmpReg, memBase->gtRegNum, index->gtRegNum, lsl);
3079 // Generate code to set tmpReg = base + index
3080 emit->emitIns_R_R_R(INS_add, size, tmpReg, memBase->gtRegNum, index->gtRegNum);
3083 // Then compute target reg from [tmpReg + offset]
3084 emit->emitIns_R_R_I(INS_add, size, lea->gtRegNum, tmpReg, offset);
3086 else // large offset
3088 // First load/store tmpReg with the large offset constant
3089 instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
3090 // Then add the base register
3092 emit->emitIns_R_R_R(INS_add, size, tmpReg, tmpReg, memBase->gtRegNum);
3094 noway_assert(tmpReg != index->gtRegNum);
3096 // Then compute target reg from [tmpReg + index*scale]
3097 genScaledAdd(size, lea->gtRegNum, tmpReg, index->gtRegNum, lsl);
3104 // Then compute target reg from [base + index*scale]
3105 genScaledAdd(size, lea->gtRegNum, memBase->gtRegNum, index->gtRegNum, lsl);
3109 // Then compute target reg from [base + index]
3110 emit->emitIns_R_R_R(INS_add, size, lea->gtRegNum, memBase->gtRegNum, index->gtRegNum);
3114 else if (lea->Base())
3116 GenTree* memBase = lea->Base();
3118 if (emitter::emitIns_valid_imm_for_add(offset))
3122 // Then compute target reg from [memBase + offset]
3123 emit->emitIns_R_R_I(INS_add, size, lea->gtRegNum, memBase->gtRegNum, offset);
3125 else // offset is zero
3127 emit->emitIns_R_R(INS_mov, size, lea->gtRegNum, memBase->gtRegNum);
3132 // We require a tmpReg to hold the offset
3133 regNumber tmpReg = lea->GetSingleTempReg();
3135 // First load tmpReg with the large offset constant
3136 instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
3138 // Then compute target reg from [memBase + tmpReg]
3139 emit->emitIns_R_R_R(INS_add, size, lea->gtRegNum, memBase->gtRegNum, tmpReg);
3142 else if (lea->Index())
3144 // If we encounter a GT_LEA node without a base it means it came out
3145 // when attempting to optimize an arbitrary arithmetic expression during lower.
3146 // This is currently disabled in ARM64 since we need to adjust lower to account
3147 // for the simpler instructions ARM64 supports.
3148 // TODO-ARM64-CQ: Fix this and let LEA optimize arithmetic trees too.
3149 assert(!"We shouldn't see a baseless address computation during CodeGen for ARM64");
3155 //------------------------------------------------------------------------
3156 // isStructReturn: Returns whether the 'treeNode' is returning a struct.
3159 // treeNode - The tree node to evaluate whether is a struct return.
3162 // Returns true if the 'treeNode" is a GT_RETURN node of type struct.
3163 // Otherwise returns false.
3165 bool CodeGen::isStructReturn(GenTreePtr treeNode)
3167 // This method could be called for 'treeNode' of GT_RET_FILT or GT_RETURN.
3168 // For the GT_RET_FILT, the return is always
3169 // a bool or a void, for the end of a finally block.
3170 noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
3172 return varTypeIsStruct(treeNode);
3175 //------------------------------------------------------------------------
3176 // genStructReturn: Generates code for returning a struct.
3179 // treeNode - The GT_RETURN tree node.
3185 // op1 of GT_RETURN node is either GT_LCL_VAR or multi-reg GT_CALL
3186 void CodeGen::genStructReturn(GenTreePtr treeNode)
3188 assert(treeNode->OperGet() == GT_RETURN);
3189 assert(isStructReturn(treeNode));
3190 GenTreePtr op1 = treeNode->gtGetOp1();
3192 if (op1->OperGet() == GT_LCL_VAR)
3194 GenTreeLclVarCommon* lclVar = op1->AsLclVarCommon();
3195 LclVarDsc* varDsc = &(compiler->lvaTable[lclVar->gtLclNum]);
3196 var_types lclType = genActualType(varDsc->TypeGet());
3198 // Currently only multireg TYP_STRUCT types such as HFA's(ARM32, ARM64) and 16-byte structs(ARM64) are supported
3199 // In the future we could have FEATURE_SIMD types like TYP_SIMD16
3200 assert(lclType == TYP_STRUCT);
3201 assert(varDsc->lvIsMultiRegRet);
3203 ReturnTypeDesc retTypeDesc;
3206 retTypeDesc.InitializeStructReturnType(compiler, varDsc->lvVerTypeInfo.GetClassHandle());
3207 regCount = retTypeDesc.GetReturnRegCount();
3209 assert(regCount >= 2);
3210 assert(op1->isContained());
3212 // Copy var on stack into ABI return registers
3213 // TODO: It could be optimized by reducing two float loading to one double
3215 for (unsigned i = 0; i < regCount; ++i)
3217 var_types type = retTypeDesc.GetReturnRegType(i);
3218 regNumber reg = retTypeDesc.GetABIReturnReg(i);
3219 getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), reg, lclVar->gtLclNum, offset);
3220 offset += genTypeSize(type);
3223 else // op1 must be multi-reg GT_CALL
3226 NYI_ARM("struct return from multi-reg GT_CALL");
3228 assert(op1->IsMultiRegCall() || op1->IsCopyOrReloadOfMultiRegCall());
3230 genConsumeRegs(op1);
3232 GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
3233 GenTreeCall* call = actualOp1->AsCall();
3235 ReturnTypeDesc* pRetTypeDesc;
3237 unsigned matchingCount = 0;
3239 pRetTypeDesc = call->GetReturnTypeDesc();
3240 regCount = pRetTypeDesc->GetReturnRegCount();
3242 var_types regType[MAX_RET_REG_COUNT];
3243 regNumber returnReg[MAX_RET_REG_COUNT];
3244 regNumber allocatedReg[MAX_RET_REG_COUNT];
3245 regMaskTP srcRegsMask = 0;
3246 regMaskTP dstRegsMask = 0;
3247 bool needToShuffleRegs = false; // Set to true if we have to move any registers
3249 for (unsigned i = 0; i < regCount; ++i)
3251 regType[i] = pRetTypeDesc->GetReturnRegType(i);
3252 returnReg[i] = pRetTypeDesc->GetABIReturnReg(i);
3254 regNumber reloadReg = REG_NA;
3255 if (op1->IsCopyOrReload())
3257 // GT_COPY/GT_RELOAD will have valid reg for those positions
3258 // that need to be copied or reloaded.
3259 reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
3262 if (reloadReg != REG_NA)
3264 allocatedReg[i] = reloadReg;
3268 allocatedReg[i] = call->GetRegNumByIdx(i);
3271 if (returnReg[i] == allocatedReg[i])
3275 else // We need to move this value
3277 // We want to move the value from allocatedReg[i] into returnReg[i]
3278 // so record these two registers in the src and dst masks
3280 srcRegsMask |= genRegMask(allocatedReg[i]);
3281 dstRegsMask |= genRegMask(returnReg[i]);
3283 needToShuffleRegs = true;
3287 if (needToShuffleRegs)
3289 assert(matchingCount < regCount);
3291 unsigned remainingRegCount = regCount - matchingCount;
3292 regMaskTP extraRegMask = treeNode->gtRsvdRegs;
3294 while (remainingRegCount > 0)
3296 // set 'available' to the 'dst' registers that are not currently holding 'src' registers
3298 regMaskTP availableMask = dstRegsMask & ~srcRegsMask;
3303 var_types curType = TYP_UNKNOWN;
3304 regNumber freeUpReg = REG_NA;
3306 if (availableMask == 0)
3308 // Circular register dependencies
3309 // So just free up the lowest register in dstRegsMask by moving it to the 'extra' register
3311 assert(dstRegsMask == srcRegsMask); // this has to be true for us to reach here
3312 assert(extraRegMask != 0); // we require an 'extra' register
3313 assert((extraRegMask & ~dstRegsMask) != 0); // it can't be part of dstRegsMask
3315 availableMask = extraRegMask & ~dstRegsMask;
3317 regMaskTP srcMask = genFindLowestBit(srcRegsMask);
3318 freeUpReg = genRegNumFromMask(srcMask);
3321 dstMask = genFindLowestBit(availableMask);
3322 dstReg = genRegNumFromMask(dstMask);
3325 if (freeUpReg != REG_NA)
3327 // We will free up the srcReg by moving it to dstReg which is an extra register
3331 // Find the 'srcReg' and set 'curType', change allocatedReg[] to dstReg
3332 // and add the new register mask bit to srcRegsMask
3334 for (unsigned i = 0; i < regCount; ++i)
3336 if (allocatedReg[i] == srcReg)
3338 curType = regType[i];
3339 allocatedReg[i] = dstReg;
3340 srcRegsMask |= genRegMask(dstReg);
3344 else // The normal case
3346 // Find the 'srcReg' and set 'curType'
3348 for (unsigned i = 0; i < regCount; ++i)
3350 if (returnReg[i] == dstReg)
3352 srcReg = allocatedReg[i];
3353 curType = regType[i];
3356 // After we perform this move we will have one less registers to setup
3357 remainingRegCount--;
3359 assert(curType != TYP_UNKNOWN);
3361 inst_RV_RV(ins_Copy(curType), dstReg, srcReg, curType);
3363 // Clear the appropriate bits in srcRegsMask and dstRegsMask
3364 srcRegsMask &= ~genRegMask(srcReg);
3365 dstRegsMask &= ~genRegMask(dstReg);
3367 } // while (remainingRegCount > 0)
3369 } // (needToShuffleRegs)
3371 } // op1 must be multi-reg GT_CALL
3373 #endif // _TARGET_ARMARCH_
3375 #endif // !LEGACY_BACKEND