From 6b9734a46cbd9c2ada2ab9ae7c3d61c3c1200ec4 Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Mon, 10 Apr 2017 19:01:34 -0700 Subject: [PATCH] Create common files for ARM32 and ARM64 common code Create codegenarmarch.cpp, lowerarmarch.cpp, lsraarmarch.cpp. For codegenarmarch.cpp, I copied over a bunch of functions that are almost identical between arm and arm64. I left the functions intact (didn't change anything), so created one for ARM32 and one for ARM64, unless they were truly identical. A follow-up will be to actually merge the functions. For lowerarmarch.cpp and lsraarmarch.cpp, I did the function merge for nearly (or completely) identical functions, since there were so many that were so similar. --- src/jit/CMakeLists.txt | 6 + src/jit/codegenarm.cpp | 1132 ------------------ src/jit/codegenarm64.cpp | 1610 +------------------------- src/jit/codegenarmarch.cpp | 2705 ++++++++++++++++++++++++++++++++++++++++++++ src/jit/lowerarm.cpp | 267 ----- src/jit/lowerarm64.cpp | 284 ----- src/jit/lowerarmarch.cpp | 346 ++++++ src/jit/lsraarm.cpp | 752 ------------ src/jit/lsraarm64.cpp | 721 +----------- src/jit/lsraarmarch.cpp | 876 ++++++++++++++ 10 files changed, 3964 insertions(+), 4735 deletions(-) create mode 100644 src/jit/codegenarmarch.cpp create mode 100644 src/jit/lowerarmarch.cpp create mode 100644 src/jit/lsraarmarch.cpp diff --git a/src/jit/CMakeLists.txt b/src/jit/CMakeLists.txt index e8920a2..e2a9ca6 100644 --- a/src/jit/CMakeLists.txt +++ b/src/jit/CMakeLists.txt @@ -104,10 +104,13 @@ set( JIT_AMD64_SOURCES set( JIT_ARM_SOURCES ${JIT_ARM_LEGACY_SOURCES} + codegenarmarch.cpp codegenarm.cpp decomposelongs.cpp emitarm.cpp + lowerarmarch.cpp lowerarm.cpp + lsraarmarch.cpp lsraarm.cpp targetarm.cpp unwindarm.cpp @@ -127,9 +130,12 @@ set( JIT_I386_SOURCES ) set( JIT_ARM64_SOURCES + codegenarmarch.cpp codegenarm64.cpp emitarm64.cpp + lowerarmarch.cpp lowerarm64.cpp + lsraarmarch.cpp lsraarm64.cpp targetarm64.cpp unwindarm.cpp diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp index 34c86e9..41bd804 100644 --- a/src/jit/codegenarm.cpp +++ b/src/jit/codegenarm.cpp @@ -24,22 +24,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "emit.h" //------------------------------------------------------------------------ -// genSetRegToIcon: Generate code that will set the given register to the integer constant. -// -void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags) -{ - // Reg cannot be a FP reg - assert(!genIsValidFloatReg(reg)); - - // The only TYP_REF constant that can come this path is a managed 'null' since it is not - // relocatable. Other ref type constants (e.g. string objects) go through a different - // code path. - noway_assert(type != TYP_REF || val == 0); - - instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags); -} - -//------------------------------------------------------------------------ // genCallFinally: Generate a call to the finally block. // BasicBlock* CodeGen::genCallFinally(BasicBlock* block) @@ -83,140 +67,6 @@ void CodeGen::genEHCatchRet(BasicBlock* block) getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_INTRET); } -//--------------------------------------------------------------------- -// genIntrinsic - generate code for a given intrinsic -// -// Arguments -// treeNode - the GT_INTRINSIC node -// -// Return value: -// None -// -void CodeGen::genIntrinsic(GenTreePtr treeNode) -{ - // Both operand and its result must be of the same floating point type. - GenTreePtr srcNode = treeNode->gtOp.gtOp1; - assert(varTypeIsFloating(srcNode)); - assert(srcNode->TypeGet() == treeNode->TypeGet()); - - // Right now only Abs/Round/Sqrt are treated as math intrinsics. - // - switch (treeNode->gtIntrinsic.gtIntrinsicId) - { - case CORINFO_INTRINSIC_Abs: - genConsumeOperands(treeNode->AsOp()); - getEmitter()->emitInsBinary(INS_vabs, emitTypeSize(treeNode), treeNode, srcNode); - break; - - case CORINFO_INTRINSIC_Round: - NYI_ARM("genIntrinsic for round - not implemented yet"); - break; - - case CORINFO_INTRINSIC_Sqrt: - genConsumeOperands(treeNode->AsOp()); - getEmitter()->emitInsBinary(INS_vsqrt, emitTypeSize(treeNode), treeNode, srcNode); - break; - - default: - assert(!"genIntrinsic: Unsupported intrinsic"); - unreached(); - } - - genProduceReg(treeNode); -} - -//--------------------------------------------------------------------- -// genPutArgStk - generate code for a GT_PUTARG_STK node -// -// Arguments -// treeNode - the GT_PUTARG_STK node -// -// Return value: -// None -// -void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) -{ - assert(treeNode->OperGet() == GT_PUTARG_STK); - var_types targetType = treeNode->TypeGet(); - GenTreePtr source = treeNode->gtOp1; - emitter* emit = getEmitter(); - - // This is the varNum for our store operations, - // typically this is the varNum for the Outgoing arg space - // When we are generating a tail call it will be the varNum for arg0 - unsigned varNumOut; - unsigned argOffsetMax; // Records the maximum size of this area for assert checks - - // Get argument offset to use with 'varNumOut' - // Here we cross check that argument offset hasn't changed from lowering to codegen since - // we are storing arg slot number in GT_PUTARG_STK node in lowering phase. - unsigned argOffsetOut = treeNode->gtSlotNum * TARGET_POINTER_SIZE; - -#ifdef DEBUG - fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->gtCall, treeNode); - assert(curArgTabEntry); - assert(argOffsetOut == (curArgTabEntry->slotNum * TARGET_POINTER_SIZE)); -#endif // DEBUG - - varNumOut = compiler->lvaOutgoingArgSpaceVar; - argOffsetMax = compiler->lvaOutgoingArgSpaceSize; - - bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_FIELD_LIST); - - if (!isStruct) // a normal non-Struct argument - { - instruction storeIns = ins_Store(targetType); - emitAttr storeAttr = emitTypeSize(targetType); - - // If it is contained then source must be the integer constant zero - if (source->isContained()) - { - assert(source->OperGet() == GT_CNS_INT); - assert(source->AsIntConCommon()->IconValue() == 0); - NYI("genPutArgStk: contained zero source"); - } - else - { - genConsumeReg(source); - emit->emitIns_S_R(storeIns, storeAttr, source->gtRegNum, varNumOut, argOffsetOut); - } - argOffsetOut += EA_SIZE_IN_BYTES(storeAttr); - assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area - } - else // We have some kind of a struct argument - { - assert(source->isContained()); // We expect that this node was marked as contained in LowerArm - - if (source->OperGet() == GT_FIELD_LIST) - { - // Deal with the multi register passed struct args. - GenTreeFieldList* fieldListPtr = source->AsFieldList(); - - // Evaluate each of the GT_FIELD_LIST items into their register - // and store their register into the outgoing argument area - for (; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest()) - { - GenTreePtr nextArgNode = fieldListPtr->gtOp.gtOp1; - genConsumeReg(nextArgNode); - - regNumber reg = nextArgNode->gtRegNum; - var_types type = nextArgNode->TypeGet(); - emitAttr attr = emitTypeSize(type); - - // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing - // argument area - emit->emitIns_S_R(ins_Store(type), attr, reg, varNumOut, argOffsetOut); - argOffsetOut += EA_SIZE_IN_BYTES(attr); - assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area - } - } - else // We must have a GT_OBJ or a GT_LCL_VAR - { - NYI("genPutArgStk: GT_OBJ or GT_LCL_VAR source of struct type"); - } - } -} - //------------------------------------------------------------------------ // instGen_Set_Reg_To_Imm: Move an immediate value into an integer register. // @@ -1248,69 +1098,6 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) NYI("genLockedInstructions"); } -//---------------------------------------------------------------------------------- -// genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local -// -// Arguments: -// treeNode - Gentree of GT_STORE_LCL_VAR -// -// Return Value: -// None -// -// Assumption: -// The child of store is a multi-reg call node. -// genProduceReg() on treeNode is made by caller of this routine. -// -void CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode) -{ - assert(treeNode->OperGet() == GT_STORE_LCL_VAR); - - // Longs are returned in two return registers on Arm32. - assert(varTypeIsLong(treeNode)); - - // Assumption: current Arm32 implementation requires that a multi-reg long - // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from - // being promoted. - unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum; - LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); - noway_assert(varDsc->lvIsMultiRegRet); - - GenTree* op1 = treeNode->gtGetOp1(); - GenTree* actualOp1 = op1->gtSkipReloadOrCopy(); - GenTreeCall* call = actualOp1->AsCall(); - assert(call->HasMultiRegRetVal()); - - genConsumeRegs(op1); - - ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); - unsigned regCount = retTypeDesc->GetReturnRegCount(); - assert(regCount <= MAX_RET_REG_COUNT); - - // Stack store - int offset = 0; - for (unsigned i = 0; i < regCount; ++i) - { - var_types type = retTypeDesc->GetReturnRegType(i); - regNumber reg = call->GetRegNumByIdx(i); - if (op1->IsCopyOrReload()) - { - // GT_COPY/GT_RELOAD will have valid reg for those positions - // that need to be copied or reloaded. - regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i); - if (reloadReg != REG_NA) - { - reg = reloadReg; - } - } - - assert(reg != REG_NA); - getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset); - offset += genTypeSize(type); - } - - varDsc->lvRegNum = REG_STK; -} - //-------------------------------------------------------------------------------------- // genLclHeap: Generate code for localloc // @@ -1680,223 +1467,6 @@ void CodeGen::genJumpTable(GenTree* treeNode) } //------------------------------------------------------------------------ -// genRangeCheck: generate code for GT_ARR_BOUNDS_CHECK node. -// -void CodeGen::genRangeCheck(GenTreePtr oper) -{ - noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK); - GenTreeBoundsChk* bndsChk = oper->AsBoundsChk(); - - GenTreePtr arrIdx = bndsChk->gtIndex->gtEffectiveVal(); - GenTreePtr arrLen = bndsChk->gtArrLen->gtEffectiveVal(); - GenTreePtr arrRef = NULL; - int lenOffset = 0; - - genConsumeIfReg(arrIdx); - genConsumeIfReg(arrLen); - - GenTree * src1, *src2; - emitJumpKind jmpKind; - - if (arrIdx->isContainedIntOrIImmed()) - { - // To encode using a cmp immediate, we place the - // constant operand in the second position - src1 = arrLen; - src2 = arrIdx; - jmpKind = genJumpKindForOper(GT_LE, CK_UNSIGNED); - } - else - { - src1 = arrIdx; - src2 = arrLen; - jmpKind = genJumpKindForOper(GT_GE, CK_UNSIGNED); - } - - getEmitter()->emitInsBinary(INS_cmp, emitAttr(TYP_INT), src1, src2); - genJumpToThrowHlpBlk(jmpKind, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB); -} - -//------------------------------------------------------------------------ -// genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the -// lower bound for the given dimension. -// -// Arguments: -// elemType - the element type of the array -// rank - the rank of the array -// dimension - the dimension for which the lower bound offset will be returned. -// -// Return Value: -// The offset. -// TODO-Cleanup: move to CodeGenCommon.cpp - -// static -unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension) -{ - // Note that the lower bound and length fields of the Array object are always TYP_INT - return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank); -} - -//------------------------------------------------------------------------ -// genOffsetOfMDArrayLength: Returns the offset from the Array object to the -// size for the given dimension. -// -// Arguments: -// elemType - the element type of the array -// rank - the rank of the array -// dimension - the dimension for which the lower bound offset will be returned. -// -// Return Value: -// The offset. -// TODO-Cleanup: move to CodeGenCommon.cpp - -// static -unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension) -{ - // Note that the lower bound and length fields of the Array object are always TYP_INT - return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension; -} - -//------------------------------------------------------------------------ -// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference, -// producing the effective index by subtracting the lower bound. -// -// Arguments: -// arrIndex - the node for which we're generating code -// -// Return Value: -// None. -// - -void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex) -{ - emitter* emit = getEmitter(); - GenTreePtr arrObj = arrIndex->ArrObj(); - GenTreePtr indexNode = arrIndex->IndexExpr(); - regNumber arrReg = genConsumeReg(arrObj); - regNumber indexReg = genConsumeReg(indexNode); - regNumber tgtReg = arrIndex->gtRegNum; - noway_assert(tgtReg != REG_NA); - - // We will use a temp register to load the lower bound and dimension size values - // - regMaskTP tmpRegsMask = arrIndex->gtRsvdRegs; // there will be two bits set - tmpRegsMask &= ~genRegMask(tgtReg); // remove the bit for 'tgtReg' from 'tmpRegsMask' - - regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask - regNumber tmpReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask - noway_assert(tmpReg != REG_NA); - - assert(tgtReg != tmpReg); - - unsigned dim = arrIndex->gtCurrDim; - unsigned rank = arrIndex->gtArrRank; - var_types elemType = arrIndex->gtArrElemType; - unsigned offset; - - offset = genOffsetOfMDArrayLowerBound(elemType, rank, dim); - emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load - emit->emitIns_R_R_R(INS_sub, EA_4BYTE, tgtReg, indexReg, tmpReg); - - offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim); - emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load - emit->emitIns_R_R(INS_cmp, EA_4BYTE, tgtReg, tmpReg); - - emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED); - genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL); - - genProduceReg(arrIndex); -} - -//------------------------------------------------------------------------ -// genCodeForArrOffset: Generates code to compute the flattened array offset for -// one dimension of an array reference: -// result = (prevDimOffset * dimSize) + effectiveIndex -// where dimSize is obtained from the arrObj operand -// -// Arguments: -// arrOffset - the node for which we're generating code -// -// Return Value: -// None. -// -// Notes: -// dimSize and effectiveIndex are always non-negative, the former by design, -// and the latter because it has been normalized to be zero-based. - -void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) -{ - GenTreePtr offsetNode = arrOffset->gtOffset; - GenTreePtr indexNode = arrOffset->gtIndex; - regNumber tgtReg = arrOffset->gtRegNum; - - noway_assert(tgtReg != REG_NA); - - if (!offsetNode->IsIntegralConst(0)) - { - emitter* emit = getEmitter(); - regNumber offsetReg = genConsumeReg(offsetNode); - noway_assert(offsetReg != REG_NA); - regNumber indexReg = genConsumeReg(indexNode); - noway_assert(indexReg != REG_NA); - GenTreePtr arrObj = arrOffset->gtArrObj; - regNumber arrReg = genConsumeReg(arrObj); - noway_assert(arrReg != REG_NA); - regMaskTP tmpRegMask = arrOffset->gtRsvdRegs; - regNumber tmpReg = genRegNumFromMask(tmpRegMask); - noway_assert(tmpReg != REG_NA); - unsigned dim = arrOffset->gtCurrDim; - unsigned rank = arrOffset->gtArrRank; - var_types elemType = arrOffset->gtArrElemType; - unsigned offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim); - - // Load tmpReg with the dimension size - emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load - - // Evaluate tgtReg = offsetReg*dim_size + indexReg. - emit->emitIns_R_R_R(INS_MUL, EA_4BYTE, tgtReg, tmpReg, offsetReg); - emit->emitIns_R_R_R(INS_add, EA_4BYTE, tgtReg, tgtReg, indexReg); - } - else - { - regNumber indexReg = genConsumeReg(indexNode); - if (indexReg != tgtReg) - { - inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT); - } - } - genProduceReg(arrOffset); -} - -//------------------------------------------------------------------------ -// indirForm: Make a temporary indir we can feed to pattern matching routines -// in cases where we don't want to instantiate all the indirs that happen. -// -GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base) -{ - GenTreeIndir i(GT_IND, type, base, nullptr); - i.gtRegNum = REG_NA; - // has to be nonnull (because contained nodes can't be the last in block) - // but don't want it to be a valid pointer - i.gtNext = (GenTree*)(-1); - return i; -} - -//------------------------------------------------------------------------ -// intForm: Make a temporary int we can feed to pattern matching routines -// in cases where we don't want to instantiate. -// -GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value) -{ - GenTreeIntCon i(type, value); - i.gtRegNum = REG_NA; - // has to be nonnull (because contained nodes can't be the last in block) - // but don't want it to be a valid pointer - i.gtNext = (GenTree*)(-1); - return i; -} - -//------------------------------------------------------------------------ // genGetInsForOper: Return instruction encoding of the operation tree. // instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) @@ -1972,59 +1542,6 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) return ins; } -//------------------------------------------------------------------------ -// genCodeForShift: Generates the code sequence for a GenTree node that -// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror). -// -// Arguments: -// tree - the bit shift node (that specifies the type of bit shift to perform). -// -// Assumptions: -// a) All GenTrees are register allocated. -// -void CodeGen::genCodeForShift(GenTreePtr tree) -{ - var_types targetType = tree->TypeGet(); - genTreeOps oper = tree->OperGet(); - instruction ins = genGetInsForOper(oper, targetType); - emitAttr size = emitTypeSize(tree); - - assert(tree->gtRegNum != REG_NA); - - genConsumeOperands(tree->AsOp()); - - GenTreePtr operand = tree->gtGetOp1(); - GenTreePtr shiftBy = tree->gtGetOp2(); - if (!shiftBy->IsCnsIntOrI()) - { - getEmitter()->emitIns_R_R_R(ins, size, tree->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum); - } - else - { - unsigned immWidth = size * BITS_PER_BYTE; - ssize_t shiftByImm = shiftBy->gtIntCon.gtIconVal & (immWidth - 1); - - getEmitter()->emitIns_R_R_I(ins, size, tree->gtRegNum, operand->gtRegNum, shiftByImm); - } - - genProduceReg(tree); -} - -// Generate code for a CpBlk node by the means of the VM memcpy helper call -// Preconditions: -// a) The size argument of the CpBlk is not an integer constant -// b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes. -void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode) -{ - // Make sure we got the arguments of the cpblk operation in the right registers - unsigned blockSize = cpBlkNode->Size(); - GenTreePtr dstAddr = cpBlkNode->Addr(); - assert(!dstAddr->isContained()); - - genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); - genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN); -} - // Generates CpBlk code by performing a loop unroll // Preconditions: // The size argument of the CpBlk node is a constant and <= 64 bytes. @@ -2082,36 +1599,6 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp) } } -// Generates code for InitBlk by calling the VM memset helper function. -// Preconditions: -// a) The size argument of the InitBlk is not an integer constant. -// b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes. -void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode) -{ - // Make sure we got the arguments of the initblk operation in the right registers - unsigned size = initBlkNode->Size(); - GenTreePtr dstAddr = initBlkNode->Addr(); - GenTreePtr initVal = initBlkNode->Data(); - if (initVal->OperIsInitVal()) - { - initVal = initVal->gtGetOp1(); - } - - assert(!dstAddr->isContained()); - assert(!initVal->isContained()); - if (initBlkNode->gtOper == GT_STORE_DYN_BLK) - { - assert(initBlkNode->AsDynBlk()->gtDynamicSize->gtRegNum == REG_ARG_2); - } - else - { - assert(initBlkNode->gtRsvdRegs == RBM_ARG_2); - } - - genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); - genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); -} - //------------------------------------------------------------------------ // genCodeForShiftLong: Generates the code sequence for a GenTree node that // represents a three operand bit shift or rotate operation (<>Lo). @@ -2176,370 +1663,6 @@ void CodeGen::genCodeForShiftLong(GenTreePtr tree) } //------------------------------------------------------------------------ -// genRegCopy: Generate a register copy. -// -void CodeGen::genRegCopy(GenTree* treeNode) -{ - assert(treeNode->OperGet() == GT_COPY); - - var_types targetType = treeNode->TypeGet(); - regNumber targetReg = treeNode->gtRegNum; - assert(targetReg != REG_NA); - - GenTree* op1 = treeNode->gtOp.gtOp1; - - // Check whether this node and the node from which we're copying the value have the same - // register type. - // This can happen if (currently iff) we have a SIMD vector type that fits in an integer - // register, in which case it is passed as an argument, or returned from a call, - // in an integer register and must be copied if it's in an xmm register. - - if (varTypeIsFloating(treeNode) != varTypeIsFloating(op1)) - { - NYI("genRegCopy floating point"); - } - else - { - inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType); - } - - if (op1->IsLocal()) - { - // The lclVar will never be a def. - // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will - // appropriately set the gcInfo for the copied value. - // If not, there are two cases we need to handle: - // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable - // will remain live in its original register. - // genProduceReg() will appropriately set the gcInfo for the copied value, - // and genConsumeReg will reset it. - // - Otherwise, we need to update register info for the lclVar. - - GenTreeLclVarCommon* lcl = op1->AsLclVarCommon(); - assert((lcl->gtFlags & GTF_VAR_DEF) == 0); - - if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0) - { - LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum]; - - // If we didn't just spill it (in genConsumeReg, above), then update the register info - if (varDsc->lvRegNum != REG_STK) - { - // The old location is dying - genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1)); - - gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum)); - - genUpdateVarReg(varDsc, treeNode); - - // The new location is going live - genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode)); - } - } - } - - genProduceReg(treeNode); -} - -//------------------------------------------------------------------------ -// genCallInstruction: Produce code for a GT_CALL node -// -void CodeGen::genCallInstruction(GenTreeCall* call) -{ - gtCallTypes callType = (gtCallTypes)call->gtCallType; - - IL_OFFSETX ilOffset = BAD_IL_OFFSET; - - // all virtuals should have been expanded into a control expression - assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr); - - // Consume all the arg regs - for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) - { - assert(list->OperIsList()); - - GenTreePtr argNode = list->Current(); - - fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy()); - assert(curArgTabEntry); - - if (curArgTabEntry->regNum == REG_STK) - continue; - - // Deal with multi register passed struct args. - if (argNode->OperGet() == GT_FIELD_LIST) - { - GenTreeArgList* argListPtr = argNode->AsArgList(); - unsigned iterationNum = 0; - regNumber argReg = curArgTabEntry->regNum; - for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++) - { - GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1; - assert(putArgRegNode->gtOper == GT_PUTARG_REG); - - genConsumeReg(putArgRegNode); - - if (putArgRegNode->gtRegNum != argReg) - { - inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg, - putArgRegNode->gtRegNum); - } - - argReg = genRegArgNext(argReg); - } - } - else - { - regNumber argReg = curArgTabEntry->regNum; - genConsumeReg(argNode); - if (argNode->gtRegNum != argReg) - { - inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum); - } - } - - // In the case of a varargs call, - // the ABI dictates that if we have floating point args, - // we must pass the enregistered arguments in both the - // integer and floating point registers so, let's do that. - if (call->IsVarargs() && varTypeIsFloating(argNode)) - { - NYI_ARM("CodeGen - IsVarargs"); - } - } - - // Insert a null check on "this" pointer if asked. - if (call->NeedsNullCheck()) - { - const regNumber regThis = genGetThisArgReg(call); - regMaskTP tempMask = genFindLowestBit(call->gtRsvdRegs); - const regNumber tmpReg = genRegNumFromMask(tempMask); - if (genCountBits(call->gtRsvdRegs) > 1) - { - call->gtRsvdRegs &= ~tempMask; - } - getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, regThis, 0); - } - - // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method. - CORINFO_METHOD_HANDLE methHnd; - GenTree* target = call->gtControlExpr; - if (callType == CT_INDIRECT) - { - assert(target == nullptr); - target = call->gtCallAddr; - methHnd = nullptr; - } - else - { - methHnd = call->gtCallMethHnd; - } - - CORINFO_SIG_INFO* sigInfo = nullptr; -#ifdef DEBUG - // Pass the call signature information down into the emitter so the emitter can associate - // native call sites with the signatures they were generated from. - if (callType != CT_HELPER) - { - sigInfo = call->callSig; - } -#endif // DEBUG - - // If fast tail call, then we are done. - if (call->IsFastTailCall()) - { - NYI_ARM("fast tail call"); - } - - // For a pinvoke to unmanaged code we emit a label to clear - // the GC pointer state before the callsite. - // We can't utilize the typical lazy killing of GC pointers - // at (or inside) the callsite. - if (call->IsUnmanaged()) - { - genDefineTempLabel(genCreateTempLabel()); - } - - // Determine return value size(s). - ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); - emitAttr retSize = EA_PTRSIZE; - emitAttr secondRetSize = EA_UNKNOWN; - - if (call->HasMultiRegRetVal()) - { - retSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(0)); - secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1)); - } - else - { - assert(!varTypeIsStruct(call)); - - if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY) - { - retSize = EA_GCREF; - } - else if (call->gtType == TYP_BYREF) - { - retSize = EA_BYREF; - } - } - - // We need to propagate the IL offset information to the call instruction, so we can emit - // an IL to native mapping record for the call, to support managed return value debugging. - // We don't want tail call helper calls that were converted from normal calls to get a record, - // so we skip this hash table lookup logic in that case. - if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall()) - { - (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset); - } - - if (target != nullptr) - { - // For ARM a call target can not be a contained indirection - assert(!target->isContainedIndir()); - - genConsumeReg(target); - - // We have already generated code for gtControlExpr evaluating it into a register. - // We just need to emit "call reg" in this case. - // - assert(genIsValidIntReg(target->gtRegNum)); - - genEmitCall(emitter::EC_INDIR_R, methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr - retSize, ilOffset, target->gtRegNum); - } - else - { - // Generate a direct call to a non-virtual user defined or helper method - assert(callType == CT_HELPER || callType == CT_USER_FUNC); - - void* addr = nullptr; - if (callType == CT_HELPER) - { - // Direct call to a helper method. - CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd); - noway_assert(helperNum != CORINFO_HELP_UNDEF); - - void* pAddr = nullptr; - addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); - - if (addr == nullptr) - { - addr = pAddr; - } - } - else - { - // Direct call to a non-virtual user function. - CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY; - if (call->IsSameThis()) - { - aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS); - } - - if ((call->NeedsNullCheck()) == 0) - { - aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL); - } - - CORINFO_CONST_LOOKUP addrInfo; - compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags); - - addr = addrInfo.addr; - } - - assert(addr); - // Non-virtual direct call to known addresses - if (!arm_Valid_Imm_For_BL((ssize_t)addr)) - { - regNumber tmpReg = genRegNumFromMask(call->gtRsvdRegs); - instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, tmpReg, (ssize_t)addr); - genEmitCall(emitter::EC_INDIR_R, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) NULL, retSize, ilOffset, tmpReg); - } - else - { - genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, retSize, ilOffset); - } - } - - // if it was a pinvoke we may have needed to get the address of a label - if (genPendingCallLabel) - { - assert(call->IsUnmanaged()); - genDefineTempLabel(genPendingCallLabel); - genPendingCallLabel = nullptr; - } - - // Update GC info: - // All Callee arg registers are trashed and no longer contain any GC pointers. - // TODO-ARM-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here? - // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other - // registers from RBM_CALLEE_TRASH - assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); - assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); - gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS; - gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS; - - var_types returnType = call->TypeGet(); - if (returnType != TYP_VOID) - { - regNumber returnReg; - - if (call->HasMultiRegRetVal()) - { - assert(pRetTypeDesc != nullptr); - unsigned regCount = pRetTypeDesc->GetReturnRegCount(); - - // If regs allocated to call node are different from ABI return - // regs in which the call has returned its result, move the result - // to regs allocated to call node. - for (unsigned i = 0; i < regCount; ++i) - { - var_types regType = pRetTypeDesc->GetReturnRegType(i); - returnReg = pRetTypeDesc->GetABIReturnReg(i); - regNumber allocatedReg = call->GetRegNumByIdx(i); - if (returnReg != allocatedReg) - { - inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType); - } - } - } - else - { - if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME)) - { - // The CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with - // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers. - returnReg = REG_PINVOKE_TCB; - } - else if (varTypeIsFloating(returnType)) - { - returnReg = REG_FLOATRET; - } - else - { - returnReg = REG_INTRET; - } - - if (call->gtRegNum != returnReg) - { - inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType); - } - } - - genProduceReg(call); - } - - // If there is nothing next, that means the result is thrown away, so this value is not live. - // However, for minopts or debuggable code, we keep it live to support managed return value debugging. - if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode) - { - gcInfo.gcMarkRegSetNpt(RBM_INTRET); - } -} - -//------------------------------------------------------------------------ // genLeaInstruction: Produce code for a GT_LEA subnode. // void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) @@ -2848,229 +1971,6 @@ void CodeGen::genLongToIntCast(GenTree* cast) } //------------------------------------------------------------------------ -// genIntToIntCast: Generate code for an integer cast -// -// Arguments: -// treeNode - The GT_CAST node -// -// Return Value: -// None. -// -// Assumptions: -// The treeNode must have an assigned register. -// For a signed convert from byte, the source must be in a byte-addressable register. -// Neither the source nor target type can be a floating point type. -// -void CodeGen::genIntToIntCast(GenTreePtr treeNode) -{ - assert(treeNode->OperGet() == GT_CAST); - - GenTreePtr castOp = treeNode->gtCast.CastOp(); - emitter* emit = getEmitter(); - - var_types dstType = treeNode->CastToType(); - var_types srcType = genActualType(castOp->TypeGet()); - emitAttr movSize = emitActualTypeSize(dstType); - bool movRequired = false; - - if (varTypeIsLong(srcType)) - { - genLongToIntCast(treeNode); - return; - } - - regNumber targetReg = treeNode->gtRegNum; - regNumber sourceReg = castOp->gtRegNum; - - // For Long to Int conversion we will have a reserved integer register to hold the immediate mask - regNumber tmpReg = (treeNode->gtRsvdRegs == RBM_NONE) ? REG_NA : genRegNumFromMask(treeNode->gtRsvdRegs); - - assert(genIsValidIntReg(targetReg)); - assert(genIsValidIntReg(sourceReg)); - - instruction ins = INS_invalid; - - genConsumeReg(castOp); - Lowering::CastInfo castInfo; - - // Get information about the cast. - Lowering::getCastDescription(treeNode, &castInfo); - - if (castInfo.requiresOverflowCheck) - { - emitAttr cmpSize = EA_ATTR(genTypeSize(srcType)); - - if (castInfo.signCheckOnly) - { - // We only need to check for a negative value in sourceReg - emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, 0); - emitJumpKind jmpLT = genJumpKindForOper(GT_LT, CK_SIGNED); - genJumpToThrowHlpBlk(jmpLT, SCK_OVERFLOW); - noway_assert(genTypeSize(srcType) == 4 || genTypeSize(srcType) == 8); - // This is only interesting case to ensure zero-upper bits. - if ((srcType == TYP_INT) && (dstType == TYP_ULONG)) - { - // cast to TYP_ULONG: - // We use a mov with size=EA_4BYTE - // which will zero out the upper bits - movSize = EA_4BYTE; - movRequired = true; - } - } - else if (castInfo.unsignedSource || castInfo.unsignedDest) - { - // When we are converting from/to unsigned, - // we only have to check for any bits set in 'typeMask' - - noway_assert(castInfo.typeMask != 0); - emit->emitIns_R_I(INS_tst, cmpSize, sourceReg, castInfo.typeMask); - emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); - genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW); - } - else - { - // For a narrowing signed cast - // - // We must check the value is in a signed range. - - // Compare with the MAX - - noway_assert((castInfo.typeMin != 0) && (castInfo.typeMax != 0)); - - if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, INS_FLAGS_DONT_CARE)) - { - emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMax); - } - else - { - noway_assert(tmpReg != REG_NA); - instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMax); - emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg); - } - - emitJumpKind jmpGT = genJumpKindForOper(GT_GT, CK_SIGNED); - genJumpToThrowHlpBlk(jmpGT, SCK_OVERFLOW); - - // Compare with the MIN - - if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, INS_FLAGS_DONT_CARE)) - { - emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMin); - } - else - { - noway_assert(tmpReg != REG_NA); - instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMin); - emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg); - } - - emitJumpKind jmpLT = genJumpKindForOper(GT_LT, CK_SIGNED); - genJumpToThrowHlpBlk(jmpLT, SCK_OVERFLOW); - } - ins = INS_mov; - } - else // Non-overflow checking cast. - { - if (genTypeSize(srcType) == genTypeSize(dstType)) - { - ins = INS_mov; - } - else - { - var_types extendType = TYP_UNKNOWN; - - // If we need to treat a signed type as unsigned - if ((treeNode->gtFlags & GTF_UNSIGNED) != 0) - { - extendType = genUnsignedType(srcType); - movSize = emitTypeSize(extendType); - movRequired = true; - } - else - { - if (genTypeSize(srcType) < genTypeSize(dstType)) - { - extendType = srcType; - movSize = emitTypeSize(srcType); - if (srcType == TYP_UINT) - { - movRequired = true; - } - } - else // (genTypeSize(srcType) > genTypeSize(dstType)) - { - extendType = dstType; - movSize = emitTypeSize(dstType); - } - } - - ins = ins_Move_Extend(extendType, castOp->InReg()); - } - } - - // We should never be generating a load from memory instruction here! - assert(!emit->emitInsIsLoad(ins)); - - if ((ins != INS_mov) || movRequired || (targetReg != sourceReg)) - { - emit->emitIns_R_R(ins, movSize, targetReg, sourceReg); - } - - genProduceReg(treeNode); -} - -//------------------------------------------------------------------------ -// genFloatToFloatCast: Generate code for a cast between float and double -// -// Arguments: -// treeNode - The GT_CAST node -// -// Return Value: -// None. -// -// Assumptions: -// Cast is a non-overflow conversion. -// The treeNode must have an assigned register. -// The cast is between float and double. -// -void CodeGen::genFloatToFloatCast(GenTreePtr treeNode) -{ - // float <--> double conversions are always non-overflow ones - assert(treeNode->OperGet() == GT_CAST); - assert(!treeNode->gtOverflow()); - - regNumber targetReg = treeNode->gtRegNum; - assert(genIsValidFloatReg(targetReg)); - - GenTreePtr op1 = treeNode->gtOp.gtOp1; - assert(!op1->isContained()); // Cannot be contained - assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg. - - var_types dstType = treeNode->CastToType(); - var_types srcType = op1->TypeGet(); - assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); - - genConsumeOperands(treeNode->AsOp()); - - // treeNode must be a reg - assert(!treeNode->isContained()); - - if (srcType != dstType) - { - instruction insVcvt = (srcType == TYP_FLOAT) ? INS_vcvt_f2d // convert Float to Double - : INS_vcvt_d2f; // convert Double to Float - - getEmitter()->emitIns_R_R(insVcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum); - } - else if (treeNode->gtRegNum != op1->gtRegNum) - { - getEmitter()->emitIns_R_R(INS_vmov, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum); - } - - genProduceReg(treeNode); -} - -//------------------------------------------------------------------------ // genIntToFloatCast: Generate code to cast an int/long to float/double // // Arguments: @@ -3228,38 +2128,6 @@ void CodeGen::genFloatToIntCast(GenTreePtr treeNode) } //------------------------------------------------------------------------ -// genCreateAndStoreGCInfo: Create and record GC Info for the function. -// -void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, - unsigned prologSize, - unsigned epilogSize DEBUGARG(void* codePtr)) -{ - IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC()); - GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC) - GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM); - assert(gcInfoEncoder); - - // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32). - gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize); - - // We keep the call count for the second call to gcMakeRegPtrTable() below. - unsigned callCnt = 0; - // First we figure out the encoder ID's for the stack slots and registers. - gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt); - // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them). - gcInfoEncoder->FinalizeSlotIds(); - // Now we can actually use those slot ID's to declare live ranges. - gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt); - - gcInfoEncoder->Build(); - - // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t) - // let's save the values anyway for debugging purposes - compiler->compInfoBlkAddr = gcInfoEncoder->Emit(); - compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface -} - -//------------------------------------------------------------------------ // genEmitHelperCall: Emit a call to a helper function. // void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg /*= REG_NA */) diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index c17e033..7f98221 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -1265,24 +1265,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ -/***************************************************************************** - * - * Generate code that will set the given register to the integer constant. - */ - -void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags) -{ - // Reg cannot be a FP reg - assert(!genIsValidFloatReg(reg)); - - // The only TYP_REF constant that can come this path is a managed 'null' since it is not - // relocatable. Other ref type constants (e.g. string objects) go through a different - // code path. - noway_assert(type != TYP_REF || val == 0); - - instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags); -} - BasicBlock* CodeGen::genCallFinally(BasicBlock* block) { // Generate a call to the finally, like this: @@ -2861,77 +2843,6 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) } } -//---------------------------------------------------------------------------------- -// genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local -// -// Arguments: -// treeNode - Gentree of GT_STORE_LCL_VAR -// -// Return Value: -// None -// -// Assumption: -// The child of store is a multi-reg call node. -// genProduceReg() on treeNode is made by caller of this routine. -// -void CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode) -{ - assert(treeNode->OperGet() == GT_STORE_LCL_VAR); - - // Structs of size >=9 and <=16 are returned in two return registers on ARM64 and HFAs. - assert(varTypeIsStruct(treeNode)); - - // Assumption: current ARM64 implementation requires that a multi-reg struct - // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from - // being struct promoted. - unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum; - LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); - noway_assert(varDsc->lvIsMultiRegRet); - - GenTree* op1 = treeNode->gtGetOp1(); - GenTree* actualOp1 = op1->gtSkipReloadOrCopy(); - GenTreeCall* call = actualOp1->AsCall(); - assert(call->HasMultiRegRetVal()); - - genConsumeRegs(op1); - - ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); - unsigned regCount = pRetTypeDesc->GetReturnRegCount(); - - if (treeNode->gtRegNum != REG_NA) - { - // Right now the only enregistrable structs supported are SIMD types. - assert(varTypeIsSIMD(treeNode)); - NYI("GT_STORE_LCL_VAR of a SIMD enregisterable struct"); - } - else - { - // Stack store - int offset = 0; - for (unsigned i = 0; i < regCount; ++i) - { - var_types type = pRetTypeDesc->GetReturnRegType(i); - regNumber reg = call->GetRegNumByIdx(i); - if (op1->IsCopyOrReload()) - { - // GT_COPY/GT_RELOAD will have valid reg for those positions - // that need to be copied or reloaded. - regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i); - if (reloadReg != REG_NA) - { - reg = reloadReg; - } - } - - assert(reg != REG_NA); - getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset); - offset += genTypeSize(type); - } - - varDsc->lvRegNum = REG_STK; - } -} - /*********************************************************************************************** * Generate code for localloc */ @@ -3349,42 +3260,6 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode) } } -// Generates code for InitBlk by calling the VM memset helper function. -// Preconditions: -// a) The size argument of the InitBlk is not an integer constant. -// b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes. -void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode) -{ - // Make sure we got the arguments of the initblk operation in the right registers - unsigned size = initBlkNode->Size(); - GenTreePtr dstAddr = initBlkNode->Addr(); - GenTreePtr initVal = initBlkNode->Data(); - if (initVal->OperIsInitVal()) - { - initVal = initVal->gtGetOp1(); - } - - assert(!dstAddr->isContained()); - assert(!initVal->isContained()); - if (initBlkNode->gtOper == GT_STORE_DYN_BLK) - { - assert(initBlkNode->AsDynBlk()->gtDynamicSize->gtRegNum == REG_ARG_2); - } - else - { - assert(initBlkNode->gtRsvdRegs == RBM_ARG_2); - } - - if (size != 0) - { - assert(size > INITBLK_UNROLL_LIMIT); - } - - genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); - - genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); -} - // Generate code for a load from some address + offset // base: tree node which can be either a local address or arbitrary node // offset: distance from the base from which to load @@ -3689,27 +3564,6 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF); } -// Generate code for a CpBlk node by the means of the VM memcpy helper call -// Preconditions: -// a) The size argument of the CpBlk is not an integer constant -// b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes. -void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode) -{ - // Make sure we got the arguments of the cpblk operation in the right registers - unsigned blockSize = cpBlkNode->Size(); - GenTreePtr dstAddr = cpBlkNode->Addr(); - assert(!dstAddr->isContained()); - - genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); - - if (blockSize != 0) - { - assert(blockSize > CPBLK_UNROLL_LIMIT); - } - - genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN); -} - // generate code do a switch statement based on a table of ip-relative offsets void CodeGen::genTableBasedSwitch(GenTree* treeNode) { @@ -3830,239 +3684,6 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) #endif // !0 } -// generate code for BoundsCheck nodes -void CodeGen::genRangeCheck(GenTreePtr oper) -{ -#ifdef FEATURE_SIMD - noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK); -#else // !FEATURE_SIMD - noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK); -#endif // !FEATURE_SIMD - - GenTreeBoundsChk* bndsChk = oper->AsBoundsChk(); - - GenTreePtr arrLen = bndsChk->gtArrLen; - GenTreePtr arrIndex = bndsChk->gtIndex; - GenTreePtr arrRef = NULL; - int lenOffset = 0; - - GenTree * src1, *src2; - emitJumpKind jmpKind; - - genConsumeRegs(arrIndex); - genConsumeRegs(arrLen); - - if (arrIndex->isContainedIntOrIImmed()) - { - // To encode using a cmp immediate, we place the - // constant operand in the second position - src1 = arrLen; - src2 = arrIndex; - jmpKind = genJumpKindForOper(GT_LE, CK_UNSIGNED); - } - else - { - src1 = arrIndex; - src2 = arrLen; - jmpKind = genJumpKindForOper(GT_GE, CK_UNSIGNED); - } - - GenTreeIntConCommon* intConst = nullptr; - if (src2->isContainedIntOrIImmed()) - { - intConst = src2->AsIntConCommon(); - } - - if (intConst != nullptr) - { - getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, src1->gtRegNum, intConst->IconValue()); - } - else - { - getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, src1->gtRegNum, src2->gtRegNum); - } - - genJumpToThrowHlpBlk(jmpKind, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB); -} - -//------------------------------------------------------------------------ -// genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the -// lower bound for the given dimension. -// -// Arguments: -// elemType - the element type of the array -// rank - the rank of the array -// dimension - the dimension for which the lower bound offset will be returned. -// -// Return Value: -// The offset. -// TODO-Cleanup: move to CodeGenCommon.cpp - -// static -unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension) -{ - // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets. - return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank); -} - -//------------------------------------------------------------------------ -// genOffsetOfMDArrayLength: Returns the offset from the Array object to the -// size for the given dimension. -// -// Arguments: -// elemType - the element type of the array -// rank - the rank of the array -// dimension - the dimension for which the lower bound offset will be returned. -// -// Return Value: -// The offset. -// TODO-Cleanup: move to CodeGenCommon.cpp - -// static -unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension) -{ - // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets. - return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension; -} - -//------------------------------------------------------------------------ -// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference, -// producing the effective index by subtracting the lower bound. -// -// Arguments: -// arrIndex - the node for which we're generating code -// -// Return Value: -// None. -// - -void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex) -{ - emitter* emit = getEmitter(); - GenTreePtr arrObj = arrIndex->ArrObj(); - GenTreePtr indexNode = arrIndex->IndexExpr(); - regNumber arrReg = genConsumeReg(arrObj); - regNumber indexReg = genConsumeReg(indexNode); - regNumber tgtReg = arrIndex->gtRegNum; - noway_assert(tgtReg != REG_NA); - - // We will use a temp register to load the lower bound and dimension size values - // - regMaskTP tmpRegsMask = arrIndex->gtRsvdRegs; // there will be two bits set - tmpRegsMask &= ~genRegMask(tgtReg); // remove the bit for 'tgtReg' from 'tmpRegsMask' - - regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask - regNumber tmpReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask - noway_assert(tmpReg != REG_NA); - - assert(tgtReg != tmpReg); - - unsigned dim = arrIndex->gtCurrDim; - unsigned rank = arrIndex->gtArrRank; - var_types elemType = arrIndex->gtArrElemType; - unsigned offset; - - offset = genOffsetOfMDArrayLowerBound(elemType, rank, dim); - emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_8BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load - emit->emitIns_R_R_R(INS_sub, EA_4BYTE, tgtReg, indexReg, tmpReg); - - offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim); - emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_8BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load - emit->emitIns_R_R(INS_cmp, EA_4BYTE, tgtReg, tmpReg); - - emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED); - genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL); - - genProduceReg(arrIndex); -} - -//------------------------------------------------------------------------ -// genCodeForArrOffset: Generates code to compute the flattened array offset for -// one dimension of an array reference: -// result = (prevDimOffset * dimSize) + effectiveIndex -// where dimSize is obtained from the arrObj operand -// -// Arguments: -// arrOffset - the node for which we're generating code -// -// Return Value: -// None. -// -// Notes: -// dimSize and effectiveIndex are always non-negative, the former by design, -// and the latter because it has been normalized to be zero-based. - -void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) -{ - GenTreePtr offsetNode = arrOffset->gtOffset; - GenTreePtr indexNode = arrOffset->gtIndex; - regNumber tgtReg = arrOffset->gtRegNum; - - noway_assert(tgtReg != REG_NA); - - if (!offsetNode->IsIntegralConst(0)) - { - emitter* emit = getEmitter(); - regNumber offsetReg = genConsumeReg(offsetNode); - noway_assert(offsetReg != REG_NA); - regNumber indexReg = genConsumeReg(indexNode); - noway_assert(indexReg != REG_NA); - GenTreePtr arrObj = arrOffset->gtArrObj; - regNumber arrReg = genConsumeReg(arrObj); - noway_assert(arrReg != REG_NA); - regMaskTP tmpRegMask = arrOffset->gtRsvdRegs; - regNumber tmpReg = genRegNumFromMask(tmpRegMask); - noway_assert(tmpReg != REG_NA); - unsigned dim = arrOffset->gtCurrDim; - unsigned rank = arrOffset->gtArrRank; - var_types elemType = arrOffset->gtArrElemType; - unsigned offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim); - - // Load tmpReg with the dimension size - emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_8BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load - - // Evaluate tgtReg = offsetReg*dim_size + indexReg. - emit->emitIns_R_R_R_R(INS_madd, EA_4BYTE, tgtReg, tmpReg, offsetReg, indexReg); - } - else - { - regNumber indexReg = genConsumeReg(indexNode); - if (indexReg != tgtReg) - { - inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT); - } - } - genProduceReg(arrOffset); -} - -// make a temporary indir we can feed to pattern matching routines -// in cases where we don't want to instantiate all the indirs that happen -// -// TODO-Cleanup: move to CodeGenCommon.cpp -GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base) -{ - GenTreeIndir i(GT_IND, type, base, nullptr); - i.gtRegNum = REG_NA; - // has to be nonnull (because contained nodes can't be the last in block) - // but don't want it to be a valid pointer - i.gtNext = (GenTree*)(-1); - return i; -} - -// make a temporary int we can feed to pattern matching routines -// in cases where we don't want to instantiate -// -// TODO-Cleanup: move to CodeGenCommon.cpp -GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value) -{ - GenTreeIntCon i(type, value); - i.gtRegNum = REG_NA; - // has to be nonnull (because contained nodes can't be the last in block) - // but don't want it to be a valid pointer - i.gtNext = (GenTree*)(-1); - return i; -} - instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) { instruction ins = INS_brk; @@ -4149,451 +3770,47 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) return ins; } -//------------------------------------------------------------------------ -// genCodeForShift: Generates the code sequence for a GenTree node that -// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror). -// -// Arguments: -// tree - the bit shift node (that specifies the type of bit shift to perform). -// -// Assumptions: -// a) All GenTrees are register allocated. -// -void CodeGen::genCodeForShift(GenTreePtr tree) +// Produce code for a GT_JMP node. +// The arguments of the caller needs to be transferred to the callee before exiting caller. +// The actual jump to callee is generated as part of caller epilog sequence. +// Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup. +void CodeGen::genJmpMethod(GenTreePtr jmp) { - var_types targetType = tree->TypeGet(); - genTreeOps oper = tree->OperGet(); - instruction ins = genGetInsForOper(oper, targetType); - emitAttr size = emitTypeSize(tree); - - assert(tree->gtRegNum != REG_NA); - - GenTreePtr operand = tree->gtGetOp1(); - genConsumeOperands(tree->AsOp()); + assert(jmp->OperGet() == GT_JMP); + assert(compiler->compJmpOpUsed); - GenTreePtr shiftBy = tree->gtGetOp2(); - if (!shiftBy->IsCnsIntOrI()) - { - getEmitter()->emitIns_R_R_R(ins, size, tree->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum); - } - else + // If no arguments, nothing to do + if (compiler->info.compArgsCount == 0) { - unsigned immWidth = emitter::getBitWidth(size); // immWidth will be set to 32 or 64 - ssize_t shiftByImm = shiftBy->gtIntCon.gtIconVal & (immWidth - 1); - - getEmitter()->emitIns_R_R_I(ins, size, tree->gtRegNum, operand->gtRegNum, shiftByImm); + return; } - genProduceReg(tree); -} - -void CodeGen::genRegCopy(GenTree* treeNode) -{ - assert(treeNode->OperGet() == GT_COPY); - - var_types targetType = treeNode->TypeGet(); - regNumber targetReg = treeNode->gtRegNum; - assert(targetReg != REG_NA); - - GenTree* op1 = treeNode->gtOp.gtOp1; - - // Check whether this node and the node from which we're copying the value have the same - // register type. - // This can happen if (currently iff) we have a SIMD vector type that fits in an integer - // register, in which case it is passed as an argument, or returned from a call, - // in an integer register and must be copied if it's in an xmm register. + // Make sure register arguments are in their initial registers + // and stack arguments are put back as well. + unsigned varNum; + LclVarDsc* varDsc; - if (varTypeIsFloating(treeNode) != varTypeIsFloating(op1)) - { - inst_RV_RV(INS_fmov, targetReg, genConsumeReg(op1), targetType); - } - else + // First move any en-registered stack arguments back to the stack. + // At the same time any reg arg not in correct reg is moved back to its stack location. + // + // We are not strictly required to spill reg args that are not in the desired reg for a jmp call + // But that would require us to deal with circularity while moving values around. Spilling + // to stack makes the implementation simple, which is not a bad trade off given Jmp calls + // are not frequent. + for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++) { - inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType); - } + varDsc = compiler->lvaTable + varNum; - if (op1->IsLocal()) - { - // The lclVar will never be a def. - // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will - // appropriately set the gcInfo for the copied value. - // If not, there are two cases we need to handle: - // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable - // will remain live in its original register. - // genProduceReg() will appropriately set the gcInfo for the copied value, - // and genConsumeReg will reset it. - // - Otherwise, we need to update register info for the lclVar. - - GenTreeLclVarCommon* lcl = op1->AsLclVarCommon(); - assert((lcl->gtFlags & GTF_VAR_DEF) == 0); - - if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0) + if (varDsc->lvPromoted) { - LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum]; - - // If we didn't just spill it (in genConsumeReg, above), then update the register info - if (varDsc->lvRegNum != REG_STK) - { - // The old location is dying - genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1)); - - gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum)); - - genUpdateVarReg(varDsc, treeNode); + noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here - // The new location is going live - genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode)); - } + unsigned fieldVarNum = varDsc->lvFieldLclStart; + varDsc = compiler->lvaTable + fieldVarNum; } - } - genProduceReg(treeNode); -} - -// Produce code for a GT_CALL node -void CodeGen::genCallInstruction(GenTreeCall* call) -{ - gtCallTypes callType = (gtCallTypes)call->gtCallType; - - IL_OFFSETX ilOffset = BAD_IL_OFFSET; - - // all virtuals should have been expanded into a control expression - assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr); - - // Consume all the arg regs - for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) - { - assert(list->OperIsList()); - - GenTreePtr argNode = list->Current(); - - fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy()); - assert(curArgTabEntry); - - if (curArgTabEntry->regNum == REG_STK) - continue; - - // Deal with multi register passed struct args. - if (argNode->OperGet() == GT_FIELD_LIST) - { - GenTreeArgList* argListPtr = argNode->AsArgList(); - unsigned iterationNum = 0; - regNumber argReg = curArgTabEntry->regNum; - for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++) - { - GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1; - assert(putArgRegNode->gtOper == GT_PUTARG_REG); - - genConsumeReg(putArgRegNode); - - if (putArgRegNode->gtRegNum != argReg) - { - inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg, - putArgRegNode->gtRegNum); - } + noway_assert(varDsc->lvIsParam); - argReg = genRegArgNext(argReg); - } - } - else - { - regNumber argReg = curArgTabEntry->regNum; - genConsumeReg(argNode); - if (argNode->gtRegNum != argReg) - { - inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum); - } - } - - // In the case of a varargs call, - // the ABI dictates that if we have floating point args, - // we must pass the enregistered arguments in both the - // integer and floating point registers so, let's do that. - if (call->IsVarargs() && varTypeIsFloating(argNode)) - { - NYI_ARM64("CodeGen - IsVarargs"); - } - } - - // Insert a null check on "this" pointer if asked. - if (call->NeedsNullCheck()) - { - const regNumber regThis = genGetThisArgReg(call); - getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, regThis, 0); - } - - // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method. - CORINFO_METHOD_HANDLE methHnd; - GenTree* target = call->gtControlExpr; - if (callType == CT_INDIRECT) - { - assert(target == nullptr); - target = call->gtCallAddr; - methHnd = nullptr; - } - else - { - methHnd = call->gtCallMethHnd; - } - - CORINFO_SIG_INFO* sigInfo = nullptr; -#ifdef DEBUG - // Pass the call signature information down into the emitter so the emitter can associate - // native call sites with the signatures they were generated from. - if (callType != CT_HELPER) - { - sigInfo = call->callSig; - } -#endif // DEBUG - - // If fast tail call, then we are done. In this case we setup the args (both reg args - // and stack args in incoming arg area) and call target in IP0. Epilog sequence would - // generate "br IP0". - if (call->IsFastTailCall()) - { - // Don't support fast tail calling JIT helpers - assert(callType != CT_HELPER); - - // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr. - assert(target != nullptr); - - genConsumeReg(target); - - if (target->gtRegNum != REG_IP0) - { - inst_RV_RV(INS_mov, REG_IP0, target->gtRegNum); - } - return; - } - - // For a pinvoke to unmanged code we emit a label to clear - // the GC pointer state before the callsite. - // We can't utilize the typical lazy killing of GC pointers - // at (or inside) the callsite. - if (call->IsUnmanaged()) - { - genDefineTempLabel(genCreateTempLabel()); - } - - // Determine return value size(s). - ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); - emitAttr retSize = EA_PTRSIZE; - emitAttr secondRetSize = EA_UNKNOWN; - - if (call->HasMultiRegRetVal()) - { - retSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(0)); - secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1)); - } - else - { - assert(!varTypeIsStruct(call)); - - if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY) - { - retSize = EA_GCREF; - } - else if (call->gtType == TYP_BYREF) - { - retSize = EA_BYREF; - } - } - - // We need to propagate the IL offset information to the call instruction, so we can emit - // an IL to native mapping record for the call, to support managed return value debugging. - // We don't want tail call helper calls that were converted from normal calls to get a record, - // so we skip this hash table lookup logic in that case. - if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall()) - { - (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset); - } - - if (target != nullptr) - { - // For Arm64 a call target can not be a contained indirection - assert(!target->isContainedIndir()); - - // We have already generated code for gtControlExpr evaluating it into a register. - // We just need to emit "call reg" in this case. - // - assert(genIsValidIntReg(target->gtRegNum)); - - genEmitCall(emitter::EC_INDIR_R, methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr - retSize, secondRetSize, ilOffset, genConsumeReg(target)); - } - else - { - // Generate a direct call to a non-virtual user defined or helper method - assert(callType == CT_HELPER || callType == CT_USER_FUNC); - - void* addr = nullptr; - if (callType == CT_HELPER) - { - // Direct call to a helper method. - CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd); - noway_assert(helperNum != CORINFO_HELP_UNDEF); - - void* pAddr = nullptr; - addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); - - if (addr == nullptr) - { - addr = pAddr; - } - } - else - { - // Direct call to a non-virtual user function. - CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY; - if (call->IsSameThis()) - { - aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS); - } - - if ((call->NeedsNullCheck()) == 0) - { - aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL); - } - - CORINFO_CONST_LOOKUP addrInfo; - compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags); - - addr = addrInfo.addr; - } -#if 0 - // Use this path if you want to load an absolute call target using - // a sequence of movs followed by an indirect call (blr instruction) - - // Load the call target address in x16 - instGen_Set_Reg_To_Imm(EA_8BYTE, REG_IP0, (ssize_t) addr); - - // indirect call to constant address in IP0 - genEmitCall(emitter::EC_INDIR_R, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - nullptr, //addr - retSize, - secondRetSize, - ilOffset, - REG_IP0); -#else - // Non-virtual direct call to known addresses - genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, retSize, secondRetSize, - ilOffset); -#endif - } - - // if it was a pinvoke we may have needed to get the address of a label - if (genPendingCallLabel) - { - assert(call->IsUnmanaged()); - genDefineTempLabel(genPendingCallLabel); - genPendingCallLabel = nullptr; - } - - // Update GC info: - // All Callee arg registers are trashed and no longer contain any GC pointers. - // TODO-ARM64-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here? - // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other - // registers from RBM_CALLEE_TRASH - assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); - assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); - gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS; - gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS; - - var_types returnType = call->TypeGet(); - if (returnType != TYP_VOID) - { - regNumber returnReg; - - if (call->HasMultiRegRetVal()) - { - assert(pRetTypeDesc != nullptr); - unsigned regCount = pRetTypeDesc->GetReturnRegCount(); - - // If regs allocated to call node are different from ABI return - // regs in which the call has returned its result, move the result - // to regs allocated to call node. - for (unsigned i = 0; i < regCount; ++i) - { - var_types regType = pRetTypeDesc->GetReturnRegType(i); - returnReg = pRetTypeDesc->GetABIReturnReg(i); - regNumber allocatedReg = call->GetRegNumByIdx(i); - if (returnReg != allocatedReg) - { - inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType); - } - } - } - else - { - if (varTypeIsFloating(returnType)) - { - returnReg = REG_FLOATRET; - } - else - { - returnReg = REG_INTRET; - } - - if (call->gtRegNum != returnReg) - { - inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType); - } - } - - genProduceReg(call); - } - - // If there is nothing next, that means the result is thrown away, so this value is not live. - // However, for minopts or debuggable code, we keep it live to support managed return value debugging. - if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode) - { - gcInfo.gcMarkRegSetNpt(RBM_INTRET); - } -} - -// Produce code for a GT_JMP node. -// The arguments of the caller needs to be transferred to the callee before exiting caller. -// The actual jump to callee is generated as part of caller epilog sequence. -// Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup. -void CodeGen::genJmpMethod(GenTreePtr jmp) -{ - assert(jmp->OperGet() == GT_JMP); - assert(compiler->compJmpOpUsed); - - // If no arguments, nothing to do - if (compiler->info.compArgsCount == 0) - { - return; - } - - // Make sure register arguments are in their initial registers - // and stack arguments are put back as well. - unsigned varNum; - LclVarDsc* varDsc; - - // First move any en-registered stack arguments back to the stack. - // At the same time any reg arg not in correct reg is moved back to its stack location. - // - // We are not strictly required to spill reg args that are not in the desired reg for a jmp call - // But that would require us to deal with circularity while moving values around. Spilling - // to stack makes the implementation simple, which is not a bad trade off given Jmp calls - // are not frequent. - for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++) - { - varDsc = compiler->lvaTable + varNum; - - if (varDsc->lvPromoted) - { - noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here - - unsigned fieldVarNum = varDsc->lvFieldLclStart; - varDsc = compiler->lvaTable + fieldVarNum; - } - noway_assert(varDsc->lvIsParam); - - if (varDsc->lvIsRegArg && (varDsc->lvRegNum != REG_STK)) + if (varDsc->lvIsRegArg && (varDsc->lvRegNum != REG_STK)) { // Skip reg args which are already in its right register for jmp call. // If not, we will spill such args to their stack locations. @@ -5081,237 +4298,6 @@ void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree) } //------------------------------------------------------------------------ -// genIntToIntCast: Generate code for an integer cast -// This method handles integer overflow checking casts -// as well as ordinary integer casts. -// -// Arguments: -// treeNode - The GT_CAST node -// -// Return Value: -// None. -// -// Assumptions: -// The treeNode is not a contained node and must have an assigned register. -// For a signed convert from byte, the source must be in a byte-addressable register. -// Neither the source nor target type can be a floating point type. -// -// TODO-ARM64-CQ: Allow castOp to be a contained node without an assigned register. -// -void CodeGen::genIntToIntCast(GenTreePtr treeNode) -{ - assert(treeNode->OperGet() == GT_CAST); - - GenTreePtr castOp = treeNode->gtCast.CastOp(); - emitter* emit = getEmitter(); - - var_types dstType = treeNode->CastToType(); - var_types srcType = genActualType(castOp->TypeGet()); - emitAttr movSize = emitActualTypeSize(dstType); - bool movRequired = false; - - regNumber targetReg = treeNode->gtRegNum; - regNumber sourceReg = castOp->gtRegNum; - - // For Long to Int conversion we will have a reserved integer register to hold the immediate mask - regNumber tmpReg = (treeNode->gtRsvdRegs == RBM_NONE) ? REG_NA : genRegNumFromMask(treeNode->gtRsvdRegs); - - assert(genIsValidIntReg(targetReg)); - assert(genIsValidIntReg(sourceReg)); - - instruction ins = INS_invalid; - - genConsumeReg(castOp); - Lowering::CastInfo castInfo; - - // Get information about the cast. - Lowering::getCastDescription(treeNode, &castInfo); - - if (castInfo.requiresOverflowCheck) - { - - emitAttr cmpSize = EA_ATTR(genTypeSize(srcType)); - - if (castInfo.signCheckOnly) - { - // We only need to check for a negative value in sourceReg - emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, 0); - emitJumpKind jmpLT = genJumpKindForOper(GT_LT, CK_SIGNED); - genJumpToThrowHlpBlk(jmpLT, SCK_OVERFLOW); - noway_assert(genTypeSize(srcType) == 4 || genTypeSize(srcType) == 8); - // This is only interesting case to ensure zero-upper bits. - if ((srcType == TYP_INT) && (dstType == TYP_ULONG)) - { - // cast to TYP_ULONG: - // We use a mov with size=EA_4BYTE - // which will zero out the upper bits - movSize = EA_4BYTE; - movRequired = true; - } - } - else if (castInfo.unsignedSource || castInfo.unsignedDest) - { - // When we are converting from/to unsigned, - // we only have to check for any bits set in 'typeMask' - - noway_assert(castInfo.typeMask != 0); - emit->emitIns_R_I(INS_tst, cmpSize, sourceReg, castInfo.typeMask); - emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); - genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW); - } - else - { - // For a narrowing signed cast - // - // We must check the value is in a signed range. - - // Compare with the MAX - - noway_assert((castInfo.typeMin != 0) && (castInfo.typeMax != 0)); - - if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, cmpSize)) - { - emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMax); - } - else - { - noway_assert(tmpReg != REG_NA); - instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMax); - emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg); - } - - emitJumpKind jmpGT = genJumpKindForOper(GT_GT, CK_SIGNED); - genJumpToThrowHlpBlk(jmpGT, SCK_OVERFLOW); - - // Compare with the MIN - - if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, cmpSize)) - { - emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMin); - } - else - { - noway_assert(tmpReg != REG_NA); - instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMin); - emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg); - } - - emitJumpKind jmpLT = genJumpKindForOper(GT_LT, CK_SIGNED); - genJumpToThrowHlpBlk(jmpLT, SCK_OVERFLOW); - } - ins = INS_mov; - } - else // Non-overflow checking cast. - { - if (genTypeSize(srcType) == genTypeSize(dstType)) - { - ins = INS_mov; - } - else - { - var_types extendType = TYP_UNKNOWN; - - // If we need to treat a signed type as unsigned - if ((treeNode->gtFlags & GTF_UNSIGNED) != 0) - { - extendType = genUnsignedType(srcType); - movSize = emitTypeSize(extendType); - movRequired = true; - } - else - { - if (genTypeSize(srcType) < genTypeSize(dstType)) - { - extendType = srcType; - if (srcType == TYP_UINT) - { - // If we are casting from a smaller type to - // a larger type, then we need to make sure the - // higher 4 bytes are zero to gaurentee the correct value. - // Therefore using a mov with EA_4BYTE in place of EA_8BYTE - // will zero the upper bits - movSize = EA_4BYTE; - movRequired = true; - } - } - else // (genTypeSize(srcType) > genTypeSize(dstType)) - { - extendType = dstType; - if (dstType == TYP_INT) - { - movSize = EA_8BYTE; // a sxtw instruction requires EA_8BYTE - } - } - } - - ins = ins_Move_Extend(extendType, castOp->InReg()); - } - } - - // We should never be generating a load from memory instruction here! - assert(!emit->emitInsIsLoad(ins)); - - if ((ins != INS_mov) || movRequired || (targetReg != sourceReg)) - { - emit->emitIns_R_R(ins, movSize, targetReg, sourceReg); - } - - genProduceReg(treeNode); -} - -//------------------------------------------------------------------------ -// genFloatToFloatCast: Generate code for a cast between float and double -// -// Arguments: -// treeNode - The GT_CAST node -// -// Return Value: -// None. -// -// Assumptions: -// Cast is a non-overflow conversion. -// The treeNode must have an assigned register. -// The cast is between float and double or vice versa. -// -void CodeGen::genFloatToFloatCast(GenTreePtr treeNode) -{ - // float <--> double conversions are always non-overflow ones - assert(treeNode->OperGet() == GT_CAST); - assert(!treeNode->gtOverflow()); - - regNumber targetReg = treeNode->gtRegNum; - assert(genIsValidFloatReg(targetReg)); - - GenTreePtr op1 = treeNode->gtOp.gtOp1; - assert(!op1->isContained()); // Cannot be contained - assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg. - - var_types dstType = treeNode->CastToType(); - var_types srcType = op1->TypeGet(); - assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); - - genConsumeOperands(treeNode->AsOp()); - - // treeNode must be a reg - assert(!treeNode->isContained()); - - if (srcType != dstType) - { - insOpts cvtOption = (srcType == TYP_FLOAT) ? INS_OPTS_S_TO_D // convert Single to Double - : INS_OPTS_D_TO_S; // convert Double to Single - - getEmitter()->emitIns_R_R(INS_fcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum, cvtOption); - } - else if (treeNode->gtRegNum != op1->gtRegNum) - { - // If double to double cast or float to float cast. Emit a move instruction. - getEmitter()->emitIns_R_R(INS_mov, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum); - } - - genProduceReg(treeNode); -} - -//------------------------------------------------------------------------ // genIntToFloatCast: Generate code to cast an int/long to float/double // // Arguments: @@ -5589,544 +4575,6 @@ int CodeGenInterface::genCallerSPtoInitialSPdelta() return callerSPtoSPdelta; } -//--------------------------------------------------------------------- -// genIntrinsic - generate code for a given intrinsic -// -// Arguments -// treeNode - the GT_INTRINSIC node -// -// Return value: -// None -// -void CodeGen::genIntrinsic(GenTreePtr treeNode) -{ - // Both operand and its result must be of the same floating point type. - GenTreePtr srcNode = treeNode->gtOp.gtOp1; - assert(varTypeIsFloating(srcNode)); - assert(srcNode->TypeGet() == treeNode->TypeGet()); - - // Right now only Abs/Round/Sqrt are treated as math intrinsics. - // - switch (treeNode->gtIntrinsic.gtIntrinsicId) - { - case CORINFO_INTRINSIC_Abs: - genConsumeOperands(treeNode->AsOp()); - getEmitter()->emitInsBinary(INS_fabs, emitTypeSize(treeNode), treeNode, srcNode); - break; - - case CORINFO_INTRINSIC_Round: - genConsumeOperands(treeNode->AsOp()); - getEmitter()->emitInsBinary(INS_frintn, emitTypeSize(treeNode), treeNode, srcNode); - break; - - case CORINFO_INTRINSIC_Sqrt: - genConsumeOperands(treeNode->AsOp()); - getEmitter()->emitInsBinary(INS_fsqrt, emitTypeSize(treeNode), treeNode, srcNode); - break; - - default: - assert(!"genIntrinsic: Unsupported intrinsic"); - unreached(); - } - - genProduceReg(treeNode); -} - -//--------------------------------------------------------------------- -// genPutArgStk - generate code for a GT_PUTARG_STK node -// -// Arguments -// treeNode - the GT_PUTARG_STK node -// -// Return value: -// None -// -void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) -{ - assert(treeNode->OperGet() == GT_PUTARG_STK); - var_types targetType = treeNode->TypeGet(); - GenTreePtr source = treeNode->gtOp1; - emitter* emit = getEmitter(); - - // This is the varNum for our store operations, - // typically this is the varNum for the Outgoing arg space - // When we are generating a tail call it will be the varNum for arg0 - unsigned varNumOut; - unsigned argOffsetMax; // Records the maximum size of this area for assert checks - - // This is the varNum for our load operations, - // only used when we have a multireg struct with a LclVar source - unsigned varNumInp = BAD_VAR_NUM; - - // Get argument offset to use with 'varNumOut' - // Here we cross check that argument offset hasn't changed from lowering to codegen since - // we are storing arg slot number in GT_PUTARG_STK node in lowering phase. - unsigned argOffsetOut = treeNode->gtSlotNum * TARGET_POINTER_SIZE; - -#ifdef DEBUG - fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->gtCall, treeNode); - assert(curArgTabEntry); - assert(argOffsetOut == (curArgTabEntry->slotNum * TARGET_POINTER_SIZE)); -#endif // DEBUG - - // Whether to setup stk arg in incoming or out-going arg area? - // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area. - // All other calls - stk arg is setup in out-going arg area. - if (treeNode->putInIncomingArgArea()) - { - varNumOut = getFirstArgWithStackSlot(); - argOffsetMax = compiler->compArgSize; -#if FEATURE_FASTTAILCALL - // This must be a fast tail call. - assert(treeNode->gtCall->IsFastTailCall()); - - // Since it is a fast tail call, the existence of first incoming arg is guaranteed - // because fast tail call requires that in-coming arg area of caller is >= out-going - // arg area required for tail call. - LclVarDsc* varDsc = &(compiler->lvaTable[varNumOut]); - assert(varDsc != nullptr); -#endif // FEATURE_FASTTAILCALL - } - else - { - varNumOut = compiler->lvaOutgoingArgSpaceVar; - argOffsetMax = compiler->lvaOutgoingArgSpaceSize; - } - bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_FIELD_LIST); - - if (!isStruct) // a normal non-Struct argument - { - instruction storeIns = ins_Store(targetType); - emitAttr storeAttr = emitTypeSize(targetType); - - // If it is contained then source must be the integer constant zero - if (source->isContained()) - { - assert(source->OperGet() == GT_CNS_INT); - assert(source->AsIntConCommon()->IconValue() == 0); - emit->emitIns_S_R(storeIns, storeAttr, REG_ZR, varNumOut, argOffsetOut); - } - else - { - genConsumeReg(source); - emit->emitIns_S_R(storeIns, storeAttr, source->gtRegNum, varNumOut, argOffsetOut); - } - argOffsetOut += EA_SIZE_IN_BYTES(storeAttr); - assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area - } - else // We have some kind of a struct argument - { - assert(source->isContained()); // We expect that this node was marked as contained in LowerArm64 - - if (source->OperGet() == GT_FIELD_LIST) - { - // Deal with the multi register passed struct args. - GenTreeFieldList* fieldListPtr = source->AsFieldList(); - - // Evaluate each of the GT_FIELD_LIST items into their register - // and store their register into the outgoing argument area - for (; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest()) - { - GenTreePtr nextArgNode = fieldListPtr->gtOp.gtOp1; - genConsumeReg(nextArgNode); - - regNumber reg = nextArgNode->gtRegNum; - var_types type = nextArgNode->TypeGet(); - emitAttr attr = emitTypeSize(type); - - // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing - // argument area - emit->emitIns_S_R(ins_Store(type), attr, reg, varNumOut, argOffsetOut); - argOffsetOut += EA_SIZE_IN_BYTES(attr); - assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area - } - } - else // We must have a GT_OBJ or a GT_LCL_VAR - { - noway_assert((source->OperGet() == GT_LCL_VAR) || (source->OperGet() == GT_OBJ)); - - var_types targetType = source->TypeGet(); - noway_assert(varTypeIsStruct(targetType)); - - // We will copy this struct to the stack, possibly using a ldp instruction - // Setup loReg and hiReg from the internal registers that we reserved in lower. - // - regNumber loReg = REG_NA; - regNumber hiReg = REG_NA; - regNumber addrReg = REG_NA; - - // In lowerArm64/TreeNodeInfoInitPutArgStk we have reserved two internal integer registers - genGetRegPairFromMask(treeNode->gtRsvdRegs, &loReg, &hiReg); - - GenTreeLclVarCommon* varNode = nullptr; - GenTreePtr addrNode = nullptr; - - if (source->OperGet() == GT_LCL_VAR) - { - varNode = source->AsLclVarCommon(); - } - else // we must have a GT_OBJ - { - assert(source->OperGet() == GT_OBJ); - - addrNode = source->gtOp.gtOp1; - - // addrNode can either be a GT_LCL_VAR_ADDR or an address expression - // - if (addrNode->OperGet() == GT_LCL_VAR_ADDR) - { - // We have a GT_OBJ(GT_LCL_VAR_ADDR) - // - // We will treat this case the same as above - // (i.e if we just had this GT_LCL_VAR directly as the source) - // so update 'source' to point this GT_LCL_VAR_ADDR node - // and continue to the codegen for the LCL_VAR node below - // - varNode = addrNode->AsLclVarCommon(); - addrNode = nullptr; - } - } - - // Either varNode or addrNOde must have been setup above, - // the xor ensures that only one of the two is setup, not both - assert((varNode != nullptr) ^ (addrNode != nullptr)); - - BYTE gcPtrs[MAX_ARG_REG_COUNT] = {}; // TYPE_GC_NONE = 0 - unsigned gcPtrCount; // The count of GC pointers in the struct - int structSize; - bool isHfa; - - // Setup the structSize, isHFa, and gcPtrCount - if (varNode != nullptr) - { - varNumInp = varNode->gtLclNum; - assert(varNumInp < compiler->lvaCount); - LclVarDsc* varDsc = &compiler->lvaTable[varNumInp]; - - assert(varDsc->lvType == TYP_STRUCT); - assert(varDsc->lvOnFrame); // This struct also must live in the stack frame - assert(!varDsc->lvRegister); // And it can't live in a register (SIMD) - - structSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine - // as that is how much stack is allocated for this LclVar - isHfa = varDsc->lvIsHfa(); - gcPtrCount = varDsc->lvStructGcCount; - for (unsigned i = 0; i < gcPtrCount; ++i) - gcPtrs[i] = varDsc->lvGcLayout[i]; - } - else // addrNode is used - { - assert(addrNode != nullptr); - - // Generate code to load the address that we need into a register - genConsumeAddress(addrNode); - addrReg = addrNode->gtRegNum; - - CORINFO_CLASS_HANDLE objClass = source->gtObj.gtClass; - - structSize = compiler->info.compCompHnd->getClassSize(objClass); - isHfa = compiler->IsHfa(objClass); - gcPtrCount = compiler->info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]); - } - - bool hasGCpointers = (gcPtrCount > 0); // true if there are any GC pointers in the struct - - // If we have an HFA we can't have any GC pointers, - // if not then the max size for the the struct is 16 bytes - if (isHfa) - { - noway_assert(gcPtrCount == 0); - } - else - { - noway_assert(structSize <= 2 * TARGET_POINTER_SIZE); - } - - noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES); - - // For a 16-byte structSize with GC pointers we will use two ldr and two str instructions - // ldr x2, [x0] - // ldr x3, [x0, #8] - // str x2, [sp, #16] - // str x3, [sp, #24] - // - // For a 16-byte structSize with no GC pointers we will use a ldp and two str instructions - // ldp x2, x3, [x0] - // str x2, [sp, #16] - // str x3, [sp, #24] - // - // For a 32-byte structSize with no GC pointers we will use two ldp and four str instructions - // ldp x2, x3, [x0] - // str x2, [sp, #16] - // str x3, [sp, #24] - // ldp x2, x3, [x0] - // str x2, [sp, #32] - // str x3, [sp, #40] - // - // Note that when loading from a varNode we currently can't use the ldp instruction - // TODO-ARM64-CQ: Implement support for using a ldp instruction with a varNum (see emitIns_R_S) - // - - int remainingSize = structSize; - unsigned structOffset = 0; - unsigned nextIndex = 0; - - while (remainingSize >= 2 * TARGET_POINTER_SIZE) - { - var_types type0 = compiler->getJitGCType(gcPtrs[nextIndex + 0]); - var_types type1 = compiler->getJitGCType(gcPtrs[nextIndex + 1]); - - if (hasGCpointers) - { - // We have GC pointers, so use two ldr instructions - // - // We must do it this way because we can't currently pass or track - // two different emitAttr values for a ldp instruction. - - // Make sure that the first load instruction does not overwrite the addrReg. - // - if (loReg != addrReg) - { - if (varNode != nullptr) - { - // Load from our varNumImp source - emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNumInp, 0); - emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNumInp, - TARGET_POINTER_SIZE); - } - else - { - // Load from our address expression source - emit->emitIns_R_R_I(ins_Load(type0), emitTypeSize(type0), loReg, addrReg, structOffset); - emit->emitIns_R_R_I(ins_Load(type1), emitTypeSize(type1), hiReg, addrReg, - structOffset + TARGET_POINTER_SIZE); - } - } - else // loReg == addrReg - { - assert(varNode == nullptr); // because addrReg is REG_NA when varNode is non-null - assert(hiReg != addrReg); - // Load from our address expression source - emit->emitIns_R_R_I(ins_Load(type1), emitTypeSize(type1), hiReg, addrReg, - structOffset + TARGET_POINTER_SIZE); - emit->emitIns_R_R_I(ins_Load(type0), emitTypeSize(type0), loReg, addrReg, structOffset); - } - } - else // our struct has no GC pointers - { - if (varNode != nullptr) - { - // Load from our varNumImp source, currently we can't use a ldp instruction to do this - emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNumInp, 0); - emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNumInp, TARGET_POINTER_SIZE); - } - else - { - // Use a ldp instruction - - // Load from our address expression source - emit->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, loReg, hiReg, addrReg, structOffset); - } - } - - // Emit two store instructions to store the two registers into the outgoing argument area - emit->emitIns_S_R(ins_Store(type0), emitTypeSize(type0), loReg, varNumOut, argOffsetOut); - emit->emitIns_S_R(ins_Store(type1), emitTypeSize(type1), hiReg, varNumOut, - argOffsetOut + TARGET_POINTER_SIZE); - argOffsetOut += (2 * TARGET_POINTER_SIZE); // We stored 16-bytes of the struct - assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area - - remainingSize -= (2 * TARGET_POINTER_SIZE); // We loaded 16-bytes of the struct - structOffset += (2 * TARGET_POINTER_SIZE); - nextIndex += 2; - } - - // For a 12-byte structSize we will we will generate two load instructions - // ldr x2, [x0] - // ldr w3, [x0, #8] - // str x2, [sp, #16] - // str w3, [sp, #24] - // - // When the first instruction has a loReg that is the same register as the addrReg, - // we set deferLoad to true and issue the intructions in the reverse order - // ldr x3, [x2, #8] - // ldr x2, [x2] - // str x2, [sp, #16] - // str x3, [sp, #24] - // - - var_types nextType = compiler->getJitGCType(gcPtrs[nextIndex]); - emitAttr nextAttr = emitTypeSize(nextType); - regNumber curReg = loReg; - - bool deferLoad = false; - var_types deferType = TYP_UNKNOWN; - emitAttr deferAttr = EA_PTRSIZE; - int deferOffset = 0; - - while (remainingSize > 0) - { - if (remainingSize >= TARGET_POINTER_SIZE) - { - remainingSize -= TARGET_POINTER_SIZE; - - if ((curReg == addrReg) && (remainingSize != 0)) - { - deferLoad = true; - deferType = nextType; - deferAttr = emitTypeSize(nextType); - deferOffset = structOffset; - } - else // the typical case - { - if (varNode != nullptr) - { - // Load from our varNumImp source - emit->emitIns_R_S(ins_Load(nextType), nextAttr, curReg, varNumInp, structOffset); - } - else - { - // Load from our address expression source - emit->emitIns_R_R_I(ins_Load(nextType), nextAttr, curReg, addrReg, structOffset); - } - // Emit a store instruction to store the register into the outgoing argument area - emit->emitIns_S_R(ins_Store(nextType), nextAttr, curReg, varNumOut, argOffsetOut); - argOffsetOut += EA_SIZE_IN_BYTES(nextAttr); - assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area - } - curReg = hiReg; - structOffset += TARGET_POINTER_SIZE; - nextIndex++; - nextType = compiler->getJitGCType(gcPtrs[nextIndex]); - nextAttr = emitTypeSize(nextType); - } - else // (remainingSize < TARGET_POINTER_SIZE) - { - int loadSize = remainingSize; - remainingSize = 0; - - // We should never have to do a non-pointer sized load when we have a LclVar source - assert(varNode == nullptr); - - // the left over size is smaller than a pointer and thus can never be a GC type - assert(varTypeIsGC(nextType) == false); - - var_types loadType = TYP_UINT; - if (loadSize == 1) - { - loadType = TYP_UBYTE; - } - else if (loadSize == 2) - { - loadType = TYP_USHORT; - } - else - { - // Need to handle additional loadSize cases here - noway_assert(loadSize == 4); - } - - instruction loadIns = ins_Load(loadType); - emitAttr loadAttr = emitAttr(loadSize); - - // When deferLoad is false, curReg can be the same as addrReg - // because the last instruction is allowed to overwrite addrReg. - // - noway_assert(!deferLoad || (curReg != addrReg)); - - emit->emitIns_R_R_I(loadIns, loadAttr, curReg, addrReg, structOffset); - - // Emit a store instruction to store the register into the outgoing argument area - emit->emitIns_S_R(ins_Store(loadType), loadAttr, curReg, varNumOut, argOffsetOut); - argOffsetOut += EA_SIZE_IN_BYTES(loadAttr); - assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area - } - } - - if (deferLoad) - { - // We should never have to do a deferred load when we have a LclVar source - assert(varNode == nullptr); - - curReg = addrReg; - - // Load from our address expression source - emit->emitIns_R_R_I(ins_Load(deferType), deferAttr, curReg, addrReg, deferOffset); - - // Emit a store instruction to store the register into the outgoing argument area - emit->emitIns_S_R(ins_Store(nextType), nextAttr, curReg, varNumOut, argOffsetOut); - argOffsetOut += EA_SIZE_IN_BYTES(nextAttr); - assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area - } - } - } -} - -/***************************************************************************** - * - * Create and record GC Info for the function. - */ -void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, - unsigned prologSize, - unsigned epilogSize DEBUGARG(void* codePtr)) -{ - genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr)); -} - -void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr)) -{ - IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC()); - GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC) - GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM); - assert(gcInfoEncoder != nullptr); - - // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32). - gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize); - - // We keep the call count for the second call to gcMakeRegPtrTable() below. - unsigned callCnt = 0; - - // First we figure out the encoder ID's for the stack slots and registers. - gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt); - - // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them). - gcInfoEncoder->FinalizeSlotIds(); - - // Now we can actually use those slot ID's to declare live ranges. - gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt); - - if (compiler->opts.compDbgEnC) - { - // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp) - // which is: - // -return address - // -saved off RBP - // -saved 'this' pointer and bool for synchronized methods - - // 4 slots for RBP + return address + RSI + RDI - int preservedAreaSize = 4 * REGSIZE_BYTES; - - if (compiler->info.compFlags & CORINFO_FLG_SYNCH) - { - if (!(compiler->info.compFlags & CORINFO_FLG_STATIC)) - preservedAreaSize += REGSIZE_BYTES; - - preservedAreaSize += 1; // bool for synchronized methods - } - - // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the - // frame - gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize); - } - - gcInfoEncoder->Build(); - - // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t) - // let's save the values anyway for debugging purposes - compiler->compInfoBlkAddr = gcInfoEncoder->Emit(); - compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface -} - /***************************************************************************** * Emit a call to a helper function. * diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp new file mode 100644 index 0000000..1b3a520 --- /dev/null +++ b/src/jit/codegenarmarch.cpp @@ -0,0 +1,2705 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX ARM/ARM64 Code Generator Common Code XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator + +#ifdef _TARGET_ARMARCH_ // This file is ONLY used for ARM and ARM64 architectures + +#include "codegen.h" +#include "lower.h" +#include "gcinfo.h" +#include "emit.h" + +#ifdef _TARGET_ARM_ + +//------------------------------------------------------------------------ +// genSetRegToIcon: Generate code that will set the given register to the integer constant. +// +void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags) +{ + // Reg cannot be a FP reg + assert(!genIsValidFloatReg(reg)); + + // The only TYP_REF constant that can come this path is a managed 'null' since it is not + // relocatable. Other ref type constants (e.g. string objects) go through a different + // code path. + noway_assert(type != TYP_REF || val == 0); + + instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags); +} + +#endif // _TARGET_ARM_ + +#ifdef _TARGET_ARM64_ + +/***************************************************************************** + * + * Generate code that will set the given register to the integer constant. + */ + +void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags) +{ + // Reg cannot be a FP reg + assert(!genIsValidFloatReg(reg)); + + // The only TYP_REF constant that can come this path is a managed 'null' since it is not + // relocatable. Other ref type constants (e.g. string objects) go through a different + // code path. + noway_assert(type != TYP_REF || val == 0); + + instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags); +} + +#endif // _TARGET_ARM64_ + +#ifdef _TARGET_ARM_ + +//--------------------------------------------------------------------- +// genIntrinsic - generate code for a given intrinsic +// +// Arguments +// treeNode - the GT_INTRINSIC node +// +// Return value: +// None +// +void CodeGen::genIntrinsic(GenTreePtr treeNode) +{ + // Both operand and its result must be of the same floating point type. + GenTreePtr srcNode = treeNode->gtOp.gtOp1; + assert(varTypeIsFloating(srcNode)); + assert(srcNode->TypeGet() == treeNode->TypeGet()); + + // Right now only Abs/Round/Sqrt are treated as math intrinsics. + // + switch (treeNode->gtIntrinsic.gtIntrinsicId) + { + case CORINFO_INTRINSIC_Abs: + genConsumeOperands(treeNode->AsOp()); + getEmitter()->emitInsBinary(INS_vabs, emitTypeSize(treeNode), treeNode, srcNode); + break; + + case CORINFO_INTRINSIC_Round: + NYI_ARM("genIntrinsic for round - not implemented yet"); + break; + + case CORINFO_INTRINSIC_Sqrt: + genConsumeOperands(treeNode->AsOp()); + getEmitter()->emitInsBinary(INS_vsqrt, emitTypeSize(treeNode), treeNode, srcNode); + break; + + default: + assert(!"genIntrinsic: Unsupported intrinsic"); + unreached(); + } + + genProduceReg(treeNode); +} + +#endif // _TARGET_ARM_ + +#ifdef _TARGET_ARM64_ + +//--------------------------------------------------------------------- +// genIntrinsic - generate code for a given intrinsic +// +// Arguments +// treeNode - the GT_INTRINSIC node +// +// Return value: +// None +// +void CodeGen::genIntrinsic(GenTreePtr treeNode) +{ + // Both operand and its result must be of the same floating point type. + GenTreePtr srcNode = treeNode->gtOp.gtOp1; + assert(varTypeIsFloating(srcNode)); + assert(srcNode->TypeGet() == treeNode->TypeGet()); + + // Right now only Abs/Round/Sqrt are treated as math intrinsics. + // + switch (treeNode->gtIntrinsic.gtIntrinsicId) + { + case CORINFO_INTRINSIC_Abs: + genConsumeOperands(treeNode->AsOp()); + getEmitter()->emitInsBinary(INS_fabs, emitTypeSize(treeNode), treeNode, srcNode); + break; + + case CORINFO_INTRINSIC_Round: + genConsumeOperands(treeNode->AsOp()); + getEmitter()->emitInsBinary(INS_frintn, emitTypeSize(treeNode), treeNode, srcNode); + break; + + case CORINFO_INTRINSIC_Sqrt: + genConsumeOperands(treeNode->AsOp()); + getEmitter()->emitInsBinary(INS_fsqrt, emitTypeSize(treeNode), treeNode, srcNode); + break; + + default: + assert(!"genIntrinsic: Unsupported intrinsic"); + unreached(); + } + + genProduceReg(treeNode); +} + +#endif // _TARGET_ARM64_ + +#ifdef _TARGET_ARM_ + +//--------------------------------------------------------------------- +// genPutArgStk - generate code for a GT_PUTARG_STK node +// +// Arguments +// treeNode - the GT_PUTARG_STK node +// +// Return value: +// None +// +void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) +{ + assert(treeNode->OperGet() == GT_PUTARG_STK); + var_types targetType = treeNode->TypeGet(); + GenTreePtr source = treeNode->gtOp1; + emitter* emit = getEmitter(); + + // This is the varNum for our store operations, + // typically this is the varNum for the Outgoing arg space + // When we are generating a tail call it will be the varNum for arg0 + unsigned varNumOut; + unsigned argOffsetMax; // Records the maximum size of this area for assert checks + + // Get argument offset to use with 'varNumOut' + // Here we cross check that argument offset hasn't changed from lowering to codegen since + // we are storing arg slot number in GT_PUTARG_STK node in lowering phase. + unsigned argOffsetOut = treeNode->gtSlotNum * TARGET_POINTER_SIZE; + +#ifdef DEBUG + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->gtCall, treeNode); + assert(curArgTabEntry); + assert(argOffsetOut == (curArgTabEntry->slotNum * TARGET_POINTER_SIZE)); +#endif // DEBUG + + varNumOut = compiler->lvaOutgoingArgSpaceVar; + argOffsetMax = compiler->lvaOutgoingArgSpaceSize; + + bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_FIELD_LIST); + + if (!isStruct) // a normal non-Struct argument + { + instruction storeIns = ins_Store(targetType); + emitAttr storeAttr = emitTypeSize(targetType); + + // If it is contained then source must be the integer constant zero + if (source->isContained()) + { + assert(source->OperGet() == GT_CNS_INT); + assert(source->AsIntConCommon()->IconValue() == 0); + NYI("genPutArgStk: contained zero source"); + } + else + { + genConsumeReg(source); + emit->emitIns_S_R(storeIns, storeAttr, source->gtRegNum, varNumOut, argOffsetOut); + } + argOffsetOut += EA_SIZE_IN_BYTES(storeAttr); + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area + } + else // We have some kind of a struct argument + { + assert(source->isContained()); // We expect that this node was marked as contained in LowerArm + + if (source->OperGet() == GT_FIELD_LIST) + { + // Deal with the multi register passed struct args. + GenTreeFieldList* fieldListPtr = source->AsFieldList(); + + // Evaluate each of the GT_FIELD_LIST items into their register + // and store their register into the outgoing argument area + for (; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest()) + { + GenTreePtr nextArgNode = fieldListPtr->gtOp.gtOp1; + genConsumeReg(nextArgNode); + + regNumber reg = nextArgNode->gtRegNum; + var_types type = nextArgNode->TypeGet(); + emitAttr attr = emitTypeSize(type); + + // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing + // argument area + emit->emitIns_S_R(ins_Store(type), attr, reg, varNumOut, argOffsetOut); + argOffsetOut += EA_SIZE_IN_BYTES(attr); + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area + } + } + else // We must have a GT_OBJ or a GT_LCL_VAR + { + NYI("genPutArgStk: GT_OBJ or GT_LCL_VAR source of struct type"); + } + } +} + +#endif // _TARGET_ARM_ + +#ifdef _TARGET_ARM64_ + +//--------------------------------------------------------------------- +// genPutArgStk - generate code for a GT_PUTARG_STK node +// +// Arguments +// treeNode - the GT_PUTARG_STK node +// +// Return value: +// None +// +void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) +{ + assert(treeNode->OperGet() == GT_PUTARG_STK); + var_types targetType = treeNode->TypeGet(); + GenTreePtr source = treeNode->gtOp1; + emitter* emit = getEmitter(); + + // This is the varNum for our store operations, + // typically this is the varNum for the Outgoing arg space + // When we are generating a tail call it will be the varNum for arg0 + unsigned varNumOut; + unsigned argOffsetMax; // Records the maximum size of this area for assert checks + + // This is the varNum for our load operations, + // only used when we have a multireg struct with a LclVar source + unsigned varNumInp = BAD_VAR_NUM; + + // Get argument offset to use with 'varNumOut' + // Here we cross check that argument offset hasn't changed from lowering to codegen since + // we are storing arg slot number in GT_PUTARG_STK node in lowering phase. + unsigned argOffsetOut = treeNode->gtSlotNum * TARGET_POINTER_SIZE; + +#ifdef DEBUG + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->gtCall, treeNode); + assert(curArgTabEntry); + assert(argOffsetOut == (curArgTabEntry->slotNum * TARGET_POINTER_SIZE)); +#endif // DEBUG + + // Whether to setup stk arg in incoming or out-going arg area? + // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area. + // All other calls - stk arg is setup in out-going arg area. + if (treeNode->putInIncomingArgArea()) + { + varNumOut = getFirstArgWithStackSlot(); + argOffsetMax = compiler->compArgSize; +#if FEATURE_FASTTAILCALL + // This must be a fast tail call. + assert(treeNode->gtCall->IsFastTailCall()); + + // Since it is a fast tail call, the existence of first incoming arg is guaranteed + // because fast tail call requires that in-coming arg area of caller is >= out-going + // arg area required for tail call. + LclVarDsc* varDsc = &(compiler->lvaTable[varNumOut]); + assert(varDsc != nullptr); +#endif // FEATURE_FASTTAILCALL + } + else + { + varNumOut = compiler->lvaOutgoingArgSpaceVar; + argOffsetMax = compiler->lvaOutgoingArgSpaceSize; + } + bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_FIELD_LIST); + + if (!isStruct) // a normal non-Struct argument + { + instruction storeIns = ins_Store(targetType); + emitAttr storeAttr = emitTypeSize(targetType); + + // If it is contained then source must be the integer constant zero + if (source->isContained()) + { + assert(source->OperGet() == GT_CNS_INT); + assert(source->AsIntConCommon()->IconValue() == 0); + emit->emitIns_S_R(storeIns, storeAttr, REG_ZR, varNumOut, argOffsetOut); + } + else + { + genConsumeReg(source); + emit->emitIns_S_R(storeIns, storeAttr, source->gtRegNum, varNumOut, argOffsetOut); + } + argOffsetOut += EA_SIZE_IN_BYTES(storeAttr); + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area + } + else // We have some kind of a struct argument + { + assert(source->isContained()); // We expect that this node was marked as contained in LowerArm64 + + if (source->OperGet() == GT_FIELD_LIST) + { + // Deal with the multi register passed struct args. + GenTreeFieldList* fieldListPtr = source->AsFieldList(); + + // Evaluate each of the GT_FIELD_LIST items into their register + // and store their register into the outgoing argument area + for (; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest()) + { + GenTreePtr nextArgNode = fieldListPtr->gtOp.gtOp1; + genConsumeReg(nextArgNode); + + regNumber reg = nextArgNode->gtRegNum; + var_types type = nextArgNode->TypeGet(); + emitAttr attr = emitTypeSize(type); + + // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing + // argument area + emit->emitIns_S_R(ins_Store(type), attr, reg, varNumOut, argOffsetOut); + argOffsetOut += EA_SIZE_IN_BYTES(attr); + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area + } + } + else // We must have a GT_OBJ or a GT_LCL_VAR + { + noway_assert((source->OperGet() == GT_LCL_VAR) || (source->OperGet() == GT_OBJ)); + + var_types targetType = source->TypeGet(); + noway_assert(varTypeIsStruct(targetType)); + + // We will copy this struct to the stack, possibly using a ldp instruction + // Setup loReg and hiReg from the internal registers that we reserved in lower. + // + regNumber loReg = REG_NA; + regNumber hiReg = REG_NA; + regNumber addrReg = REG_NA; + + // In lowerArm64/TreeNodeInfoInitPutArgStk we have reserved two internal integer registers + genGetRegPairFromMask(treeNode->gtRsvdRegs, &loReg, &hiReg); + + GenTreeLclVarCommon* varNode = nullptr; + GenTreePtr addrNode = nullptr; + + if (source->OperGet() == GT_LCL_VAR) + { + varNode = source->AsLclVarCommon(); + } + else // we must have a GT_OBJ + { + assert(source->OperGet() == GT_OBJ); + + addrNode = source->gtOp.gtOp1; + + // addrNode can either be a GT_LCL_VAR_ADDR or an address expression + // + if (addrNode->OperGet() == GT_LCL_VAR_ADDR) + { + // We have a GT_OBJ(GT_LCL_VAR_ADDR) + // + // We will treat this case the same as above + // (i.e if we just had this GT_LCL_VAR directly as the source) + // so update 'source' to point this GT_LCL_VAR_ADDR node + // and continue to the codegen for the LCL_VAR node below + // + varNode = addrNode->AsLclVarCommon(); + addrNode = nullptr; + } + } + + // Either varNode or addrNOde must have been setup above, + // the xor ensures that only one of the two is setup, not both + assert((varNode != nullptr) ^ (addrNode != nullptr)); + + BYTE gcPtrs[MAX_ARG_REG_COUNT] = {}; // TYPE_GC_NONE = 0 + unsigned gcPtrCount; // The count of GC pointers in the struct + int structSize; + bool isHfa; + + // Setup the structSize, isHFa, and gcPtrCount + if (varNode != nullptr) + { + varNumInp = varNode->gtLclNum; + assert(varNumInp < compiler->lvaCount); + LclVarDsc* varDsc = &compiler->lvaTable[varNumInp]; + + assert(varDsc->lvType == TYP_STRUCT); + assert(varDsc->lvOnFrame); // This struct also must live in the stack frame + assert(!varDsc->lvRegister); // And it can't live in a register (SIMD) + + structSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine + // as that is how much stack is allocated for this LclVar + isHfa = varDsc->lvIsHfa(); + gcPtrCount = varDsc->lvStructGcCount; + for (unsigned i = 0; i < gcPtrCount; ++i) + gcPtrs[i] = varDsc->lvGcLayout[i]; + } + else // addrNode is used + { + assert(addrNode != nullptr); + + // Generate code to load the address that we need into a register + genConsumeAddress(addrNode); + addrReg = addrNode->gtRegNum; + + CORINFO_CLASS_HANDLE objClass = source->gtObj.gtClass; + + structSize = compiler->info.compCompHnd->getClassSize(objClass); + isHfa = compiler->IsHfa(objClass); + gcPtrCount = compiler->info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]); + } + + bool hasGCpointers = (gcPtrCount > 0); // true if there are any GC pointers in the struct + + // If we have an HFA we can't have any GC pointers, + // if not then the max size for the the struct is 16 bytes + if (isHfa) + { + noway_assert(gcPtrCount == 0); + } + else + { + noway_assert(structSize <= 2 * TARGET_POINTER_SIZE); + } + + noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES); + + // For a 16-byte structSize with GC pointers we will use two ldr and two str instructions + // ldr x2, [x0] + // ldr x3, [x0, #8] + // str x2, [sp, #16] + // str x3, [sp, #24] + // + // For a 16-byte structSize with no GC pointers we will use a ldp and two str instructions + // ldp x2, x3, [x0] + // str x2, [sp, #16] + // str x3, [sp, #24] + // + // For a 32-byte structSize with no GC pointers we will use two ldp and four str instructions + // ldp x2, x3, [x0] + // str x2, [sp, #16] + // str x3, [sp, #24] + // ldp x2, x3, [x0] + // str x2, [sp, #32] + // str x3, [sp, #40] + // + // Note that when loading from a varNode we currently can't use the ldp instruction + // TODO-ARM64-CQ: Implement support for using a ldp instruction with a varNum (see emitIns_R_S) + // + + int remainingSize = structSize; + unsigned structOffset = 0; + unsigned nextIndex = 0; + + while (remainingSize >= 2 * TARGET_POINTER_SIZE) + { + var_types type0 = compiler->getJitGCType(gcPtrs[nextIndex + 0]); + var_types type1 = compiler->getJitGCType(gcPtrs[nextIndex + 1]); + + if (hasGCpointers) + { + // We have GC pointers, so use two ldr instructions + // + // We must do it this way because we can't currently pass or track + // two different emitAttr values for a ldp instruction. + + // Make sure that the first load instruction does not overwrite the addrReg. + // + if (loReg != addrReg) + { + if (varNode != nullptr) + { + // Load from our varNumImp source + emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNumInp, 0); + emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNumInp, + TARGET_POINTER_SIZE); + } + else + { + // Load from our address expression source + emit->emitIns_R_R_I(ins_Load(type0), emitTypeSize(type0), loReg, addrReg, structOffset); + emit->emitIns_R_R_I(ins_Load(type1), emitTypeSize(type1), hiReg, addrReg, + structOffset + TARGET_POINTER_SIZE); + } + } + else // loReg == addrReg + { + assert(varNode == nullptr); // because addrReg is REG_NA when varNode is non-null + assert(hiReg != addrReg); + // Load from our address expression source + emit->emitIns_R_R_I(ins_Load(type1), emitTypeSize(type1), hiReg, addrReg, + structOffset + TARGET_POINTER_SIZE); + emit->emitIns_R_R_I(ins_Load(type0), emitTypeSize(type0), loReg, addrReg, structOffset); + } + } + else // our struct has no GC pointers + { + if (varNode != nullptr) + { + // Load from our varNumImp source, currently we can't use a ldp instruction to do this + emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNumInp, 0); + emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNumInp, TARGET_POINTER_SIZE); + } + else + { + // Use a ldp instruction + + // Load from our address expression source + emit->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, loReg, hiReg, addrReg, structOffset); + } + } + + // Emit two store instructions to store the two registers into the outgoing argument area + emit->emitIns_S_R(ins_Store(type0), emitTypeSize(type0), loReg, varNumOut, argOffsetOut); + emit->emitIns_S_R(ins_Store(type1), emitTypeSize(type1), hiReg, varNumOut, + argOffsetOut + TARGET_POINTER_SIZE); + argOffsetOut += (2 * TARGET_POINTER_SIZE); // We stored 16-bytes of the struct + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area + + remainingSize -= (2 * TARGET_POINTER_SIZE); // We loaded 16-bytes of the struct + structOffset += (2 * TARGET_POINTER_SIZE); + nextIndex += 2; + } + + // For a 12-byte structSize we will we will generate two load instructions + // ldr x2, [x0] + // ldr w3, [x0, #8] + // str x2, [sp, #16] + // str w3, [sp, #24] + // + // When the first instruction has a loReg that is the same register as the addrReg, + // we set deferLoad to true and issue the intructions in the reverse order + // ldr x3, [x2, #8] + // ldr x2, [x2] + // str x2, [sp, #16] + // str x3, [sp, #24] + // + + var_types nextType = compiler->getJitGCType(gcPtrs[nextIndex]); + emitAttr nextAttr = emitTypeSize(nextType); + regNumber curReg = loReg; + + bool deferLoad = false; + var_types deferType = TYP_UNKNOWN; + emitAttr deferAttr = EA_PTRSIZE; + int deferOffset = 0; + + while (remainingSize > 0) + { + if (remainingSize >= TARGET_POINTER_SIZE) + { + remainingSize -= TARGET_POINTER_SIZE; + + if ((curReg == addrReg) && (remainingSize != 0)) + { + deferLoad = true; + deferType = nextType; + deferAttr = emitTypeSize(nextType); + deferOffset = structOffset; + } + else // the typical case + { + if (varNode != nullptr) + { + // Load from our varNumImp source + emit->emitIns_R_S(ins_Load(nextType), nextAttr, curReg, varNumInp, structOffset); + } + else + { + // Load from our address expression source + emit->emitIns_R_R_I(ins_Load(nextType), nextAttr, curReg, addrReg, structOffset); + } + // Emit a store instruction to store the register into the outgoing argument area + emit->emitIns_S_R(ins_Store(nextType), nextAttr, curReg, varNumOut, argOffsetOut); + argOffsetOut += EA_SIZE_IN_BYTES(nextAttr); + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area + } + curReg = hiReg; + structOffset += TARGET_POINTER_SIZE; + nextIndex++; + nextType = compiler->getJitGCType(gcPtrs[nextIndex]); + nextAttr = emitTypeSize(nextType); + } + else // (remainingSize < TARGET_POINTER_SIZE) + { + int loadSize = remainingSize; + remainingSize = 0; + + // We should never have to do a non-pointer sized load when we have a LclVar source + assert(varNode == nullptr); + + // the left over size is smaller than a pointer and thus can never be a GC type + assert(varTypeIsGC(nextType) == false); + + var_types loadType = TYP_UINT; + if (loadSize == 1) + { + loadType = TYP_UBYTE; + } + else if (loadSize == 2) + { + loadType = TYP_USHORT; + } + else + { + // Need to handle additional loadSize cases here + noway_assert(loadSize == 4); + } + + instruction loadIns = ins_Load(loadType); + emitAttr loadAttr = emitAttr(loadSize); + + // When deferLoad is false, curReg can be the same as addrReg + // because the last instruction is allowed to overwrite addrReg. + // + noway_assert(!deferLoad || (curReg != addrReg)); + + emit->emitIns_R_R_I(loadIns, loadAttr, curReg, addrReg, structOffset); + + // Emit a store instruction to store the register into the outgoing argument area + emit->emitIns_S_R(ins_Store(loadType), loadAttr, curReg, varNumOut, argOffsetOut); + argOffsetOut += EA_SIZE_IN_BYTES(loadAttr); + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area + } + } + + if (deferLoad) + { + // We should never have to do a deferred load when we have a LclVar source + assert(varNode == nullptr); + + curReg = addrReg; + + // Load from our address expression source + emit->emitIns_R_R_I(ins_Load(deferType), deferAttr, curReg, addrReg, deferOffset); + + // Emit a store instruction to store the register into the outgoing argument area + emit->emitIns_S_R(ins_Store(nextType), nextAttr, curReg, varNumOut, argOffsetOut); + argOffsetOut += EA_SIZE_IN_BYTES(nextAttr); + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area + } + } + } +} + +#endif // _TARGET_ARM64_ + +#ifdef _TARGET_ARM_ + +//---------------------------------------------------------------------------------- +// genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local +// +// Arguments: +// treeNode - Gentree of GT_STORE_LCL_VAR +// +// Return Value: +// None +// +// Assumption: +// The child of store is a multi-reg call node. +// genProduceReg() on treeNode is made by caller of this routine. +// +void CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode) +{ + assert(treeNode->OperGet() == GT_STORE_LCL_VAR); + + // Longs are returned in two return registers on Arm32. + assert(varTypeIsLong(treeNode)); + + // Assumption: current Arm32 implementation requires that a multi-reg long + // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from + // being promoted. + unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum; + LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); + noway_assert(varDsc->lvIsMultiRegRet); + + GenTree* op1 = treeNode->gtGetOp1(); + GenTree* actualOp1 = op1->gtSkipReloadOrCopy(); + GenTreeCall* call = actualOp1->AsCall(); + assert(call->HasMultiRegRetVal()); + + genConsumeRegs(op1); + + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + assert(regCount <= MAX_RET_REG_COUNT); + + // Stack store + int offset = 0; + for (unsigned i = 0; i < regCount; ++i) + { + var_types type = retTypeDesc->GetReturnRegType(i); + regNumber reg = call->GetRegNumByIdx(i); + if (op1->IsCopyOrReload()) + { + // GT_COPY/GT_RELOAD will have valid reg for those positions + // that need to be copied or reloaded. + regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i); + if (reloadReg != REG_NA) + { + reg = reloadReg; + } + } + + assert(reg != REG_NA); + getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset); + offset += genTypeSize(type); + } + + varDsc->lvRegNum = REG_STK; +} + +#endif // _TARGET_ARM_ + +#ifdef _TARGET_ARM64_ + +//---------------------------------------------------------------------------------- +// genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local +// +// Arguments: +// treeNode - Gentree of GT_STORE_LCL_VAR +// +// Return Value: +// None +// +// Assumption: +// The child of store is a multi-reg call node. +// genProduceReg() on treeNode is made by caller of this routine. +// +void CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode) +{ + assert(treeNode->OperGet() == GT_STORE_LCL_VAR); + + // Structs of size >=9 and <=16 are returned in two return registers on ARM64 and HFAs. + assert(varTypeIsStruct(treeNode)); + + // Assumption: current ARM64 implementation requires that a multi-reg struct + // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from + // being struct promoted. + unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum; + LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); + noway_assert(varDsc->lvIsMultiRegRet); + + GenTree* op1 = treeNode->gtGetOp1(); + GenTree* actualOp1 = op1->gtSkipReloadOrCopy(); + GenTreeCall* call = actualOp1->AsCall(); + assert(call->HasMultiRegRetVal()); + + genConsumeRegs(op1); + + ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = pRetTypeDesc->GetReturnRegCount(); + + if (treeNode->gtRegNum != REG_NA) + { + // Right now the only enregistrable structs supported are SIMD types. + assert(varTypeIsSIMD(treeNode)); + NYI("GT_STORE_LCL_VAR of a SIMD enregisterable struct"); + } + else + { + // Stack store + int offset = 0; + for (unsigned i = 0; i < regCount; ++i) + { + var_types type = pRetTypeDesc->GetReturnRegType(i); + regNumber reg = call->GetRegNumByIdx(i); + if (op1->IsCopyOrReload()) + { + // GT_COPY/GT_RELOAD will have valid reg for those positions + // that need to be copied or reloaded. + regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i); + if (reloadReg != REG_NA) + { + reg = reloadReg; + } + } + + assert(reg != REG_NA); + getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset); + offset += genTypeSize(type); + } + + varDsc->lvRegNum = REG_STK; + } +} + +#endif // _TARGET_ARM64_ + +#ifdef _TARGET_ARM_ + +//------------------------------------------------------------------------ +// genRangeCheck: generate code for GT_ARR_BOUNDS_CHECK node. +// +void CodeGen::genRangeCheck(GenTreePtr oper) +{ + noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK); + GenTreeBoundsChk* bndsChk = oper->AsBoundsChk(); + + GenTreePtr arrIdx = bndsChk->gtIndex->gtEffectiveVal(); + GenTreePtr arrLen = bndsChk->gtArrLen->gtEffectiveVal(); + GenTreePtr arrRef = NULL; + int lenOffset = 0; + + genConsumeIfReg(arrIdx); + genConsumeIfReg(arrLen); + + GenTree * src1, *src2; + emitJumpKind jmpKind; + + if (arrIdx->isContainedIntOrIImmed()) + { + // To encode using a cmp immediate, we place the + // constant operand in the second position + src1 = arrLen; + src2 = arrIdx; + jmpKind = genJumpKindForOper(GT_LE, CK_UNSIGNED); + } + else + { + src1 = arrIdx; + src2 = arrLen; + jmpKind = genJumpKindForOper(GT_GE, CK_UNSIGNED); + } + + getEmitter()->emitInsBinary(INS_cmp, emitAttr(TYP_INT), src1, src2); + genJumpToThrowHlpBlk(jmpKind, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB); +} + +#endif // _TARGET_ARM_ + +#ifdef _TARGET_ARM64_ + +// generate code for BoundsCheck nodes +void CodeGen::genRangeCheck(GenTreePtr oper) +{ +#ifdef FEATURE_SIMD + noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK); +#else // !FEATURE_SIMD + noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK); +#endif // !FEATURE_SIMD + + GenTreeBoundsChk* bndsChk = oper->AsBoundsChk(); + + GenTreePtr arrLen = bndsChk->gtArrLen; + GenTreePtr arrIndex = bndsChk->gtIndex; + GenTreePtr arrRef = NULL; + int lenOffset = 0; + + GenTree * src1, *src2; + emitJumpKind jmpKind; + + genConsumeRegs(arrIndex); + genConsumeRegs(arrLen); + + if (arrIndex->isContainedIntOrIImmed()) + { + // To encode using a cmp immediate, we place the + // constant operand in the second position + src1 = arrLen; + src2 = arrIndex; + jmpKind = genJumpKindForOper(GT_LE, CK_UNSIGNED); + } + else + { + src1 = arrIndex; + src2 = arrLen; + jmpKind = genJumpKindForOper(GT_GE, CK_UNSIGNED); + } + + GenTreeIntConCommon* intConst = nullptr; + if (src2->isContainedIntOrIImmed()) + { + intConst = src2->AsIntConCommon(); + } + + if (intConst != nullptr) + { + getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, src1->gtRegNum, intConst->IconValue()); + } + else + { + getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, src1->gtRegNum, src2->gtRegNum); + } + + genJumpToThrowHlpBlk(jmpKind, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB); +} + +#endif // _TARGET_ARM64_ + +//------------------------------------------------------------------------ +// genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the +// lower bound for the given dimension. +// +// Arguments: +// elemType - the element type of the array +// rank - the rank of the array +// dimension - the dimension for which the lower bound offset will be returned. +// +// Return Value: +// The offset. +// TODO-Cleanup: move to CodeGenCommon.cpp + +// static +unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension) +{ + // Note that the lower bound and length fields of the Array object are always TYP_INT + return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank); +} + +//------------------------------------------------------------------------ +// genOffsetOfMDArrayLength: Returns the offset from the Array object to the +// size for the given dimension. +// +// Arguments: +// elemType - the element type of the array +// rank - the rank of the array +// dimension - the dimension for which the lower bound offset will be returned. +// +// Return Value: +// The offset. +// TODO-Cleanup: move to CodeGenCommon.cpp + +// static +unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension) +{ + // Note that the lower bound and length fields of the Array object are always TYP_INT + return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension; +} + +//------------------------------------------------------------------------ +// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference, +// producing the effective index by subtracting the lower bound. +// +// Arguments: +// arrIndex - the node for which we're generating code +// +// Return Value: +// None. +// +void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex) +{ + emitter* emit = getEmitter(); + GenTreePtr arrObj = arrIndex->ArrObj(); + GenTreePtr indexNode = arrIndex->IndexExpr(); + regNumber arrReg = genConsumeReg(arrObj); + regNumber indexReg = genConsumeReg(indexNode); + regNumber tgtReg = arrIndex->gtRegNum; + noway_assert(tgtReg != REG_NA); + + // We will use a temp register to load the lower bound and dimension size values + // + regMaskTP tmpRegsMask = arrIndex->gtRsvdRegs; // there will be two bits set + tmpRegsMask &= ~genRegMask(tgtReg); // remove the bit for 'tgtReg' from 'tmpRegsMask' + + regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask + regNumber tmpReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask + noway_assert(tmpReg != REG_NA); + + assert(tgtReg != tmpReg); + + unsigned dim = arrIndex->gtCurrDim; + unsigned rank = arrIndex->gtArrRank; + var_types elemType = arrIndex->gtArrElemType; + unsigned offset; + + offset = genOffsetOfMDArrayLowerBound(elemType, rank, dim); + emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_PTRSIZE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load + emit->emitIns_R_R_R(INS_sub, EA_4BYTE, tgtReg, indexReg, tmpReg); + + offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim); + emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_PTRSIZE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load + emit->emitIns_R_R(INS_cmp, EA_4BYTE, tgtReg, tmpReg); + + emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED); + genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL); + + genProduceReg(arrIndex); +} + +#ifdef _TARGET_ARM_ + +//------------------------------------------------------------------------ +// genCodeForArrOffset: Generates code to compute the flattened array offset for +// one dimension of an array reference: +// result = (prevDimOffset * dimSize) + effectiveIndex +// where dimSize is obtained from the arrObj operand +// +// Arguments: +// arrOffset - the node for which we're generating code +// +// Return Value: +// None. +// +// Notes: +// dimSize and effectiveIndex are always non-negative, the former by design, +// and the latter because it has been normalized to be zero-based. + +void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) +{ + GenTreePtr offsetNode = arrOffset->gtOffset; + GenTreePtr indexNode = arrOffset->gtIndex; + regNumber tgtReg = arrOffset->gtRegNum; + + noway_assert(tgtReg != REG_NA); + + if (!offsetNode->IsIntegralConst(0)) + { + emitter* emit = getEmitter(); + regNumber offsetReg = genConsumeReg(offsetNode); + noway_assert(offsetReg != REG_NA); + regNumber indexReg = genConsumeReg(indexNode); + noway_assert(indexReg != REG_NA); + GenTreePtr arrObj = arrOffset->gtArrObj; + regNumber arrReg = genConsumeReg(arrObj); + noway_assert(arrReg != REG_NA); + regMaskTP tmpRegMask = arrOffset->gtRsvdRegs; + regNumber tmpReg = genRegNumFromMask(tmpRegMask); + noway_assert(tmpReg != REG_NA); + unsigned dim = arrOffset->gtCurrDim; + unsigned rank = arrOffset->gtArrRank; + var_types elemType = arrOffset->gtArrElemType; + unsigned offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim); + + // Load tmpReg with the dimension size + emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load + + // Evaluate tgtReg = offsetReg*dim_size + indexReg. + emit->emitIns_R_R_R(INS_MUL, EA_4BYTE, tgtReg, tmpReg, offsetReg); + emit->emitIns_R_R_R(INS_add, EA_4BYTE, tgtReg, tgtReg, indexReg); + } + else + { + regNumber indexReg = genConsumeReg(indexNode); + if (indexReg != tgtReg) + { + inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT); + } + } + genProduceReg(arrOffset); +} + +#endif // _TARGET_ARM_ + +#ifdef _TARGET_ARM64_ + +//------------------------------------------------------------------------ +// genCodeForArrOffset: Generates code to compute the flattened array offset for +// one dimension of an array reference: +// result = (prevDimOffset * dimSize) + effectiveIndex +// where dimSize is obtained from the arrObj operand +// +// Arguments: +// arrOffset - the node for which we're generating code +// +// Return Value: +// None. +// +// Notes: +// dimSize and effectiveIndex are always non-negative, the former by design, +// and the latter because it has been normalized to be zero-based. + +void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) +{ + GenTreePtr offsetNode = arrOffset->gtOffset; + GenTreePtr indexNode = arrOffset->gtIndex; + regNumber tgtReg = arrOffset->gtRegNum; + + noway_assert(tgtReg != REG_NA); + + if (!offsetNode->IsIntegralConst(0)) + { + emitter* emit = getEmitter(); + regNumber offsetReg = genConsumeReg(offsetNode); + noway_assert(offsetReg != REG_NA); + regNumber indexReg = genConsumeReg(indexNode); + noway_assert(indexReg != REG_NA); + GenTreePtr arrObj = arrOffset->gtArrObj; + regNumber arrReg = genConsumeReg(arrObj); + noway_assert(arrReg != REG_NA); + regMaskTP tmpRegMask = arrOffset->gtRsvdRegs; + regNumber tmpReg = genRegNumFromMask(tmpRegMask); + noway_assert(tmpReg != REG_NA); + unsigned dim = arrOffset->gtCurrDim; + unsigned rank = arrOffset->gtArrRank; + var_types elemType = arrOffset->gtArrElemType; + unsigned offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim); + + // Load tmpReg with the dimension size + emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_8BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load + + // Evaluate tgtReg = offsetReg*dim_size + indexReg. + emit->emitIns_R_R_R_R(INS_madd, EA_4BYTE, tgtReg, tmpReg, offsetReg, indexReg); + } + else + { + regNumber indexReg = genConsumeReg(indexNode); + if (indexReg != tgtReg) + { + inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT); + } + } + genProduceReg(arrOffset); +} + +#endif // _TARGET_ARM64_ + +//------------------------------------------------------------------------ +// indirForm: Make a temporary indir we can feed to pattern matching routines +// in cases where we don't want to instantiate all the indirs that happen. +// +GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base) +{ + GenTreeIndir i(GT_IND, type, base, nullptr); + i.gtRegNum = REG_NA; + // has to be nonnull (because contained nodes can't be the last in block) + // but don't want it to be a valid pointer + i.gtNext = (GenTree*)(-1); + return i; +} + +//------------------------------------------------------------------------ +// intForm: Make a temporary int we can feed to pattern matching routines +// in cases where we don't want to instantiate. +// +GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value) +{ + GenTreeIntCon i(type, value); + i.gtRegNum = REG_NA; + // has to be nonnull (because contained nodes can't be the last in block) + // but don't want it to be a valid pointer + i.gtNext = (GenTree*)(-1); + return i; +} + +#ifdef _TARGET_ARM_ + +//------------------------------------------------------------------------ +// genCodeForShift: Generates the code sequence for a GenTree node that +// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror). +// +// Arguments: +// tree - the bit shift node (that specifies the type of bit shift to perform). +// +// Assumptions: +// a) All GenTrees are register allocated. +// +void CodeGen::genCodeForShift(GenTreePtr tree) +{ + var_types targetType = tree->TypeGet(); + genTreeOps oper = tree->OperGet(); + instruction ins = genGetInsForOper(oper, targetType); + emitAttr size = emitTypeSize(tree); + + assert(tree->gtRegNum != REG_NA); + + genConsumeOperands(tree->AsOp()); + + GenTreePtr operand = tree->gtGetOp1(); + GenTreePtr shiftBy = tree->gtGetOp2(); + if (!shiftBy->IsCnsIntOrI()) + { + getEmitter()->emitIns_R_R_R(ins, size, tree->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum); + } + else + { + unsigned immWidth = size * BITS_PER_BYTE; + ssize_t shiftByImm = shiftBy->gtIntCon.gtIconVal & (immWidth - 1); + + getEmitter()->emitIns_R_R_I(ins, size, tree->gtRegNum, operand->gtRegNum, shiftByImm); + } + + genProduceReg(tree); +} + +#endif // _TARGET_ARM_ + +#ifdef _TARGET_ARM64_ + +//------------------------------------------------------------------------ +// genCodeForShift: Generates the code sequence for a GenTree node that +// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror). +// +// Arguments: +// tree - the bit shift node (that specifies the type of bit shift to perform). +// +// Assumptions: +// a) All GenTrees are register allocated. +// +void CodeGen::genCodeForShift(GenTreePtr tree) +{ + var_types targetType = tree->TypeGet(); + genTreeOps oper = tree->OperGet(); + instruction ins = genGetInsForOper(oper, targetType); + emitAttr size = emitTypeSize(tree); + + assert(tree->gtRegNum != REG_NA); + + GenTreePtr operand = tree->gtGetOp1(); + genConsumeOperands(tree->AsOp()); + + GenTreePtr shiftBy = tree->gtGetOp2(); + if (!shiftBy->IsCnsIntOrI()) + { + getEmitter()->emitIns_R_R_R(ins, size, tree->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum); + } + else + { + unsigned immWidth = emitter::getBitWidth(size); // immWidth will be set to 32 or 64 + ssize_t shiftByImm = shiftBy->gtIntCon.gtIconVal & (immWidth - 1); + + getEmitter()->emitIns_R_R_I(ins, size, tree->gtRegNum, operand->gtRegNum, shiftByImm); + } + + genProduceReg(tree); +} + +#endif // _TARGET_ARM64_ + +#ifdef _TARGET_ARM_ + +// Generate code for a CpBlk node by the means of the VM memcpy helper call +// Preconditions: +// a) The size argument of the CpBlk is not an integer constant +// b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes. +void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode) +{ + // Make sure we got the arguments of the cpblk operation in the right registers + unsigned blockSize = cpBlkNode->Size(); + GenTreePtr dstAddr = cpBlkNode->Addr(); + assert(!dstAddr->isContained()); + + genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); + genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN); +} + +#endif // _TARGET_ARM_ + +#ifdef _TARGET_ARM64_ + +// Generate code for a CpBlk node by the means of the VM memcpy helper call +// Preconditions: +// a) The size argument of the CpBlk is not an integer constant +// b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes. +void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode) +{ + // Make sure we got the arguments of the cpblk operation in the right registers + unsigned blockSize = cpBlkNode->Size(); + GenTreePtr dstAddr = cpBlkNode->Addr(); + assert(!dstAddr->isContained()); + + genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); + + if (blockSize != 0) + { + assert(blockSize > CPBLK_UNROLL_LIMIT); + } + + genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN); +} + +#endif // _TARGET_ARM64_ + +#ifdef _TARGET_ARM_ + +// Generates code for InitBlk by calling the VM memset helper function. +// Preconditions: +// a) The size argument of the InitBlk is not an integer constant. +// b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes. +void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode) +{ + // Make sure we got the arguments of the initblk operation in the right registers + unsigned size = initBlkNode->Size(); + GenTreePtr dstAddr = initBlkNode->Addr(); + GenTreePtr initVal = initBlkNode->Data(); + if (initVal->OperIsInitVal()) + { + initVal = initVal->gtGetOp1(); + } + + assert(!dstAddr->isContained()); + assert(!initVal->isContained()); + if (initBlkNode->gtOper == GT_STORE_DYN_BLK) + { + assert(initBlkNode->AsDynBlk()->gtDynamicSize->gtRegNum == REG_ARG_2); + } + else + { + assert(initBlkNode->gtRsvdRegs == RBM_ARG_2); + } + + genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); + genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); +} + +#endif // _TARGET_ARM_ + +#ifdef _TARGET_ARM64_ + +// Generates code for InitBlk by calling the VM memset helper function. +// Preconditions: +// a) The size argument of the InitBlk is not an integer constant. +// b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes. +void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode) +{ + // Make sure we got the arguments of the initblk operation in the right registers + unsigned size = initBlkNode->Size(); + GenTreePtr dstAddr = initBlkNode->Addr(); + GenTreePtr initVal = initBlkNode->Data(); + if (initVal->OperIsInitVal()) + { + initVal = initVal->gtGetOp1(); + } + + assert(!dstAddr->isContained()); + assert(!initVal->isContained()); + if (initBlkNode->gtOper == GT_STORE_DYN_BLK) + { + assert(initBlkNode->AsDynBlk()->gtDynamicSize->gtRegNum == REG_ARG_2); + } + else + { + assert(initBlkNode->gtRsvdRegs == RBM_ARG_2); + } + + if (size != 0) + { + assert(size > INITBLK_UNROLL_LIMIT); + } + + genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); + + genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); +} + +#endif // _TARGET_ARM64_ + +#ifdef _TARGET_ARM_ + +//------------------------------------------------------------------------ +// genRegCopy: Generate a register copy. +// +void CodeGen::genRegCopy(GenTree* treeNode) +{ + assert(treeNode->OperGet() == GT_COPY); + + var_types targetType = treeNode->TypeGet(); + regNumber targetReg = treeNode->gtRegNum; + assert(targetReg != REG_NA); + + GenTree* op1 = treeNode->gtOp.gtOp1; + + // Check whether this node and the node from which we're copying the value have the same + // register type. + // This can happen if (currently iff) we have a SIMD vector type that fits in an integer + // register, in which case it is passed as an argument, or returned from a call, + // in an integer register and must be copied if it's in an xmm register. + + if (varTypeIsFloating(treeNode) != varTypeIsFloating(op1)) + { + NYI("genRegCopy floating point"); + } + else + { + inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType); + } + + if (op1->IsLocal()) + { + // The lclVar will never be a def. + // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will + // appropriately set the gcInfo for the copied value. + // If not, there are two cases we need to handle: + // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable + // will remain live in its original register. + // genProduceReg() will appropriately set the gcInfo for the copied value, + // and genConsumeReg will reset it. + // - Otherwise, we need to update register info for the lclVar. + + GenTreeLclVarCommon* lcl = op1->AsLclVarCommon(); + assert((lcl->gtFlags & GTF_VAR_DEF) == 0); + + if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0) + { + LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum]; + + // If we didn't just spill it (in genConsumeReg, above), then update the register info + if (varDsc->lvRegNum != REG_STK) + { + // The old location is dying + genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1)); + + gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum)); + + genUpdateVarReg(varDsc, treeNode); + + // The new location is going live + genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode)); + } + } + } + + genProduceReg(treeNode); +} + +#endif // _TARGET_ARM_ + +#ifdef _TARGET_ARM64_ + +void CodeGen::genRegCopy(GenTree* treeNode) +{ + assert(treeNode->OperGet() == GT_COPY); + + var_types targetType = treeNode->TypeGet(); + regNumber targetReg = treeNode->gtRegNum; + assert(targetReg != REG_NA); + + GenTree* op1 = treeNode->gtOp.gtOp1; + + // Check whether this node and the node from which we're copying the value have the same + // register type. + // This can happen if (currently iff) we have a SIMD vector type that fits in an integer + // register, in which case it is passed as an argument, or returned from a call, + // in an integer register and must be copied if it's in an xmm register. + + if (varTypeIsFloating(treeNode) != varTypeIsFloating(op1)) + { + inst_RV_RV(INS_fmov, targetReg, genConsumeReg(op1), targetType); + } + else + { + inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType); + } + + if (op1->IsLocal()) + { + // The lclVar will never be a def. + // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will + // appropriately set the gcInfo for the copied value. + // If not, there are two cases we need to handle: + // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable + // will remain live in its original register. + // genProduceReg() will appropriately set the gcInfo for the copied value, + // and genConsumeReg will reset it. + // - Otherwise, we need to update register info for the lclVar. + + GenTreeLclVarCommon* lcl = op1->AsLclVarCommon(); + assert((lcl->gtFlags & GTF_VAR_DEF) == 0); + + if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0) + { + LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum]; + + // If we didn't just spill it (in genConsumeReg, above), then update the register info + if (varDsc->lvRegNum != REG_STK) + { + // The old location is dying + genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1)); + + gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum)); + + genUpdateVarReg(varDsc, treeNode); + + // The new location is going live + genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode)); + } + } + } + genProduceReg(treeNode); +} + +#endif // _TARGET_ARM64_ + +#ifdef _TARGET_ARM_ + +//------------------------------------------------------------------------ +// genCallInstruction: Produce code for a GT_CALL node +// +void CodeGen::genCallInstruction(GenTreeCall* call) +{ + gtCallTypes callType = (gtCallTypes)call->gtCallType; + + IL_OFFSETX ilOffset = BAD_IL_OFFSET; + + // all virtuals should have been expanded into a control expression + assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr); + + // Consume all the arg regs + for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) + { + assert(list->OperIsList()); + + GenTreePtr argNode = list->Current(); + + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy()); + assert(curArgTabEntry); + + if (curArgTabEntry->regNum == REG_STK) + continue; + + // Deal with multi register passed struct args. + if (argNode->OperGet() == GT_FIELD_LIST) + { + GenTreeArgList* argListPtr = argNode->AsArgList(); + unsigned iterationNum = 0; + regNumber argReg = curArgTabEntry->regNum; + for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++) + { + GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1; + assert(putArgRegNode->gtOper == GT_PUTARG_REG); + + genConsumeReg(putArgRegNode); + + if (putArgRegNode->gtRegNum != argReg) + { + inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg, + putArgRegNode->gtRegNum); + } + + argReg = genRegArgNext(argReg); + } + } + else + { + regNumber argReg = curArgTabEntry->regNum; + genConsumeReg(argNode); + if (argNode->gtRegNum != argReg) + { + inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum); + } + } + + // In the case of a varargs call, + // the ABI dictates that if we have floating point args, + // we must pass the enregistered arguments in both the + // integer and floating point registers so, let's do that. + if (call->IsVarargs() && varTypeIsFloating(argNode)) + { + NYI_ARM("CodeGen - IsVarargs"); + } + } + + // Insert a null check on "this" pointer if asked. + if (call->NeedsNullCheck()) + { + const regNumber regThis = genGetThisArgReg(call); + regMaskTP tempMask = genFindLowestBit(call->gtRsvdRegs); + const regNumber tmpReg = genRegNumFromMask(tempMask); + if (genCountBits(call->gtRsvdRegs) > 1) + { + call->gtRsvdRegs &= ~tempMask; + } + getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, regThis, 0); + } + + // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method. + CORINFO_METHOD_HANDLE methHnd; + GenTree* target = call->gtControlExpr; + if (callType == CT_INDIRECT) + { + assert(target == nullptr); + target = call->gtCallAddr; + methHnd = nullptr; + } + else + { + methHnd = call->gtCallMethHnd; + } + + CORINFO_SIG_INFO* sigInfo = nullptr; +#ifdef DEBUG + // Pass the call signature information down into the emitter so the emitter can associate + // native call sites with the signatures they were generated from. + if (callType != CT_HELPER) + { + sigInfo = call->callSig; + } +#endif // DEBUG + + // If fast tail call, then we are done. + if (call->IsFastTailCall()) + { + NYI_ARM("fast tail call"); + } + + // For a pinvoke to unmanaged code we emit a label to clear + // the GC pointer state before the callsite. + // We can't utilize the typical lazy killing of GC pointers + // at (or inside) the callsite. + if (call->IsUnmanaged()) + { + genDefineTempLabel(genCreateTempLabel()); + } + + // Determine return value size(s). + ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); + emitAttr retSize = EA_PTRSIZE; + emitAttr secondRetSize = EA_UNKNOWN; + + if (call->HasMultiRegRetVal()) + { + retSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(0)); + secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1)); + } + else + { + assert(!varTypeIsStruct(call)); + + if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY) + { + retSize = EA_GCREF; + } + else if (call->gtType == TYP_BYREF) + { + retSize = EA_BYREF; + } + } + + // We need to propagate the IL offset information to the call instruction, so we can emit + // an IL to native mapping record for the call, to support managed return value debugging. + // We don't want tail call helper calls that were converted from normal calls to get a record, + // so we skip this hash table lookup logic in that case. + if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall()) + { + (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset); + } + + if (target != nullptr) + { + // For ARM a call target can not be a contained indirection + assert(!target->isContainedIndir()); + + genConsumeReg(target); + + // We have already generated code for gtControlExpr evaluating it into a register. + // We just need to emit "call reg" in this case. + // + assert(genIsValidIntReg(target->gtRegNum)); + + genEmitCall(emitter::EC_INDIR_R, methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr + retSize, ilOffset, target->gtRegNum); + } + else + { + // Generate a direct call to a non-virtual user defined or helper method + assert(callType == CT_HELPER || callType == CT_USER_FUNC); + + void* addr = nullptr; + if (callType == CT_HELPER) + { + // Direct call to a helper method. + CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd); + noway_assert(helperNum != CORINFO_HELP_UNDEF); + + void* pAddr = nullptr; + addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); + + if (addr == nullptr) + { + addr = pAddr; + } + } + else + { + // Direct call to a non-virtual user function. + CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY; + if (call->IsSameThis()) + { + aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS); + } + + if ((call->NeedsNullCheck()) == 0) + { + aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL); + } + + CORINFO_CONST_LOOKUP addrInfo; + compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags); + + addr = addrInfo.addr; + } + + assert(addr); + // Non-virtual direct call to known addresses + if (!arm_Valid_Imm_For_BL((ssize_t)addr)) + { + regNumber tmpReg = genRegNumFromMask(call->gtRsvdRegs); + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, tmpReg, (ssize_t)addr); + genEmitCall(emitter::EC_INDIR_R, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) NULL, retSize, ilOffset, tmpReg); + } + else + { + genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, retSize, ilOffset); + } + } + + // if it was a pinvoke we may have needed to get the address of a label + if (genPendingCallLabel) + { + assert(call->IsUnmanaged()); + genDefineTempLabel(genPendingCallLabel); + genPendingCallLabel = nullptr; + } + + // Update GC info: + // All Callee arg registers are trashed and no longer contain any GC pointers. + // TODO-ARM-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here? + // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other + // registers from RBM_CALLEE_TRASH + assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); + assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); + gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS; + gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS; + + var_types returnType = call->TypeGet(); + if (returnType != TYP_VOID) + { + regNumber returnReg; + + if (call->HasMultiRegRetVal()) + { + assert(pRetTypeDesc != nullptr); + unsigned regCount = pRetTypeDesc->GetReturnRegCount(); + + // If regs allocated to call node are different from ABI return + // regs in which the call has returned its result, move the result + // to regs allocated to call node. + for (unsigned i = 0; i < regCount; ++i) + { + var_types regType = pRetTypeDesc->GetReturnRegType(i); + returnReg = pRetTypeDesc->GetABIReturnReg(i); + regNumber allocatedReg = call->GetRegNumByIdx(i); + if (returnReg != allocatedReg) + { + inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType); + } + } + } + else + { + if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME)) + { + // The CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with + // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers. + returnReg = REG_PINVOKE_TCB; + } + else if (varTypeIsFloating(returnType)) + { + returnReg = REG_FLOATRET; + } + else + { + returnReg = REG_INTRET; + } + + if (call->gtRegNum != returnReg) + { + inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType); + } + } + + genProduceReg(call); + } + + // If there is nothing next, that means the result is thrown away, so this value is not live. + // However, for minopts or debuggable code, we keep it live to support managed return value debugging. + if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode) + { + gcInfo.gcMarkRegSetNpt(RBM_INTRET); + } +} + +#endif // _TARGET_ARM_ + +#ifdef _TARGET_ARM64_ + +// Produce code for a GT_CALL node +void CodeGen::genCallInstruction(GenTreeCall* call) +{ + gtCallTypes callType = (gtCallTypes)call->gtCallType; + + IL_OFFSETX ilOffset = BAD_IL_OFFSET; + + // all virtuals should have been expanded into a control expression + assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr); + + // Consume all the arg regs + for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) + { + assert(list->OperIsList()); + + GenTreePtr argNode = list->Current(); + + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy()); + assert(curArgTabEntry); + + if (curArgTabEntry->regNum == REG_STK) + continue; + + // Deal with multi register passed struct args. + if (argNode->OperGet() == GT_FIELD_LIST) + { + GenTreeArgList* argListPtr = argNode->AsArgList(); + unsigned iterationNum = 0; + regNumber argReg = curArgTabEntry->regNum; + for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++) + { + GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1; + assert(putArgRegNode->gtOper == GT_PUTARG_REG); + + genConsumeReg(putArgRegNode); + + if (putArgRegNode->gtRegNum != argReg) + { + inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg, + putArgRegNode->gtRegNum); + } + + argReg = genRegArgNext(argReg); + } + } + else + { + regNumber argReg = curArgTabEntry->regNum; + genConsumeReg(argNode); + if (argNode->gtRegNum != argReg) + { + inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum); + } + } + + // In the case of a varargs call, + // the ABI dictates that if we have floating point args, + // we must pass the enregistered arguments in both the + // integer and floating point registers so, let's do that. + if (call->IsVarargs() && varTypeIsFloating(argNode)) + { + NYI_ARM64("CodeGen - IsVarargs"); + } + } + + // Insert a null check on "this" pointer if asked. + if (call->NeedsNullCheck()) + { + const regNumber regThis = genGetThisArgReg(call); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, regThis, 0); + } + + // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method. + CORINFO_METHOD_HANDLE methHnd; + GenTree* target = call->gtControlExpr; + if (callType == CT_INDIRECT) + { + assert(target == nullptr); + target = call->gtCallAddr; + methHnd = nullptr; + } + else + { + methHnd = call->gtCallMethHnd; + } + + CORINFO_SIG_INFO* sigInfo = nullptr; +#ifdef DEBUG + // Pass the call signature information down into the emitter so the emitter can associate + // native call sites with the signatures they were generated from. + if (callType != CT_HELPER) + { + sigInfo = call->callSig; + } +#endif // DEBUG + + // If fast tail call, then we are done. In this case we setup the args (both reg args + // and stack args in incoming arg area) and call target in IP0. Epilog sequence would + // generate "br IP0". + if (call->IsFastTailCall()) + { + // Don't support fast tail calling JIT helpers + assert(callType != CT_HELPER); + + // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr. + assert(target != nullptr); + + genConsumeReg(target); + + if (target->gtRegNum != REG_IP0) + { + inst_RV_RV(INS_mov, REG_IP0, target->gtRegNum); + } + return; + } + + // For a pinvoke to unmanged code we emit a label to clear + // the GC pointer state before the callsite. + // We can't utilize the typical lazy killing of GC pointers + // at (or inside) the callsite. + if (call->IsUnmanaged()) + { + genDefineTempLabel(genCreateTempLabel()); + } + + // Determine return value size(s). + ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); + emitAttr retSize = EA_PTRSIZE; + emitAttr secondRetSize = EA_UNKNOWN; + + if (call->HasMultiRegRetVal()) + { + retSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(0)); + secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1)); + } + else + { + assert(!varTypeIsStruct(call)); + + if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY) + { + retSize = EA_GCREF; + } + else if (call->gtType == TYP_BYREF) + { + retSize = EA_BYREF; + } + } + + // We need to propagate the IL offset information to the call instruction, so we can emit + // an IL to native mapping record for the call, to support managed return value debugging. + // We don't want tail call helper calls that were converted from normal calls to get a record, + // so we skip this hash table lookup logic in that case. + if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall()) + { + (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset); + } + + if (target != nullptr) + { + // For Arm64 a call target can not be a contained indirection + assert(!target->isContainedIndir()); + + // We have already generated code for gtControlExpr evaluating it into a register. + // We just need to emit "call reg" in this case. + // + assert(genIsValidIntReg(target->gtRegNum)); + + genEmitCall(emitter::EC_INDIR_R, methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr + retSize, secondRetSize, ilOffset, genConsumeReg(target)); + } + else + { + // Generate a direct call to a non-virtual user defined or helper method + assert(callType == CT_HELPER || callType == CT_USER_FUNC); + + void* addr = nullptr; + if (callType == CT_HELPER) + { + // Direct call to a helper method. + CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd); + noway_assert(helperNum != CORINFO_HELP_UNDEF); + + void* pAddr = nullptr; + addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr); + + if (addr == nullptr) + { + addr = pAddr; + } + } + else + { + // Direct call to a non-virtual user function. + CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY; + if (call->IsSameThis()) + { + aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS); + } + + if ((call->NeedsNullCheck()) == 0) + { + aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL); + } + + CORINFO_CONST_LOOKUP addrInfo; + compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags); + + addr = addrInfo.addr; + } +#if 0 + // Use this path if you want to load an absolute call target using + // a sequence of movs followed by an indirect call (blr instruction) + + // Load the call target address in x16 + instGen_Set_Reg_To_Imm(EA_8BYTE, REG_IP0, (ssize_t) addr); + + // indirect call to constant address in IP0 + genEmitCall(emitter::EC_INDIR_R, + methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) + nullptr, //addr + retSize, + secondRetSize, + ilOffset, + REG_IP0); +#else + // Non-virtual direct call to known addresses + genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, retSize, secondRetSize, + ilOffset); +#endif + } + + // if it was a pinvoke we may have needed to get the address of a label + if (genPendingCallLabel) + { + assert(call->IsUnmanaged()); + genDefineTempLabel(genPendingCallLabel); + genPendingCallLabel = nullptr; + } + + // Update GC info: + // All Callee arg registers are trashed and no longer contain any GC pointers. + // TODO-ARM64-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here? + // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other + // registers from RBM_CALLEE_TRASH + assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); + assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); + gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS; + gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS; + + var_types returnType = call->TypeGet(); + if (returnType != TYP_VOID) + { + regNumber returnReg; + + if (call->HasMultiRegRetVal()) + { + assert(pRetTypeDesc != nullptr); + unsigned regCount = pRetTypeDesc->GetReturnRegCount(); + + // If regs allocated to call node are different from ABI return + // regs in which the call has returned its result, move the result + // to regs allocated to call node. + for (unsigned i = 0; i < regCount; ++i) + { + var_types regType = pRetTypeDesc->GetReturnRegType(i); + returnReg = pRetTypeDesc->GetABIReturnReg(i); + regNumber allocatedReg = call->GetRegNumByIdx(i); + if (returnReg != allocatedReg) + { + inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType); + } + } + } + else + { + if (varTypeIsFloating(returnType)) + { + returnReg = REG_FLOATRET; + } + else + { + returnReg = REG_INTRET; + } + + if (call->gtRegNum != returnReg) + { + inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType); + } + } + + genProduceReg(call); + } + + // If there is nothing next, that means the result is thrown away, so this value is not live. + // However, for minopts or debuggable code, we keep it live to support managed return value debugging. + if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode) + { + gcInfo.gcMarkRegSetNpt(RBM_INTRET); + } +} + +#endif // _TARGET_ARM64_ + +#ifdef _TARGET_ARM_ + +//------------------------------------------------------------------------ +// genIntToIntCast: Generate code for an integer cast +// +// Arguments: +// treeNode - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// The treeNode must have an assigned register. +// For a signed convert from byte, the source must be in a byte-addressable register. +// Neither the source nor target type can be a floating point type. +// +void CodeGen::genIntToIntCast(GenTreePtr treeNode) +{ + assert(treeNode->OperGet() == GT_CAST); + + GenTreePtr castOp = treeNode->gtCast.CastOp(); + emitter* emit = getEmitter(); + + var_types dstType = treeNode->CastToType(); + var_types srcType = genActualType(castOp->TypeGet()); + emitAttr movSize = emitActualTypeSize(dstType); + bool movRequired = false; + + if (varTypeIsLong(srcType)) + { + genLongToIntCast(treeNode); + return; + } + + regNumber targetReg = treeNode->gtRegNum; + regNumber sourceReg = castOp->gtRegNum; + + // For Long to Int conversion we will have a reserved integer register to hold the immediate mask + regNumber tmpReg = (treeNode->gtRsvdRegs == RBM_NONE) ? REG_NA : genRegNumFromMask(treeNode->gtRsvdRegs); + + assert(genIsValidIntReg(targetReg)); + assert(genIsValidIntReg(sourceReg)); + + instruction ins = INS_invalid; + + genConsumeReg(castOp); + Lowering::CastInfo castInfo; + + // Get information about the cast. + Lowering::getCastDescription(treeNode, &castInfo); + + if (castInfo.requiresOverflowCheck) + { + emitAttr cmpSize = EA_ATTR(genTypeSize(srcType)); + + if (castInfo.signCheckOnly) + { + // We only need to check for a negative value in sourceReg + emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, 0); + emitJumpKind jmpLT = genJumpKindForOper(GT_LT, CK_SIGNED); + genJumpToThrowHlpBlk(jmpLT, SCK_OVERFLOW); + noway_assert(genTypeSize(srcType) == 4 || genTypeSize(srcType) == 8); + // This is only interesting case to ensure zero-upper bits. + if ((srcType == TYP_INT) && (dstType == TYP_ULONG)) + { + // cast to TYP_ULONG: + // We use a mov with size=EA_4BYTE + // which will zero out the upper bits + movSize = EA_4BYTE; + movRequired = true; + } + } + else if (castInfo.unsignedSource || castInfo.unsignedDest) + { + // When we are converting from/to unsigned, + // we only have to check for any bits set in 'typeMask' + + noway_assert(castInfo.typeMask != 0); + emit->emitIns_R_I(INS_tst, cmpSize, sourceReg, castInfo.typeMask); + emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); + genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW); + } + else + { + // For a narrowing signed cast + // + // We must check the value is in a signed range. + + // Compare with the MAX + + noway_assert((castInfo.typeMin != 0) && (castInfo.typeMax != 0)); + + if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, INS_FLAGS_DONT_CARE)) + { + emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMax); + } + else + { + noway_assert(tmpReg != REG_NA); + instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMax); + emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg); + } + + emitJumpKind jmpGT = genJumpKindForOper(GT_GT, CK_SIGNED); + genJumpToThrowHlpBlk(jmpGT, SCK_OVERFLOW); + + // Compare with the MIN + + if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, INS_FLAGS_DONT_CARE)) + { + emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMin); + } + else + { + noway_assert(tmpReg != REG_NA); + instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMin); + emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg); + } + + emitJumpKind jmpLT = genJumpKindForOper(GT_LT, CK_SIGNED); + genJumpToThrowHlpBlk(jmpLT, SCK_OVERFLOW); + } + ins = INS_mov; + } + else // Non-overflow checking cast. + { + if (genTypeSize(srcType) == genTypeSize(dstType)) + { + ins = INS_mov; + } + else + { + var_types extendType = TYP_UNKNOWN; + + // If we need to treat a signed type as unsigned + if ((treeNode->gtFlags & GTF_UNSIGNED) != 0) + { + extendType = genUnsignedType(srcType); + movSize = emitTypeSize(extendType); + movRequired = true; + } + else + { + if (genTypeSize(srcType) < genTypeSize(dstType)) + { + extendType = srcType; + movSize = emitTypeSize(srcType); + if (srcType == TYP_UINT) + { + movRequired = true; + } + } + else // (genTypeSize(srcType) > genTypeSize(dstType)) + { + extendType = dstType; + movSize = emitTypeSize(dstType); + } + } + + ins = ins_Move_Extend(extendType, castOp->InReg()); + } + } + + // We should never be generating a load from memory instruction here! + assert(!emit->emitInsIsLoad(ins)); + + if ((ins != INS_mov) || movRequired || (targetReg != sourceReg)) + { + emit->emitIns_R_R(ins, movSize, targetReg, sourceReg); + } + + genProduceReg(treeNode); +} + +#endif // _TARGET_ARM_ + +#ifdef _TARGET_ARM64_ + +//------------------------------------------------------------------------ +// genIntToIntCast: Generate code for an integer cast +// This method handles integer overflow checking casts +// as well as ordinary integer casts. +// +// Arguments: +// treeNode - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// The treeNode is not a contained node and must have an assigned register. +// For a signed convert from byte, the source must be in a byte-addressable register. +// Neither the source nor target type can be a floating point type. +// +// TODO-ARM64-CQ: Allow castOp to be a contained node without an assigned register. +// +void CodeGen::genIntToIntCast(GenTreePtr treeNode) +{ + assert(treeNode->OperGet() == GT_CAST); + + GenTreePtr castOp = treeNode->gtCast.CastOp(); + emitter* emit = getEmitter(); + + var_types dstType = treeNode->CastToType(); + var_types srcType = genActualType(castOp->TypeGet()); + emitAttr movSize = emitActualTypeSize(dstType); + bool movRequired = false; + + regNumber targetReg = treeNode->gtRegNum; + regNumber sourceReg = castOp->gtRegNum; + + // For Long to Int conversion we will have a reserved integer register to hold the immediate mask + regNumber tmpReg = (treeNode->gtRsvdRegs == RBM_NONE) ? REG_NA : genRegNumFromMask(treeNode->gtRsvdRegs); + + assert(genIsValidIntReg(targetReg)); + assert(genIsValidIntReg(sourceReg)); + + instruction ins = INS_invalid; + + genConsumeReg(castOp); + Lowering::CastInfo castInfo; + + // Get information about the cast. + Lowering::getCastDescription(treeNode, &castInfo); + + if (castInfo.requiresOverflowCheck) + { + + emitAttr cmpSize = EA_ATTR(genTypeSize(srcType)); + + if (castInfo.signCheckOnly) + { + // We only need to check for a negative value in sourceReg + emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, 0); + emitJumpKind jmpLT = genJumpKindForOper(GT_LT, CK_SIGNED); + genJumpToThrowHlpBlk(jmpLT, SCK_OVERFLOW); + noway_assert(genTypeSize(srcType) == 4 || genTypeSize(srcType) == 8); + // This is only interesting case to ensure zero-upper bits. + if ((srcType == TYP_INT) && (dstType == TYP_ULONG)) + { + // cast to TYP_ULONG: + // We use a mov with size=EA_4BYTE + // which will zero out the upper bits + movSize = EA_4BYTE; + movRequired = true; + } + } + else if (castInfo.unsignedSource || castInfo.unsignedDest) + { + // When we are converting from/to unsigned, + // we only have to check for any bits set in 'typeMask' + + noway_assert(castInfo.typeMask != 0); + emit->emitIns_R_I(INS_tst, cmpSize, sourceReg, castInfo.typeMask); + emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); + genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW); + } + else + { + // For a narrowing signed cast + // + // We must check the value is in a signed range. + + // Compare with the MAX + + noway_assert((castInfo.typeMin != 0) && (castInfo.typeMax != 0)); + + if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, cmpSize)) + { + emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMax); + } + else + { + noway_assert(tmpReg != REG_NA); + instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMax); + emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg); + } + + emitJumpKind jmpGT = genJumpKindForOper(GT_GT, CK_SIGNED); + genJumpToThrowHlpBlk(jmpGT, SCK_OVERFLOW); + + // Compare with the MIN + + if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, cmpSize)) + { + emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMin); + } + else + { + noway_assert(tmpReg != REG_NA); + instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMin); + emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg); + } + + emitJumpKind jmpLT = genJumpKindForOper(GT_LT, CK_SIGNED); + genJumpToThrowHlpBlk(jmpLT, SCK_OVERFLOW); + } + ins = INS_mov; + } + else // Non-overflow checking cast. + { + if (genTypeSize(srcType) == genTypeSize(dstType)) + { + ins = INS_mov; + } + else + { + var_types extendType = TYP_UNKNOWN; + + // If we need to treat a signed type as unsigned + if ((treeNode->gtFlags & GTF_UNSIGNED) != 0) + { + extendType = genUnsignedType(srcType); + movSize = emitTypeSize(extendType); + movRequired = true; + } + else + { + if (genTypeSize(srcType) < genTypeSize(dstType)) + { + extendType = srcType; + if (srcType == TYP_UINT) + { + // If we are casting from a smaller type to + // a larger type, then we need to make sure the + // higher 4 bytes are zero to gaurentee the correct value. + // Therefore using a mov with EA_4BYTE in place of EA_8BYTE + // will zero the upper bits + movSize = EA_4BYTE; + movRequired = true; + } + } + else // (genTypeSize(srcType) > genTypeSize(dstType)) + { + extendType = dstType; + if (dstType == TYP_INT) + { + movSize = EA_8BYTE; // a sxtw instruction requires EA_8BYTE + } + } + } + + ins = ins_Move_Extend(extendType, castOp->InReg()); + } + } + + // We should never be generating a load from memory instruction here! + assert(!emit->emitInsIsLoad(ins)); + + if ((ins != INS_mov) || movRequired || (targetReg != sourceReg)) + { + emit->emitIns_R_R(ins, movSize, targetReg, sourceReg); + } + + genProduceReg(treeNode); +} + +#endif // _TARGET_ARM64_ + +#ifdef _TARGET_ARM_ + +//------------------------------------------------------------------------ +// genFloatToFloatCast: Generate code for a cast between float and double +// +// Arguments: +// treeNode - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// Cast is a non-overflow conversion. +// The treeNode must have an assigned register. +// The cast is between float and double. +// +void CodeGen::genFloatToFloatCast(GenTreePtr treeNode) +{ + // float <--> double conversions are always non-overflow ones + assert(treeNode->OperGet() == GT_CAST); + assert(!treeNode->gtOverflow()); + + regNumber targetReg = treeNode->gtRegNum; + assert(genIsValidFloatReg(targetReg)); + + GenTreePtr op1 = treeNode->gtOp.gtOp1; + assert(!op1->isContained()); // Cannot be contained + assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg. + + var_types dstType = treeNode->CastToType(); + var_types srcType = op1->TypeGet(); + assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); + + genConsumeOperands(treeNode->AsOp()); + + // treeNode must be a reg + assert(!treeNode->isContained()); + + if (srcType != dstType) + { + instruction insVcvt = (srcType == TYP_FLOAT) ? INS_vcvt_f2d // convert Float to Double + : INS_vcvt_d2f; // convert Double to Float + + getEmitter()->emitIns_R_R(insVcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum); + } + else if (treeNode->gtRegNum != op1->gtRegNum) + { + getEmitter()->emitIns_R_R(INS_vmov, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum); + } + + genProduceReg(treeNode); +} + +#endif // _TARGET_ARM_ + +#ifdef _TARGET_ARM64_ + +//------------------------------------------------------------------------ +// genFloatToFloatCast: Generate code for a cast between float and double +// +// Arguments: +// treeNode - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// Cast is a non-overflow conversion. +// The treeNode must have an assigned register. +// The cast is between float and double or vice versa. +// +void CodeGen::genFloatToFloatCast(GenTreePtr treeNode) +{ + // float <--> double conversions are always non-overflow ones + assert(treeNode->OperGet() == GT_CAST); + assert(!treeNode->gtOverflow()); + + regNumber targetReg = treeNode->gtRegNum; + assert(genIsValidFloatReg(targetReg)); + + GenTreePtr op1 = treeNode->gtOp.gtOp1; + assert(!op1->isContained()); // Cannot be contained + assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg. + + var_types dstType = treeNode->CastToType(); + var_types srcType = op1->TypeGet(); + assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); + + genConsumeOperands(treeNode->AsOp()); + + // treeNode must be a reg + assert(!treeNode->isContained()); + + if (srcType != dstType) + { + insOpts cvtOption = (srcType == TYP_FLOAT) ? INS_OPTS_S_TO_D // convert Single to Double + : INS_OPTS_D_TO_S; // convert Double to Single + + getEmitter()->emitIns_R_R(INS_fcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum, cvtOption); + } + else if (treeNode->gtRegNum != op1->gtRegNum) + { + // If double to double cast or float to float cast. Emit a move instruction. + getEmitter()->emitIns_R_R(INS_mov, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum); + } + + genProduceReg(treeNode); +} + +#endif // _TARGET_ARM64_ + +#ifdef _TARGET_ARM_ + +//------------------------------------------------------------------------ +// genCreateAndStoreGCInfo: Create and record GC Info for the function. +// +void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, + unsigned prologSize, + unsigned epilogSize DEBUGARG(void* codePtr)) +{ + IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC()); + GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC) + GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM); + assert(gcInfoEncoder); + + // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32). + gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize); + + // We keep the call count for the second call to gcMakeRegPtrTable() below. + unsigned callCnt = 0; + // First we figure out the encoder ID's for the stack slots and registers. + gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt); + // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them). + gcInfoEncoder->FinalizeSlotIds(); + // Now we can actually use those slot ID's to declare live ranges. + gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt); + + gcInfoEncoder->Build(); + + // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t) + // let's save the values anyway for debugging purposes + compiler->compInfoBlkAddr = gcInfoEncoder->Emit(); + compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface +} + +#endif // _TARGET_ARM_ + +#ifdef _TARGET_ARM64_ + +/***************************************************************************** + * + * Create and record GC Info for the function. + */ +void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, + unsigned prologSize, + unsigned epilogSize DEBUGARG(void* codePtr)) +{ + genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr)); +} + +void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr)) +{ + IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC()); + GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC) + GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM); + assert(gcInfoEncoder != nullptr); + + // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32). + gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize); + + // We keep the call count for the second call to gcMakeRegPtrTable() below. + unsigned callCnt = 0; + + // First we figure out the encoder ID's for the stack slots and registers. + gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt); + + // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them). + gcInfoEncoder->FinalizeSlotIds(); + + // Now we can actually use those slot ID's to declare live ranges. + gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt); + + if (compiler->opts.compDbgEnC) + { + // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp) + // which is: + // -return address + // -saved off RBP + // -saved 'this' pointer and bool for synchronized methods + + // 4 slots for RBP + return address + RSI + RDI + int preservedAreaSize = 4 * REGSIZE_BYTES; + + if (compiler->info.compFlags & CORINFO_FLG_SYNCH) + { + if (!(compiler->info.compFlags & CORINFO_FLG_STATIC)) + preservedAreaSize += REGSIZE_BYTES; + + preservedAreaSize += 1; // bool for synchronized methods + } + + // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the + // frame + gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize); + } + + gcInfoEncoder->Build(); + + // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t) + // let's save the values anyway for debugging purposes + compiler->compInfoBlkAddr = gcInfoEncoder->Emit(); + compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface +} + +#endif // _TARGET_ARM64_ + +#endif // _TARGET_ARMARCH_ + +#endif // !LEGACY_BACKEND diff --git a/src/jit/lowerarm.cpp b/src/jit/lowerarm.cpp index 851ec45..0701520 100644 --- a/src/jit/lowerarm.cpp +++ b/src/jit/lowerarm.cpp @@ -31,273 +31,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "lsra.h" //------------------------------------------------------------------------ -// LowerStoreLoc: Lower a store of a lclVar -// -// Arguments: -// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR) -// -// Notes: -// This involves: -// - Widening operations of unsigneds. -// -void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) -{ - // Try to widen the ops if they are going into a local var. - GenTree* op1 = storeLoc->gtGetOp1(); - if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (op1->gtOper == GT_CNS_INT)) - { - GenTreeIntCon* con = op1->AsIntCon(); - ssize_t ival = con->gtIconVal; - unsigned varNum = storeLoc->gtLclNum; - LclVarDsc* varDsc = comp->lvaTable + varNum; - - if (varDsc->lvIsSIMDType()) - { - noway_assert(storeLoc->gtType != TYP_STRUCT); - } - unsigned size = genTypeSize(storeLoc); - // If we are storing a constant into a local variable - // we extend the size of the store here - if ((size < 4) && !varTypeIsStruct(varDsc)) - { - if (!varTypeIsUnsigned(varDsc)) - { - if (genTypeSize(storeLoc) == 1) - { - if ((ival & 0x7f) != ival) - { - ival = ival | 0xffffff00; - } - } - else - { - assert(genTypeSize(storeLoc) == 2); - if ((ival & 0x7fff) != ival) - { - ival = ival | 0xffff0000; - } - } - } - - // A local stack slot is at least 4 bytes in size, regardless of - // what the local var is typed as, so auto-promote it here - // unless it is a field of a promoted struct - // TODO-ARM-CQ: if the field is promoted shouldn't we also be able to do this? - if (!varDsc->lvIsStructField) - { - storeLoc->gtType = TYP_INT; - con->SetIconValue(ival); - } - } - } -} - -void Lowering::LowerBlockStore(GenTreeBlk* blkNode) -{ - GenTree* dstAddr = blkNode->Addr(); - unsigned size = blkNode->gtBlkSize; - GenTree* source = blkNode->Data(); - Compiler* compiler = comp; - - // Sources are dest address and initVal or source. - GenTreePtr srcAddrOrFill = nullptr; - bool isInitBlk = blkNode->OperIsInitBlkOp(); - - if (!isInitBlk) - { - // CopyObj or CopyBlk - if ((blkNode->OperGet() == GT_STORE_OBJ) && ((blkNode->AsObj()->gtGcPtrCount == 0) || blkNode->gtBlkOpGcUnsafe)) - { - blkNode->SetOper(GT_STORE_BLK); - } - if (source->gtOper == GT_IND) - { - srcAddrOrFill = blkNode->Data()->gtGetOp1(); - } - } - - if (isInitBlk) - { - GenTreePtr initVal = source; - if (initVal->OperIsInitVal()) - { - initVal = initVal->gtGetOp1(); - } - srcAddrOrFill = initVal; - -#if 0 - if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT) && initVal->IsCnsIntOrI()) - { - // TODO-ARM-CQ: Currently we generate a helper call for every - // initblk we encounter. Later on we should implement loop unrolling - // code sequences to improve CQ. - // For reference see the code in LowerXArch.cpp. - NYI_ARM("initblk loop unrolling is currently not implemented."); - } - else -#endif // 0 - { - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; - } - } - else - { - if (blkNode->OperGet() == GT_STORE_OBJ) - { - // CopyObj - - NYI_ARM("Lowering for GT_STORE_OBJ isn't implemented"); - } - else - { - // CopyBlk - short internalIntCount = 0; - regMaskTP internalIntCandidates = RBM_NONE; - -#if 0 - // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size - // we should unroll the loop to improve CQ. - // For reference see the code in lowerxarch.cpp. - - // TODO-ARM-CQ: cpblk loop unrolling is currently not implemented. - if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT)) - { - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; - } - else -#endif // 0 - { - // In case we have a constant integer this means we went beyond - // CPBLK_UNROLL_LIMIT bytes of size, still we should never have the case of - // any GC-Pointers in the src struct. - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; - } - } - } -} - -//------------------------------------------------------------------------ -// LowerCast: Lower GT_CAST(srcType, DstType) nodes. -// -// Arguments: -// tree - GT_CAST node to be lowered -// -// Return Value: -// None. -// -// Notes: -// Casts from small int type to float/double are transformed as follows: -// GT_CAST(byte, float/double) = GT_CAST(GT_CAST(byte, int32), float/double) -// GT_CAST(sbyte, float/double) = GT_CAST(GT_CAST(sbyte, int32), float/double) -// GT_CAST(int16, float/double) = GT_CAST(GT_CAST(int16, int32), float/double) -// GT_CAST(uint16, float/double) = GT_CAST(GT_CAST(uint16, int32), float/double) -// -// Similarly casts from float/double to a smaller int type are transformed as follows: -// GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte) -// GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte) -// GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16) -// GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16) -// -// Note that for the overflow conversions we still depend on helper calls and -// don't expect to see them here. -// i) GT_CAST(float/double, int type with overflow detection) -// -void Lowering::LowerCast(GenTree* tree) -{ - assert(tree->OperGet() == GT_CAST); - - JITDUMP("LowerCast for: "); - DISPNODE(tree); - JITDUMP("\n"); - - GenTreePtr op1 = tree->gtOp.gtOp1; - var_types dstType = tree->CastToType(); - var_types srcType = op1->TypeGet(); - var_types tmpType = TYP_UNDEF; - - if (varTypeIsFloating(srcType)) - { - noway_assert(!tree->gtOverflow()); - } - - // Case of src is a small type and dst is a floating point type. - if (varTypeIsSmall(srcType) && varTypeIsFloating(dstType)) - { - NYI_ARM("Lowering for cast from small type to float"); // Not tested yet. - // These conversions can never be overflow detecting ones. - noway_assert(!tree->gtOverflow()); - tmpType = TYP_INT; - } - // case of src is a floating point type and dst is a small type. - else if (varTypeIsFloating(srcType) && varTypeIsSmall(dstType)) - { - NYI_ARM("Lowering for cast from float to small type"); // Not tested yet. - tmpType = TYP_INT; - } - - if (tmpType != TYP_UNDEF) - { - GenTreePtr tmp = comp->gtNewCastNode(tmpType, op1, tmpType); - tmp->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT)); - - tree->gtFlags &= ~GTF_UNSIGNED; - tree->gtOp.gtOp1 = tmp; - BlockRange().InsertAfter(op1, tmp); - } -} - -//------------------------------------------------------------------------ -// LowerRotate: Lower GT_ROL and GT_ROL nodes. -// -// Arguments: -// tree - the node to lower -// -// Return Value: -// None. -// -void Lowering::LowerRotate(GenTreePtr tree) -{ - if (tree->OperGet() == GT_ROL) - { - // There is no ROL instruction on ARM. Convert ROL into ROR. - GenTreePtr rotatedValue = tree->gtOp.gtOp1; - unsigned rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8; - GenTreePtr rotateLeftIndexNode = tree->gtOp.gtOp2; - - if (rotateLeftIndexNode->IsCnsIntOrI()) - { - ssize_t rotateLeftIndex = rotateLeftIndexNode->gtIntCon.gtIconVal; - ssize_t rotateRightIndex = rotatedValueBitSize - rotateLeftIndex; - rotateLeftIndexNode->gtIntCon.gtIconVal = rotateRightIndex; - } - else - { - GenTreePtr tmp = - comp->gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode); - BlockRange().InsertAfter(rotateLeftIndexNode, tmp); - tree->gtOp.gtOp2 = tmp; - } - tree->ChangeOper(GT_ROR); - } -} - -//------------------------------------------------------------------------ -// LowerPutArgStk: Lower a GT_PUTARG_STK node -// -// Arguments: -// argNode - a GT_PUTARG_STK node -// -// Return Value: -// None. -// -// Notes: -// There is currently no Lowering required for this on ARM. -// -void Lowering::LowerPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info) -{ -} - -//------------------------------------------------------------------------ // IsCallTargetInRange: Can a call target address be encoded in-place? // // Return Value: diff --git a/src/jit/lowerarm64.cpp b/src/jit/lowerarm64.cpp index 7a564ef..b24ed82 100644 --- a/src/jit/lowerarm64.cpp +++ b/src/jit/lowerarm64.cpp @@ -29,290 +29,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "sideeffects.h" #include "lower.h" -//------------------------------------------------------------------------ -// LowerStoreLoc: Lower a store of a lclVar -// -// Arguments: -// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR) -// -// Notes: -// This involves: -// - Widening operations of unsigneds. - -void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) -{ - // Try to widen the ops if they are going into a local var. - GenTree* op1 = storeLoc->gtGetOp1(); - if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (op1->gtOper == GT_CNS_INT)) - { - GenTreeIntCon* con = op1->AsIntCon(); - ssize_t ival = con->gtIconVal; - unsigned varNum = storeLoc->gtLclNum; - LclVarDsc* varDsc = comp->lvaTable + varNum; - - if (varDsc->lvIsSIMDType()) - { - noway_assert(storeLoc->gtType != TYP_STRUCT); - } - unsigned size = genTypeSize(storeLoc); - // If we are storing a constant into a local variable - // we extend the size of the store here - if ((size < 4) && !varTypeIsStruct(varDsc)) - { - if (!varTypeIsUnsigned(varDsc)) - { - if (genTypeSize(storeLoc) == 1) - { - if ((ival & 0x7f) != ival) - { - ival = ival | 0xffffff00; - } - } - else - { - assert(genTypeSize(storeLoc) == 2); - if ((ival & 0x7fff) != ival) - { - ival = ival | 0xffff0000; - } - } - } - - // A local stack slot is at least 4 bytes in size, regardless of - // what the local var is typed as, so auto-promote it here - // unless it is a field of a promoted struct - // TODO-ARM64-CQ: if the field is promoted shouldn't we also be able to do this? - if (!varDsc->lvIsStructField) - { - storeLoc->gtType = TYP_INT; - con->SetIconValue(ival); - } - } - } -} - -//------------------------------------------------------------------------ -// LowerBlockStore: Set block store type -// -// Arguments: -// blkNode - The block store node of interest -// -// Return Value: -// None. -// - -void Lowering::LowerBlockStore(GenTreeBlk* blkNode) -{ - GenTree* dstAddr = blkNode->Addr(); - unsigned size = blkNode->gtBlkSize; - GenTree* source = blkNode->Data(); - Compiler* compiler = comp; - - // Sources are dest address and initVal or source. - GenTreePtr srcAddrOrFill = nullptr; - bool isInitBlk = blkNode->OperIsInitBlkOp(); - - if (!isInitBlk) - { - // CopyObj or CopyBlk - if ((blkNode->OperGet() == GT_STORE_OBJ) && ((blkNode->AsObj()->gtGcPtrCount == 0) || blkNode->gtBlkOpGcUnsafe)) - { - blkNode->SetOper(GT_STORE_BLK); - } - if (source->gtOper == GT_IND) - { - srcAddrOrFill = blkNode->Data()->gtGetOp1(); - } - } - - if (isInitBlk) - { - GenTreePtr initVal = source; - if (initVal->OperIsInitVal()) - { - initVal = initVal->gtGetOp1(); - } - srcAddrOrFill = initVal; - - if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT) && initVal->IsCnsIntOrI()) - { - // The fill value of an initblk is interpreted to hold a - // value of (unsigned int8) however a constant of any size - // may practically reside on the evaluation stack. So extract - // the lower byte out of the initVal constant and replicate - // it to a larger constant whose size is sufficient to support - // the largest width store of the desired inline expansion. - - ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF; - if (size < REGSIZE_BYTES) - { - initVal->gtIntCon.gtIconVal = 0x01010101 * fill; - } - else - { - initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * fill; - initVal->gtType = TYP_LONG; - } - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; - } - else - { - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; - } - } - else - { - // CopyObj or CopyBlk - // Sources are src and dest and size if not constant. - - if (blkNode->OperGet() == GT_STORE_OBJ) - { - // CopyObj - - GenTreeObj* objNode = blkNode->AsObj(); - - unsigned slots = objNode->gtSlots; - -#ifdef DEBUG - // CpObj must always have at least one GC-Pointer as a member. - assert(objNode->gtGcPtrCount > 0); - - assert(dstAddr->gtType == TYP_BYREF || dstAddr->gtType == TYP_I_IMPL); - - CORINFO_CLASS_HANDLE clsHnd = objNode->gtClass; - size_t classSize = compiler->info.compCompHnd->getClassSize(clsHnd); - size_t blkSize = roundUp(classSize, TARGET_POINTER_SIZE); - - // Currently, the EE always round up a class data structure so - // we are not handling the case where we have a non multiple of pointer sized - // struct. This behavior may change in the future so in order to keeps things correct - // let's assert it just to be safe. Going forward we should simply - // handle this case. - assert(classSize == blkSize); - assert((blkSize / TARGET_POINTER_SIZE) == slots); - assert(objNode->HasGCPtr()); -#endif - - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; - } - else - { - // CopyBlk - short internalIntCount = 0; - regMaskTP internalIntCandidates = RBM_NONE; - - if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT)) - { - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; - } - else - { - // In case we have a constant integer this means we went beyond - // CPBLK_UNROLL_LIMIT bytes of size, still we should never have the case of - // any GC-Pointers in the src struct. - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; - } - } - } -} - -/* Lower GT_CAST(srcType, DstType) nodes. - * - * Casts from small int type to float/double are transformed as follows: - * GT_CAST(byte, float/double) = GT_CAST(GT_CAST(byte, int32), float/double) - * GT_CAST(sbyte, float/double) = GT_CAST(GT_CAST(sbyte, int32), float/double) - * GT_CAST(int16, float/double) = GT_CAST(GT_CAST(int16, int32), float/double) - * GT_CAST(uint16, float/double) = GT_CAST(GT_CAST(uint16, int32), float/double) - * - * SSE2 conversion instructions operate on signed integers. casts from Uint32/Uint64 - * are morphed as follows by front-end and hence should not be seen here. - * GT_CAST(uint32, float/double) = GT_CAST(GT_CAST(uint32, long), float/double) - * GT_CAST(uint64, float) = GT_CAST(GT_CAST(uint64, double), float) - * - * - * Similarly casts from float/double to a smaller int type are transformed as follows: - * GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte) - * GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte) - * GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16) - * GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16) - * - * SSE2 has instructions to convert a float/double vlaue into a signed 32/64-bit - * integer. The above transformations help us to leverage those instructions. - * - * Note that for the overflow conversions we still depend on helper calls and - * don't expect to see them here. - * i) GT_CAST(float/double, int type with overflow detection) - * - */ -void Lowering::LowerCast(GenTree* tree) -{ - assert(tree->OperGet() == GT_CAST); - - GenTreePtr op1 = tree->gtOp.gtOp1; - var_types dstType = tree->CastToType(); - var_types srcType = op1->TypeGet(); - var_types tmpType = TYP_UNDEF; - - // We should never see the following casts as they are expected to be lowered - // apropriately or converted into helper calls by front-end. - // srcType = float/double dstType = * and overflow detecting cast - // Reason: must be converted to a helper call - // - if (varTypeIsFloating(srcType)) - { - noway_assert(!tree->gtOverflow()); - } - - // Case of src is a small type and dst is a floating point type. - if (varTypeIsSmall(srcType) && varTypeIsFloating(dstType)) - { - // These conversions can never be overflow detecting ones. - noway_assert(!tree->gtOverflow()); - tmpType = TYP_INT; - } - // case of src is a floating point type and dst is a small type. - else if (varTypeIsFloating(srcType) && varTypeIsSmall(dstType)) - { - tmpType = TYP_INT; - } - - if (tmpType != TYP_UNDEF) - { - GenTreePtr tmp = comp->gtNewCastNode(tmpType, op1, tmpType); - tmp->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT)); - - tree->gtFlags &= ~GTF_UNSIGNED; - tree->gtOp.gtOp1 = tmp; - BlockRange().InsertAfter(op1, tmp); - } -} - -void Lowering::LowerRotate(GenTreePtr tree) -{ - if (tree->OperGet() == GT_ROL) - { - // There is no ROL instruction on ARM. Convert ROL into ROR. - GenTreePtr rotatedValue = tree->gtOp.gtOp1; - unsigned rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8; - GenTreePtr rotateLeftIndexNode = tree->gtOp.gtOp2; - - if (rotateLeftIndexNode->IsCnsIntOrI()) - { - ssize_t rotateLeftIndex = rotateLeftIndexNode->gtIntCon.gtIconVal; - ssize_t rotateRightIndex = rotatedValueBitSize - rotateLeftIndex; - rotateLeftIndexNode->gtIntCon.gtIconVal = rotateRightIndex; - } - else - { - GenTreePtr tmp = - comp->gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode); - BlockRange().InsertAfter(rotateLeftIndexNode, tmp); - tree->gtOp.gtOp2 = tmp; - } - tree->ChangeOper(GT_ROR); - } -} - // returns true if the tree can use the read-modify-write memory instruction form bool Lowering::isRMWRegOper(GenTreePtr tree) { diff --git a/src/jit/lowerarmarch.cpp b/src/jit/lowerarmarch.cpp new file mode 100644 index 0000000..4ff3552 --- /dev/null +++ b/src/jit/lowerarmarch.cpp @@ -0,0 +1,346 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Lowering for ARM and ARM64 common code XX +XX XX +XX This encapsulates common logic for lowering trees for the ARM and ARM64 XX +XX architectures. For a more detailed view of what is lowering, please XX +XX take a look at Lower.cpp XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator + +#ifdef _TARGET_ARMARCH_ // This file is ONLY used for ARM and ARM64 architectures + +#include "jit.h" +#include "sideeffects.h" +#include "lower.h" +#include "lsra.h" + +//------------------------------------------------------------------------ +// LowerStoreLoc: Lower a store of a lclVar +// +// Arguments: +// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR) +// +// Notes: +// This involves: +// - Widening operations of unsigneds. +// +void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) +{ + // Try to widen the ops if they are going into a local var. + GenTree* op1 = storeLoc->gtGetOp1(); + if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (op1->gtOper == GT_CNS_INT)) + { + GenTreeIntCon* con = op1->AsIntCon(); + ssize_t ival = con->gtIconVal; + unsigned varNum = storeLoc->gtLclNum; + LclVarDsc* varDsc = comp->lvaTable + varNum; + + if (varDsc->lvIsSIMDType()) + { + noway_assert(storeLoc->gtType != TYP_STRUCT); + } + unsigned size = genTypeSize(storeLoc); + // If we are storing a constant into a local variable + // we extend the size of the store here + if ((size < 4) && !varTypeIsStruct(varDsc)) + { + if (!varTypeIsUnsigned(varDsc)) + { + if (genTypeSize(storeLoc) == 1) + { + if ((ival & 0x7f) != ival) + { + ival = ival | 0xffffff00; + } + } + else + { + assert(genTypeSize(storeLoc) == 2); + if ((ival & 0x7fff) != ival) + { + ival = ival | 0xffff0000; + } + } + } + + // A local stack slot is at least 4 bytes in size, regardless of + // what the local var is typed as, so auto-promote it here + // unless it is a field of a promoted struct + // TODO-CQ: if the field is promoted shouldn't we also be able to do this? + if (!varDsc->lvIsStructField) + { + storeLoc->gtType = TYP_INT; + con->SetIconValue(ival); + } + } + } +} + +//------------------------------------------------------------------------ +// LowerBlockStore: Set block store type +// +// Arguments: +// blkNode - The block store node of interest +// +// Return Value: +// None. +// +void Lowering::LowerBlockStore(GenTreeBlk* blkNode) +{ + GenTree* dstAddr = blkNode->Addr(); + unsigned size = blkNode->gtBlkSize; + GenTree* source = blkNode->Data(); + Compiler* compiler = comp; + + // Sources are dest address and initVal or source. + GenTreePtr srcAddrOrFill = nullptr; + bool isInitBlk = blkNode->OperIsInitBlkOp(); + + if (!isInitBlk) + { + // CopyObj or CopyBlk + if ((blkNode->OperGet() == GT_STORE_OBJ) && ((blkNode->AsObj()->gtGcPtrCount == 0) || blkNode->gtBlkOpGcUnsafe)) + { + blkNode->SetOper(GT_STORE_BLK); + } + if (source->gtOper == GT_IND) + { + srcAddrOrFill = blkNode->Data()->gtGetOp1(); + } + } + + if (isInitBlk) + { + GenTreePtr initVal = source; + if (initVal->OperIsInitVal()) + { + initVal = initVal->gtGetOp1(); + } + srcAddrOrFill = initVal; + +#ifdef _TARGET_ARM64_ + if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT) && initVal->IsCnsIntOrI()) + { + // TODO-ARM-CQ: Currently we generate a helper call for every + // initblk we encounter. Later on we should implement loop unrolling + // code sequences to improve CQ. + // For reference see the code in LowerXArch.cpp. + NYI_ARM("initblk loop unrolling is currently not implemented."); + + // The fill value of an initblk is interpreted to hold a + // value of (unsigned int8) however a constant of any size + // may practically reside on the evaluation stack. So extract + // the lower byte out of the initVal constant and replicate + // it to a larger constant whose size is sufficient to support + // the largest width store of the desired inline expansion. + + ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF; + if (size < REGSIZE_BYTES) + { + initVal->gtIntCon.gtIconVal = 0x01010101 * fill; + } + else + { + initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * fill; + initVal->gtType = TYP_LONG; + } + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; + } + else +#endif // _TARGET_ARM64_ + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; + } + } + else + { + // CopyObj or CopyBlk + // Sources are src and dest and size if not constant. + + if (blkNode->OperGet() == GT_STORE_OBJ) + { + // CopyObj + + NYI_ARM("Lowering for GT_STORE_OBJ isn't implemented"); + +#ifdef _TARGET_ARM64_ + + GenTreeObj* objNode = blkNode->AsObj(); + + unsigned slots = objNode->gtSlots; + +#ifdef DEBUG + // CpObj must always have at least one GC-Pointer as a member. + assert(objNode->gtGcPtrCount > 0); + + assert(dstAddr->gtType == TYP_BYREF || dstAddr->gtType == TYP_I_IMPL); + + CORINFO_CLASS_HANDLE clsHnd = objNode->gtClass; + size_t classSize = compiler->info.compCompHnd->getClassSize(clsHnd); + size_t blkSize = roundUp(classSize, TARGET_POINTER_SIZE); + + // Currently, the EE always round up a class data structure so + // we are not handling the case where we have a non multiple of pointer sized + // struct. This behavior may change in the future so in order to keeps things correct + // let's assert it just to be safe. Going forward we should simply + // handle this case. + assert(classSize == blkSize); + assert((blkSize / TARGET_POINTER_SIZE) == slots); + assert(objNode->HasGCPtr()); +#endif + + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; + +#endif // _TARGET_ARM64_ + } + else + { + // CopyBlk + short internalIntCount = 0; + regMaskTP internalIntCandidates = RBM_NONE; + +#ifdef _TARGET_ARM64_ + // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size + // we should unroll the loop to improve CQ. + // For reference see the code in lowerxarch.cpp. + // TODO-ARM-CQ: cpblk loop unrolling is currently not implemented. + + if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT)) + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; + } + else +#endif // _TARGET_ARM64_ + { + // In case we have a constant integer this means we went beyond + // CPBLK_UNROLL_LIMIT bytes of size, still we should never have the case of + // any GC-Pointers in the src struct. + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; + } + } + } +} + +//------------------------------------------------------------------------ +// LowerCast: Lower GT_CAST(srcType, DstType) nodes. +// +// Arguments: +// tree - GT_CAST node to be lowered +// +// Return Value: +// None. +// +// Notes: +// Casts from small int type to float/double are transformed as follows: +// GT_CAST(byte, float/double) = GT_CAST(GT_CAST(byte, int32), float/double) +// GT_CAST(sbyte, float/double) = GT_CAST(GT_CAST(sbyte, int32), float/double) +// GT_CAST(int16, float/double) = GT_CAST(GT_CAST(int16, int32), float/double) +// GT_CAST(uint16, float/double) = GT_CAST(GT_CAST(uint16, int32), float/double) +// +// Similarly casts from float/double to a smaller int type are transformed as follows: +// GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte) +// GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte) +// GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16) +// GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16) +// +// Note that for the overflow conversions we still depend on helper calls and +// don't expect to see them here. +// i) GT_CAST(float/double, int type with overflow detection) +// +void Lowering::LowerCast(GenTree* tree) +{ + assert(tree->OperGet() == GT_CAST); + + JITDUMP("LowerCast for: "); + DISPNODE(tree); + JITDUMP("\n"); + + GenTreePtr op1 = tree->gtOp.gtOp1; + var_types dstType = tree->CastToType(); + var_types srcType = op1->TypeGet(); + var_types tmpType = TYP_UNDEF; + + if (varTypeIsFloating(srcType)) + { + noway_assert(!tree->gtOverflow()); + } + + // Case of src is a small type and dst is a floating point type. + if (varTypeIsSmall(srcType) && varTypeIsFloating(dstType)) + { + NYI_ARM("Lowering for cast from small type to float"); // Not tested yet. + // These conversions can never be overflow detecting ones. + noway_assert(!tree->gtOverflow()); + tmpType = TYP_INT; + } + // case of src is a floating point type and dst is a small type. + else if (varTypeIsFloating(srcType) && varTypeIsSmall(dstType)) + { + NYI_ARM("Lowering for cast from float to small type"); // Not tested yet. + tmpType = TYP_INT; + } + + if (tmpType != TYP_UNDEF) + { + GenTreePtr tmp = comp->gtNewCastNode(tmpType, op1, tmpType); + tmp->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT)); + + tree->gtFlags &= ~GTF_UNSIGNED; + tree->gtOp.gtOp1 = tmp; + BlockRange().InsertAfter(op1, tmp); + } +} + +//------------------------------------------------------------------------ +// LowerRotate: Lower GT_ROL and GT_ROL nodes. +// +// Arguments: +// tree - the node to lower +// +// Return Value: +// None. +// +void Lowering::LowerRotate(GenTreePtr tree) +{ + if (tree->OperGet() == GT_ROL) + { + // There is no ROL instruction on ARM. Convert ROL into ROR. + GenTreePtr rotatedValue = tree->gtOp.gtOp1; + unsigned rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8; + GenTreePtr rotateLeftIndexNode = tree->gtOp.gtOp2; + + if (rotateLeftIndexNode->IsCnsIntOrI()) + { + ssize_t rotateLeftIndex = rotateLeftIndexNode->gtIntCon.gtIconVal; + ssize_t rotateRightIndex = rotatedValueBitSize - rotateLeftIndex; + rotateLeftIndexNode->gtIntCon.gtIconVal = rotateRightIndex; + } + else + { + GenTreePtr tmp = + comp->gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode); + BlockRange().InsertAfter(rotateLeftIndexNode, tmp); + tree->gtOp.gtOp2 = tmp; + } + tree->ChangeOper(GT_ROR); + } +} + +#endif // _TARGET_ARMARCH_ + +#endif // !LEGACY_BACKEND diff --git a/src/jit/lsraarm.cpp b/src/jit/lsraarm.cpp index 912fd36..e35e579 100644 --- a/src/jit/lsraarm.cpp +++ b/src/jit/lsraarm.cpp @@ -30,270 +30,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "lsra.h" //------------------------------------------------------------------------ -// TreeNodeInfoInitStoreLoc: Lower a store of a lclVar -// -// Arguments: -// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR) -// -// Notes: -// This involves: -// - Setting the appropriate candidates for a store of a multi-reg call return value. -// - Handling of contained immediates and widening operations of unsigneds. -// -void Lowering::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc) -{ - TreeNodeInfo* info = &(storeLoc->gtLsraInfo); - - // Is this the case of var = call where call is returning - // a value in multiple return registers? - GenTree* op1 = storeLoc->gtGetOp1(); - if (op1->IsMultiRegCall()) - { - // backend expects to see this case only for store lclvar. - assert(storeLoc->OperGet() == GT_STORE_LCL_VAR); - - // srcCount = number of registers in which the value is returned by call - GenTreeCall* call = op1->AsCall(); - ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); - info->srcCount = retTypeDesc->GetReturnRegCount(); - - // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1 - regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call); - op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates); - return; - } - - CheckImmedAndMakeContained(storeLoc, op1); -} - -//------------------------------------------------------------------------ -// TreeNodeInfoInitCmp: Lower a GT comparison node. -// -// Arguments: -// tree - the node to lower -// -// Return Value: -// None. -// -void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree) -{ - TreeNodeInfo* info = &(tree->gtLsraInfo); - - info->srcCount = 2; - info->dstCount = 1; - - GenTreePtr op1 = tree->gtOp.gtOp1; - GenTreePtr op2 = tree->gtOp.gtOp2; - var_types op1Type = op1->TypeGet(); - var_types op2Type = op2->TypeGet(); - - // Long compares will consume GT_LONG nodes, each of which produces two results. - // Thus for each long operand there will be an additional source. - // TODO-ARM-CQ: Mark hiOp2 and loOp2 as contained if it is a constant. - if (varTypeIsLong(op1Type)) - { - info->srcCount++; - } - if (varTypeIsLong(op2Type)) - { - info->srcCount++; - } - - CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2); -} - -//------------------------------------------------------------------------ -// TreeNodeInfoInitGCWriteBarrier: GC lowering helper. -// -// Arguments: -// tree - the node to lower -// -// Return Value: -// None. -// -void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree) -{ - GenTreePtr dst = tree; - GenTreePtr addr = tree->gtOp.gtOp1; - GenTreePtr src = tree->gtOp.gtOp2; - - if (addr->OperGet() == GT_LEA) - { - // In the case where we are doing a helper assignment, if the dst - // is an indir through an lea, we need to actually instantiate the - // lea in a register - GenTreeAddrMode* lea = addr->AsAddrMode(); - - short leaSrcCount = 0; - if (lea->Base() != nullptr) - { - leaSrcCount++; - } - if (lea->Index() != nullptr) - { - leaSrcCount++; - } - lea->gtLsraInfo.srcCount = leaSrcCount; - lea->gtLsraInfo.dstCount = 1; - } - -#if NOGC_WRITE_BARRIERS - NYI_ARM("NOGC_WRITE_BARRIERS"); -#else - // For the standard JIT Helper calls - // op1 goes into REG_ARG_0 and - // op2 goes into REG_ARG_1 - // - addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0); - src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1); -#endif // NOGC_WRITE_BARRIERS - - // Both src and dst must reside in a register, which they should since we haven't set - // either of them as contained. - assert(addr->gtLsraInfo.dstCount == 1); - assert(src->gtLsraInfo.dstCount == 1); -} - -//------------------------------------------------------------------------ -// TreeNodeInfoInitIndir: Specify register requirements for address expression -// of an indirection operation. -// -// Arguments: -// indirTree - GT_IND, GT_STOREIND, block node or GT_NULLCHECK gentree node -// -void Lowering::TreeNodeInfoInitIndir(GenTreePtr indirTree) -{ - assert(indirTree->OperIsIndir()); - // If this is the rhs of a block copy (i.e. non-enregisterable struct), - // it has no register requirements. - if (indirTree->TypeGet() == TYP_STRUCT) - { - return; - } - - GenTreePtr addr = indirTree->gtGetOp1(); - TreeNodeInfo* info = &(indirTree->gtLsraInfo); - - GenTreePtr base = nullptr; - GenTreePtr index = nullptr; - unsigned cns = 0; - unsigned mul; - bool rev; - bool modifiedSources = false; - - if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr)) - { - GenTreeAddrMode* lea = addr->AsAddrMode(); - base = lea->Base(); - index = lea->Index(); - cns = lea->gtOffset; - - m_lsra->clearOperandCounts(addr); - // The srcCount is decremented because addr is now "contained", - // then we account for the base and index below, if they are non-null. - info->srcCount--; - } - else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) && - !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index))) - { - // An addressing mode will be constructed that may cause some - // nodes to not need a register, and cause others' lifetimes to be extended - // to the GT_IND or even its parent if it's an assignment - - assert(base != addr); - m_lsra->clearOperandCounts(addr); - - GenTreePtr arrLength = nullptr; - - // Traverse the computation below GT_IND to find the operands - // for the addressing mode, marking the various constants and - // intermediate results as not consuming/producing. - // If the traversal were more complex, we might consider using - // a traversal function, but the addressing mode is only made - // up of simple arithmetic operators, and the code generator - // only traverses one leg of each node. - - bool foundBase = (base == nullptr); - bool foundIndex = (index == nullptr); - GenTreePtr nextChild = nullptr; - for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild) - { - nextChild = nullptr; - GenTreePtr op1 = child->gtOp.gtOp1; - GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr; - - if (op1 == base) - { - foundBase = true; - } - else if (op1 == index) - { - foundIndex = true; - } - else - { - m_lsra->clearOperandCounts(op1); - if (!op1->OperIsLeaf()) - { - nextChild = op1; - } - } - - if (op2 != nullptr) - { - if (op2 == base) - { - foundBase = true; - } - else if (op2 == index) - { - foundIndex = true; - } - else - { - m_lsra->clearOperandCounts(op2); - if (!op2->OperIsLeaf()) - { - assert(nextChild == nullptr); - nextChild = op2; - } - } - } - } - assert(foundBase && foundIndex); - info->srcCount--; // it gets incremented below. - } - else if (addr->gtOper == GT_ARR_ELEM) - { - // The GT_ARR_ELEM consumes all the indices and produces the offset. - // The array object lives until the mem access. - // We also consume the target register to which the address is - // computed - - info->srcCount++; - assert(addr->gtLsraInfo.srcCount >= 2); - addr->gtLsraInfo.srcCount -= 1; - } - else - { - // it is nothing but a plain indir - info->srcCount--; // base gets added in below - base = addr; - } - - if (base != nullptr) - { - info->srcCount++; - } - - if (index != nullptr && !modifiedSources) - { - info->srcCount++; - info->internalIntCount++; - } -} - -//------------------------------------------------------------------------ // TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN. // // Arguments: @@ -381,353 +117,6 @@ void Lowering::TreeNodeInfoInitReturn(GenTree* tree) } } -//------------------------------------------------------------------------ -// TreeNodeInfoInitShiftRotate: Set the NodeInfo for a shift or rotate. -// -// Arguments: -// tree - The node of interest -// -// Return Value: -// None. -// -void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree) -{ - TreeNodeInfo* info = &(tree->gtLsraInfo); - LinearScan* l = m_lsra; - - info->srcCount = 2; - info->dstCount = 1; - - GenTreePtr shiftBy = tree->gtOp.gtOp2; - GenTreePtr source = tree->gtOp.gtOp1; - if (shiftBy->IsCnsIntOrI()) - { - l->clearDstCount(shiftBy); - info->srcCount--; - } - - // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that - // we can have a three operand form. Increment the srcCount. - if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO) - { - assert(source->OperGet() == GT_LONG); - - info->srcCount++; - - if (tree->OperGet() == GT_LSH_HI) - { - GenTreePtr sourceLo = source->gtOp.gtOp1; - sourceLo->gtLsraInfo.isDelayFree = true; - } - else - { - GenTreePtr sourceHi = source->gtOp.gtOp2; - sourceHi->gtLsraInfo.isDelayFree = true; - } - - source->gtLsraInfo.hasDelayFreeSrc = true; - info->hasDelayFreeSrc = true; - } -} - -//------------------------------------------------------------------------ -// TreeNodeInfoInitPutArgReg: Set the NodeInfo for a PUTARG_REG. -// -// Arguments: -// node - The PUTARG_REG node. -// argReg - The register in which to pass the argument. -// info - The info for the node's using call. -// isVarArgs - True if the call uses a varargs calling convention. -// callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register. -// -// Return Value: -// None. -// -void Lowering::TreeNodeInfoInitPutArgReg( - GenTreeUnOp* node, regNumber argReg, TreeNodeInfo& info, bool isVarArgs, bool* callHasFloatRegArgs) -{ - assert(node != nullptr); - assert(node->OperIsPutArgReg()); - assert(argReg != REG_NA); - - // Each register argument corresponds to one source. - info.srcCount++; - - // Set the register requirements for the node. - const regMaskTP argMask = genRegMask(argReg); - node->gtLsraInfo.setDstCandidates(m_lsra, argMask); - node->gtLsraInfo.setSrcCandidates(m_lsra, argMask); - - // To avoid redundant moves, have the argument operand computed in the - // register in which the argument is passed to the call. - node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(m_lsra, m_lsra->getUseCandidates(node)); - - *callHasFloatRegArgs |= varTypeIsFloating(node->TypeGet()); -} - -//------------------------------------------------------------------------ -// TreeNodeInfoInitCall: Set the NodeInfo for a call. -// -// Arguments: -// call - The call node of interest -// -// Return Value: -// None. -// -void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) -{ - TreeNodeInfo* info = &(call->gtLsraInfo); - LinearScan* l = m_lsra; - Compiler* compiler = comp; - bool hasMultiRegRetVal = false; - ReturnTypeDesc* retTypeDesc = nullptr; - - info->srcCount = 0; - if (call->TypeGet() != TYP_VOID) - { - hasMultiRegRetVal = call->HasMultiRegRetVal(); - if (hasMultiRegRetVal) - { - // dst count = number of registers in which the value is returned by call - retTypeDesc = call->GetReturnTypeDesc(); - info->dstCount = retTypeDesc->GetReturnRegCount(); - } - else - { - info->dstCount = 1; - } - } - else - { - info->dstCount = 0; - } - - GenTree* ctrlExpr = call->gtControlExpr; - if (call->gtCallType == CT_INDIRECT) - { - // either gtControlExpr != null or gtCallAddr != null. - // Both cannot be non-null at the same time. - assert(ctrlExpr == nullptr); - assert(call->gtCallAddr != nullptr); - ctrlExpr = call->gtCallAddr; - } - - // set reg requirements on call target represented as control sequence. - if (ctrlExpr != nullptr) - { - // we should never see a gtControlExpr whose type is void. - assert(ctrlExpr->TypeGet() != TYP_VOID); - - info->srcCount++; - // In case of fast tail implemented as jmp, make sure that gtControlExpr is - // computed into a register. - if (call->IsFastTailCall()) - { - NYI_ARM("tail call"); - } - } - else - { - info->internalIntCount = 1; - } - - RegisterType registerType = call->TypeGet(); - - // Set destination candidates for return value of the call. - if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME)) - { - // The ARM CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with - // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers. - info->setDstCandidates(l, RBM_PINVOKE_TCB); - } - else if (hasMultiRegRetVal) - { - assert(retTypeDesc != nullptr); - info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs()); - } - else if (varTypeIsFloating(registerType)) - { - info->setDstCandidates(l, RBM_FLOATRET); - } - else if (registerType == TYP_LONG) - { - info->setDstCandidates(l, RBM_LNGRET); - } - else - { - info->setDstCandidates(l, RBM_INTRET); - } - - // If there is an explicit this pointer, we don't want that node to produce anything - // as it is redundant - if (call->gtCallObjp != nullptr) - { - GenTreePtr thisPtrNode = call->gtCallObjp; - - if (thisPtrNode->gtOper == GT_PUTARG_REG) - { - l->clearOperandCounts(thisPtrNode); - l->clearDstCount(thisPtrNode->gtOp.gtOp1); - } - else - { - l->clearDstCount(thisPtrNode); - } - } - - // First, count reg args - bool callHasFloatRegArgs = false; - - for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) - { - assert(list->OperIsList()); - - GenTreePtr argNode = list->Current(); - - fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); - assert(curArgTabEntry); - - if (curArgTabEntry->regNum == REG_STK) - { - // late arg that is not passed in a register - assert(argNode->gtOper == GT_PUTARG_STK); - - TreeNodeInfoInitPutArgStk(argNode->AsPutArgStk(), curArgTabEntry); - continue; - } - - // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct - if (argNode->OperGet() == GT_FIELD_LIST) - { - // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs) - regNumber argReg = curArgTabEntry->regNum; - for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest()) - { - TreeNodeInfoInitPutArgReg(entry->Current()->AsUnOp(), argReg, *info, false, &callHasFloatRegArgs); - - // Update argReg for the next putarg_reg (if any) - argReg = genRegArgNext(argReg); - } - } - else - { - TreeNodeInfoInitPutArgReg(argNode->AsUnOp(), curArgTabEntry->regNum, *info, false, &callHasFloatRegArgs); - } - } - - // Now, count stack args - // Note that these need to be computed into a register, but then - // they're just stored to the stack - so the reg doesn't - // need to remain live until the call. In fact, it must not - // because the code generator doesn't actually consider it live, - // so it can't be spilled. - - GenTreePtr args = call->gtCallArgs; - while (args) - { - GenTreePtr arg = args->gtOp.gtOp1; - - // Skip arguments that have been moved to the Late Arg list - if (!(args->gtFlags & GTF_LATE_ARG)) - { - if (arg->gtOper == GT_PUTARG_STK) - { - fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg); - assert(curArgTabEntry); - - assert(curArgTabEntry->regNum == REG_STK); - - TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry); - } - else - { - TreeNodeInfo* argInfo = &(arg->gtLsraInfo); - if (argInfo->dstCount != 0) - { - argInfo->isLocalDefUse = true; - } - - argInfo->dstCount = 0; - } - } - args = args->gtOp.gtOp2; - } - - if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr)) - { - NYI_ARM("float reg varargs"); - } - - if (call->NeedsNullCheck()) - { - info->internalIntCount++; - } -} - -//------------------------------------------------------------------------ -// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node -// -// Arguments: -// argNode - a GT_PUTARG_STK node -// -// Return Value: -// None. -// -// Notes: -// Set the child node(s) to be contained when we have a multireg arg -// -void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info) -{ - assert(argNode->gtOper == GT_PUTARG_STK); - - GenTreePtr putArgChild = argNode->gtOp.gtOp1; - - // Initialize 'argNode' as not contained, as this is both the default case - // and how MakeSrcContained expects to find things setup. - // - argNode->gtLsraInfo.srcCount = 1; - argNode->gtLsraInfo.dstCount = 0; - - // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct - if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST)) - { - // We will use store instructions that each write a register sized value - - if (putArgChild->OperGet() == GT_FIELD_LIST) - { - // We consume all of the items in the GT_FIELD_LIST - argNode->gtLsraInfo.srcCount = info->numSlots; - } - else - { - // We could use a ldp/stp sequence so we need two internal registers - argNode->gtLsraInfo.internalIntCount = 2; - - if (putArgChild->OperGet() == GT_OBJ) - { - GenTreePtr objChild = putArgChild->gtOp.gtOp1; - if (objChild->OperGet() == GT_LCL_VAR_ADDR) - { - // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR - // as one contained operation - // - MakeSrcContained(putArgChild, objChild); - } - } - - // We will generate all of the code for the GT_PUTARG_STK and it's child node - // as one contained operation - // - MakeSrcContained(argNode, putArgChild); - } - } - else - { - // We must not have a multi-reg struct - assert(info->numSlots == 1); - } -} - void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree) { TreeNodeInfo* info = &(tree->gtLsraInfo); @@ -811,147 +200,6 @@ void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree) } //------------------------------------------------------------------------ -// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store. -// -// Arguments: -// blkNode - The block store node of interest -// -// Return Value: -// None. -void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) -{ - GenTree* dstAddr = blkNode->Addr(); - unsigned size = blkNode->gtBlkSize; - GenTree* source = blkNode->Data(); - LinearScan* l = m_lsra; - Compiler* compiler = comp; - - // Sources are dest address and initVal or source. - // We may require an additional source or temp register for the size. - blkNode->gtLsraInfo.srcCount = 2; - blkNode->gtLsraInfo.dstCount = 0; - GenTreePtr srcAddrOrFill = nullptr; - bool isInitBlk = blkNode->OperIsInitBlkOp(); - - if (!isInitBlk) - { - // CopyObj or CopyBlk - if (source->gtOper == GT_IND) - { - srcAddrOrFill = blkNode->Data()->gtGetOp1(); - // We're effectively setting source as contained, but can't call MakeSrcContained, because the - // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading. - // If srcAddr is already non-contained, we don't need to change it. - if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0) - { - srcAddrOrFill->gtLsraInfo.setDstCount(1); - srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount); - } - m_lsra->clearOperandCounts(source); - } - else if (!source->IsMultiRegCall() && !source->OperIsSIMD()) - { - assert(source->IsLocal()); - MakeSrcContained(blkNode, source); - } - } - - if (isInitBlk) - { - GenTreePtr initVal = source; - if (initVal->OperIsInitVal()) - { - initVal = initVal->gtGetOp1(); - } - srcAddrOrFill = initVal; - - if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll) - { - // TODO-ARM-CQ: Currently we generate a helper call for every - // initblk we encounter. Later on we should implement loop unrolling - // code sequences to improve CQ. - // For reference see the code in lsraxarch.cpp. - NYI_ARM("initblk loop unrolling is currently not implemented."); - } - else - { - assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper); - // The helper follows the regular ABI. - dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); - initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); - if (size != 0) - { - // Reserve a temp register for the block size argument. - blkNode->gtLsraInfo.setInternalCandidates(l, RBM_ARG_2); - blkNode->gtLsraInfo.internalIntCount = 1; - } - else - { - // The block size argument is a third argument to GT_STORE_DYN_BLK - noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); - blkNode->gtLsraInfo.setSrcCount(3); - GenTree* sizeNode = blkNode->AsDynBlk()->gtDynamicSize; - sizeNode->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); - } - } - } - else - { - // CopyObj or CopyBlk - // Sources are src and dest and size if not constant. - if (blkNode->OperGet() == GT_STORE_OBJ) - { - // CopyObj - NYI_ARM("GT_STORE_OBJ is needed of write barriers implementation"); - } - else - { - // CopyBlk - short internalIntCount = 0; - regMaskTP internalIntCandidates = RBM_NONE; - - if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll) - { - // TODO-ARM-CQ: cpblk loop unrolling is currently not implemented. - // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size - // we should unroll the loop to improve CQ. - // For reference see the code in lsraxarch.cpp. - NYI_ARM("cpblk loop unrolling is currently not implemented."); - } - else - { - assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper); - dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); - // The srcAddr goes in arg1. - if (srcAddrOrFill != nullptr) - { - srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); - } - if (size != 0) - { - // Reserve a temp register for the block size argument. - internalIntCandidates |= RBM_ARG_2; - internalIntCount++; - } - else - { - // The block size argument is a third argument to GT_STORE_DYN_BLK - noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); - blkNode->gtLsraInfo.setSrcCount(3); - GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize; - blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); - } - } - if (internalIntCount != 0) - { - blkNode->gtLsraInfo.internalIntCount = internalIntCount; - blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates); - } - } - } -} - -//------------------------------------------------------------------------ // TreeNodeInfoInit: Set the register requirements for RA. // // Notes: diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp index e2159cb..3b2d465 100644 --- a/src/jit/lsraarm64.cpp +++ b/src/jit/lsraarm64.cpp @@ -29,43 +29,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "lower.h" //------------------------------------------------------------------------ -// TreeNodeInfoInitStoreLoc: Set register requirements for a store of a lclVar -// -// Arguments: -// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR) -// -// Notes: -// This involves: -// - Setting the appropriate candidates for a store of a multi-reg call return value. -// - Handling of contained immediates. - -void Lowering::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc) -{ - TreeNodeInfo* info = &(storeLoc->gtLsraInfo); - - // Is this the case of var = call where call is returning - // a value in multiple return registers? - GenTree* op1 = storeLoc->gtGetOp1(); - if (op1->IsMultiRegCall()) - { - // backend expects to see this case only for store lclvar. - assert(storeLoc->OperGet() == GT_STORE_LCL_VAR); - - // srcCount = number of registers in which the value is returned by call - GenTreeCall* call = op1->AsCall(); - ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); - info->srcCount = retTypeDesc->GetReturnRegCount(); - - // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1 - regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call); - op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates); - return; - } - - CheckImmedAndMakeContained(storeLoc, op1); -} - -//------------------------------------------------------------------------ // TreeNodeInfoInit: Set the register requirements for RA. // // Notes: @@ -435,19 +398,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_RSH: case GT_RSZ: case GT_ROR: - { - info->srcCount = 2; - info->dstCount = 1; - - GenTreePtr shiftBy = tree->gtOp.gtOp2; - GenTreePtr source = tree->gtOp.gtOp1; - if (shiftBy->IsCnsIntOrI()) - { - l->clearDstCount(shiftBy); - info->srcCount--; - } - } - break; + TreeNodeInfoInitShiftRotate(tree); + break; case GT_EQ: case GT_NE: @@ -847,458 +799,6 @@ void Lowering::TreeNodeInfoInitReturn(GenTree* tree) } } -//------------------------------------------------------------------------ -// TreeNodeInfoInitPutArgReg: Set the NodeInfo for a PUTARG_REG. -// -// Arguments: -// node - The PUTARG_REG node. -// argReg - The register in which to pass the argument. -// info - The info for the node's using call. -// isVarArgs - True if the call uses a varargs calling convention. -// callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register. -// -// Return Value: -// None. -// -void Lowering::TreeNodeInfoInitPutArgReg( - GenTreeUnOp* node, regNumber argReg, TreeNodeInfo& info, bool isVarArgs, bool* callHasFloatRegArgs) -{ - assert(node != nullptr); - assert(node->OperIsPutArgReg()); - assert(argReg != REG_NA); - - // Each register argument corresponds to one source. - info.srcCount++; - - // Set the register requirements for the node. - const regMaskTP argMask = genRegMask(argReg); - node->gtLsraInfo.setDstCandidates(m_lsra, argMask); - node->gtLsraInfo.setSrcCandidates(m_lsra, argMask); - - // To avoid redundant moves, have the argument operand computed in the - // register in which the argument is passed to the call. - node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(m_lsra, m_lsra->getUseCandidates(node)); - - *callHasFloatRegArgs |= varTypeIsFloating(node->TypeGet()); -} - -//------------------------------------------------------------------------ -// TreeNodeInfoInitCall: Set the NodeInfo for a call. -// -// Arguments: -// call - The call node of interest -// -// Return Value: -// None. -// -void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) -{ - TreeNodeInfo* info = &(call->gtLsraInfo); - LinearScan* l = m_lsra; - Compiler* compiler = comp; - bool hasMultiRegRetVal = false; - ReturnTypeDesc* retTypeDesc = nullptr; - - info->srcCount = 0; - if (call->TypeGet() != TYP_VOID) - { - hasMultiRegRetVal = call->HasMultiRegRetVal(); - if (hasMultiRegRetVal) - { - // dst count = number of registers in which the value is returned by call - retTypeDesc = call->GetReturnTypeDesc(); - info->dstCount = retTypeDesc->GetReturnRegCount(); - } - else - { - info->dstCount = 1; - } - } - else - { - info->dstCount = 0; - } - - GenTree* ctrlExpr = call->gtControlExpr; - if (call->gtCallType == CT_INDIRECT) - { - // either gtControlExpr != null or gtCallAddr != null. - // Both cannot be non-null at the same time. - assert(ctrlExpr == nullptr); - assert(call->gtCallAddr != nullptr); - ctrlExpr = call->gtCallAddr; - } - - // set reg requirements on call target represented as control sequence. - if (ctrlExpr != nullptr) - { - // we should never see a gtControlExpr whose type is void. - assert(ctrlExpr->TypeGet() != TYP_VOID); - - info->srcCount++; - - // In case of fast tail implemented as jmp, make sure that gtControlExpr is - // computed into a register. - if (call->IsFastTailCall()) - { - // Fast tail call - make sure that call target is always computed in IP0 - // so that epilog sequence can generate "br xip0" to achieve fast tail call. - ctrlExpr->gtLsraInfo.setSrcCandidates(l, genRegMask(REG_IP0)); - } - } - - RegisterType registerType = call->TypeGet(); - - // Set destination candidates for return value of the call. - if (hasMultiRegRetVal) - { - assert(retTypeDesc != nullptr); - info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs()); - } - else if (varTypeIsFloating(registerType)) - { - info->setDstCandidates(l, RBM_FLOATRET); - } - else if (registerType == TYP_LONG) - { - info->setDstCandidates(l, RBM_LNGRET); - } - else - { - info->setDstCandidates(l, RBM_INTRET); - } - - // If there is an explicit this pointer, we don't want that node to produce anything - // as it is redundant - if (call->gtCallObjp != nullptr) - { - GenTreePtr thisPtrNode = call->gtCallObjp; - - if (thisPtrNode->gtOper == GT_PUTARG_REG) - { - l->clearOperandCounts(thisPtrNode); - l->clearDstCount(thisPtrNode->gtOp.gtOp1); - } - else - { - l->clearDstCount(thisPtrNode); - } - } - - // First, count reg args - bool callHasFloatRegArgs = false; - - for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) - { - assert(list->OperIsList()); - - GenTreePtr argNode = list->Current(); - - fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); - assert(curArgTabEntry); - - if (curArgTabEntry->regNum == REG_STK) - { - // late arg that is not passed in a register - assert(argNode->gtOper == GT_PUTARG_STK); - - TreeNodeInfoInitPutArgStk(argNode->AsPutArgStk(), curArgTabEntry); - continue; - } - - // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct - if (argNode->OperGet() == GT_FIELD_LIST) - { - // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs) - regNumber argReg = curArgTabEntry->regNum; - for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest()) - { - TreeNodeInfoInitPutArgReg(entry->Current()->AsUnOp(), argReg, *info, false, &callHasFloatRegArgs); - - // Update argReg for the next putarg_reg (if any) - argReg = genRegArgNext(argReg); - } - } - else - { - TreeNodeInfoInitPutArgReg(argNode->AsUnOp(), curArgTabEntry->regNum, *info, false, &callHasFloatRegArgs); - } - } - - // Now, count stack args - // Note that these need to be computed into a register, but then - // they're just stored to the stack - so the reg doesn't - // need to remain live until the call. In fact, it must not - // because the code generator doesn't actually consider it live, - // so it can't be spilled. - - GenTreePtr args = call->gtCallArgs; - while (args) - { - GenTreePtr arg = args->gtOp.gtOp1; - - // Skip arguments that have been moved to the Late Arg list - if (!(args->gtFlags & GTF_LATE_ARG)) - { - if (arg->gtOper == GT_PUTARG_STK) - { - fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg); - assert(curArgTabEntry); - - assert(curArgTabEntry->regNum == REG_STK); - - TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry); - } - else - { - TreeNodeInfo* argInfo = &(arg->gtLsraInfo); - if (argInfo->dstCount != 0) - { - argInfo->isLocalDefUse = true; - } - - argInfo->dstCount = 0; - } - } - args = args->gtOp.gtOp2; - } - - // If it is a fast tail call, it is already preferenced to use IP0. - // Therefore, no need set src candidates on call tgt again. - if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr)) - { - // Don't assign the call target to any of the argument registers because - // we will use them to also pass floating point arguments as required - // by Arm64 ABI. - ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS)); - } -} - -//------------------------------------------------------------------------ -// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node -// -// Arguments: -// argNode - a GT_PUTARG_STK node -// -// Return Value: -// None. -// -// Notes: -// Set the child node(s) to be contained when we have a multireg arg -// -void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info) -{ - assert(argNode->gtOper == GT_PUTARG_STK); - - GenTreePtr putArgChild = argNode->gtOp.gtOp1; - - // Initialize 'argNode' as not contained, as this is both the default case - // and how MakeSrcContained expects to find things setup. - // - argNode->gtLsraInfo.srcCount = 1; - argNode->gtLsraInfo.dstCount = 0; - - // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct - if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST)) - { - // We will use store instructions that each write a register sized value - - if (putArgChild->OperGet() == GT_FIELD_LIST) - { - // We consume all of the items in the GT_FIELD_LIST - argNode->gtLsraInfo.srcCount = info->numSlots; - } - else - { - // We could use a ldp/stp sequence so we need two internal registers - argNode->gtLsraInfo.internalIntCount = 2; - - if (putArgChild->OperGet() == GT_OBJ) - { - GenTreePtr objChild = putArgChild->gtOp.gtOp1; - if (objChild->OperGet() == GT_LCL_VAR_ADDR) - { - // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR - // as one contained operation - // - MakeSrcContained(putArgChild, objChild); - } - } - - // We will generate all of the code for the GT_PUTARG_STK and it's child node - // as one contained operation - // - MakeSrcContained(argNode, putArgChild); - } - } - else - { - // We must not have a multi-reg struct - assert(info->numSlots == 1); - } -} - -//------------------------------------------------------------------------ -// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store. -// -// Arguments: -// blkNode - The block store node of interest -// -// Return Value: -// None. -// -// Notes: - -void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) -{ - GenTree* dstAddr = blkNode->Addr(); - unsigned size = blkNode->gtBlkSize; - GenTree* source = blkNode->Data(); - LinearScan* l = m_lsra; - Compiler* compiler = comp; - - // Sources are dest address and initVal or source. - // We may require an additional source or temp register for the size. - blkNode->gtLsraInfo.srcCount = 2; - blkNode->gtLsraInfo.dstCount = 0; - GenTreePtr srcAddrOrFill = nullptr; - bool isInitBlk = blkNode->OperIsInitBlkOp(); - - if (!isInitBlk) - { - // CopyObj or CopyBlk - if (source->gtOper == GT_IND) - { - srcAddrOrFill = blkNode->Data()->gtGetOp1(); - // We're effectively setting source as contained, but can't call MakeSrcContained, because the - // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading. - // If srcAddr is already non-contained, we don't need to change it. - if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0) - { - srcAddrOrFill->gtLsraInfo.setDstCount(1); - srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount); - } - m_lsra->clearOperandCounts(source); - } - else if (!source->IsMultiRegCall() && !source->OperIsSIMD()) - { - assert(source->IsLocal()); - MakeSrcContained(blkNode, source); - } - } - - if (isInitBlk) - { - GenTreePtr initVal = source; - if (initVal->OperIsInitVal()) - { - initVal = initVal->gtGetOp1(); - } - srcAddrOrFill = initVal; - - if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll) - { - // No additional temporaries required - ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF; - if (fill == 0) - { - MakeSrcContained(blkNode, source); - } - } - else - { - assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper); - // The helper follows the regular ABI. - dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); - initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); - if (size != 0) - { - // Reserve a temp register for the block size argument. - blkNode->gtLsraInfo.setInternalCandidates(l, RBM_ARG_2); - blkNode->gtLsraInfo.internalIntCount = 1; - } - else - { - // The block size argument is a third argument to GT_STORE_DYN_BLK - noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); - blkNode->gtLsraInfo.setSrcCount(3); - GenTree* sizeNode = blkNode->AsDynBlk()->gtDynamicSize; - sizeNode->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); - } - } - } - else - { - // CopyObj or CopyBlk - // Sources are src and dest and size if not constant. - - if (blkNode->OperGet() == GT_STORE_OBJ) - { - // CopyObj - - // We don't need to materialize the struct size but we still need - // a temporary register to perform the sequence of loads and stores. - blkNode->gtLsraInfo.internalIntCount = 1; - - dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF); - // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. - // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, - // which is killed by a StoreObj (and thus needn't be reserved). - if (srcAddrOrFill != nullptr) - { - srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF); - } - } - else - { - // CopyBlk - short internalIntCount = 0; - regMaskTP internalIntCandidates = RBM_NONE; - - if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll) - { - internalIntCount = 1; - internalIntCandidates = RBM_ALLINT; - - if (size >= 2 * REGSIZE_BYTES) - { - // Use ldp/stp to reduce code size and improve performance - internalIntCount++; - } - } - else - { - assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper); - dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); - // The srcAddr goes in arg1. - if (srcAddrOrFill != nullptr) - { - srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); - } - if (size != 0) - { - // Reserve a temp register for the block size argument. - internalIntCandidates |= RBM_ARG_2; - internalIntCount++; - } - else - { - // The block size argument is a third argument to GT_STORE_DYN_BLK - noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); - blkNode->gtLsraInfo.setSrcCount(3); - GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize; - blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); - } - } - if (internalIntCount != 0) - { - blkNode->gtLsraInfo.internalIntCount = internalIntCount; - blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates); - } - } - } -} - #ifdef FEATURE_SIMD //------------------------------------------------------------------------ // TreeNodeInfoInitSIMD: Set the NodeInfo for a GT_SIMD tree. @@ -1500,223 +1000,6 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree) } #endif // FEATURE_SIMD -void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree) -{ - GenTreePtr dst = tree; - GenTreePtr addr = tree->gtOp.gtOp1; - GenTreePtr src = tree->gtOp.gtOp2; - - if (addr->OperGet() == GT_LEA) - { - // In the case where we are doing a helper assignment, if the dst - // is an indir through an lea, we need to actually instantiate the - // lea in a register - GenTreeAddrMode* lea = addr->AsAddrMode(); - - short leaSrcCount = 0; - if (lea->Base() != nullptr) - { - leaSrcCount++; - } - if (lea->Index() != nullptr) - { - leaSrcCount++; - } - lea->gtLsraInfo.srcCount = leaSrcCount; - lea->gtLsraInfo.dstCount = 1; - } - -#if NOGC_WRITE_BARRIERS - // For the NOGC JIT Helper calls - // - // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF) - // the 'src' goes into x15 (REG_WRITE_BARRIER) - // - addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER_DST_BYREF); - src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER); -#else - // For the standard JIT Helper calls - // op1 goes into REG_ARG_0 and - // op2 goes into REG_ARG_1 - // - addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0); - src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1); -#endif // NOGC_WRITE_BARRIERS - - // Both src and dst must reside in a register, which they should since we haven't set - // either of them as contained. - assert(addr->gtLsraInfo.dstCount == 1); - assert(src->gtLsraInfo.dstCount == 1); -} - -//----------------------------------------------------------------------------------------- -// TreeNodeInfoInitIndir: Specify register requirements for address expression of an indirection operation. -// -// Arguments: -// indirTree - GT_IND or GT_STOREIND gentree node -// -void Lowering::TreeNodeInfoInitIndir(GenTreePtr indirTree) -{ - assert(indirTree->OperIsIndir()); - // If this is the rhs of a block copy (i.e. non-enregisterable struct), - // it has no register requirements. - if (indirTree->TypeGet() == TYP_STRUCT) - { - return; - } - - GenTreePtr addr = indirTree->gtGetOp1(); - TreeNodeInfo* info = &(indirTree->gtLsraInfo); - - GenTreePtr base = nullptr; - GenTreePtr index = nullptr; - unsigned cns = 0; - unsigned mul; - bool rev; - bool modifiedSources = false; - - if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr)) - { - GenTreeAddrMode* lea = addr->AsAddrMode(); - base = lea->Base(); - index = lea->Index(); - cns = lea->gtOffset; - - m_lsra->clearOperandCounts(addr); - // The srcCount is decremented because addr is now "contained", - // then we account for the base and index below, if they are non-null. - info->srcCount--; - } - else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) && - !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index))) - { - // An addressing mode will be constructed that may cause some - // nodes to not need a register, and cause others' lifetimes to be extended - // to the GT_IND or even its parent if it's an assignment - - assert(base != addr); - m_lsra->clearOperandCounts(addr); - - GenTreePtr arrLength = nullptr; - - // Traverse the computation below GT_IND to find the operands - // for the addressing mode, marking the various constants and - // intermediate results as not consuming/producing. - // If the traversal were more complex, we might consider using - // a traversal function, but the addressing mode is only made - // up of simple arithmetic operators, and the code generator - // only traverses one leg of each node. - - bool foundBase = (base == nullptr); - bool foundIndex = (index == nullptr); - GenTreePtr nextChild = nullptr; - for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild) - { - nextChild = nullptr; - GenTreePtr op1 = child->gtOp.gtOp1; - GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr; - - if (op1 == base) - { - foundBase = true; - } - else if (op1 == index) - { - foundIndex = true; - } - else - { - m_lsra->clearOperandCounts(op1); - if (!op1->OperIsLeaf()) - { - nextChild = op1; - } - } - - if (op2 != nullptr) - { - if (op2 == base) - { - foundBase = true; - } - else if (op2 == index) - { - foundIndex = true; - } - else - { - m_lsra->clearOperandCounts(op2); - if (!op2->OperIsLeaf()) - { - assert(nextChild == nullptr); - nextChild = op2; - } - } - } - } - assert(foundBase && foundIndex); - info->srcCount--; // it gets incremented below. - } - else if (addr->gtOper == GT_ARR_ELEM) - { - // The GT_ARR_ELEM consumes all the indices and produces the offset. - // The array object lives until the mem access. - // We also consume the target register to which the address is - // computed - - info->srcCount++; - assert(addr->gtLsraInfo.srcCount >= 2); - addr->gtLsraInfo.srcCount -= 1; - } - else - { - // it is nothing but a plain indir - info->srcCount--; // base gets added in below - base = addr; - } - - if (base != nullptr) - { - info->srcCount++; - } - - if (index != nullptr && !modifiedSources) - { - info->srcCount++; - } - - // On ARM64 we may need a single internal register - // (when both conditions are true then we still only need a single internal register) - if ((index != nullptr) && (cns != 0)) - { - // ARM64 does not support both Index and offset so we need an internal register - info->internalIntCount = 1; - } - else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree))) - { - // This offset can't be contained in the ldr/str instruction, so we need an internal register - info->internalIntCount = 1; - } -} - -//------------------------------------------------------------------------ -// TreeNodeInfoInitCmp: Set the register requirements for a compare. -// -// Arguments: -// tree - The node of interest -// -// Return Value: -// None. -// -void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree) -{ - TreeNodeInfo* info = &(tree->gtLsraInfo); - - info->srcCount = 2; - info->dstCount = 1; - CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2); -} - #endif // _TARGET_ARM64_ #endif // !LEGACY_BACKEND diff --git a/src/jit/lsraarmarch.cpp b/src/jit/lsraarmarch.cpp new file mode 100644 index 0000000..11fc490 --- /dev/null +++ b/src/jit/lsraarmarch.cpp @@ -0,0 +1,876 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Register Requirements for ARM and ARM64 common code XX +XX XX +XX This encapsulates common logic for setting register requirements for XX +XX the ARM and ARM64 architectures. XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator + +#ifdef _TARGET_ARMARCH_ // This file is ONLY used for ARM and ARM64 architectures + +#include "jit.h" +#include "sideeffects.h" +#include "lower.h" +#include "lsra.h" + +//------------------------------------------------------------------------ +// TreeNodeInfoInitStoreLoc: Set register requirements for a store of a lclVar +// +// Arguments: +// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR) +// +// Notes: +// This involves: +// - Setting the appropriate candidates for a store of a multi-reg call return value. +// - Handling of contained immediates. +// +void Lowering::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc) +{ + TreeNodeInfo* info = &(storeLoc->gtLsraInfo); + + // Is this the case of var = call where call is returning + // a value in multiple return registers? + GenTree* op1 = storeLoc->gtGetOp1(); + if (op1->IsMultiRegCall()) + { + // backend expects to see this case only for store lclvar. + assert(storeLoc->OperGet() == GT_STORE_LCL_VAR); + + // srcCount = number of registers in which the value is returned by call + GenTreeCall* call = op1->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + info->srcCount = retTypeDesc->GetReturnRegCount(); + + // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1 + regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call); + op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates); + return; + } + + CheckImmedAndMakeContained(storeLoc, op1); +} + +//------------------------------------------------------------------------ +// TreeNodeInfoInitCmp: Lower a GT comparison node. +// +// Arguments: +// tree - the node to lower +// +// Return Value: +// None. +// +void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree) +{ + TreeNodeInfo* info = &(tree->gtLsraInfo); + + info->srcCount = 2; + info->dstCount = 1; + +#ifdef _TARGET_ARM_ + + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + var_types op1Type = op1->TypeGet(); + var_types op2Type = op2->TypeGet(); + + // Long compares will consume GT_LONG nodes, each of which produces two results. + // Thus for each long operand there will be an additional source. + // TODO-ARM-CQ: Mark hiOp2 and loOp2 as contained if it is a constant. + if (varTypeIsLong(op1Type)) + { + info->srcCount++; + } + if (varTypeIsLong(op2Type)) + { + info->srcCount++; + } + +#endif // _TARGET_ARM_ + + CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2); +} + +void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree) +{ + GenTreePtr dst = tree; + GenTreePtr addr = tree->gtOp.gtOp1; + GenTreePtr src = tree->gtOp.gtOp2; + + if (addr->OperGet() == GT_LEA) + { + // In the case where we are doing a helper assignment, if the dst + // is an indir through an lea, we need to actually instantiate the + // lea in a register + GenTreeAddrMode* lea = addr->AsAddrMode(); + + short leaSrcCount = 0; + if (lea->Base() != nullptr) + { + leaSrcCount++; + } + if (lea->Index() != nullptr) + { + leaSrcCount++; + } + lea->gtLsraInfo.srcCount = leaSrcCount; + lea->gtLsraInfo.dstCount = 1; + } + +#if NOGC_WRITE_BARRIERS + NYI_ARM("NOGC_WRITE_BARRIERS"); + + // For the NOGC JIT Helper calls + // + // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF) + // the 'src' goes into x15 (REG_WRITE_BARRIER) + // + addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER_DST_BYREF); + src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER); +#else + // For the standard JIT Helper calls + // op1 goes into REG_ARG_0 and + // op2 goes into REG_ARG_1 + // + addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0); + src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1); +#endif // NOGC_WRITE_BARRIERS + + // Both src and dst must reside in a register, which they should since we haven't set + // either of them as contained. + assert(addr->gtLsraInfo.dstCount == 1); + assert(src->gtLsraInfo.dstCount == 1); +} + +//------------------------------------------------------------------------ +// TreeNodeInfoInitIndir: Specify register requirements for address expression +// of an indirection operation. +// +// Arguments: +// indirTree - GT_IND, GT_STOREIND, block node or GT_NULLCHECK gentree node +// +void Lowering::TreeNodeInfoInitIndir(GenTreePtr indirTree) +{ + assert(indirTree->OperIsIndir()); + // If this is the rhs of a block copy (i.e. non-enregisterable struct), + // it has no register requirements. + if (indirTree->TypeGet() == TYP_STRUCT) + { + return; + } + + GenTreePtr addr = indirTree->gtGetOp1(); + TreeNodeInfo* info = &(indirTree->gtLsraInfo); + + GenTreePtr base = nullptr; + GenTreePtr index = nullptr; + unsigned cns = 0; + unsigned mul; + bool rev; + bool modifiedSources = false; + + if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr)) + { + GenTreeAddrMode* lea = addr->AsAddrMode(); + base = lea->Base(); + index = lea->Index(); + cns = lea->gtOffset; + + m_lsra->clearOperandCounts(addr); + // The srcCount is decremented because addr is now "contained", + // then we account for the base and index below, if they are non-null. + info->srcCount--; + } + else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) && + !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index))) + { + // An addressing mode will be constructed that may cause some + // nodes to not need a register, and cause others' lifetimes to be extended + // to the GT_IND or even its parent if it's an assignment + + assert(base != addr); + m_lsra->clearOperandCounts(addr); + + GenTreePtr arrLength = nullptr; + + // Traverse the computation below GT_IND to find the operands + // for the addressing mode, marking the various constants and + // intermediate results as not consuming/producing. + // If the traversal were more complex, we might consider using + // a traversal function, but the addressing mode is only made + // up of simple arithmetic operators, and the code generator + // only traverses one leg of each node. + + bool foundBase = (base == nullptr); + bool foundIndex = (index == nullptr); + GenTreePtr nextChild = nullptr; + for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild) + { + nextChild = nullptr; + GenTreePtr op1 = child->gtOp.gtOp1; + GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr; + + if (op1 == base) + { + foundBase = true; + } + else if (op1 == index) + { + foundIndex = true; + } + else + { + m_lsra->clearOperandCounts(op1); + if (!op1->OperIsLeaf()) + { + nextChild = op1; + } + } + + if (op2 != nullptr) + { + if (op2 == base) + { + foundBase = true; + } + else if (op2 == index) + { + foundIndex = true; + } + else + { + m_lsra->clearOperandCounts(op2); + if (!op2->OperIsLeaf()) + { + assert(nextChild == nullptr); + nextChild = op2; + } + } + } + } + assert(foundBase && foundIndex); + info->srcCount--; // it gets incremented below. + } + else if (addr->gtOper == GT_ARR_ELEM) + { + // The GT_ARR_ELEM consumes all the indices and produces the offset. + // The array object lives until the mem access. + // We also consume the target register to which the address is + // computed + + info->srcCount++; + assert(addr->gtLsraInfo.srcCount >= 2); + addr->gtLsraInfo.srcCount -= 1; + } + else + { + // it is nothing but a plain indir + info->srcCount--; // base gets added in below + base = addr; + } + + if (base != nullptr) + { + info->srcCount++; + } + + if (index != nullptr && !modifiedSources) + { + info->srcCount++; + +#ifdef _TARGET_ARM_ + info->internalIntCount++; +#endif // _TARGET_ARM_ + } + +#ifdef _TARGET_ARM64_ + + // On ARM64 we may need a single internal register + // (when both conditions are true then we still only need a single internal register) + if ((index != nullptr) && (cns != 0)) + { + // ARM64 does not support both Index and offset so we need an internal register + info->internalIntCount = 1; + } + else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree))) + { + // This offset can't be contained in the ldr/str instruction, so we need an internal register + info->internalIntCount = 1; + } + +#endif // _TARGET_ARM64_ +} + +//------------------------------------------------------------------------ +// TreeNodeInfoInitShiftRotate: Set the NodeInfo for a shift or rotate. +// +// Arguments: +// tree - The node of interest +// +// Return Value: +// None. +// +void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree) +{ + TreeNodeInfo* info = &(tree->gtLsraInfo); + LinearScan* l = m_lsra; + + info->srcCount = 2; + info->dstCount = 1; + + GenTreePtr shiftBy = tree->gtOp.gtOp2; + GenTreePtr source = tree->gtOp.gtOp1; + if (shiftBy->IsCnsIntOrI()) + { + l->clearDstCount(shiftBy); + info->srcCount--; + } + +#ifdef _TARGET_ARM_ + + // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that + // we can have a three operand form. Increment the srcCount. + if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO) + { + assert(source->OperGet() == GT_LONG); + + info->srcCount++; + + if (tree->OperGet() == GT_LSH_HI) + { + GenTreePtr sourceLo = source->gtOp.gtOp1; + sourceLo->gtLsraInfo.isDelayFree = true; + } + else + { + GenTreePtr sourceHi = source->gtOp.gtOp2; + sourceHi->gtLsraInfo.isDelayFree = true; + } + + source->gtLsraInfo.hasDelayFreeSrc = true; + info->hasDelayFreeSrc = true; + } + +#endif // _TARGET_ARM_ +} + +//------------------------------------------------------------------------ +// TreeNodeInfoInitPutArgReg: Set the NodeInfo for a PUTARG_REG. +// +// Arguments: +// node - The PUTARG_REG node. +// argReg - The register in which to pass the argument. +// info - The info for the node's using call. +// isVarArgs - True if the call uses a varargs calling convention. +// callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register. +// +// Return Value: +// None. +// +void Lowering::TreeNodeInfoInitPutArgReg( + GenTreeUnOp* node, regNumber argReg, TreeNodeInfo& info, bool isVarArgs, bool* callHasFloatRegArgs) +{ + assert(node != nullptr); + assert(node->OperIsPutArgReg()); + assert(argReg != REG_NA); + + // Each register argument corresponds to one source. + info.srcCount++; + + // Set the register requirements for the node. + const regMaskTP argMask = genRegMask(argReg); + node->gtLsraInfo.setDstCandidates(m_lsra, argMask); + node->gtLsraInfo.setSrcCandidates(m_lsra, argMask); + + // To avoid redundant moves, have the argument operand computed in the + // register in which the argument is passed to the call. + node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(m_lsra, m_lsra->getUseCandidates(node)); + + *callHasFloatRegArgs |= varTypeIsFloating(node->TypeGet()); +} + +//------------------------------------------------------------------------ +// TreeNodeInfoInitCall: Set the NodeInfo for a call. +// +// Arguments: +// call - The call node of interest +// +// Return Value: +// None. +// +void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) +{ + TreeNodeInfo* info = &(call->gtLsraInfo); + LinearScan* l = m_lsra; + Compiler* compiler = comp; + bool hasMultiRegRetVal = false; + ReturnTypeDesc* retTypeDesc = nullptr; + + info->srcCount = 0; + if (call->TypeGet() != TYP_VOID) + { + hasMultiRegRetVal = call->HasMultiRegRetVal(); + if (hasMultiRegRetVal) + { + // dst count = number of registers in which the value is returned by call + retTypeDesc = call->GetReturnTypeDesc(); + info->dstCount = retTypeDesc->GetReturnRegCount(); + } + else + { + info->dstCount = 1; + } + } + else + { + info->dstCount = 0; + } + + GenTree* ctrlExpr = call->gtControlExpr; + if (call->gtCallType == CT_INDIRECT) + { + // either gtControlExpr != null or gtCallAddr != null. + // Both cannot be non-null at the same time. + assert(ctrlExpr == nullptr); + assert(call->gtCallAddr != nullptr); + ctrlExpr = call->gtCallAddr; + } + + // set reg requirements on call target represented as control sequence. + if (ctrlExpr != nullptr) + { + // we should never see a gtControlExpr whose type is void. + assert(ctrlExpr->TypeGet() != TYP_VOID); + + info->srcCount++; + + // In case of fast tail implemented as jmp, make sure that gtControlExpr is + // computed into a register. + if (call->IsFastTailCall()) + { + NYI_ARM("tail call"); + +#ifdef _TARGET_ARM64_ + // Fast tail call - make sure that call target is always computed in IP0 + // so that epilog sequence can generate "br xip0" to achieve fast tail call. + ctrlExpr->gtLsraInfo.setSrcCandidates(l, genRegMask(REG_IP0)); +#endif // _TARGET_ARM64_ + } + } +#ifdef _TARGET_ARM_ + else + { + info->internalIntCount = 1; + } +#endif // _TARGET_ARM_ + + RegisterType registerType = call->TypeGet(); + +// Set destination candidates for return value of the call. + +#ifdef _TARGET_ARM_ + if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME)) + { + // The ARM CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with + // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers. + info->setDstCandidates(l, RBM_PINVOKE_TCB); + } + else +#endif // _TARGET_ARM_ + if (hasMultiRegRetVal) + { + assert(retTypeDesc != nullptr); + info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs()); + } + else if (varTypeIsFloating(registerType)) + { + info->setDstCandidates(l, RBM_FLOATRET); + } + else if (registerType == TYP_LONG) + { + info->setDstCandidates(l, RBM_LNGRET); + } + else + { + info->setDstCandidates(l, RBM_INTRET); + } + + // If there is an explicit this pointer, we don't want that node to produce anything + // as it is redundant + if (call->gtCallObjp != nullptr) + { + GenTreePtr thisPtrNode = call->gtCallObjp; + + if (thisPtrNode->gtOper == GT_PUTARG_REG) + { + l->clearOperandCounts(thisPtrNode); + l->clearDstCount(thisPtrNode->gtOp.gtOp1); + } + else + { + l->clearDstCount(thisPtrNode); + } + } + + // First, count reg args + bool callHasFloatRegArgs = false; + + for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) + { + assert(list->OperIsList()); + + GenTreePtr argNode = list->Current(); + + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); + assert(curArgTabEntry); + + if (curArgTabEntry->regNum == REG_STK) + { + // late arg that is not passed in a register + assert(argNode->gtOper == GT_PUTARG_STK); + + TreeNodeInfoInitPutArgStk(argNode->AsPutArgStk(), curArgTabEntry); + continue; + } + + // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct + if (argNode->OperGet() == GT_FIELD_LIST) + { + // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs) + regNumber argReg = curArgTabEntry->regNum; + for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest()) + { + TreeNodeInfoInitPutArgReg(entry->Current()->AsUnOp(), argReg, *info, false, &callHasFloatRegArgs); + + // Update argReg for the next putarg_reg (if any) + argReg = genRegArgNext(argReg); + } + } + else + { + TreeNodeInfoInitPutArgReg(argNode->AsUnOp(), curArgTabEntry->regNum, *info, false, &callHasFloatRegArgs); + } + } + + // Now, count stack args + // Note that these need to be computed into a register, but then + // they're just stored to the stack - so the reg doesn't + // need to remain live until the call. In fact, it must not + // because the code generator doesn't actually consider it live, + // so it can't be spilled. + + GenTreePtr args = call->gtCallArgs; + while (args) + { + GenTreePtr arg = args->gtOp.gtOp1; + + // Skip arguments that have been moved to the Late Arg list + if (!(args->gtFlags & GTF_LATE_ARG)) + { + if (arg->gtOper == GT_PUTARG_STK) + { + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg); + assert(curArgTabEntry); + + assert(curArgTabEntry->regNum == REG_STK); + + TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry); + } + else + { + TreeNodeInfo* argInfo = &(arg->gtLsraInfo); + if (argInfo->dstCount != 0) + { + argInfo->isLocalDefUse = true; + } + + argInfo->dstCount = 0; + } + } + args = args->gtOp.gtOp2; + } + + // If it is a fast tail call, it is already preferenced to use IP0. + // Therefore, no need set src candidates on call tgt again. + if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr)) + { + NYI_ARM("float reg varargs"); + + // Don't assign the call target to any of the argument registers because + // we will use them to also pass floating point arguments as required + // by Arm64 ABI. + ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS)); + } + +#ifdef _TARGET_ARM_ + + if (call->NeedsNullCheck()) + { + info->internalIntCount++; + } + +#endif // _TARGET_ARM_ +} + +//------------------------------------------------------------------------ +// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node +// +// Arguments: +// argNode - a GT_PUTARG_STK node +// +// Return Value: +// None. +// +// Notes: +// Set the child node(s) to be contained when we have a multireg arg +// +void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info) +{ + assert(argNode->gtOper == GT_PUTARG_STK); + + GenTreePtr putArgChild = argNode->gtOp.gtOp1; + + // Initialize 'argNode' as not contained, as this is both the default case + // and how MakeSrcContained expects to find things setup. + // + argNode->gtLsraInfo.srcCount = 1; + argNode->gtLsraInfo.dstCount = 0; + + // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct + if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST)) + { + // We will use store instructions that each write a register sized value + + if (putArgChild->OperGet() == GT_FIELD_LIST) + { + // We consume all of the items in the GT_FIELD_LIST + argNode->gtLsraInfo.srcCount = info->numSlots; + } + else + { + // We could use a ldp/stp sequence so we need two internal registers + argNode->gtLsraInfo.internalIntCount = 2; + + if (putArgChild->OperGet() == GT_OBJ) + { + GenTreePtr objChild = putArgChild->gtOp.gtOp1; + if (objChild->OperGet() == GT_LCL_VAR_ADDR) + { + // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR + // as one contained operation + // + MakeSrcContained(putArgChild, objChild); + } + } + + // We will generate all of the code for the GT_PUTARG_STK and it's child node + // as one contained operation + // + MakeSrcContained(argNode, putArgChild); + } + } + else + { + // We must not have a multi-reg struct + assert(info->numSlots == 1); + } +} + +//------------------------------------------------------------------------ +// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store. +// +// Arguments: +// blkNode - The block store node of interest +// +// Return Value: +// None. +// +void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) +{ + GenTree* dstAddr = blkNode->Addr(); + unsigned size = blkNode->gtBlkSize; + GenTree* source = blkNode->Data(); + LinearScan* l = m_lsra; + Compiler* compiler = comp; + + // Sources are dest address and initVal or source. + // We may require an additional source or temp register for the size. + blkNode->gtLsraInfo.srcCount = 2; + blkNode->gtLsraInfo.dstCount = 0; + GenTreePtr srcAddrOrFill = nullptr; + bool isInitBlk = blkNode->OperIsInitBlkOp(); + + if (!isInitBlk) + { + // CopyObj or CopyBlk + if (source->gtOper == GT_IND) + { + srcAddrOrFill = blkNode->Data()->gtGetOp1(); + // We're effectively setting source as contained, but can't call MakeSrcContained, because the + // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading. + // If srcAddr is already non-contained, we don't need to change it. + if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0) + { + srcAddrOrFill->gtLsraInfo.setDstCount(1); + srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount); + } + m_lsra->clearOperandCounts(source); + } + else if (!source->IsMultiRegCall() && !source->OperIsSIMD()) + { + assert(source->IsLocal()); + MakeSrcContained(blkNode, source); + } + } + + if (isInitBlk) + { + GenTreePtr initVal = source; + if (initVal->OperIsInitVal()) + { + initVal = initVal->gtGetOp1(); + } + srcAddrOrFill = initVal; + + if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll) + { + // TODO-ARM-CQ: Currently we generate a helper call for every + // initblk we encounter. Later on we should implement loop unrolling + // code sequences to improve CQ. + // For reference see the code in lsraxarch.cpp. + NYI_ARM("initblk loop unrolling is currently not implemented."); + +#ifdef _TARGET_ARM64_ + // No additional temporaries required + ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF; + if (fill == 0) + { + MakeSrcContained(blkNode, source); + } +#endif // _TARGET_ARM64_ + } + else + { + assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper); + // The helper follows the regular ABI. + dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); + initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); + if (size != 0) + { + // Reserve a temp register for the block size argument. + blkNode->gtLsraInfo.setInternalCandidates(l, RBM_ARG_2); + blkNode->gtLsraInfo.internalIntCount = 1; + } + else + { + // The block size argument is a third argument to GT_STORE_DYN_BLK + noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); + blkNode->gtLsraInfo.setSrcCount(3); + GenTree* sizeNode = blkNode->AsDynBlk()->gtDynamicSize; + sizeNode->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); + } + } + } + else + { + // CopyObj or CopyBlk + // Sources are src and dest and size if not constant. + if (blkNode->OperGet() == GT_STORE_OBJ) + { + // CopyObj + NYI_ARM("GT_STORE_OBJ is needed of write barriers implementation"); + +#ifdef _TARGET_ARM64_ + + // We don't need to materialize the struct size but we still need + // a temporary register to perform the sequence of loads and stores. + blkNode->gtLsraInfo.internalIntCount = 1; + + dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF); + // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. + // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, + // which is killed by a StoreObj (and thus needn't be reserved). + if (srcAddrOrFill != nullptr) + { + srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF); + } + +#endif // _TARGET_ARM64_ + } + else + { + // CopyBlk + short internalIntCount = 0; + regMaskTP internalIntCandidates = RBM_NONE; + + if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll) + { + // TODO-ARM-CQ: cpblk loop unrolling is currently not implemented. + // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size + // we should unroll the loop to improve CQ. + // For reference see the code in lsraxarch.cpp. + NYI_ARM("cpblk loop unrolling is currently not implemented."); + +#ifdef _TARGET_ARM64_ + + internalIntCount = 1; + internalIntCandidates = RBM_ALLINT; + + if (size >= 2 * REGSIZE_BYTES) + { + // Use ldp/stp to reduce code size and improve performance + internalIntCount++; + } + +#endif // _TARGET_ARM64_ + } + else + { + assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper); + dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); + // The srcAddr goes in arg1. + if (srcAddrOrFill != nullptr) + { + srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); + } + if (size != 0) + { + // Reserve a temp register for the block size argument. + internalIntCandidates |= RBM_ARG_2; + internalIntCount++; + } + else + { + // The block size argument is a third argument to GT_STORE_DYN_BLK + noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); + blkNode->gtLsraInfo.setSrcCount(3); + GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize; + blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); + } + } + if (internalIntCount != 0) + { + blkNode->gtLsraInfo.internalIntCount = internalIntCount; + blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates); + } + } + } +} + +#endif // _TARGET_ARMARCH_ + +#endif // !LEGACY_BACKEND -- 2.7.4