From 0ce4fab8647ac3fbc200c27adfcf31aaae0f660b Mon Sep 17 00:00:00 2001 From: Hyeongseok Oh Date: Thu, 29 Jun 2017 03:01:18 +0900 Subject: [PATCH] [RyuJIT/ARM32] Enable passing large split struct argument (#12050) * [RyuJIT/ARM32] Enable passing large split struct This enables passing split struct larger than 16 bytes. To support splitted struct, it defines new GenTree type - GenTreePutArgSplit. GenTreePutArgSplit is similar with GenTreePutArgStk, but it is used for splitted struct only and it has additional field to save register information. GenTreePutArgSplit node is generated in lower phase. * Apply reviews: split struct argument passing - Fix some comments: genPutArgSplit, GenTreePutArgStk, GenTreePutArgSplit, NuwPutArg, ArgComplete - Add assertion check in genPutArgSplit, genCallInstruction - Rename variable: baseReg - Change flag for GenTreePutArgSplit: _TARGET_ARM && !LEGACY_BACKEND - Change type of gtOtherRegs in GenTreePutArgSplit - Remove duplicated code: NewPutArg - Implement spill & restore flag for GenTreePutArgSplit * Apply reviews - Rebase - Update managing spillFlag for split struct - Implement spill & restore code generation - Fix typos and rename variables - Fix bug related to print gentree for split struct * Fix bug and comments - Fix bug in regset.cpp - Add comments in morph.cpp's NYI_ARM - Fix comments' typo in lsraarmarcp.cpp --- src/jit/codegenarmarch.cpp | 167 +++++++++++++++++++ src/jit/codegenlinear.cpp | 72 ++++++++ src/jit/codegenlinear.h | 6 + src/jit/gentree.cpp | 87 ++++++++++ src/jit/gentree.h | 222 ++++++++++++++++++++++++- src/jit/gtlist.h | 3 + src/jit/gtstructs.h | 3 + src/jit/lower.cpp | 404 ++++++++++++++++++++++++--------------------- src/jit/lower.h | 3 + src/jit/lsra.cpp | 21 +++ src/jit/lsraarm.cpp | 1 + src/jit/lsraarmarch.cpp | 65 ++++++++ src/jit/morph.cpp | 68 +++++--- src/jit/regset.cpp | 41 ++++- 14 files changed, 948 insertions(+), 215 deletions(-) diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp index d70fbb4..86dec5e 100644 --- a/src/jit/codegenarmarch.cpp +++ b/src/jit/codegenarmarch.cpp @@ -276,6 +276,12 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) genPutArgReg(treeNode->AsOp()); break; +#ifdef _TARGET_ARM_ + case GT_PUTARG_SPLIT: + genPutArgSplit(treeNode->AsPutArgSplit()); + break; +#endif + case GT_CALL: genCallInstruction(treeNode->AsCall()); break; @@ -885,6 +891,151 @@ void CodeGen::genPutArgReg(GenTreeOp* tree) genProduceReg(tree); } +#ifdef _TARGET_ARM_ +//--------------------------------------------------------------------- +// genPutArgSplit - generate code for a GT_PUTARG_SPLIT node +// +// Arguments +// tree - the GT_PUTARG_SPLIT node +// +// Return value: +// None +// +void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode) +{ + assert(treeNode->OperIs(GT_PUTARG_SPLIT)); + + GenTreePtr source = treeNode->gtOp1; + emitter* emit = getEmitter(); + + noway_assert(source->OperGet() == GT_OBJ); + + var_types targetType = source->TypeGet(); + noway_assert(varTypeIsStruct(targetType)); + + regNumber baseReg = treeNode->ExtractTempReg(); + regNumber addrReg = REG_NA; + + GenTreeLclVarCommon* varNode = nullptr; + GenTreePtr addrNode = nullptr; + + addrNode = source->gtOp.gtOp1; + + // addrNode can either be a GT_LCL_VAR_ADDR or an address expression + // + if (addrNode->OperGet() == GT_LCL_VAR_ADDR) + { + // We have a GT_OBJ(GT_LCL_VAR_ADDR) + // + // We will treat this case the same as above + // (i.e if we just had this GT_LCL_VAR directly as the source) + // so update 'source' to point this GT_LCL_VAR_ADDR node + // and continue to the codegen for the LCL_VAR node below + // + varNode = addrNode->AsLclVarCommon(); + addrNode = nullptr; + } + + // Either varNode or addrNOde must have been setup above, + // the xor ensures that only one of the two is setup, not both + assert((varNode != nullptr) ^ (addrNode != nullptr)); + + // Setup the structSize, isHFa, and gcPtrCount + BYTE* gcPtrs = treeNode->gtGcPtrs; + unsigned gcPtrCount = treeNode->gtNumberReferenceSlots; // The count of GC pointers in the struct + int structSize = treeNode->getArgSize(); + bool isHfa = treeNode->gtIsHfa; + + // This is the varNum for our load operations, + // only used when we have a struct with a LclVar source + unsigned srcVarNum = BAD_VAR_NUM; + + if (varNode != nullptr) + { + srcVarNum = varNode->gtLclNum; + assert(srcVarNum < compiler->lvaCount); + } + else // addrNode is used + { + assert(addrNode != nullptr); + + // Generate code to load the address that we need into a register + genConsumeAddress(addrNode); + addrReg = addrNode->gtRegNum; + } + + // If we have an HFA we can't have any GC pointers, + // if not then the max size for the the struct is 16 bytes + if (isHfa) + { + noway_assert(gcPtrCount == 0); + } + + unsigned varNumOut = compiler->lvaOutgoingArgSpaceVar; + unsigned argOffsetMax = compiler->lvaOutgoingArgSpaceSize; + unsigned argOffsetOut = treeNode->gtSlotNum * TARGET_POINTER_SIZE; + + // Put on stack first + unsigned nextIndex = treeNode->gtNumRegs; + unsigned structOffset = nextIndex * TARGET_POINTER_SIZE; + int remainingSize = structSize - structOffset; + + // remainingSize is always multiple of TARGET_POINTER_SIZE + assert(remainingSize % TARGET_POINTER_SIZE == 0); + while (remainingSize > 0) + { + var_types type = compiler->getJitGCType(gcPtrs[nextIndex]); + + if (varNode != nullptr) + { + // Load from our varNumImp source + emit->emitIns_R_S(INS_ldr, emitTypeSize(type), baseReg, srcVarNum, structOffset); + } + else + { + // check for case of destroying the addrRegister while we still need it + assert(baseReg != addrReg); + + // Load from our address expression source + emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), baseReg, addrReg, structOffset); + } + + // Emit str instruction to store the register into the outgoing argument area + emit->emitIns_S_R(INS_str, emitTypeSize(type), baseReg, varNumOut, argOffsetOut); + argOffsetOut += TARGET_POINTER_SIZE; // We stored 4-bytes of the struct + assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area + remainingSize -= TARGET_POINTER_SIZE; // We loaded 4-bytes of the struct + structOffset += TARGET_POINTER_SIZE; + nextIndex += 1; + } + + // Set registers + structOffset = 0; + for (unsigned idx = 0; idx < treeNode->gtNumRegs; idx++) + { + regNumber targetReg = treeNode->GetRegNumByIdx(idx); + var_types type = treeNode->GetRegType(idx); + + if (varNode != nullptr) + { + // Load from our varNumImp source + emit->emitIns_R_S(INS_ldr, emitTypeSize(type), targetReg, srcVarNum, structOffset); + } + else + { + // check for case of destroying the addrRegister while we still need it + assert(targetReg != addrReg); + + // Load from our address expression source + emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), targetReg, addrReg, structOffset); + } + structOffset += TARGET_POINTER_SIZE; + } + + genProduceReg(treeNode); +} +#endif // _TARGET_ARM_ + //---------------------------------------------------------------------------------- // genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local // @@ -1616,6 +1767,22 @@ void CodeGen::genCallInstruction(GenTreeCall* call) #endif // _TARGET_ARM_ } } +#ifdef _TARGET_ARM_ + else if (curArgTabEntry->isSplit) + { + assert(curArgTabEntry->numRegs >= 1); + genConsumeArgSplitStruct(argNode->AsPutArgSplit()); + for (unsigned idx = 0; idx < curArgTabEntry->numRegs; idx++) + { + regNumber argReg = (regNumber)((unsigned)curArgTabEntry->regNum + idx); + regNumber allocReg = argNode->AsPutArgSplit()->GetRegNumByIdx(idx); + if (argReg != allocReg) + { + inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), true), argReg, allocReg); + } + } + } +#endif else { regNumber argReg = curArgTabEntry->regNum; diff --git a/src/jit/codegenlinear.cpp b/src/jit/codegenlinear.cpp index 29bb61a..b98b6e1 100644 --- a/src/jit/codegenlinear.cpp +++ b/src/jit/codegenlinear.cpp @@ -1004,6 +1004,35 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) unspillTree->gtFlags &= ~GTF_SPILLED; } +#ifdef _TARGET_ARM_ + else if (unspillTree->OperIsPutArgSplit()) + { + GenTreePutArgSplit* splitArg = unspillTree->AsPutArgSplit(); + unsigned regCount = splitArg->gtNumRegs; + + // In case of split struct argument node, GTF_SPILLED flag on it indicates that + // one or more of its result regs are spilled. Call node needs to be + // queried to know which specific result regs to be unspilled. + for (unsigned i = 0; i < regCount; ++i) + { + unsigned flags = splitArg->GetRegSpillFlagByIdx(i); + if ((flags & GTF_SPILLED) != 0) + { + BYTE* gcPtrs = splitArg->gtGcPtrs; + var_types dstType = splitArg->GetRegType(i); + regNumber dstReg = splitArg->GetRegNumByIdx(i); + + TempDsc* t = regSet.rsUnspillInPlace(splitArg, dstReg, i); + getEmitter()->emitIns_R_S(ins_Load(dstType), emitActualTypeSize(dstType), dstReg, t->tdTempNum(), + 0); + compiler->tmpRlsTemp(t); + gcInfo.gcMarkRegPtrVal(dstReg, dstType); + } + } + + unspillTree->gtFlags &= ~GTF_SPILLED; + } +#endif else { TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum); @@ -1389,6 +1418,31 @@ void CodeGen::genConsumePutStructArgStk(GenTreePutArgStk* putArgNode, } #endif // FEATURE_PUT_STRUCT_ARG_STK +#ifdef _TARGET_ARM_ +//------------------------------------------------------------------------ +// genConsumeArgRegSplit: Consume register(s) in Call node to set split struct argument. +// Liveness update for the PutArgSplit node is not needed +// +// Arguments: +// putArgNode - the PUTARG_STK tree. +// +// Return Value: +// None. +// +void CodeGen::genConsumeArgSplitStruct(GenTreePutArgSplit* putArgNode) +{ + assert(putArgNode->OperGet() == GT_PUTARG_SPLIT); + assert(putArgNode->gtHasReg()); + + genUnspillRegIfNeeded(putArgNode); + + // Skip updating GC info + // GC info for all argument registers will be cleared in caller + + genCheckConsumeNode(putArgNode); +} +#endif + //------------------------------------------------------------------------ // genSetBlockSize: Ensure that the block size is in the given register // @@ -1583,6 +1637,24 @@ void CodeGen::genProduceReg(GenTree* tree) } } } +#ifdef _TARGET_ARM_ + else if (tree->OperIsPutArgSplit()) + { + GenTreePutArgSplit* argSplit = tree->AsPutArgSplit(); + unsigned regCount = argSplit->gtNumRegs; + + for (unsigned i = 0; i < regCount; ++i) + { + unsigned flags = argSplit->GetRegSpillFlagByIdx(i); + if ((flags & GTF_SPILL) != 0) + { + regNumber reg = argSplit->GetRegNumByIdx(i); + regSet.rsSpillTree(reg, argSplit, i); + gcInfo.gcMarkRegSetNpt(genRegMask(reg)); + } + } + } +#endif // _TARGET_ARM_ else { regSet.rsSpillTree(tree->gtRegNum, tree); diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h index 6a737f6..64d5c4f 100644 --- a/src/jit/codegenlinear.h +++ b/src/jit/codegenlinear.h @@ -45,6 +45,9 @@ void genCodeForCompare(GenTreeOp* tree); void genIntrinsic(GenTreePtr treeNode); void genPutArgStk(GenTreePutArgStk* treeNode); void genPutArgReg(GenTreeOp* tree); +#ifdef _TARGET_ARM_ +void genPutArgSplit(GenTreePutArgSplit* treeNode); +#endif #if defined(_TARGET_XARCH_) unsigned getBaseVarForPutArgStk(GenTreePtr treeNode); @@ -144,6 +147,9 @@ void genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, #ifdef FEATURE_PUT_STRUCT_ARG_STK void genConsumePutStructArgStk(GenTreePutArgStk* putArgStkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg); #endif // FEATURE_PUT_STRUCT_ARG_STK +#ifdef _TARGET_ARM_ +void CodeGen::genConsumeArgSplitStruct(GenTreePutArgSplit* putArgNode); +#endif void genConsumeRegs(GenTree* tree); void genConsumeOperands(GenTreeOp* tree); diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 65922a2..ebef0c1 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -328,6 +328,9 @@ void GenTree::InitNodeSize() // TODO-Throughput: This should not need to be a large node. The object info should be // obtained from the child node. GenTree::s_gtNodeSizes[GT_PUTARG_STK] = TREE_NODE_SZ_LARGE; +#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) + GenTree::s_gtNodeSizes[GT_PUTARG_SPLIT] = TREE_NODE_SZ_LARGE; +#endif #endif // FEATURE_PUT_STRUCT_ARG_STK assert(GenTree::s_gtNodeSizes[GT_RETURN] == GenTree::s_gtNodeSizes[GT_ASG]); @@ -390,6 +393,9 @@ void GenTree::InitNodeSize() // TODO-Throughput: This should not need to be a large node. The object info should be // obtained from the child node. static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_LARGE); +#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) + static_assert_no_msg(sizeof(GenTreePutArgSplit) <= TREE_NODE_SZ_LARGE); +#endif #endif // FEATURE_PUT_STRUCT_ARG_STK #ifdef FEATURE_SIMD @@ -1742,6 +1748,22 @@ regMaskTP GenTree::gtGetRegMask() const } } } +#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) + else if (OperIsPutArgSplit()) + { + GenTree* tree = const_cast(this); + GenTreePutArgSplit* splitArg = tree->AsPutArgSplit(); + unsigned regCount = splitArg->gtNumRegs; + + resultMask = RBM_NONE; + for (unsigned i = 0; i < regCount; ++i) + { + regNumber reg = splitArg->GetRegNumByIdx(i); + assert(reg != REG_NA); + resultMask |= genRegMask(reg); + } + } +#endif else { resultMask = genRegMask(gtRegNum); @@ -9540,6 +9562,9 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) case GT_PHYSREGDST: case GT_PUTARG_REG: case GT_PUTARG_STK: +#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) + case GT_PUTARG_SPLIT: +#endif // !LEGACY_BACKEND && _TARGET_ARM_ case GT_RETURNTRAP: m_edge = &m_node->AsUnOp()->gtOp1; assert(*m_edge != nullptr); @@ -12128,6 +12153,68 @@ void Compiler::gtGetArgMsg( } else { +#ifdef _TARGET_ARM_ + if (curArgTabEntry->isSplit) + { + regNumber firstReg = curArgTabEntry->regNum; + if (listCount == -1) + { + if (curArgTabEntry->numRegs == 1) + { + sprintf_s(bufp, bufLength, "arg%d %s out+%02x%c", argNum, compRegVarName(firstReg), + (curArgTabEntry->slotNum) * TARGET_POINTER_SIZE, 0); + } + else + { + regNumber lastReg = REG_STK; + char separator = (curArgTabEntry->numRegs == 2) ? ',' : '-'; + if (curArgTabEntry->isHfaRegArg) + { + unsigned lastRegNum = genMapFloatRegNumToRegArgNum(firstReg) + curArgTabEntry->numRegs - 1; + lastReg = genMapFloatRegArgNumToRegNum(lastRegNum); + } + else + { + unsigned lastRegNum = genMapIntRegNumToRegArgNum(firstReg) + curArgTabEntry->numRegs - 1; + lastReg = genMapIntRegArgNumToRegNum(lastRegNum); + } + sprintf_s(bufp, bufLength, "arg%d %s%c%s out+%02x%c", argNum, compRegVarName(firstReg), + separator, compRegVarName(lastReg), (curArgTabEntry->slotNum) * TARGET_POINTER_SIZE, + 0); + } + } + else + { + unsigned curArgNum = BAD_VAR_NUM; + bool isFloat = curArgTabEntry->isHfaRegArg; + if (isFloat) + { + curArgNum = genMapFloatRegNumToRegArgNum(firstReg) + listCount; + } + else + { + curArgNum = genMapIntRegNumToRegArgNum(firstReg) + listCount; + } + + if (!isFloat && curArgNum < MAX_REG_ARG) + { + regNumber curReg = genMapIntRegArgNumToRegNum(curArgNum); + sprintf_s(bufp, bufLength, "arg%d m%d %s%c", argNum, listCount, compRegVarName(curReg), 0); + } + else if (isFloat && curArgNum < MAX_FLOAT_REG_ARG) + { + regNumber curReg = genMapFloatRegArgNumToRegNum(curArgNum); + sprintf_s(bufp, bufLength, "arg%d m%d %s%c", argNum, listCount, compRegVarName(curReg), 0); + } + else + { + unsigned stackSlot = listCount - curArgTabEntry->numRegs; + sprintf_s(bufp, bufLength, "arg%d m%d out+%s%c", argNum, listCount, stackSlot, 0); + } + } + return; + } +#endif // _TARGET_ARM_ #if FEATURE_FIXED_OUT_ARGS if (listCount == -1) { diff --git a/src/jit/gentree.h b/src/jit/gentree.h index 9ed8e52..047e740 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -1293,6 +1293,15 @@ public: return OperIsStoreBlk(OperGet()); } + bool OperIsPutArgSplit() const + { +#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) + return gtOper == GT_PUTARG_SPLIT; +#else + return false; +#endif + } + bool OperIsPutArgStk() const { return gtOper == GT_PUTARG_STK; @@ -1305,7 +1314,7 @@ public: bool OperIsPutArg() const { - return OperIsPutArgStk() || OperIsPutArgReg(); + return OperIsPutArgStk() || OperIsPutArgReg() || OperIsPutArgSplit(); } bool OperIsAddrMode() const @@ -4907,6 +4916,9 @@ struct GenTreePutArgStk : public GenTreeUnOp unsigned gtNumSlots; // Number of slots for the argument to be passed on stack unsigned gtNumberReferenceSlots; // Number of reference slots. BYTE* gtGcPtrs; // gcPointers +#ifdef _TARGET_ARM_ + bool gtIsHfa; +#endif #endif // FEATURE_PUT_STRUCT_ARG_STK @@ -4921,6 +4933,212 @@ struct GenTreePutArgStk : public GenTreeUnOp #endif }; +#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) +// Represent the struct argument: split value in register(s) and stack +struct GenTreePutArgSplit : public GenTreePutArgStk +{ + unsigned gtNumRegs; + + GenTreePutArgSplit(GenTreePtr op1, + unsigned slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(unsigned numSlots), + unsigned numRegs, + bool isHfa, + bool putIncomingArgArea = false, + GenTreeCall* callNode = nullptr) + : GenTreePutArgStk(GT_PUTARG_SPLIT, + TYP_STRUCT, + op1, + slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(numSlots), + putIncomingArgArea, + callNode) + , gtNumRegs(numRegs) + { + gtIsHfa = isHfa; + ClearOtherRegs(); + ClearOtherRegFlags(); + } + + // Type required to support multi-reg struct arg. + var_types m_regType[MAX_REG_ARG]; + + // First reg of struct is always given by gtRegNum. + // gtOtherRegs holds the other reg numbers of struct. + // HFA args is not yet handled. + regNumberSmall gtOtherRegs[MAX_REG_ARG - 1]; + + // GTF_SPILL or GTF_SPILLED flag on a multi-reg struct node indicates that one or + // more of its result regs are in that state. The spill flag of each of the + // return register is stored here. We only need 2 bits per register, + // so this is treated as a 2-bit array. + static const unsigned PACKED_GTF_SPILL = 1; + static const unsigned PACKED_GTF_SPILLED = 2; + unsigned char gtSpillFlags; + + //--------------------------------------------------------------------------- + // GetRegNumByIdx: get ith register allocated to this struct argument. + // + // Arguments: + // idx - index of the struct + // + // Return Value: + // Return regNumber of ith register of this struct argument + // + regNumber GetRegNumByIdx(unsigned idx) const + { + assert(idx < MAX_REG_ARG); + + if (idx == 0) + { + return gtRegNum; + } + + return (regNumber)gtOtherRegs[idx - 1]; + } + + //---------------------------------------------------------------------- + // SetRegNumByIdx: set ith register of this struct argument + // + // Arguments: + // reg - reg number + // idx - index of the struct + // + // Return Value: + // None + // + void SetRegNumByIdx(regNumber reg, unsigned idx) + { + assert(idx < MAX_REG_ARG); + if (idx == 0) + { + gtRegNum = reg; + } + else + { + gtOtherRegs[idx - 1] = reg; + assert(gtOtherRegs[idx - 1] == reg); + } + } + + //---------------------------------------------------------------------------- + // ClearOtherRegs: clear multi-reg state to indicate no regs are allocated + // + // Arguments: + // None + // + // Return Value: + // None + // + void ClearOtherRegs() + { + for (unsigned i = 0; i < MAX_REG_ARG - 1; ++i) + { + gtOtherRegs[i] = REG_NA; + } + } + + //---------------------------------------------------------------------- + // GetRegSpillFlagByIdx: get spill flag associated with the register + // specified by its index. + // + // Arguments: + // idx - Position or index of the register + // + // Return Value: + // Returns GTF_* flags associated with the register. Only GTF_SPILL and GTF_SPILLED are considered. + // + unsigned GetRegSpillFlagByIdx(unsigned idx) const + { + assert(idx < MAX_REG_ARG); + + unsigned bits = gtSpillFlags >> (idx * 2); // It doesn't matter that we possibly leave other high bits here. + unsigned spillFlags = 0; + if (bits & PACKED_GTF_SPILL) + { + spillFlags |= GTF_SPILL; + } + if (bits & PACKED_GTF_SPILLED) + { + spillFlags |= GTF_SPILLED; + } + + return spillFlags; + } + + //---------------------------------------------------------------------- + // SetRegSpillFlagByIdx: set spill flags for the register + // specified by its index. + // + // Arguments: + // flags - GTF_* flags. Only GTF_SPILL and GTF_SPILLED are allowed. + // idx - Position or index of the register + // + // Return Value: + // None + // + void SetRegSpillFlagByIdx(unsigned flags, unsigned idx) + { + assert(idx < MAX_REG_ARG); + + unsigned bits = 0; + if (flags & GTF_SPILL) + { + bits |= PACKED_GTF_SPILL; + } + if (flags & GTF_SPILLED) + { + bits |= PACKED_GTF_SPILLED; + } + + // Clear anything that was already there by masking out the bits before 'or'ing in what we want there. + gtSpillFlags = (gtSpillFlags & ~(0xffU << (idx * 2))) | (bits << (idx * 2)); + } + + //-------------------------------------------------------------------------- + // GetRegType: Get var_type of the register specified by index. + // + // Arguments: + // index - Index of the register. + // First register will have an index 0 and so on. + // + // Return Value: + // var_type of the register specified by its index. + + var_types GetRegType(unsigned index) + { + assert(index < gtNumRegs); + var_types result = m_regType[index]; + return result; + } + + //------------------------------------------------------------------- + // clearOtherRegFlags: clear GTF_* flags associated with gtOtherRegs + // + // Arguments: + // None + // + // Return Value: + // None + // + void ClearOtherRegFlags() + { + gtSpillFlags = 0; + } + +#ifdef FEATURE_PUT_STRUCT_ARG_STK + unsigned getArgSize() + { + return (gtNumSlots + gtNumRegs) * TARGET_POINTER_SIZE; + } +#endif // FEATURE_PUT_STRUCT_ARG_STK + +#if DEBUGGABLE_GENTREE + GenTreePutArgSplit() : GenTreePutArgStk() + { + } +#endif +}; +#endif // !LEGACY_BACKEND && _TARGET_ARM_ + // Represents GT_COPY or GT_RELOAD node struct GenTreeCopyOrReload : public GenTreeUnOp { @@ -5476,7 +5694,7 @@ inline bool GenTree::IsMultiRegNode() const } #if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) - if (gtOper == GT_MUL_LONG) + if (gtOper == GT_MUL_LONG || OperIsPutArgSplit()) { return true; } diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h index b15d2a4..35c25b9 100644 --- a/src/jit/gtlist.h +++ b/src/jit/gtlist.h @@ -295,6 +295,9 @@ GTNODE(PINVOKE_PROLOG , GenTree ,0,GTK_LEAF|GTK_NOVALUE) // pinvo GTNODE(PINVOKE_EPILOG , GenTree ,0,GTK_LEAF|GTK_NOVALUE) // pinvoke epilog seq GTNODE(PUTARG_REG , GenTreeOp ,0,GTK_UNOP) // operator that places outgoing arg in register GTNODE(PUTARG_STK , GenTreePutArgStk ,0,GTK_UNOP|GTK_NOVALUE) // operator that places outgoing arg in stack +#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) +GTNODE(PUTARG_SPLIT , GenTreePutArgSplit ,0,GTK_UNOP) // operator that places outgoing arg in registers with stack (split struct in ARM32) +#endif // !LEGACY_BACKEND && _TARGET_ARM_ GTNODE(RETURNTRAP , GenTreeOp ,0,GTK_UNOP|GTK_NOVALUE) // a conditional call to wait on gc GTNODE(SWAP , GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE) // op1 and op2 swap (registers) GTNODE(IL_OFFSET , GenTreeStmt ,0,GTK_LEAF|GTK_NOVALUE) // marks an IL offset for debugging purposes diff --git a/src/jit/gtstructs.h b/src/jit/gtstructs.h index 898aeec..fa462ad 100644 --- a/src/jit/gtstructs.h +++ b/src/jit/gtstructs.h @@ -96,6 +96,9 @@ GTSTRUCT_1(PhiArg , GT_PHI_ARG) GTSTRUCT_1(StoreInd , GT_STOREIND) GTSTRUCT_N(Indir , GT_STOREIND, GT_IND, GT_NULLCHECK, GT_BLK, GT_STORE_BLK, GT_OBJ, GT_STORE_OBJ, GT_DYN_BLK, GT_STORE_DYN_BLK) GTSTRUCT_1(PutArgStk , GT_PUTARG_STK) +#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) +GTSTRUCT_1(PutArgSplit , GT_PUTARG_SPLIT) +#endif GTSTRUCT_1(PhysReg , GT_PHYSREG) #ifdef FEATURE_SIMD GTSTRUCT_1(SIMD , GT_SIMD) diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index fe6c10c..0479b78 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -766,8 +766,7 @@ void Lowering::ReplaceArgWithPutArgOrCopy(GenTree** argSlot, GenTree* putArgOrCo { assert(argSlot != nullptr); assert(*argSlot != nullptr); - assert(putArgOrCopy->OperGet() == GT_PUTARG_REG || putArgOrCopy->OperGet() == GT_PUTARG_STK || - putArgOrCopy->OperGet() == GT_COPY); + assert(putArgOrCopy->OperIsPutArg() || putArgOrCopy->OperIs(GT_COPY)); GenTree* arg = *argSlot; @@ -828,99 +827,162 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP isOnStack = info->regNum == REG_STK; #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING - if (!isOnStack) +#ifdef _TARGET_ARM_ + // Struct can be split into register(s) and stack on ARM + if (info->isSplit) { -#ifdef FEATURE_SIMD - // TYP_SIMD8 is passed in an integer register. We need the putArg node to be of the int type. - if (type == TYP_SIMD8 && genIsValidIntReg(info->regNum)) + if (arg->OperGet() != GT_OBJ) + { + NYI_ARM("Lowering: Oper for struct argument is not GT_OBJ"); + } + + putArg = new (comp, GT_PUTARG_SPLIT) + GenTreePutArgSplit(arg, info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots), info->numRegs, + info->isHfaRegArg, call->IsFastTailCall(), call); + + // Set GC Pointer info + GenTreePutArgSplit* argSplit = putArg->AsPutArgSplit(); + BYTE* gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots + info->numRegs]; + unsigned numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout); + argSplit->setGcPointers(numRefs, gcLayout); + + // Set type of registers + for (unsigned index = 0; index < info->numRegs; index++) { - type = TYP_LONG; + var_types regType = comp->getJitGCType(gcLayout[index]); + argSplit->m_regType[index] = regType; } + } + else +#endif // _TARGET_ARM_ + { + if (!isOnStack) + { +#ifdef FEATURE_SIMD + // TYP_SIMD8 is passed in an integer register. We need the putArg node to be of the int type. + if (type == TYP_SIMD8 && genIsValidIntReg(info->regNum)) + { + type = TYP_LONG; + } #endif // FEATURE_SIMD #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - if (info->isStruct) - { - // The following code makes sure a register passed struct arg is moved to - // the register before the call is made. - // There are two cases (comments added in the code below.) - // 1. The struct is of size one eightbyte: - // In this case a new tree is created that is GT_PUTARG_REG - // with a op1 the original argument. - // 2. The struct is contained in 2 eightbytes: - // in this case the arg comes as a GT_FIELD_LIST of two GT_LCL_FLDs - the two eightbytes of the struct. - // The code creates a GT_PUTARG_REG node for each GT_LCL_FLD in the GT_FIELD_LIST - // and splices it in the list with the corresponding original GT_LCL_FLD tree as op1. - - assert(info->structDesc.eightByteCount != 0); - - if (info->structDesc.eightByteCount == 1) + if (info->isStruct) { - // clang-format off - // Case 1 above: Create a GT_PUTARG_REG node with op1 of the original tree. - // - // Here the IR for this operation: - // lowering call : - // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0 - // N003(6, 5)[000052] * --XG------ - / --* indir int - // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0 - // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int - // N009(3, 4)[000054] ------ - N----arg0 in rdi + --* lclFld int V02 tmp0[+0](last use) - // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1 - // - // args : - // lowering arg : (13, 11)[000070] -- - XG-- - R-- - *storeIndir int - // - // late : - // lowering arg : N009(3, 4)[000054] ------ - N---- * lclFld int V02 tmp0[+0](last use) - // new node is : (3, 4)[000071] ------------ * putarg_reg int RV - // - // after : - // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0 - // N003(6, 5)[000052] * --XG------ - / --* indir int - // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0 - // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int - // N009(3, 4)[000054] ------ - N---- | / --* lclFld int V02 tmp0[+0](last use) - // (3, 4)[000071] ------------arg0 in rdi + --* putarg_reg int RV - // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1 - // - // clang-format on + // The following code makes sure a register passed struct arg is moved to + // the register before the call is made. + // There are two cases (comments added in the code below.) + // 1. The struct is of size one eightbyte: + // In this case a new tree is created that is GT_PUTARG_REG + // with a op1 the original argument. + // 2. The struct is contained in 2 eightbytes: + // in this case the arg comes as a GT_FIELD_LIST of two GT_LCL_FLDs + // - the two eightbytes of the struct. + // The code creates a GT_PUTARG_REG node for each GT_LCL_FLD in the GT_FIELD_LIST + // and splices it in the list with the corresponding original GT_LCL_FLD tree as op1. + + assert(info->structDesc.eightByteCount != 0); + + if (info->structDesc.eightByteCount == 1) + { + // clang-format off + // Case 1 above: Create a GT_PUTARG_REG node with op1 of the original tree. + // + // Here the IR for this operation: + // lowering call : + // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0 + // N003(6, 5)[000052] * --XG------ - / --* indir int + // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0 + // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int + // N009(3, 4)[000054] ------ - N----arg0 in rdi + --* lclFld int V02 tmp0[+0](last use) + // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1 + // + // args : + // lowering arg : (13, 11)[000070] -- - XG-- - R-- - *storeIndir int + // + // late : + // lowering arg : N009(3, 4)[000054] ------ - N---- * lclFld int V02 tmp0[+0](last use) + // new node is : (3, 4)[000071] ------------ * putarg_reg int RV + // + // after : + // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0 + // N003(6, 5)[000052] * --XG------ - / --* indir int + // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0 + // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int + // N009(3, 4)[000054] ------ - N---- | / --* lclFld int V02 tmp0[+0](last use) + // (3, 4)[000071] ------------arg0 in rdi + --* putarg_reg int RV + // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1 + // + // clang-format on - putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg); + putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg); + } + else if (info->structDesc.eightByteCount == 2) + { + // clang-format off + // Case 2 above: Convert the LCL_FLDs to PUTARG_REG + // + // lowering call : + // N001(3, 2) [000025] ------ - N----Source / --* &lclVar byref V01 loc1 + // N003(3, 2) [000056] ------ - N----Destination + --* &lclVar byref V03 tmp1 + // N006(1, 1) [000058] ------------ + --* const int 16 + // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void + // N009(3, 4) [000061] ------ - N----arg0 in rdi + --* lclFld long V03 tmp1[+0] + // N010(3, 4) [000063] ------------arg0 in rsi + --* lclFld long V03 tmp1[+8](last use) + // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2 + // + // args : + // lowering arg : N007(12, 12)[000059] - A--G---- - L - *copyBlk void + // + // late : + // lowering arg : N012(11, 13)[000065] ------------ * struct + // + // after : + // N001(3, 2)[000025] ------ - N----Source / --* &lclVar byref V01 loc1 + // N003(3, 2)[000056] ------ - N----Destination + --* &lclVar byref V03 tmp1 + // N006(1, 1)[000058] ------------ + --* const int 16 + // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void + // N009(3, 4)[000061] ------ - N---- | / --* lclFld long V03 tmp1[+0] + // (3, 4)[000072] ------------arg0 in rdi + --* putarg_reg long + // N010(3, 4)[000063] ------------ | / --* lclFld long V03 tmp1[+8](last use) + // (3, 4)[000073] ------------arg0 in rsi + --* putarg_reg long + // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2 + // + // clang-format on + + assert(arg->OperGet() == GT_FIELD_LIST); + + GenTreeFieldList* fieldListPtr = arg->AsFieldList(); + assert(fieldListPtr->IsFieldListHead()); + + for (unsigned ctr = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), ctr++) + { + // Create a new GT_PUTARG_REG node with op1 the original GT_LCL_FLD. + GenTreePtr newOper = comp->gtNewOperNode( + GT_PUTARG_REG, + comp->GetTypeFromClassificationAndSizes(info->structDesc.eightByteClassifications[ctr], + info->structDesc.eightByteSizes[ctr]), + fieldListPtr->gtOp.gtOp1); + + // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST + ReplaceArgWithPutArgOrCopy(&fieldListPtr->gtOp.gtOp1, newOper); + } + + // Just return arg. The GT_FIELD_LIST is not replaced. + // Nothing more to do. + return arg; + } + else + { + assert(false && "Illegal count of eightbytes for the CLR type system"); // No more than 2 eightbytes + // for the CLR. + } } - else if (info->structDesc.eightByteCount == 2) + else +#else // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if FEATURE_MULTIREG_ARGS + if ((info->numRegs > 1) && (arg->OperGet() == GT_FIELD_LIST)) { - // clang-format off - // Case 2 above: Convert the LCL_FLDs to PUTARG_REG - // - // lowering call : - // N001(3, 2) [000025] ------ - N----Source / --* &lclVar byref V01 loc1 - // N003(3, 2) [000056] ------ - N----Destination + --* &lclVar byref V03 tmp1 - // N006(1, 1) [000058] ------------ + --* const int 16 - // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void - // N009(3, 4) [000061] ------ - N----arg0 in rdi + --* lclFld long V03 tmp1[+0] - // N010(3, 4) [000063] ------------arg0 in rsi + --* lclFld long V03 tmp1[+8](last use) - // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2 - // - // args : - // lowering arg : N007(12, 12)[000059] - A--G---- - L - *copyBlk void - // - // late : - // lowering arg : N012(11, 13)[000065] ------------ * struct - // - // after : - // N001(3, 2)[000025] ------ - N----Source / --* &lclVar byref V01 loc1 - // N003(3, 2)[000056] ------ - N----Destination + --* &lclVar byref V03 tmp1 - // N006(1, 1)[000058] ------------ + --* const int 16 - // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void - // N009(3, 4)[000061] ------ - N---- | / --* lclFld long V03 tmp1[+0] - // (3, 4)[000072] ------------arg0 in rdi + --* putarg_reg long - // N010(3, 4)[000063] ------------ | / --* lclFld long V03 tmp1[+8](last use) - // (3, 4)[000073] ------------arg0 in rsi + --* putarg_reg long - // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2 - // - // clang-format on - assert(arg->OperGet() == GT_FIELD_LIST); GenTreeFieldList* fieldListPtr = arg->AsFieldList(); @@ -928,12 +990,11 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP for (unsigned ctr = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), ctr++) { - // Create a new GT_PUTARG_REG node with op1 the original GT_LCL_FLD. - GenTreePtr newOper = comp->gtNewOperNode( - GT_PUTARG_REG, - comp->GetTypeFromClassificationAndSizes(info->structDesc.eightByteClassifications[ctr], - info->structDesc.eightByteSizes[ctr]), - fieldListPtr->gtOp.gtOp1); + GenTreePtr curOp = fieldListPtr->gtOp.gtOp1; + var_types curTyp = curOp->TypeGet(); + + // Create a new GT_PUTARG_REG node with op1 + GenTreePtr newOper = comp->gtNewOperNode(GT_PUTARG_REG, curTyp, curOp); // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST ReplaceArgWithPutArgOrCopy(&fieldListPtr->gtOp.gtOp1, newOper); @@ -944,119 +1005,88 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP return arg; } else +#endif // FEATURE_MULTIREG_ARGS +#endif // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) { - assert(false && - "Illegal count of eightbytes for the CLR type system"); // No more than 2 eightbytes for the CLR. + putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg); } } else -#else // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) -#if FEATURE_MULTIREG_ARGS - if ((info->numRegs > 1) && (arg->OperGet() == GT_FIELD_LIST)) { - assert(arg->OperGet() == GT_FIELD_LIST); + // Mark this one as tail call arg if it is a fast tail call. + // This provides the info to put this argument in in-coming arg area slot + // instead of in out-going arg area slot. - GenTreeFieldList* fieldListPtr = arg->AsFieldList(); - assert(fieldListPtr->IsFieldListHead()); + PUT_STRUCT_ARG_STK_ONLY(assert(info->isStruct == varTypeIsStruct(type))); // Make sure state is correct - for (unsigned ctr = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), ctr++) - { - GenTreePtr curOp = fieldListPtr->gtOp.gtOp1; - var_types curTyp = curOp->TypeGet(); - - // Create a new GT_PUTARG_REG node with op1 - GenTreePtr newOper = comp->gtNewOperNode(GT_PUTARG_REG, curTyp, curOp); - - // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST - ReplaceArgWithPutArgOrCopy(&fieldListPtr->gtOp.gtOp1, newOper); - } - - // Just return arg. The GT_FIELD_LIST is not replaced. - // Nothing more to do. - return arg; - } - else -#endif // FEATURE_MULTIREG_ARGS -#endif // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - { - putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg); - } - } - else - { - // Mark this one as tail call arg if it is a fast tail call. - // This provides the info to put this argument in in-coming arg area slot - // instead of in out-going arg area slot. - - PUT_STRUCT_ARG_STK_ONLY(assert(info->isStruct == varTypeIsStruct(type))); // Make sure state is correct - - putArg = new (comp, GT_PUTARG_STK) - GenTreePutArgStk(GT_PUTARG_STK, type, arg, info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots), - call->IsFastTailCall(), call); + putArg = new (comp, GT_PUTARG_STK) + GenTreePutArgStk(GT_PUTARG_STK, type, arg, info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots), + call->IsFastTailCall(), call); #ifdef FEATURE_PUT_STRUCT_ARG_STK - // If the ArgTabEntry indicates that this arg is a struct - // get and store the number of slots that are references. - // This is later used in the codegen for PUT_ARG_STK implementation - // for struct to decide whether and how many single eight-byte copies - // to be done (only for reference slots), so gcinfo is emitted. - // For non-reference slots faster/smaller size instructions are used - - // pair copying using XMM registers or rep mov instructions. - if (info->isStruct) - { - // We use GT_OBJ for non-SIMD struct arguments. However, for - // SIMD arguments the GT_OBJ has already been transformed. - if (arg->gtOper != GT_OBJ) - { - assert(varTypeIsSIMD(arg)); - } - else + // If the ArgTabEntry indicates that this arg is a struct + // get and store the number of slots that are references. + // This is later used in the codegen for PUT_ARG_STK implementation + // for struct to decide whether and how many single eight-byte copies + // to be done (only for reference slots), so gcinfo is emitted. + // For non-reference slots faster/smaller size instructions are used - + // pair copying using XMM registers or rep mov instructions. + if (info->isStruct) { - unsigned numRefs = 0; - BYTE* gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots]; - assert(!varTypeIsSIMD(arg)); - numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout); - putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout); + // We use GT_OBJ for non-SIMD struct arguments. However, for + // SIMD arguments the GT_OBJ has already been transformed. + if (arg->gtOper != GT_OBJ) + { + assert(varTypeIsSIMD(arg)); + } + else + { + unsigned numRefs = 0; + BYTE* gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots]; + assert(!varTypeIsSIMD(arg)); + numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout); + putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout); #ifdef _TARGET_X86_ - // On x86 VM lies about the type of a struct containing a pointer sized - // integer field by returning the type of its field as the type of struct. - // Such struct can be passed in a register depending its position in - // parameter list. VM does this unwrapping only one level and therefore - // a type like Struct Foo { Struct Bar { int f}} awlays needs to be - // passed on stack. Also, VM doesn't lie about type of such a struct - // when it is a field of another struct. That is VM doesn't lie about - // the type of Foo.Bar - // - // We now support the promotion of fields that are of type struct. - // However we only support a limited case where the struct field has a - // single field and that single field must be a scalar type. Say Foo.Bar - // field is getting passed as a parameter to a call, Since it is a TYP_STRUCT, - // as per x86 ABI it should always be passed on stack. Therefore GenTree - // node under a PUTARG_STK could be GT_OBJ(GT_LCL_VAR_ADDR(v1)), where - // local v1 could be a promoted field standing for Foo.Bar. Note that - // the type of v1 will be the type of field of Foo.Bar.f when Foo is - // promoted. That is v1 will be a scalar type. In this case we need to - // pass v1 on stack instead of in a register. - // - // TODO-PERF: replace GT_OBJ(GT_LCL_VAR_ADDR(v1)) with v1 if v1 is - // a scalar type and the width of GT_OBJ matches the type size of v1. - // Note that this cannot be done till call node arguments are morphed - // because we should not lose the fact that the type of argument is - // a struct so that the arg gets correctly marked to be passed on stack. - GenTree* objOp1 = arg->gtGetOp1(); - if (objOp1->OperGet() == GT_LCL_VAR_ADDR) - { - unsigned lclNum = objOp1->AsLclVarCommon()->GetLclNum(); - if (comp->lvaTable[lclNum].lvType != TYP_STRUCT) + // On x86 VM lies about the type of a struct containing a pointer sized + // integer field by returning the type of its field as the type of struct. + // Such struct can be passed in a register depending its position in + // parameter list. VM does this unwrapping only one level and therefore + // a type like Struct Foo { Struct Bar { int f}} awlays needs to be + // passed on stack. Also, VM doesn't lie about type of such a struct + // when it is a field of another struct. That is VM doesn't lie about + // the type of Foo.Bar + // + // We now support the promotion of fields that are of type struct. + // However we only support a limited case where the struct field has a + // single field and that single field must be a scalar type. Say Foo.Bar + // field is getting passed as a parameter to a call, Since it is a TYP_STRUCT, + // as per x86 ABI it should always be passed on stack. Therefore GenTree + // node under a PUTARG_STK could be GT_OBJ(GT_LCL_VAR_ADDR(v1)), where + // local v1 could be a promoted field standing for Foo.Bar. Note that + // the type of v1 will be the type of field of Foo.Bar.f when Foo is + // promoted. That is v1 will be a scalar type. In this case we need to + // pass v1 on stack instead of in a register. + // + // TODO-PERF: replace GT_OBJ(GT_LCL_VAR_ADDR(v1)) with v1 if v1 is + // a scalar type and the width of GT_OBJ matches the type size of v1. + // Note that this cannot be done till call node arguments are morphed + // because we should not lose the fact that the type of argument is + // a struct so that the arg gets correctly marked to be passed on stack. + GenTree* objOp1 = arg->gtGetOp1(); + if (objOp1->OperGet() == GT_LCL_VAR_ADDR) { - comp->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_VMNeedsStackAddr)); + unsigned lclNum = objOp1->AsLclVarCommon()->GetLclNum(); + if (comp->lvaTable[lclNum].lvType != TYP_STRUCT) + { + comp->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_VMNeedsStackAddr)); + } } - } #endif // _TARGET_X86_ + } } - } #endif // FEATURE_PUT_STRUCT_ARG_STK + } } JITDUMP("new node is : "); diff --git a/src/jit/lower.h b/src/jit/lower.h index 486cc39..76cf481 100644 --- a/src/jit/lower.h +++ b/src/jit/lower.h @@ -231,6 +231,9 @@ private: #ifdef FEATURE_PUT_STRUCT_ARG_STK void LowerPutArgStk(GenTreePutArgStk* tree); void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* tree); +#ifdef _TARGET_ARM_ + void TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* tree, TreeNodeInfo& info, fgArgTabEntryPtr argInfo); +#endif #endif // FEATURE_PUT_STRUCT_ARG_STK void TreeNodeInfoInitLclHeap(GenTree* tree); diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index 59e547c..2af8251 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -140,6 +140,11 @@ void lsraAssignRegToTree(GenTreePtr tree, regNumber reg, unsigned regIdx) GenTreeMulLong* mul = tree->AsMulLong(); mul->gtOtherReg = reg; } + else if (tree->OperGet() == GT_PUTARG_SPLIT) + { + GenTreePutArgSplit* putArg = tree->AsPutArgSplit(); + putArg->SetRegNumByIdx(reg, regIdx); + } #endif // _TARGET_ARM_ else { @@ -4148,6 +4153,15 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, useCandidates = allRegs(registerType); } +#ifdef _TARGET_ARM_ + if (tree->OperIsPutArgSplit()) + { + // get i-th candidate + currCandidates = genFindLowestReg(candidates); + candidates &= ~currCandidates; + } +#endif + if (interval == nullptr) { // Make a new interval @@ -9032,6 +9046,13 @@ void LinearScan::resolveRegisters() GenTreeCall* call = treeNode->AsCall(); call->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx()); } +#ifdef _TARGET_ARM_ + else if (treeNode->OperIsPutArgSplit()) + { + GenTreePutArgSplit* splitArg = treeNode->AsPutArgSplit(); + splitArg->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx()); + } +#endif } // If the value is reloaded or moved to a different register, we need to insert diff --git a/src/jit/lsraarm.cpp b/src/jit/lsraarm.cpp index 123bb2d..ee356d5 100644 --- a/src/jit/lsraarm.cpp +++ b/src/jit/lsraarm.cpp @@ -753,6 +753,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_MEMORYBARRIER: case GT_OBJ: case GT_COPY: + case GT_PUTARG_SPLIT: info->dstCount = tree->IsValue() ? 1 : 0; if (kind & (GTK_CONST | GTK_LEAF)) { diff --git a/src/jit/lsraarmarch.cpp b/src/jit/lsraarmarch.cpp index 5babb6a..e361478 100644 --- a/src/jit/lsraarmarch.cpp +++ b/src/jit/lsraarmarch.cpp @@ -582,6 +582,13 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) #endif // _TARGET_ARM_ } } +#ifdef _TARGET_ARM_ + else if (argNode->OperGet() == GT_PUTARG_SPLIT) + { + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode); + TreeNodeInfoInitPutArgSplit(argNode->AsPutArgSplit(), *info, curArgTabEntry); + } +#endif else { TreeNodeInfoInitPutArgReg(argNode->AsUnOp(), curArgTabEntry->regNum, *info, false, &callHasFloatRegArgs); @@ -612,6 +619,13 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry); } +#ifdef _TARGET_ARM_ + else if (arg->OperGet() == GT_PUTARG_SPLIT) + { + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg); + TreeNodeInfoInitPutArgSplit(arg->AsPutArgSplit(), *info, curArgTabEntry); + } +#endif else { TreeNodeInfo* argInfo = &(arg->gtLsraInfo); @@ -723,6 +737,57 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntr } } +#ifdef _TARGET_ARM_ +//------------------------------------------------------------------------ +// TreeNodeInfoInitPutArgSplit: Set the NodeInfo for a GT_PUTARG_SPLIT node +// +// Arguments: +// argNode - a GT_PUTARG_SPLIT node +// +// Return Value: +// None. +// +// Notes: +// Set the child node(s) to be contained +// +void Lowering::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode, TreeNodeInfo& info, fgArgTabEntryPtr argInfo) +{ + assert(argNode->gtOper == GT_PUTARG_SPLIT); + + GenTreePtr putArgChild = argNode->gtOp.gtOp1; + + // Initialize 'argNode' as not contained, as this is both the default case + // and how MakeSrcContained expects to find things setup. + // + argNode->gtLsraInfo.srcCount = 1; + argNode->gtLsraInfo.dstCount = argInfo->numRegs; + info.srcCount += argInfo->numRegs; + + regNumber argReg = argInfo->regNum; + regMaskTP argMask = RBM_NONE; + for (unsigned i = 0; i < argInfo->numRegs; i++) + { + argMask |= genRegMask((regNumber)((unsigned)argReg + i)); + } + argNode->gtLsraInfo.setDstCandidates(m_lsra, argMask); + + assert(putArgChild->TypeGet() == TYP_STRUCT); + assert(putArgChild->OperGet() == GT_OBJ); + // We could use a ldr/str sequence so we need a internal register + argNode->gtLsraInfo.internalIntCount = 1; + + GenTreePtr objChild = putArgChild->gtOp.gtOp1; + if (objChild->OperGet() == GT_LCL_VAR_ADDR) + { + // We will generate all of the code for the GT_PUTARG_SPLIT, the GT_OBJ and the GT_LCL_VAR_ADDR + // as one contained operation + // + MakeSrcContained(putArgChild, objChild); + } + MakeSrcContained(argNode, putArgChild); +} +#endif // _TARGET_ARM_ + //------------------------------------------------------------------------ // TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store. // diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 92091c9..588309d 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -1439,6 +1439,13 @@ void fgArgInfo::ArgsComplete() continue; #endif } +#if defined(_TARGET_ARM_) && !defined(LEGACY_BACKEND) + else if (curArgTabEntry->isSplit) + { + hasStructRegArg = true; + hasStackArgs = true; + } +#endif else // we have a register argument, next we look for a struct type. { if (varTypeIsStruct(argx) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct)) @@ -1557,6 +1564,12 @@ void fgArgInfo::ArgsComplete() { prevArgTabEntry->needPlace = true; } +#if defined(_TARGET_ARM_) && !defined(LEGACY_BACKEND) + else if (prevArgTabEntry->isSplit) + { + prevArgTabEntry->needPlace = true; + } +#endif #endif } } @@ -1566,9 +1579,6 @@ void fgArgInfo::ArgsComplete() // For RyuJIT backend we will expand a Multireg arg into a GT_FIELD_LIST // with multiple indirections, so here we consider spilling it into a tmp LclVar. // - // Note that Arm32 is a LEGACY_BACKEND and it defines FEATURE_MULTIREG_ARGS - // so we skip this for ARM32 until it is ported to use RyuJIT backend - // bool isMultiRegArg = (curArgTabEntry->numRegs > 1); @@ -1579,6 +1589,10 @@ void fgArgInfo::ArgsComplete() // Spill multireg struct arguments that have Assignments or Calls embedded in them curArgTabEntry->needTmp = true; } +#ifndef _TARGET_ARM_ + // TODO-Arm: This optimization is not implemented for ARM32 + // so we skip this for ARM32 until it is ported to use RyuJIT backend + // else { // We call gtPrepareCost to measure the cost of evaluating this tree @@ -1610,7 +1624,6 @@ void fgArgInfo::ArgsComplete() curArgTabEntry->needTmp = true; } break; - case 11: case 13: case 14: @@ -1632,6 +1645,7 @@ void fgArgInfo::ArgsComplete() } } } +#endif // !_TARGET_ARM_ } #endif // FEATURE_MULTIREG_ARGS #endif // LEGACY_BACKEND @@ -3415,11 +3429,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; - if (isHfaArg) - { - hasMultiregStructArgs = true; - } - else if (size > 1 && size <= 4) + if (isHfaArg || size > 1) { hasMultiregStructArgs = true; } @@ -3806,18 +3816,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) hasMultiregStructArgs = true; } #elif defined(_TARGET_ARM_) - // TODO-Arm: Need to handle the case - // where structs passed by value can be split between registers and stack. - if (size > 1 && size <= 4) + if (size > 1) { hasMultiregStructArgs = true; } -#ifndef LEGACY_BACKEND - else if (size > 4 && passUsingIntRegs) - { - NYI_ARM("Struct can be split between registers and stack"); - } -#endif // !LEGACY_BACKEND #endif // _TARGET_ARM_ } @@ -4110,9 +4112,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) #ifdef _TARGET_ARM_ if (fltArgRegNum > MAX_FLOAT_REG_ARG) { -#ifndef LEGACY_BACKEND - NYI_ARM("Struct split between float registers and stack"); -#endif // !LEGACY_BACKEND // This indicates a partial enregistration of a struct type assert(varTypeIsStruct(argx)); unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG); @@ -4142,9 +4141,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) #ifdef _TARGET_ARM_ if (intArgRegNum > MAX_REG_ARG) { -#ifndef LEGACY_BACKEND - NYI_ARM("Struct split between integer registers and stack"); -#endif // !LEGACY_BACKEND // This indicates a partial enregistration of a struct type assert((isStructArg) || argx->OperIsFieldList() || argx->OperIsCopyBlkOp() || (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG))); @@ -4768,6 +4764,30 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f NYI("fgMorphMultiregStructArg requires implementation for this target"); #endif +#ifdef _TARGET_ARM_ + if (fgEntryPtr->isSplit) + { + if (fgEntryPtr->isHfaRegArg) + { + // We cannot handle split struct morphed to GT_FIELD_LIST yet + NYI_ARM("Struct split between float registers and stack"); + } + else if (fgEntryPtr->numSlots + fgEntryPtr->numRegs > 4) + { + return arg; + } + else + { + // We cannot handle split struct morphed to GT_FIELD_LIST yet + NYI_ARM("Struct split between integer registers and stack"); + } + } + else if (!fgEntryPtr->isHfaRegArg && fgEntryPtr->numSlots > 4) + { + return arg; + } +#endif + #if FEATURE_MULTIREG_ARGS // Examine 'arg' and setup argValue objClass and structSize // diff --git a/src/jit/regset.cpp b/src/jit/regset.cpp index 48ec6ea..7204663 100644 --- a/src/jit/regset.cpp +++ b/src/jit/regset.cpp @@ -1527,6 +1527,9 @@ void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree, unsigned regIdx /* =0 * GenTreeCall* call = nullptr; var_types treeType; +#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) + GenTreePutArgSplit* splitArg = nullptr; +#endif #ifndef LEGACY_BACKEND if (tree->IsMultiRegCall()) @@ -1535,8 +1538,15 @@ void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree, unsigned regIdx /* =0 * ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); treeType = retTypeDesc->GetReturnRegType(regIdx); } +#ifdef _TARGET_ARM_ + else if (tree->OperIsPutArgSplit()) + { + splitArg = tree->AsPutArgSplit(); + treeType = splitArg->GetRegType(regIdx); + } +#endif // _TARGET_ARM_ else -#endif +#endif // !LEGACY_BACKEND { treeType = tree->TypeGet(); } @@ -1584,6 +1594,14 @@ void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree, unsigned regIdx /* =0 * assert((regFlags & GTF_SPILL) != 0); regFlags &= ~GTF_SPILL; } +#ifdef _TARGET_ARM_ + else if (splitArg != nullptr) + { + regFlags = splitArg->GetRegSpillFlagByIdx(regIdx); + assert((regFlags & GTF_SPILL) != 0); + regFlags &= ~GTF_SPILL; + } +#endif // _TARGET_ARM_ else { assert(!varTypeIsMultiReg(tree)); @@ -1603,9 +1621,12 @@ void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree, unsigned regIdx /* =0 * assert(tree->InReg()); assert(tree->gtRegNum == reg); } +#elif defined(_TARGET_ARM_) + assert(tree->gtRegNum == reg || (call != nullptr && call->GetRegNumByIdx(regIdx) == reg) || + (splitArg != nullptr && splitArg->GetRegNumByIdx(regIdx) == reg)); #else assert(tree->gtRegNum == reg || (call != nullptr && call->GetRegNumByIdx(regIdx) == reg)); -#endif // CPU_LONG_USES_REGPAIR +#endif // !CPU_LONG_USES_REGPAIR && !_TARGET_ARM_ // Are any registers free for spillage? SpillDsc* spill = SpillDsc::alloc(m_rsCompiler, this, tempType); @@ -1726,6 +1747,13 @@ void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree, unsigned regIdx /* =0 * regFlags |= GTF_SPILLED; call->SetRegSpillFlagByIdx(regFlags, regIdx); } +#ifdef _TARGET_ARM_ + else if (splitArg != nullptr) + { + regFlags |= GTF_SPILLED; + splitArg->SetRegSpillFlagByIdx(regFlags, regIdx); + } +#endif // _TARGET_ARM_ #endif //! LEGACY_BACKEND } @@ -2355,6 +2383,15 @@ TempDsc* RegSet::rsUnspillInPlace(GenTreePtr tree, regNumber oldReg, unsigned re flags &= ~GTF_SPILLED; call->SetRegSpillFlagByIdx(flags, regIdx); } +#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) + else if (tree->OperIsPutArgSplit()) + { + GenTreePutArgSplit* splitArg = tree->AsPutArgSplit(); + unsigned flags = splitArg->GetRegSpillFlagByIdx(regIdx); + flags &= ~GTF_SPILLED; + splitArg->SetRegSpillFlagByIdx(flags, regIdx); + } +#endif // !LEGACY_BACKEND && _TARGET_ARM_ else { tree->gtFlags &= ~GTF_SPILLED; -- 2.7.4