[RyuJIT/ARM32] Enable passing large split struct argument (#12050)
authorHyeongseok Oh <hseok82.oh@samsung.com>
Wed, 28 Jun 2017 18:01:18 +0000 (03:01 +0900)
committerBruce Forstall <brucefo@microsoft.com>
Wed, 28 Jun 2017 18:01:18 +0000 (11:01 -0700)
* [RyuJIT/ARM32] Enable passing large split struct

This enables passing split struct larger than 16 bytes.
To support splitted struct, it defines new GenTree type - GenTreePutArgSplit.
GenTreePutArgSplit is similar with GenTreePutArgStk,
but it is used for splitted struct only
and it has additional field to save register information.
GenTreePutArgSplit node is generated in lower phase.

* Apply reviews: split struct argument passing

- Fix some comments:
genPutArgSplit, GenTreePutArgStk, GenTreePutArgSplit, NuwPutArg, ArgComplete
- Add assertion check in genPutArgSplit, genCallInstruction
- Rename variable: baseReg
- Change flag for GenTreePutArgSplit: _TARGET_ARM && !LEGACY_BACKEND
- Change type of gtOtherRegs in GenTreePutArgSplit
- Remove duplicated code: NewPutArg
- Implement spill & restore flag for GenTreePutArgSplit

* Apply reviews

- Rebase
- Update managing spillFlag for split struct
- Implement spill & restore code generation
- Fix typos and rename variables
- Fix bug related to print gentree for split struct

* Fix bug and comments

- Fix bug in regset.cpp
- Add comments in morph.cpp's NYI_ARM
- Fix comments' typo in lsraarmarcp.cpp

14 files changed:
src/jit/codegenarmarch.cpp
src/jit/codegenlinear.cpp
src/jit/codegenlinear.h
src/jit/gentree.cpp
src/jit/gentree.h
src/jit/gtlist.h
src/jit/gtstructs.h
src/jit/lower.cpp
src/jit/lower.h
src/jit/lsra.cpp
src/jit/lsraarm.cpp
src/jit/lsraarmarch.cpp
src/jit/morph.cpp
src/jit/regset.cpp

index d70fbb4..86dec5e 100644 (file)
@@ -276,6 +276,12 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
             genPutArgReg(treeNode->AsOp());
             break;
 
+#ifdef _TARGET_ARM_
+        case GT_PUTARG_SPLIT:
+            genPutArgSplit(treeNode->AsPutArgSplit());
+            break;
+#endif
+
         case GT_CALL:
             genCallInstruction(treeNode->AsCall());
             break;
@@ -885,6 +891,151 @@ void CodeGen::genPutArgReg(GenTreeOp* tree)
     genProduceReg(tree);
 }
 
+#ifdef _TARGET_ARM_
+//---------------------------------------------------------------------
+// genPutArgSplit - generate code for a GT_PUTARG_SPLIT node
+//
+// Arguments
+//    tree - the GT_PUTARG_SPLIT node
+//
+// Return value:
+//    None
+//
+void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode)
+{
+    assert(treeNode->OperIs(GT_PUTARG_SPLIT));
+
+    GenTreePtr source = treeNode->gtOp1;
+    emitter*   emit   = getEmitter();
+
+    noway_assert(source->OperGet() == GT_OBJ);
+
+    var_types targetType = source->TypeGet();
+    noway_assert(varTypeIsStruct(targetType));
+
+    regNumber baseReg = treeNode->ExtractTempReg();
+    regNumber addrReg = REG_NA;
+
+    GenTreeLclVarCommon* varNode  = nullptr;
+    GenTreePtr           addrNode = nullptr;
+
+    addrNode = source->gtOp.gtOp1;
+
+    // addrNode can either be a GT_LCL_VAR_ADDR or an address expression
+    //
+    if (addrNode->OperGet() == GT_LCL_VAR_ADDR)
+    {
+        // We have a GT_OBJ(GT_LCL_VAR_ADDR)
+        //
+        // We will treat this case the same as above
+        // (i.e if we just had this GT_LCL_VAR directly as the source)
+        // so update 'source' to point this GT_LCL_VAR_ADDR node
+        // and continue to the codegen for the LCL_VAR node below
+        //
+        varNode  = addrNode->AsLclVarCommon();
+        addrNode = nullptr;
+    }
+
+    // Either varNode or addrNOde must have been setup above,
+    // the xor ensures that only one of the two is setup, not both
+    assert((varNode != nullptr) ^ (addrNode != nullptr));
+
+    // Setup the structSize, isHFa, and gcPtrCount
+    BYTE*    gcPtrs     = treeNode->gtGcPtrs;
+    unsigned gcPtrCount = treeNode->gtNumberReferenceSlots; // The count of GC pointers in the struct
+    int      structSize = treeNode->getArgSize();
+    bool     isHfa      = treeNode->gtIsHfa;
+
+    // This is the varNum for our load operations,
+    // only used when we have a struct with a LclVar source
+    unsigned srcVarNum = BAD_VAR_NUM;
+
+    if (varNode != nullptr)
+    {
+        srcVarNum = varNode->gtLclNum;
+        assert(srcVarNum < compiler->lvaCount);
+    }
+    else // addrNode is used
+    {
+        assert(addrNode != nullptr);
+
+        // Generate code to load the address that we need into a register
+        genConsumeAddress(addrNode);
+        addrReg = addrNode->gtRegNum;
+    }
+
+    // If we have an HFA we can't have any GC pointers,
+    // if not then the max size for the the struct is 16 bytes
+    if (isHfa)
+    {
+        noway_assert(gcPtrCount == 0);
+    }
+
+    unsigned varNumOut    = compiler->lvaOutgoingArgSpaceVar;
+    unsigned argOffsetMax = compiler->lvaOutgoingArgSpaceSize;
+    unsigned argOffsetOut = treeNode->gtSlotNum * TARGET_POINTER_SIZE;
+
+    // Put on stack first
+    unsigned nextIndex     = treeNode->gtNumRegs;
+    unsigned structOffset  = nextIndex * TARGET_POINTER_SIZE;
+    int      remainingSize = structSize - structOffset;
+
+    // remainingSize is always multiple of TARGET_POINTER_SIZE
+    assert(remainingSize % TARGET_POINTER_SIZE == 0);
+    while (remainingSize > 0)
+    {
+        var_types type = compiler->getJitGCType(gcPtrs[nextIndex]);
+
+        if (varNode != nullptr)
+        {
+            // Load from our varNumImp source
+            emit->emitIns_R_S(INS_ldr, emitTypeSize(type), baseReg, srcVarNum, structOffset);
+        }
+        else
+        {
+            // check for case of destroying the addrRegister while we still need it
+            assert(baseReg != addrReg);
+
+            // Load from our address expression source
+            emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), baseReg, addrReg, structOffset);
+        }
+
+        // Emit str instruction to store the register into the outgoing argument area
+        emit->emitIns_S_R(INS_str, emitTypeSize(type), baseReg, varNumOut, argOffsetOut);
+        argOffsetOut += TARGET_POINTER_SIZE;  // We stored 4-bytes of the struct
+        assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+        remainingSize -= TARGET_POINTER_SIZE; // We loaded 4-bytes of the struct
+        structOffset += TARGET_POINTER_SIZE;
+        nextIndex += 1;
+    }
+
+    // Set registers
+    structOffset = 0;
+    for (unsigned idx = 0; idx < treeNode->gtNumRegs; idx++)
+    {
+        regNumber targetReg = treeNode->GetRegNumByIdx(idx);
+        var_types type      = treeNode->GetRegType(idx);
+
+        if (varNode != nullptr)
+        {
+            // Load from our varNumImp source
+            emit->emitIns_R_S(INS_ldr, emitTypeSize(type), targetReg, srcVarNum, structOffset);
+        }
+        else
+        {
+            // check for case of destroying the addrRegister while we still need it
+            assert(targetReg != addrReg);
+
+            // Load from our address expression source
+            emit->emitIns_R_R_I(INS_ldr, emitTypeSize(type), targetReg, addrReg, structOffset);
+        }
+        structOffset += TARGET_POINTER_SIZE;
+    }
+
+    genProduceReg(treeNode);
+}
+#endif // _TARGET_ARM_
+
 //----------------------------------------------------------------------------------
 // genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local
 //
@@ -1616,6 +1767,22 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
 #endif // _TARGET_ARM_
             }
         }
+#ifdef _TARGET_ARM_
+        else if (curArgTabEntry->isSplit)
+        {
+            assert(curArgTabEntry->numRegs >= 1);
+            genConsumeArgSplitStruct(argNode->AsPutArgSplit());
+            for (unsigned idx = 0; idx < curArgTabEntry->numRegs; idx++)
+            {
+                regNumber argReg   = (regNumber)((unsigned)curArgTabEntry->regNum + idx);
+                regNumber allocReg = argNode->AsPutArgSplit()->GetRegNumByIdx(idx);
+                if (argReg != allocReg)
+                {
+                    inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), true), argReg, allocReg);
+                }
+            }
+        }
+#endif
         else
         {
             regNumber argReg = curArgTabEntry->regNum;
index 29bb61a..b98b6e1 100644 (file)
@@ -1004,6 +1004,35 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
 
             unspillTree->gtFlags &= ~GTF_SPILLED;
         }
+#ifdef _TARGET_ARM_
+        else if (unspillTree->OperIsPutArgSplit())
+        {
+            GenTreePutArgSplit* splitArg = unspillTree->AsPutArgSplit();
+            unsigned            regCount = splitArg->gtNumRegs;
+
+            // In case of split struct argument node, GTF_SPILLED flag on it indicates that
+            // one or more of its result regs are spilled.  Call node needs to be
+            // queried to know which specific result regs to be unspilled.
+            for (unsigned i = 0; i < regCount; ++i)
+            {
+                unsigned flags = splitArg->GetRegSpillFlagByIdx(i);
+                if ((flags & GTF_SPILLED) != 0)
+                {
+                    BYTE*     gcPtrs  = splitArg->gtGcPtrs;
+                    var_types dstType = splitArg->GetRegType(i);
+                    regNumber dstReg  = splitArg->GetRegNumByIdx(i);
+
+                    TempDsc* t = regSet.rsUnspillInPlace(splitArg, dstReg, i);
+                    getEmitter()->emitIns_R_S(ins_Load(dstType), emitActualTypeSize(dstType), dstReg, t->tdTempNum(),
+                                              0);
+                    compiler->tmpRlsTemp(t);
+                    gcInfo.gcMarkRegPtrVal(dstReg, dstType);
+                }
+            }
+
+            unspillTree->gtFlags &= ~GTF_SPILLED;
+        }
+#endif
         else
         {
             TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum);
@@ -1389,6 +1418,31 @@ void CodeGen::genConsumePutStructArgStk(GenTreePutArgStk* putArgNode,
 }
 #endif // FEATURE_PUT_STRUCT_ARG_STK
 
+#ifdef _TARGET_ARM_
+//------------------------------------------------------------------------
+// genConsumeArgRegSplit: Consume register(s) in Call node to set split struct argument.
+//                        Liveness update for the PutArgSplit node is not needed
+//
+// Arguments:
+//    putArgNode - the PUTARG_STK tree.
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genConsumeArgSplitStruct(GenTreePutArgSplit* putArgNode)
+{
+    assert(putArgNode->OperGet() == GT_PUTARG_SPLIT);
+    assert(putArgNode->gtHasReg());
+
+    genUnspillRegIfNeeded(putArgNode);
+
+    // Skip updating GC info
+    // GC info for all argument registers will be cleared in caller
+
+    genCheckConsumeNode(putArgNode);
+}
+#endif
+
 //------------------------------------------------------------------------
 // genSetBlockSize: Ensure that the block size is in the given register
 //
@@ -1583,6 +1637,24 @@ void CodeGen::genProduceReg(GenTree* tree)
                     }
                 }
             }
+#ifdef _TARGET_ARM_
+            else if (tree->OperIsPutArgSplit())
+            {
+                GenTreePutArgSplit* argSplit = tree->AsPutArgSplit();
+                unsigned            regCount = argSplit->gtNumRegs;
+
+                for (unsigned i = 0; i < regCount; ++i)
+                {
+                    unsigned flags = argSplit->GetRegSpillFlagByIdx(i);
+                    if ((flags & GTF_SPILL) != 0)
+                    {
+                        regNumber reg = argSplit->GetRegNumByIdx(i);
+                        regSet.rsSpillTree(reg, argSplit, i);
+                        gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+                    }
+                }
+            }
+#endif // _TARGET_ARM_
             else
             {
                 regSet.rsSpillTree(tree->gtRegNum, tree);
index 6a737f6..64d5c4f 100644 (file)
@@ -45,6 +45,9 @@ void genCodeForCompare(GenTreeOp* tree);
 void genIntrinsic(GenTreePtr treeNode);
 void genPutArgStk(GenTreePutArgStk* treeNode);
 void genPutArgReg(GenTreeOp* tree);
+#ifdef _TARGET_ARM_
+void genPutArgSplit(GenTreePutArgSplit* treeNode);
+#endif
 
 #if defined(_TARGET_XARCH_)
 unsigned getBaseVarForPutArgStk(GenTreePtr treeNode);
@@ -144,6 +147,9 @@ void genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg,
 #ifdef FEATURE_PUT_STRUCT_ARG_STK
 void genConsumePutStructArgStk(GenTreePutArgStk* putArgStkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg);
 #endif // FEATURE_PUT_STRUCT_ARG_STK
+#ifdef _TARGET_ARM_
+void CodeGen::genConsumeArgSplitStruct(GenTreePutArgSplit* putArgNode);
+#endif
 
 void genConsumeRegs(GenTree* tree);
 void genConsumeOperands(GenTreeOp* tree);
index 65922a2..ebef0c1 100644 (file)
@@ -328,6 +328,9 @@ void GenTree::InitNodeSize()
     // TODO-Throughput: This should not need to be a large node. The object info should be
     // obtained from the child node.
     GenTree::s_gtNodeSizes[GT_PUTARG_STK]       = TREE_NODE_SZ_LARGE;
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
+    GenTree::s_gtNodeSizes[GT_PUTARG_SPLIT]     = TREE_NODE_SZ_LARGE;
+#endif
 #endif // FEATURE_PUT_STRUCT_ARG_STK
 
     assert(GenTree::s_gtNodeSizes[GT_RETURN] == GenTree::s_gtNodeSizes[GT_ASG]);
@@ -390,6 +393,9 @@ void GenTree::InitNodeSize()
     // TODO-Throughput: This should not need to be a large node. The object info should be
     // obtained from the child node.
     static_assert_no_msg(sizeof(GenTreePutArgStk)    <= TREE_NODE_SZ_LARGE);
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
+    static_assert_no_msg(sizeof(GenTreePutArgSplit)  <= TREE_NODE_SZ_LARGE);
+#endif
 #endif // FEATURE_PUT_STRUCT_ARG_STK
 
 #ifdef FEATURE_SIMD
@@ -1742,6 +1748,22 @@ regMaskTP GenTree::gtGetRegMask() const
                 }
             }
         }
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
+        else if (OperIsPutArgSplit())
+        {
+            GenTree*            tree     = const_cast<GenTree*>(this);
+            GenTreePutArgSplit* splitArg = tree->AsPutArgSplit();
+            unsigned            regCount = splitArg->gtNumRegs;
+
+            resultMask = RBM_NONE;
+            for (unsigned i = 0; i < regCount; ++i)
+            {
+                regNumber reg = splitArg->GetRegNumByIdx(i);
+                assert(reg != REG_NA);
+                resultMask |= genRegMask(reg);
+            }
+        }
+#endif
         else
         {
             resultMask = genRegMask(gtRegNum);
@@ -9540,6 +9562,9 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node)
         case GT_PHYSREGDST:
         case GT_PUTARG_REG:
         case GT_PUTARG_STK:
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
+        case GT_PUTARG_SPLIT:
+#endif // !LEGACY_BACKEND && _TARGET_ARM_
         case GT_RETURNTRAP:
             m_edge = &m_node->AsUnOp()->gtOp1;
             assert(*m_edge != nullptr);
@@ -12128,6 +12153,68 @@ void Compiler::gtGetArgMsg(
         }
         else
         {
+#ifdef _TARGET_ARM_
+            if (curArgTabEntry->isSplit)
+            {
+                regNumber firstReg = curArgTabEntry->regNum;
+                if (listCount == -1)
+                {
+                    if (curArgTabEntry->numRegs == 1)
+                    {
+                        sprintf_s(bufp, bufLength, "arg%d %s out+%02x%c", argNum, compRegVarName(firstReg),
+                                  (curArgTabEntry->slotNum) * TARGET_POINTER_SIZE, 0);
+                    }
+                    else
+                    {
+                        regNumber lastReg   = REG_STK;
+                        char      separator = (curArgTabEntry->numRegs == 2) ? ',' : '-';
+                        if (curArgTabEntry->isHfaRegArg)
+                        {
+                            unsigned lastRegNum = genMapFloatRegNumToRegArgNum(firstReg) + curArgTabEntry->numRegs - 1;
+                            lastReg             = genMapFloatRegArgNumToRegNum(lastRegNum);
+                        }
+                        else
+                        {
+                            unsigned lastRegNum = genMapIntRegNumToRegArgNum(firstReg) + curArgTabEntry->numRegs - 1;
+                            lastReg             = genMapIntRegArgNumToRegNum(lastRegNum);
+                        }
+                        sprintf_s(bufp, bufLength, "arg%d %s%c%s out+%02x%c", argNum, compRegVarName(firstReg),
+                                  separator, compRegVarName(lastReg), (curArgTabEntry->slotNum) * TARGET_POINTER_SIZE,
+                                  0);
+                    }
+                }
+                else
+                {
+                    unsigned curArgNum = BAD_VAR_NUM;
+                    bool     isFloat   = curArgTabEntry->isHfaRegArg;
+                    if (isFloat)
+                    {
+                        curArgNum = genMapFloatRegNumToRegArgNum(firstReg) + listCount;
+                    }
+                    else
+                    {
+                        curArgNum = genMapIntRegNumToRegArgNum(firstReg) + listCount;
+                    }
+
+                    if (!isFloat && curArgNum < MAX_REG_ARG)
+                    {
+                        regNumber curReg = genMapIntRegArgNumToRegNum(curArgNum);
+                        sprintf_s(bufp, bufLength, "arg%d m%d %s%c", argNum, listCount, compRegVarName(curReg), 0);
+                    }
+                    else if (isFloat && curArgNum < MAX_FLOAT_REG_ARG)
+                    {
+                        regNumber curReg = genMapFloatRegArgNumToRegNum(curArgNum);
+                        sprintf_s(bufp, bufLength, "arg%d m%d %s%c", argNum, listCount, compRegVarName(curReg), 0);
+                    }
+                    else
+                    {
+                        unsigned stackSlot = listCount - curArgTabEntry->numRegs;
+                        sprintf_s(bufp, bufLength, "arg%d m%d out+%s%c", argNum, listCount, stackSlot, 0);
+                    }
+                }
+                return;
+            }
+#endif // _TARGET_ARM_
 #if FEATURE_FIXED_OUT_ARGS
             if (listCount == -1)
             {
index 9ed8e52..047e740 100644 (file)
@@ -1293,6 +1293,15 @@ public:
         return OperIsStoreBlk(OperGet());
     }
 
+    bool OperIsPutArgSplit() const
+    {
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
+        return gtOper == GT_PUTARG_SPLIT;
+#else
+        return false;
+#endif
+    }
+
     bool OperIsPutArgStk() const
     {
         return gtOper == GT_PUTARG_STK;
@@ -1305,7 +1314,7 @@ public:
 
     bool OperIsPutArg() const
     {
-        return OperIsPutArgStk() || OperIsPutArgReg();
+        return OperIsPutArgStk() || OperIsPutArgReg() || OperIsPutArgSplit();
     }
 
     bool OperIsAddrMode() const
@@ -4907,6 +4916,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
     unsigned gtNumSlots;             // Number of slots for the argument to be passed on stack
     unsigned gtNumberReferenceSlots; // Number of reference slots.
     BYTE*    gtGcPtrs;               // gcPointers
+#ifdef _TARGET_ARM_
+    bool gtIsHfa;
+#endif
 
 #endif // FEATURE_PUT_STRUCT_ARG_STK
 
@@ -4921,6 +4933,212 @@ struct GenTreePutArgStk : public GenTreeUnOp
 #endif
 };
 
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
+// Represent the struct argument: split value in register(s) and stack
+struct GenTreePutArgSplit : public GenTreePutArgStk
+{
+    unsigned gtNumRegs;
+
+    GenTreePutArgSplit(GenTreePtr op1,
+                       unsigned slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(unsigned numSlots),
+                       unsigned     numRegs,
+                       bool         isHfa,
+                       bool         putIncomingArgArea = false,
+                       GenTreeCall* callNode           = nullptr)
+        : GenTreePutArgStk(GT_PUTARG_SPLIT,
+                           TYP_STRUCT,
+                           op1,
+                           slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(numSlots),
+                           putIncomingArgArea,
+                           callNode)
+        , gtNumRegs(numRegs)
+    {
+        gtIsHfa = isHfa;
+        ClearOtherRegs();
+        ClearOtherRegFlags();
+    }
+
+    // Type required to support multi-reg struct arg.
+    var_types m_regType[MAX_REG_ARG];
+
+    // First reg of struct is always given by gtRegNum.
+    // gtOtherRegs holds the other reg numbers of struct.
+    // HFA args is not yet handled.
+    regNumberSmall gtOtherRegs[MAX_REG_ARG - 1];
+
+    // GTF_SPILL or GTF_SPILLED flag on a multi-reg struct node indicates that one or
+    // more of its result regs are in that state.  The spill flag of each of the
+    // return register is stored here. We only need 2 bits per register,
+    // so this is treated as a 2-bit array.
+    static const unsigned PACKED_GTF_SPILL   = 1;
+    static const unsigned PACKED_GTF_SPILLED = 2;
+    unsigned char         gtSpillFlags;
+
+    //---------------------------------------------------------------------------
+    // GetRegNumByIdx: get ith register allocated to this struct argument.
+    //
+    // Arguments:
+    //     idx   -   index of the struct
+    //
+    // Return Value:
+    //     Return regNumber of ith register of this struct argument
+    //
+    regNumber GetRegNumByIdx(unsigned idx) const
+    {
+        assert(idx < MAX_REG_ARG);
+
+        if (idx == 0)
+        {
+            return gtRegNum;
+        }
+
+        return (regNumber)gtOtherRegs[idx - 1];
+    }
+
+    //----------------------------------------------------------------------
+    // SetRegNumByIdx: set ith register of this struct argument
+    //
+    // Arguments:
+    //    reg    -   reg number
+    //    idx    -   index of the struct
+    //
+    // Return Value:
+    //    None
+    //
+    void SetRegNumByIdx(regNumber reg, unsigned idx)
+    {
+        assert(idx < MAX_REG_ARG);
+        if (idx == 0)
+        {
+            gtRegNum = reg;
+        }
+        else
+        {
+            gtOtherRegs[idx - 1] = reg;
+            assert(gtOtherRegs[idx - 1] == reg);
+        }
+    }
+
+    //----------------------------------------------------------------------------
+    // ClearOtherRegs: clear multi-reg state to indicate no regs are allocated
+    //
+    // Arguments:
+    //    None
+    //
+    // Return Value:
+    //    None
+    //
+    void ClearOtherRegs()
+    {
+        for (unsigned i = 0; i < MAX_REG_ARG - 1; ++i)
+        {
+            gtOtherRegs[i] = REG_NA;
+        }
+    }
+
+    //----------------------------------------------------------------------
+    // GetRegSpillFlagByIdx: get spill flag associated with the register
+    // specified by its index.
+    //
+    // Arguments:
+    //    idx  -  Position or index of the register
+    //
+    // Return Value:
+    //    Returns GTF_* flags associated with the register. Only GTF_SPILL and GTF_SPILLED are considered.
+    //
+    unsigned GetRegSpillFlagByIdx(unsigned idx) const
+    {
+        assert(idx < MAX_REG_ARG);
+
+        unsigned bits = gtSpillFlags >> (idx * 2); // It doesn't matter that we possibly leave other high bits here.
+        unsigned spillFlags = 0;
+        if (bits & PACKED_GTF_SPILL)
+        {
+            spillFlags |= GTF_SPILL;
+        }
+        if (bits & PACKED_GTF_SPILLED)
+        {
+            spillFlags |= GTF_SPILLED;
+        }
+
+        return spillFlags;
+    }
+
+    //----------------------------------------------------------------------
+    // SetRegSpillFlagByIdx: set spill flags for the register
+    // specified by its index.
+    //
+    // Arguments:
+    //    flags  -  GTF_* flags. Only GTF_SPILL and GTF_SPILLED are allowed.
+    //    idx    -  Position or index of the register
+    //
+    // Return Value:
+    //    None
+    //
+    void SetRegSpillFlagByIdx(unsigned flags, unsigned idx)
+    {
+        assert(idx < MAX_REG_ARG);
+
+        unsigned bits = 0;
+        if (flags & GTF_SPILL)
+        {
+            bits |= PACKED_GTF_SPILL;
+        }
+        if (flags & GTF_SPILLED)
+        {
+            bits |= PACKED_GTF_SPILLED;
+        }
+
+        // Clear anything that was already there by masking out the bits before 'or'ing in what we want there.
+        gtSpillFlags = (gtSpillFlags & ~(0xffU << (idx * 2))) | (bits << (idx * 2));
+    }
+
+    //--------------------------------------------------------------------------
+    // GetRegType:  Get var_type of the register specified by index.
+    //
+    // Arguments:
+    //    index - Index of the register.
+    //            First register will have an index 0 and so on.
+    //
+    // Return Value:
+    //    var_type of the register specified by its index.
+
+    var_types GetRegType(unsigned index)
+    {
+        assert(index < gtNumRegs);
+        var_types result = m_regType[index];
+        return result;
+    }
+
+    //-------------------------------------------------------------------
+    // clearOtherRegFlags: clear GTF_* flags associated with gtOtherRegs
+    //
+    // Arguments:
+    //     None
+    //
+    // Return Value:
+    //     None
+    //
+    void ClearOtherRegFlags()
+    {
+        gtSpillFlags = 0;
+    }
+
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
+    unsigned getArgSize()
+    {
+        return (gtNumSlots + gtNumRegs) * TARGET_POINTER_SIZE;
+    }
+#endif // FEATURE_PUT_STRUCT_ARG_STK
+
+#if DEBUGGABLE_GENTREE
+    GenTreePutArgSplit() : GenTreePutArgStk()
+    {
+    }
+#endif
+};
+#endif // !LEGACY_BACKEND && _TARGET_ARM_
+
 // Represents GT_COPY or GT_RELOAD node
 struct GenTreeCopyOrReload : public GenTreeUnOp
 {
@@ -5476,7 +5694,7 @@ inline bool GenTree::IsMultiRegNode() const
     }
 
 #if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
-    if (gtOper == GT_MUL_LONG)
+    if (gtOper == GT_MUL_LONG || OperIsPutArgSplit())
     {
         return true;
     }
index b15d2a4..35c25b9 100644 (file)
@@ -295,6 +295,9 @@ GTNODE(PINVOKE_PROLOG   , GenTree            ,0,GTK_LEAF|GTK_NOVALUE)   // pinvo
 GTNODE(PINVOKE_EPILOG   , GenTree            ,0,GTK_LEAF|GTK_NOVALUE)   // pinvoke epilog seq
 GTNODE(PUTARG_REG       , GenTreeOp          ,0,GTK_UNOP)               // operator that places outgoing arg in register
 GTNODE(PUTARG_STK       , GenTreePutArgStk   ,0,GTK_UNOP|GTK_NOVALUE)   // operator that places outgoing arg in stack
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
+GTNODE(PUTARG_SPLIT     , GenTreePutArgSplit ,0,GTK_UNOP)               // operator that places outgoing arg in registers with stack (split struct in ARM32)
+#endif // !LEGACY_BACKEND && _TARGET_ARM_
 GTNODE(RETURNTRAP       , GenTreeOp          ,0,GTK_UNOP|GTK_NOVALUE)   // a conditional call to wait on gc
 GTNODE(SWAP             , GenTreeOp          ,0,GTK_BINOP|GTK_NOVALUE)  // op1 and op2 swap (registers)
 GTNODE(IL_OFFSET        , GenTreeStmt        ,0,GTK_LEAF|GTK_NOVALUE)   // marks an IL offset for debugging purposes
index 898aeec..fa462ad 100644 (file)
@@ -96,6 +96,9 @@ GTSTRUCT_1(PhiArg      , GT_PHI_ARG)
 GTSTRUCT_1(StoreInd    , GT_STOREIND)
 GTSTRUCT_N(Indir       , GT_STOREIND, GT_IND, GT_NULLCHECK, GT_BLK, GT_STORE_BLK, GT_OBJ, GT_STORE_OBJ, GT_DYN_BLK, GT_STORE_DYN_BLK)
 GTSTRUCT_1(PutArgStk   , GT_PUTARG_STK)
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
+GTSTRUCT_1(PutArgSplit , GT_PUTARG_SPLIT)
+#endif
 GTSTRUCT_1(PhysReg     , GT_PHYSREG)
 #ifdef FEATURE_SIMD
 GTSTRUCT_1(SIMD        , GT_SIMD) 
index fe6c10c..0479b78 100644 (file)
@@ -766,8 +766,7 @@ void Lowering::ReplaceArgWithPutArgOrCopy(GenTree** argSlot, GenTree* putArgOrCo
 {
     assert(argSlot != nullptr);
     assert(*argSlot != nullptr);
-    assert(putArgOrCopy->OperGet() == GT_PUTARG_REG || putArgOrCopy->OperGet() == GT_PUTARG_STK ||
-           putArgOrCopy->OperGet() == GT_COPY);
+    assert(putArgOrCopy->OperIsPutArg() || putArgOrCopy->OperIs(GT_COPY));
 
     GenTree* arg = *argSlot;
 
@@ -828,99 +827,162 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
     isOnStack = info->regNum == REG_STK;
 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
 
-    if (!isOnStack)
+#ifdef _TARGET_ARM_
+    // Struct can be split into register(s) and stack on ARM
+    if (info->isSplit)
     {
-#ifdef FEATURE_SIMD
-        // TYP_SIMD8 is passed in an integer register.  We need the putArg node to be of the int type.
-        if (type == TYP_SIMD8 && genIsValidIntReg(info->regNum))
+        if (arg->OperGet() != GT_OBJ)
+        {
+            NYI_ARM("Lowering: Oper for struct argument is not GT_OBJ");
+        }
+
+        putArg = new (comp, GT_PUTARG_SPLIT)
+            GenTreePutArgSplit(arg, info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots), info->numRegs,
+                               info->isHfaRegArg, call->IsFastTailCall(), call);
+
+        // Set GC Pointer info
+        GenTreePutArgSplit* argSplit = putArg->AsPutArgSplit();
+        BYTE*               gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots + info->numRegs];
+        unsigned            numRefs  = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
+        argSplit->setGcPointers(numRefs, gcLayout);
+
+        // Set type of registers
+        for (unsigned index = 0; index < info->numRegs; index++)
         {
-            type = TYP_LONG;
+            var_types regType          = comp->getJitGCType(gcLayout[index]);
+            argSplit->m_regType[index] = regType;
         }
+    }
+    else
+#endif // _TARGET_ARM_
+    {
+        if (!isOnStack)
+        {
+#ifdef FEATURE_SIMD
+            // TYP_SIMD8 is passed in an integer register.  We need the putArg node to be of the int type.
+            if (type == TYP_SIMD8 && genIsValidIntReg(info->regNum))
+            {
+                type = TYP_LONG;
+            }
 #endif // FEATURE_SIMD
 
 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
-        if (info->isStruct)
-        {
-            // The following code makes sure a register passed struct arg is moved to
-            // the register before the call is made.
-            // There are two cases (comments added in the code below.)
-            // 1. The struct is of size one eightbyte:
-            //    In this case a new tree is created that is GT_PUTARG_REG
-            //    with a op1 the original argument.
-            // 2. The struct is contained in 2 eightbytes:
-            //    in this case the arg comes as a GT_FIELD_LIST of two GT_LCL_FLDs - the two eightbytes of the struct.
-            //    The code creates a GT_PUTARG_REG node for each GT_LCL_FLD in the GT_FIELD_LIST
-            //    and splices it in the list with the corresponding original GT_LCL_FLD tree as op1.
-
-            assert(info->structDesc.eightByteCount != 0);
-
-            if (info->structDesc.eightByteCount == 1)
+            if (info->isStruct)
             {
-                // clang-format off
-                // Case 1 above: Create a GT_PUTARG_REG node with op1 of the original tree.
-                //
-                // Here the IR for this operation:
-                // lowering call :
-                //     N001(3, 2)[000017] ------ - N---- / --*  &lclVar   byref  V00 loc0
-                //     N003(6, 5)[000052] * --XG------ - / --*  indir     int
-                //     N004(3, 2)[000046] ------ - N---- + --*  &lclVar   byref  V02 tmp0
-                //     (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --*  storeIndir int
-                //     N009(3, 4)[000054] ------ - N----arg0 in rdi + --*  lclFld    int    V02 tmp0[+0](last use)
-                //     N011(33, 21)[000018] --CXG------ - *call      void   Test.Foo.test1
-                //
-                // args :
-                //     lowering arg : (13, 11)[000070] -- - XG-- - R-- - *storeIndir int
-                //
-                // late :
-                //    lowering arg : N009(3, 4)[000054] ------ - N----             *  lclFld    int    V02 tmp0[+0](last use)
-                //    new node is : (3, 4)[000071] ------------             *  putarg_reg int    RV
-                //
-                // after :
-                //    N001(3, 2)[000017] ------ - N---- / --*  &lclVar   byref  V00 loc0
-                //    N003(6, 5)[000052] * --XG------ - / --*  indir     int
-                //    N004(3, 2)[000046] ------ - N---- + --*  &lclVar   byref  V02 tmp0
-                //    (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --*  storeIndir int
-                //    N009(3, 4)[000054] ------ - N---- | / --*  lclFld    int    V02 tmp0[+0](last use)
-                //    (3, 4)[000071] ------------arg0 in rdi + --*  putarg_reg int    RV
-                //    N011(33, 21)[000018] --CXG------ - *call      void   Test.Foo.test1
-                //
-                // clang-format on
+                // The following code makes sure a register passed struct arg is moved to
+                // the register before the call is made.
+                // There are two cases (comments added in the code below.)
+                // 1. The struct is of size one eightbyte:
+                //    In this case a new tree is created that is GT_PUTARG_REG
+                //    with a op1 the original argument.
+                // 2. The struct is contained in 2 eightbytes:
+                //    in this case the arg comes as a GT_FIELD_LIST of two GT_LCL_FLDs
+                //     - the two eightbytes of the struct.
+                //    The code creates a GT_PUTARG_REG node for each GT_LCL_FLD in the GT_FIELD_LIST
+                //    and splices it in the list with the corresponding original GT_LCL_FLD tree as op1.
+
+                assert(info->structDesc.eightByteCount != 0);
+
+                if (info->structDesc.eightByteCount == 1)
+                {
+                    // clang-format off
+                    // Case 1 above: Create a GT_PUTARG_REG node with op1 of the original tree.
+                    //
+                    // Here the IR for this operation:
+                    // lowering call :
+                    //     N001(3, 2)[000017] ------ - N---- / --*  &lclVar   byref  V00 loc0
+                    //     N003(6, 5)[000052] * --XG------ - / --*  indir     int
+                    //     N004(3, 2)[000046] ------ - N---- + --*  &lclVar   byref  V02 tmp0
+                    //     (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --*  storeIndir int
+                    //     N009(3, 4)[000054] ------ - N----arg0 in rdi + --*  lclFld    int    V02 tmp0[+0](last use)
+                    //     N011(33, 21)[000018] --CXG------ - *call      void   Test.Foo.test1
+                    //
+                    // args :
+                    //     lowering arg : (13, 11)[000070] -- - XG-- - R-- - *storeIndir int
+                    //
+                    // late :
+                    //    lowering arg : N009(3, 4)[000054] ------ - N----             *  lclFld    int    V02 tmp0[+0](last use)
+                    //    new node is : (3, 4)[000071] ------------             *  putarg_reg int    RV
+                    //
+                    // after :
+                    //    N001(3, 2)[000017] ------ - N---- / --*  &lclVar   byref  V00 loc0
+                    //    N003(6, 5)[000052] * --XG------ - / --*  indir     int
+                    //    N004(3, 2)[000046] ------ - N---- + --*  &lclVar   byref  V02 tmp0
+                    //    (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --*  storeIndir int
+                    //    N009(3, 4)[000054] ------ - N---- | / --*  lclFld    int    V02 tmp0[+0](last use)
+                    //    (3, 4)[000071] ------------arg0 in rdi + --*  putarg_reg int    RV
+                    //    N011(33, 21)[000018] --CXG------ - *call      void   Test.Foo.test1
+                    //
+                    // clang-format on
 
-                putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
+                    putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
+                }
+                else if (info->structDesc.eightByteCount == 2)
+                {
+                    // clang-format off
+                    // Case 2 above: Convert the LCL_FLDs to PUTARG_REG
+                    //
+                    // lowering call :
+                    //     N001(3, 2)  [000025] ------ - N----Source / --*  &lclVar   byref  V01 loc1
+                    //     N003(3, 2)  [000056] ------ - N----Destination + --*  &lclVar   byref  V03 tmp1
+                    //     N006(1, 1)  [000058] ------------ + --*  const     int    16
+                    //     N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --*  copyBlk   void
+                    //     N009(3, 4)  [000061] ------ - N----arg0 in rdi + --*  lclFld    long   V03 tmp1[+0]
+                    //     N010(3, 4)  [000063] ------------arg0 in rsi + --*  lclFld    long   V03 tmp1[+8](last use)
+                    //     N014(40, 31)[000026] --CXG------ - *call      void   Test.Foo.test2
+                    //
+                    // args :
+                    //     lowering arg : N007(12, 12)[000059] - A--G---- - L - *copyBlk   void
+                    //
+                    // late :
+                    //     lowering arg : N012(11, 13)[000065] ------------             *  <list>    struct
+                    //
+                    // after :
+                    //     N001(3, 2)[000025] ------ - N----Source / --*  &lclVar   byref  V01 loc1
+                    //     N003(3, 2)[000056] ------ - N----Destination + --*  &lclVar   byref  V03 tmp1
+                    //     N006(1, 1)[000058] ------------ + --*  const     int    16
+                    //     N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --*  copyBlk   void
+                    //     N009(3, 4)[000061] ------ - N---- | / --*  lclFld    long   V03 tmp1[+0]
+                    //     (3, 4)[000072] ------------arg0 in rdi + --*  putarg_reg long
+                    //     N010(3, 4)[000063] ------------ | / --*  lclFld    long   V03 tmp1[+8](last use)
+                    //     (3, 4)[000073] ------------arg0 in rsi + --*  putarg_reg long
+                    //     N014(40, 31)[000026] --CXG------ - *call      void   Test.Foo.test2
+                    //
+                    // clang-format on
+
+                    assert(arg->OperGet() == GT_FIELD_LIST);
+
+                    GenTreeFieldList* fieldListPtr = arg->AsFieldList();
+                    assert(fieldListPtr->IsFieldListHead());
+
+                    for (unsigned ctr = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), ctr++)
+                    {
+                        // Create a new GT_PUTARG_REG node with op1 the original GT_LCL_FLD.
+                        GenTreePtr newOper = comp->gtNewOperNode(
+                            GT_PUTARG_REG,
+                            comp->GetTypeFromClassificationAndSizes(info->structDesc.eightByteClassifications[ctr],
+                                                                    info->structDesc.eightByteSizes[ctr]),
+                            fieldListPtr->gtOp.gtOp1);
+
+                        // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST
+                        ReplaceArgWithPutArgOrCopy(&fieldListPtr->gtOp.gtOp1, newOper);
+                    }
+
+                    // Just return arg. The GT_FIELD_LIST is not replaced.
+                    // Nothing more to do.
+                    return arg;
+                }
+                else
+                {
+                    assert(false && "Illegal count of eightbytes for the CLR type system"); // No more than 2 eightbytes
+                                                                                            // for the CLR.
+                }
             }
-            else if (info->structDesc.eightByteCount == 2)
+            else
+#else // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#if FEATURE_MULTIREG_ARGS
+            if ((info->numRegs > 1) && (arg->OperGet() == GT_FIELD_LIST))
             {
-                // clang-format off
-                // Case 2 above: Convert the LCL_FLDs to PUTARG_REG
-                //
-                // lowering call :
-                //     N001(3, 2)  [000025] ------ - N----Source / --*  &lclVar   byref  V01 loc1
-                //     N003(3, 2)  [000056] ------ - N----Destination + --*  &lclVar   byref  V03 tmp1
-                //     N006(1, 1)  [000058] ------------ + --*  const     int    16
-                //     N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --*  copyBlk   void
-                //     N009(3, 4)  [000061] ------ - N----arg0 in rdi + --*  lclFld    long   V03 tmp1[+0]
-                //     N010(3, 4)  [000063] ------------arg0 in rsi + --*  lclFld    long   V03 tmp1[+8](last use)
-                //     N014(40, 31)[000026] --CXG------ - *call      void   Test.Foo.test2
-                //
-                // args :
-                //     lowering arg : N007(12, 12)[000059] - A--G---- - L - *copyBlk   void
-                //
-                // late :
-                //     lowering arg : N012(11, 13)[000065] ------------             *  <list>    struct
-                //
-                // after :
-                //     N001(3, 2)[000025] ------ - N----Source / --*  &lclVar   byref  V01 loc1
-                //     N003(3, 2)[000056] ------ - N----Destination + --*  &lclVar   byref  V03 tmp1
-                //     N006(1, 1)[000058] ------------ + --*  const     int    16
-                //     N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --*  copyBlk   void
-                //     N009(3, 4)[000061] ------ - N---- | / --*  lclFld    long   V03 tmp1[+0]
-                //     (3, 4)[000072] ------------arg0 in rdi + --*  putarg_reg long
-                //     N010(3, 4)[000063] ------------ | / --*  lclFld    long   V03 tmp1[+8](last use)
-                //     (3, 4)[000073] ------------arg0 in rsi + --*  putarg_reg long
-                //     N014(40, 31)[000026] --CXG------ - *call      void   Test.Foo.test2
-                //
-                // clang-format on
-
                 assert(arg->OperGet() == GT_FIELD_LIST);
 
                 GenTreeFieldList* fieldListPtr = arg->AsFieldList();
@@ -928,12 +990,11 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
 
                 for (unsigned ctr = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), ctr++)
                 {
-                    // Create a new GT_PUTARG_REG node with op1 the original GT_LCL_FLD.
-                    GenTreePtr newOper = comp->gtNewOperNode(
-                        GT_PUTARG_REG,
-                        comp->GetTypeFromClassificationAndSizes(info->structDesc.eightByteClassifications[ctr],
-                                                                info->structDesc.eightByteSizes[ctr]),
-                        fieldListPtr->gtOp.gtOp1);
+                    GenTreePtr curOp  = fieldListPtr->gtOp.gtOp1;
+                    var_types  curTyp = curOp->TypeGet();
+
+                    // Create a new GT_PUTARG_REG node with op1
+                    GenTreePtr newOper = comp->gtNewOperNode(GT_PUTARG_REG, curTyp, curOp);
 
                     // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST
                     ReplaceArgWithPutArgOrCopy(&fieldListPtr->gtOp.gtOp1, newOper);
@@ -944,119 +1005,88 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
                 return arg;
             }
             else
+#endif // FEATURE_MULTIREG_ARGS
+#endif // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
             {
-                assert(false &&
-                       "Illegal count of eightbytes for the CLR type system"); // No more than 2 eightbytes for the CLR.
+                putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
             }
         }
         else
-#else // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
-#if FEATURE_MULTIREG_ARGS
-        if ((info->numRegs > 1) && (arg->OperGet() == GT_FIELD_LIST))
         {
-            assert(arg->OperGet() == GT_FIELD_LIST);
+            // Mark this one as tail call arg if it is a fast tail call.
+            // This provides the info to put this argument in in-coming arg area slot
+            // instead of in out-going arg area slot.
 
-            GenTreeFieldList* fieldListPtr = arg->AsFieldList();
-            assert(fieldListPtr->IsFieldListHead());
+            PUT_STRUCT_ARG_STK_ONLY(assert(info->isStruct == varTypeIsStruct(type))); // Make sure state is correct
 
-            for (unsigned ctr = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), ctr++)
-            {
-                GenTreePtr curOp  = fieldListPtr->gtOp.gtOp1;
-                var_types  curTyp = curOp->TypeGet();
-
-                // Create a new GT_PUTARG_REG node with op1
-                GenTreePtr newOper = comp->gtNewOperNode(GT_PUTARG_REG, curTyp, curOp);
-
-                // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST
-                ReplaceArgWithPutArgOrCopy(&fieldListPtr->gtOp.gtOp1, newOper);
-            }
-
-            // Just return arg. The GT_FIELD_LIST is not replaced.
-            // Nothing more to do.
-            return arg;
-        }
-        else
-#endif // FEATURE_MULTIREG_ARGS
-#endif // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
-        {
-            putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
-        }
-    }
-    else
-    {
-        // Mark this one as tail call arg if it is a fast tail call.
-        // This provides the info to put this argument in in-coming arg area slot
-        // instead of in out-going arg area slot.
-
-        PUT_STRUCT_ARG_STK_ONLY(assert(info->isStruct == varTypeIsStruct(type))); // Make sure state is correct
-
-        putArg = new (comp, GT_PUTARG_STK)
-            GenTreePutArgStk(GT_PUTARG_STK, type, arg, info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots),
-                             call->IsFastTailCall(), call);
+            putArg = new (comp, GT_PUTARG_STK)
+                GenTreePutArgStk(GT_PUTARG_STK, type, arg, info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots),
+                                 call->IsFastTailCall(), call);
 
 #ifdef FEATURE_PUT_STRUCT_ARG_STK
-        // If the ArgTabEntry indicates that this arg is a struct
-        // get and store the number of slots that are references.
-        // This is later used in the codegen for PUT_ARG_STK implementation
-        // for struct to decide whether and how many single eight-byte copies
-        // to be done (only for reference slots), so gcinfo is emitted.
-        // For non-reference slots faster/smaller size instructions are used -
-        // pair copying using XMM registers or rep mov instructions.
-        if (info->isStruct)
-        {
-            // We use GT_OBJ for non-SIMD struct arguments. However, for
-            // SIMD arguments the GT_OBJ has already been transformed.
-            if (arg->gtOper != GT_OBJ)
-            {
-                assert(varTypeIsSIMD(arg));
-            }
-            else
+            // If the ArgTabEntry indicates that this arg is a struct
+            // get and store the number of slots that are references.
+            // This is later used in the codegen for PUT_ARG_STK implementation
+            // for struct to decide whether and how many single eight-byte copies
+            // to be done (only for reference slots), so gcinfo is emitted.
+            // For non-reference slots faster/smaller size instructions are used -
+            // pair copying using XMM registers or rep mov instructions.
+            if (info->isStruct)
             {
-                unsigned numRefs  = 0;
-                BYTE*    gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots];
-                assert(!varTypeIsSIMD(arg));
-                numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
-                putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout);
+                // We use GT_OBJ for non-SIMD struct arguments. However, for
+                // SIMD arguments the GT_OBJ has already been transformed.
+                if (arg->gtOper != GT_OBJ)
+                {
+                    assert(varTypeIsSIMD(arg));
+                }
+                else
+                {
+                    unsigned numRefs  = 0;
+                    BYTE*    gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots];
+                    assert(!varTypeIsSIMD(arg));
+                    numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
+                    putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout);
 
 #ifdef _TARGET_X86_
-                // On x86 VM lies about the type of a struct containing a pointer sized
-                // integer field by returning the type of its field as the type of struct.
-                // Such struct can be passed in a register depending its position in
-                // parameter list.  VM does this unwrapping only one level and therefore
-                // a type like Struct Foo { Struct Bar { int f}} awlays needs to be
-                // passed on stack.  Also, VM doesn't lie about type of such a struct
-                // when it is a field of another struct.  That is VM doesn't lie about
-                // the type of Foo.Bar
-                //
-                // We now support the promotion of fields that are of type struct.
-                // However we only support a limited case where the struct field has a
-                // single field and that single field must be a scalar type. Say Foo.Bar
-                // field is getting passed as a parameter to a call, Since it is a TYP_STRUCT,
-                // as per x86 ABI it should always be passed on stack.  Therefore GenTree
-                // node under a PUTARG_STK could be GT_OBJ(GT_LCL_VAR_ADDR(v1)), where
-                // local v1 could be a promoted field standing for Foo.Bar.  Note that
-                // the type of v1 will be the type of field of Foo.Bar.f when Foo is
-                // promoted.  That is v1 will be a scalar type.  In this case we need to
-                // pass v1 on stack instead of in a register.
-                //
-                // TODO-PERF: replace GT_OBJ(GT_LCL_VAR_ADDR(v1)) with v1 if v1 is
-                // a scalar type and the width of GT_OBJ matches the type size of v1.
-                // Note that this cannot be done till call node arguments are morphed
-                // because we should not lose the fact that the type of argument is
-                // a struct so that the arg gets correctly marked to be passed on stack.
-                GenTree* objOp1 = arg->gtGetOp1();
-                if (objOp1->OperGet() == GT_LCL_VAR_ADDR)
-                {
-                    unsigned lclNum = objOp1->AsLclVarCommon()->GetLclNum();
-                    if (comp->lvaTable[lclNum].lvType != TYP_STRUCT)
+                    // On x86 VM lies about the type of a struct containing a pointer sized
+                    // integer field by returning the type of its field as the type of struct.
+                    // Such struct can be passed in a register depending its position in
+                    // parameter list.  VM does this unwrapping only one level and therefore
+                    // a type like Struct Foo { Struct Bar { int f}} awlays needs to be
+                    // passed on stack.  Also, VM doesn't lie about type of such a struct
+                    // when it is a field of another struct.  That is VM doesn't lie about
+                    // the type of Foo.Bar
+                    //
+                    // We now support the promotion of fields that are of type struct.
+                    // However we only support a limited case where the struct field has a
+                    // single field and that single field must be a scalar type. Say Foo.Bar
+                    // field is getting passed as a parameter to a call, Since it is a TYP_STRUCT,
+                    // as per x86 ABI it should always be passed on stack.  Therefore GenTree
+                    // node under a PUTARG_STK could be GT_OBJ(GT_LCL_VAR_ADDR(v1)), where
+                    // local v1 could be a promoted field standing for Foo.Bar.  Note that
+                    // the type of v1 will be the type of field of Foo.Bar.f when Foo is
+                    // promoted.  That is v1 will be a scalar type.  In this case we need to
+                    // pass v1 on stack instead of in a register.
+                    //
+                    // TODO-PERF: replace GT_OBJ(GT_LCL_VAR_ADDR(v1)) with v1 if v1 is
+                    // a scalar type and the width of GT_OBJ matches the type size of v1.
+                    // Note that this cannot be done till call node arguments are morphed
+                    // because we should not lose the fact that the type of argument is
+                    // a struct so that the arg gets correctly marked to be passed on stack.
+                    GenTree* objOp1 = arg->gtGetOp1();
+                    if (objOp1->OperGet() == GT_LCL_VAR_ADDR)
                     {
-                        comp->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_VMNeedsStackAddr));
+                        unsigned lclNum = objOp1->AsLclVarCommon()->GetLclNum();
+                        if (comp->lvaTable[lclNum].lvType != TYP_STRUCT)
+                        {
+                            comp->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_VMNeedsStackAddr));
+                        }
                     }
-                }
 #endif // _TARGET_X86_
+                }
             }
-        }
 #endif // FEATURE_PUT_STRUCT_ARG_STK
+        }
     }
 
     JITDUMP("new node is : ");
index 486cc39..76cf481 100644 (file)
@@ -231,6 +231,9 @@ private:
 #ifdef FEATURE_PUT_STRUCT_ARG_STK
     void LowerPutArgStk(GenTreePutArgStk* tree);
     void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* tree);
+#ifdef _TARGET_ARM_
+    void TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* tree, TreeNodeInfo& info, fgArgTabEntryPtr argInfo);
+#endif
 #endif // FEATURE_PUT_STRUCT_ARG_STK
     void TreeNodeInfoInitLclHeap(GenTree* tree);
 
index 59e547c..2af8251 100644 (file)
@@ -140,6 +140,11 @@ void lsraAssignRegToTree(GenTreePtr tree, regNumber reg, unsigned regIdx)
         GenTreeMulLong* mul = tree->AsMulLong();
         mul->gtOtherReg     = reg;
     }
+    else if (tree->OperGet() == GT_PUTARG_SPLIT)
+    {
+        GenTreePutArgSplit* putArg = tree->AsPutArgSplit();
+        putArg->SetRegNumByIdx(reg, regIdx);
+    }
 #endif // _TARGET_ARM_
     else
     {
@@ -4148,6 +4153,15 @@ void LinearScan::buildRefPositionsForNode(GenTree*                  tree,
             useCandidates  = allRegs(registerType);
         }
 
+#ifdef _TARGET_ARM_
+        if (tree->OperIsPutArgSplit())
+        {
+            // get i-th candidate
+            currCandidates = genFindLowestReg(candidates);
+            candidates &= ~currCandidates;
+        }
+#endif
+
         if (interval == nullptr)
         {
             // Make a new interval
@@ -9032,6 +9046,13 @@ void LinearScan::resolveRegisters()
                                 GenTreeCall* call = treeNode->AsCall();
                                 call->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx());
                             }
+#ifdef _TARGET_ARM_
+                            else if (treeNode->OperIsPutArgSplit())
+                            {
+                                GenTreePutArgSplit* splitArg = treeNode->AsPutArgSplit();
+                                splitArg->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx());
+                            }
+#endif
                         }
 
                         // If the value is reloaded or moved to a different register, we need to insert
index 123bb2d..ee356d5 100644 (file)
@@ -753,6 +753,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
         case GT_MEMORYBARRIER:
         case GT_OBJ:
         case GT_COPY:
+        case GT_PUTARG_SPLIT:
             info->dstCount = tree->IsValue() ? 1 : 0;
             if (kind & (GTK_CONST | GTK_LEAF))
             {
index 5babb6a..e361478 100644 (file)
@@ -582,6 +582,13 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
 #endif // _TARGET_ARM_
             }
         }
+#ifdef _TARGET_ARM_
+        else if (argNode->OperGet() == GT_PUTARG_SPLIT)
+        {
+            fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
+            TreeNodeInfoInitPutArgSplit(argNode->AsPutArgSplit(), *info, curArgTabEntry);
+        }
+#endif
         else
         {
             TreeNodeInfoInitPutArgReg(argNode->AsUnOp(), curArgTabEntry->regNum, *info, false, &callHasFloatRegArgs);
@@ -612,6 +619,13 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
 
                 TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry);
             }
+#ifdef _TARGET_ARM_
+            else if (arg->OperGet() == GT_PUTARG_SPLIT)
+            {
+                fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
+                TreeNodeInfoInitPutArgSplit(arg->AsPutArgSplit(), *info, curArgTabEntry);
+            }
+#endif
             else
             {
                 TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
@@ -723,6 +737,57 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntr
     }
 }
 
+#ifdef _TARGET_ARM_
+//------------------------------------------------------------------------
+// TreeNodeInfoInitPutArgSplit: Set the NodeInfo for a GT_PUTARG_SPLIT node
+//
+// Arguments:
+//    argNode - a GT_PUTARG_SPLIT node
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    Set the child node(s) to be contained
+//
+void Lowering::TreeNodeInfoInitPutArgSplit(GenTreePutArgSplit* argNode, TreeNodeInfo& info, fgArgTabEntryPtr argInfo)
+{
+    assert(argNode->gtOper == GT_PUTARG_SPLIT);
+
+    GenTreePtr putArgChild = argNode->gtOp.gtOp1;
+
+    // Initialize 'argNode' as not contained, as this is both the default case
+    //  and how MakeSrcContained expects to find things setup.
+    //
+    argNode->gtLsraInfo.srcCount = 1;
+    argNode->gtLsraInfo.dstCount = argInfo->numRegs;
+    info.srcCount += argInfo->numRegs;
+
+    regNumber argReg  = argInfo->regNum;
+    regMaskTP argMask = RBM_NONE;
+    for (unsigned i = 0; i < argInfo->numRegs; i++)
+    {
+        argMask |= genRegMask((regNumber)((unsigned)argReg + i));
+    }
+    argNode->gtLsraInfo.setDstCandidates(m_lsra, argMask);
+
+    assert(putArgChild->TypeGet() == TYP_STRUCT);
+    assert(putArgChild->OperGet() == GT_OBJ);
+    // We could use a ldr/str sequence so we need a internal register
+    argNode->gtLsraInfo.internalIntCount = 1;
+
+    GenTreePtr objChild = putArgChild->gtOp.gtOp1;
+    if (objChild->OperGet() == GT_LCL_VAR_ADDR)
+    {
+        // We will generate all of the code for the GT_PUTARG_SPLIT, the GT_OBJ and the GT_LCL_VAR_ADDR
+        // as one contained operation
+        //
+        MakeSrcContained(putArgChild, objChild);
+    }
+    MakeSrcContained(argNode, putArgChild);
+}
+#endif // _TARGET_ARM_
+
 //------------------------------------------------------------------------
 // TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store.
 //
index 92091c9..588309d 100644 (file)
@@ -1439,6 +1439,13 @@ void fgArgInfo::ArgsComplete()
             continue;
 #endif
         }
+#if defined(_TARGET_ARM_) && !defined(LEGACY_BACKEND)
+        else if (curArgTabEntry->isSplit)
+        {
+            hasStructRegArg = true;
+            hasStackArgs    = true;
+        }
+#endif
         else // we have a register argument, next we look for a struct type.
         {
             if (varTypeIsStruct(argx) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct))
@@ -1557,6 +1564,12 @@ void fgArgInfo::ArgsComplete()
                 {
                     prevArgTabEntry->needPlace = true;
                 }
+#if defined(_TARGET_ARM_) && !defined(LEGACY_BACKEND)
+                else if (prevArgTabEntry->isSplit)
+                {
+                    prevArgTabEntry->needPlace = true;
+                }
+#endif
 #endif
             }
         }
@@ -1566,9 +1579,6 @@ void fgArgInfo::ArgsComplete()
         // For RyuJIT backend we will expand a Multireg arg into a GT_FIELD_LIST
         // with multiple indirections, so here we consider spilling it into a tmp LclVar.
         //
-        // Note that Arm32 is a LEGACY_BACKEND and it defines FEATURE_MULTIREG_ARGS
-        // so we skip this for ARM32 until it is ported to use RyuJIT backend
-        //
 
         bool isMultiRegArg = (curArgTabEntry->numRegs > 1);
 
@@ -1579,6 +1589,10 @@ void fgArgInfo::ArgsComplete()
                 // Spill multireg struct arguments that have Assignments or Calls embedded in them
                 curArgTabEntry->needTmp = true;
             }
+#ifndef _TARGET_ARM_
+            // TODO-Arm: This optimization is not implemented for ARM32
+            // so we skip this for ARM32 until it is ported to use RyuJIT backend
+            //
             else
             {
                 // We call gtPrepareCost to measure the cost of evaluating this tree
@@ -1610,7 +1624,6 @@ void fgArgInfo::ArgsComplete()
                                 curArgTabEntry->needTmp = true;
                             }
                             break;
-
                         case 11:
                         case 13:
                         case 14:
@@ -1632,6 +1645,7 @@ void fgArgInfo::ArgsComplete()
                     }
                 }
             }
+#endif // !_TARGET_ARM_
         }
 #endif // FEATURE_MULTIREG_ARGS
 #endif // LEGACY_BACKEND
@@ -3415,11 +3429,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                     size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
                                               TARGET_POINTER_SIZE)) /
                            TARGET_POINTER_SIZE;
-                    if (isHfaArg)
-                    {
-                        hasMultiregStructArgs = true;
-                    }
-                    else if (size > 1 && size <= 4)
+                    if (isHfaArg || size > 1)
                     {
                         hasMultiregStructArgs = true;
                     }
@@ -3806,18 +3816,10 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                     hasMultiregStructArgs = true;
                 }
 #elif defined(_TARGET_ARM_)
-                // TODO-Arm: Need to handle the case
-                // where structs passed by value can be split between registers and stack.
-                if (size > 1 && size <= 4)
+                if (size > 1)
                 {
                     hasMultiregStructArgs = true;
                 }
-#ifndef LEGACY_BACKEND
-                else if (size > 4 && passUsingIntRegs)
-                {
-                    NYI_ARM("Struct can be split between registers and stack");
-                }
-#endif // !LEGACY_BACKEND
 #endif // _TARGET_ARM_
             }
 
@@ -4110,9 +4112,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
 #ifdef _TARGET_ARM_
                         if (fltArgRegNum > MAX_FLOAT_REG_ARG)
                         {
-#ifndef LEGACY_BACKEND
-                            NYI_ARM("Struct split between float registers and stack");
-#endif // !LEGACY_BACKEND
                             // This indicates a partial enregistration of a struct type
                             assert(varTypeIsStruct(argx));
                             unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG);
@@ -4142,9 +4141,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
 #ifdef _TARGET_ARM_
                         if (intArgRegNum > MAX_REG_ARG)
                         {
-#ifndef LEGACY_BACKEND
-                            NYI_ARM("Struct split between integer registers and stack");
-#endif // !LEGACY_BACKEND
                             // This indicates a partial enregistration of a struct type
                             assert((isStructArg) || argx->OperIsFieldList() || argx->OperIsCopyBlkOp() ||
                                    (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
@@ -4768,6 +4764,30 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f
     NYI("fgMorphMultiregStructArg requires implementation for this target");
 #endif
 
+#ifdef _TARGET_ARM_
+    if (fgEntryPtr->isSplit)
+    {
+        if (fgEntryPtr->isHfaRegArg)
+        {
+            // We cannot handle split struct morphed to GT_FIELD_LIST yet
+            NYI_ARM("Struct split between float registers and stack");
+        }
+        else if (fgEntryPtr->numSlots + fgEntryPtr->numRegs > 4)
+        {
+            return arg;
+        }
+        else
+        {
+            // We cannot handle split struct morphed to GT_FIELD_LIST yet
+            NYI_ARM("Struct split between integer registers and stack");
+        }
+    }
+    else if (!fgEntryPtr->isHfaRegArg && fgEntryPtr->numSlots > 4)
+    {
+        return arg;
+    }
+#endif
+
 #if FEATURE_MULTIREG_ARGS
     // Examine 'arg' and setup argValue objClass and structSize
     //
index 48ec6ea..7204663 100644 (file)
@@ -1527,6 +1527,9 @@ void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree, unsigned regIdx /* =0 *
 
     GenTreeCall* call = nullptr;
     var_types    treeType;
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
+    GenTreePutArgSplit* splitArg = nullptr;
+#endif
 
 #ifndef LEGACY_BACKEND
     if (tree->IsMultiRegCall())
@@ -1535,8 +1538,15 @@ void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree, unsigned regIdx /* =0 *
         ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
         treeType                    = retTypeDesc->GetReturnRegType(regIdx);
     }
+#ifdef _TARGET_ARM_
+    else if (tree->OperIsPutArgSplit())
+    {
+        splitArg = tree->AsPutArgSplit();
+        treeType = splitArg->GetRegType(regIdx);
+    }
+#endif // _TARGET_ARM_
     else
-#endif
+#endif // !LEGACY_BACKEND
     {
         treeType = tree->TypeGet();
     }
@@ -1584,6 +1594,14 @@ void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree, unsigned regIdx /* =0 *
         assert((regFlags & GTF_SPILL) != 0);
         regFlags &= ~GTF_SPILL;
     }
+#ifdef _TARGET_ARM_
+    else if (splitArg != nullptr)
+    {
+        regFlags = splitArg->GetRegSpillFlagByIdx(regIdx);
+        assert((regFlags & GTF_SPILL) != 0);
+        regFlags &= ~GTF_SPILL;
+    }
+#endif // _TARGET_ARM_
     else
     {
         assert(!varTypeIsMultiReg(tree));
@@ -1603,9 +1621,12 @@ void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree, unsigned regIdx /* =0 *
         assert(tree->InReg());
         assert(tree->gtRegNum == reg);
     }
+#elif defined(_TARGET_ARM_)
+    assert(tree->gtRegNum == reg || (call != nullptr && call->GetRegNumByIdx(regIdx) == reg) ||
+           (splitArg != nullptr && splitArg->GetRegNumByIdx(regIdx) == reg));
 #else
     assert(tree->gtRegNum == reg || (call != nullptr && call->GetRegNumByIdx(regIdx) == reg));
-#endif // CPU_LONG_USES_REGPAIR
+#endif // !CPU_LONG_USES_REGPAIR && !_TARGET_ARM_
 
     // Are any registers free for spillage?
     SpillDsc* spill = SpillDsc::alloc(m_rsCompiler, this, tempType);
@@ -1726,6 +1747,13 @@ void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree, unsigned regIdx /* =0 *
         regFlags |= GTF_SPILLED;
         call->SetRegSpillFlagByIdx(regFlags, regIdx);
     }
+#ifdef _TARGET_ARM_
+    else if (splitArg != nullptr)
+    {
+        regFlags |= GTF_SPILLED;
+        splitArg->SetRegSpillFlagByIdx(regFlags, regIdx);
+    }
+#endif // _TARGET_ARM_
 #endif //! LEGACY_BACKEND
 }
 
@@ -2355,6 +2383,15 @@ TempDsc* RegSet::rsUnspillInPlace(GenTreePtr tree, regNumber oldReg, unsigned re
         flags &= ~GTF_SPILLED;
         call->SetRegSpillFlagByIdx(flags, regIdx);
     }
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
+    else if (tree->OperIsPutArgSplit())
+    {
+        GenTreePutArgSplit* splitArg = tree->AsPutArgSplit();
+        unsigned            flags    = splitArg->GetRegSpillFlagByIdx(regIdx);
+        flags &= ~GTF_SPILLED;
+        splitArg->SetRegSpillFlagByIdx(flags, regIdx);
+    }
+#endif // !LEGACY_BACKEND && _TARGET_ARM_
     else
     {
         tree->gtFlags &= ~GTF_SPILLED;