Multi-reg call node support.
authorsivarv <sivarv@microsoft.com>
Wed, 4 May 2016 15:29:18 +0000 (08:29 -0700)
committersivarv <sivarv@microsoft.com>
Wed, 4 May 2016 19:02:52 +0000 (12:02 -0700)
Commit migrated from https://github.com/dotnet/coreclr/commit/a88ba565faa592e5aeb59df22640961e81d8d037

14 files changed:
src/coreclr/src/jit/codegenarm.cpp
src/coreclr/src/jit/codegenarm64.cpp
src/coreclr/src/jit/codegenlegacy.cpp
src/coreclr/src/jit/codegenxarch.cpp
src/coreclr/src/jit/gentree.cpp
src/coreclr/src/jit/gentree.h
src/coreclr/src/jit/gtstructs.h
src/coreclr/src/jit/importer.cpp
src/coreclr/src/jit/lowerxarch.cpp
src/coreclr/src/jit/lsra.cpp
src/coreclr/src/jit/lsra.h
src/coreclr/src/jit/nodeinfo.h
src/coreclr/src/jit/regset.cpp
src/coreclr/src/jit/regset.h

index 53b7315..a3f5a00 100644 (file)
@@ -1821,7 +1821,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree *tree)
         }
         else
         {
-            TempDsc* t = regSet.rsUnspillInPlace(unspillTree);
+            TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum);
             compiler->tmpRlsTemp(t);
             getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType),
                             emitActualTypeSize(unspillTree->gtType),
index 031131b..2228aa1 100644 (file)
@@ -4706,7 +4706,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree *tree)
         }
         else
         {
-            TempDsc* t = regSet.rsUnspillInPlace(unspillTree);
+            TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum);
             getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType),
                             emitActualTypeSize(unspillTree->gtType),
                             dstReg,
index 2b34af8..b5a9f79 100644 (file)
@@ -7170,7 +7170,7 @@ DONE_LEA_ADD:
         {
             /* Get the temp we spilled into. */
 
-            TempDsc * temp = regSet.rsUnspillInPlace(op1);
+            TempDsc * temp = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
 
             /* For 8bit operations, we need to make sure that op2 is
                in a byte-addressable registers */
index b75dc9f..0250977 100644 (file)
@@ -185,16 +185,18 @@ void                CodeGen::genEmitGSCookieCheck(bool pushReg)
         // Handle multi-reg return type values
         if (compiler->compMethodReturnsMultiRegRetType())
         {
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
             ReturnTypeDesc retTypeDesc;
             retTypeDesc.Initialize(compiler, compiler->info.compMethodInfo->args.retTypeClass);
+            unsigned regCount = retTypeDesc.GetReturnRegCount();
 
-            assert(retTypeDesc.GetReturnRegCount() == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_RETURN_IN_REGISTERS);
+            // Only x86 and x64 Unix ABI allows multi-reg return and
+            // number of result regs should be equal to MAX_RET_REG_COUNT.
+            assert(regCount == MAX_RET_REG_COUNT);
 
-            // Set the GC-ness of the struct return registers.
-            gcInfo.gcMarkRegPtrVal(REG_INTRET, retTypeDesc.GetReturnRegType(0));
-            gcInfo.gcMarkRegPtrVal(REG_INTRET_1, retTypeDesc.GetReturnRegType(1));
-#endif 
+            for (unsigned i = 0; i < regCount; ++i)
+            {
+                gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i));
+            }
         }
         else if (compiler->compMethodReturnsRetBufAddr())
         {
@@ -1510,6 +1512,8 @@ CodeGen::isStructReturn(GenTreePtr treeNode)
 // Return Value:
 //    None
 //
+// Assumption:
+//    op1 of GT_RETURN node is either GT_LCL_VAR or multi-reg GT_CALL
 void
 CodeGen::genStructReturn(GenTreePtr treeNode)
 {
@@ -1518,9 +1522,6 @@ CodeGen::genStructReturn(GenTreePtr treeNode)
     var_types targetType = treeNode->TypeGet();
 
 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
-    noway_assert((op1->OperGet() == GT_LCL_VAR) ||
-                 (op1->OperGet() == GT_CALL));
-
     if (op1->OperGet() == GT_LCL_VAR)
     {
         assert(op1->isContained());
@@ -1531,31 +1532,94 @@ CodeGen::genStructReturn(GenTreePtr treeNode)
 
         ReturnTypeDesc retTypeDesc;
         retTypeDesc.Initialize(compiler, varDsc->lvVerTypeInfo.GetClassHandle());
-        assert(retTypeDesc.GetReturnRegCount() == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_RETURN_IN_REGISTERS);
-
-        var_types type0 = retTypeDesc.GetReturnRegType(0);
-        var_types type1 = retTypeDesc.GetReturnRegType(1);
+        unsigned regCount = retTypeDesc.GetReturnRegCount();
+        assert(regCount == MAX_RET_REG_COUNT);
 
-        regNumber reg0 = retTypeDesc.GetABIReturnReg(0);
-        regNumber reg1 = retTypeDesc.GetABIReturnReg(1);
-        assert(reg0 != REG_NA && reg1 != REG_NA);
-
-        // Move the values into the return registers     
-        getEmitter()->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), reg0, lclVar->gtLclNum, 0);
-        getEmitter()->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), reg1, lclVar->gtLclNum, 8);
+        // Move the value into ABI return registers     
+        int offset = 0;
+        for (unsigned i = 0; i < regCount; ++i)
+        {
+            var_types type = retTypeDesc.GetReturnRegType(i);
+            regNumber reg = retTypeDesc.GetABIReturnReg(i);
+            getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), reg, lclVar->gtLclNum, offset);
+            offset += genTypeSize(type);
+        }
     }
     else
     {
-        // Assumption: multi-reg return value of a GT_CALL node is never spilled.
-        // TODO-BUG: support for multi-reg call nodes.
+        assert(op1->IsMultiRegCall());
+        genConsumeRegs(op1);
 
-        assert(op1->OperGet() == GT_CALL);
-        assert((op1->gtFlags & GTF_SPILLED) == 0);
-    }
+        GenTreeCall* call = op1->AsCall();
+        ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+        unsigned regCount = retTypeDesc->GetReturnRegCount();
+        assert(regCount == MAX_RET_REG_COUNT);
+
+        // Handle circular dependency between call allocated regs and ABI return regs.
+        //
+        // It is possible under LSRA stress that originally allocated regs of call node,
+        // say rax and rdx, are spilled and reloaded to rdx and rax respectively.  But
+        // GT_RETURN needs to  move values as follows: rdx->rax, rax->rdx. Similar kind
+        // kind of circular dependency could arise between xmm0 and xmm1 return regs.
+        // Codegen is expected to handle such circular dependency.
+        //
+        var_types regType0 = retTypeDesc->GetReturnRegType(0);
+        regNumber returnReg0 = retTypeDesc->GetABIReturnReg(0);
+        regNumber allocatedReg0 = call->GetRegNumByIdx(0);
+
+        var_types regType1 = retTypeDesc->GetReturnRegType(1);
+        regNumber returnReg1 = retTypeDesc->GetABIReturnReg(1);
+        regNumber allocatedReg1 = call->GetRegNumByIdx(1);
+        
+        if (allocatedReg0 == returnReg1 &&
+            allocatedReg1 == returnReg0)
+        {
+            // Circular dependency - swap allocatedReg0 and allocatedReg1
+            if (varTypeIsFloating(regType0))
+            {
+                assert(varTypeIsFloating(regType1));
+
+                // The fastest way to swap two XMM regs is using PXOR
+                inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE);
+                inst_RV_RV(INS_pxor, allocatedReg1, allocatedReg0, TYP_DOUBLE);
+                inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE);
+            }
+            else
+            {
+                assert(varTypeIsIntegral(regType0));
+                assert(varTypeIsIntegral(regType1));
+                inst_RV_RV(INS_xchg, allocatedReg1, allocatedReg0, TYP_I_IMPL);
+            }
+        }
+        else if (allocatedReg1 == returnReg0)
+        {
+            // Change the order of moves to correctly handle dependency.
+            if (allocatedReg1 != returnReg1)
+            {
+                inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1);
+            }
 
+            if (allocatedReg0 != returnReg0)
+            {
+                inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0);
+            }
+        }
+        else
+        {
+            // No circular dependency case.
+            if (allocatedReg0 != returnReg0)
+            {
+                inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0);
+            }
 
+            if (allocatedReg1 != returnReg1)
+            {
+                inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1);
+            }
+        }
+    }
 #else
-    assert("!unreached");
+    unreached();
 #endif   
 }
 
@@ -1650,7 +1714,7 @@ CodeGen::genReturn(GenTreePtr treeNode)
                     op1->gtFlags |= GTF_SPILLED;
                     op1->gtFlags &= ~GTF_SPILL;
 
-                    TempDsc* t = regSet.rsUnspillInPlace(op1);
+                    TempDsc* t = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
                     inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0);
                     op1->gtFlags &= ~GTF_SPILLED;
                     compiler->tmpRlsTemp(t);
@@ -1960,9 +2024,11 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
 
     case GT_STORE_LCL_VAR:
         {  
+            GenTreePtr op1 = treeNode->gtGetOp1();
+
             // var = call, where call returns a multi-reg return value
             // case is handled separately.
-            if (treeNode->IsMultiRegCallStoreToLocal())
+            if (op1->gtSkipReloadOrCopy()->IsMultiRegCall())
             {
                 genMultiRegCallStoreToLocal(treeNode);
             }
@@ -1984,8 +2050,7 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
                     break;
                 }
 #endif // !defined(_TARGET_64BIT_)
-
-                GenTreePtr op1 = treeNode->gtGetOp1();
+                
                 genConsumeRegs(op1);
 
                 if (treeNode->gtRegNum == REG_NA)
@@ -2635,12 +2700,12 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
 //    None
 //
 // Assumption:
-//    The child of store is a GT_CALL node.
+//    The child of store is a multi-reg call node.
 //
 void
 CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode)
 {
-    assert(treeNode->IsMultiRegCallStoreToLocal());
+    assert(treeNode->OperGet() == GT_STORE_LCL_VAR);    
 
 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
     // Structs of size >=9 and <=16 are returned in two return registers on x64 Unix.
@@ -2649,38 +2714,44 @@ CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode)
     // Assumption: struct local var needs to be in memory
     noway_assert(!treeNode->InReg());
 
+    // Assumption: current x64 Unix implementation requires that a multi-reg struct
+    // var in 'var = call' is flagged as lvIsMultiRegArgOrRet to prevent it from
+    // being struct promoted.  
+    unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
+    LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+    noway_assert(varDsc->lvIsMultiRegArgOrRet);
+    
     GenTree* op1 = treeNode->gtGetOp1();
-    GenTreeCall* actualOp1 = op1->gtSkipReloadOrCopy()->AsCall();
-    assert(actualOp1->HasMultiRegRetVal());
+    GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
+    GenTreeCall* call = actualOp1->AsCall();
+    assert(call->HasMultiRegRetVal());
 
     genConsumeRegs(op1);
 
-    ReturnTypeDesc* retTypeDesc = actualOp1->GetReturnTypeDesc();
-    assert(retTypeDesc->GetReturnRegCount() == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_RETURN_IN_REGISTERS); 
+    ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+    assert(retTypeDesc->GetReturnRegCount() == MAX_RET_REG_COUNT);
+    unsigned regCount = retTypeDesc->GetReturnRegCount();
 
-    var_types type0 = retTypeDesc->GetReturnRegType(0);
-    var_types type1 = retTypeDesc->GetReturnRegType(1);
-
-    regNumber reg0 = retTypeDesc->GetABIReturnReg(0);
-    regNumber reg1 = retTypeDesc->GetABIReturnReg(1);
-
-    assert(reg0 != REG_NA && reg1 != REG_NA);
-
-    // Assumption: multi-reg return value of a GT_CALL node never gets spilled.
-    // TODO-BUG: support for multi-reg GT_CALL nodes.
-
-    unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
-    LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
-
-    // Assumption: current x64 Unix implementation requires that a multi-reg struct
-    // var in 'var = call' is flagged as lvIsMultiRegArgOrRet to prevent it from
-    // being struct poromoted.  
-    //
-
-    noway_assert(varDsc->lvIsMultiRegArgOrRet);
+    int offset = 0;
+    for (unsigned i = 0; i < regCount; ++i)
+    {
+        var_types type = retTypeDesc->GetReturnRegType(i);
+        regNumber reg = call->GetRegNumByIdx(i);
+        if (op1->IsCopyOrReload())
+        {
+            // GT_COPY/GT_RELOAD will have valid reg for those positions
+            // that need to be copied or reloaded.
+            regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
+            if (reloadReg != REG_NA)
+            {
+                reg = reloadReg;
+            }
+        }
 
-    getEmitter()->emitIns_S_R(ins_Store(type0), emitTypeSize(type0), reg0, lclNum, 0);
-    getEmitter()->emitIns_S_R(ins_Store(type1), emitTypeSize(type1), reg1, lclNum, 8);
+        assert(reg != REG_NA);
+        getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
+        offset += genTypeSize(type);
+    }
 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
     assert(!"Unreached");
 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
@@ -4355,13 +4426,14 @@ void CodeGen::genCodeForShiftRMW(GenTreeStoreInd* storeInd)
 void CodeGen::genUnspillRegIfNeeded(GenTree *tree)
 {
     regNumber dstReg = tree->gtRegNum;
-
     GenTree* unspillTree = tree;
+
     if (tree->gtOper == GT_RELOAD)
     {
         unspillTree = tree->gtOp.gtOp1;
     }
-    if (unspillTree->gtFlags & GTF_SPILLED)
+
+    if ((unspillTree->gtFlags & GTF_SPILLED) != 0)
     {
         if (genIsRegCandidateLocal(unspillTree))
         {
@@ -4439,22 +4511,72 @@ void CodeGen::genUnspillRegIfNeeded(GenTree *tree)
 
                 regSet.AddMaskVars(genGetRegMask(varDsc));
             }
+
+            gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
+        }
+        else if (unspillTree->IsMultiRegCall())
+        {
+            GenTreeCall* call = unspillTree->AsCall();
+            ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+            unsigned regCount = retTypeDesc->GetReturnRegCount();
+            GenTreeCopyOrReload* reloadTree = nullptr;
+            if (tree->OperGet() == GT_RELOAD)
+            {
+                reloadTree = tree->AsCopyOrReload();
+            }
+
+            // In case of multi-reg call node, GTF_SPILLED flag on it indicates that
+            // one or more of its result regs are spilled.  Call node needs to be 
+            // queried to know which specific result regs to be unspilled.
+            for (unsigned i = 0; i < regCount; ++i)
+            {
+                unsigned flags = call->GetRegSpillFlagByIdx(i);
+                if ((flags & GTF_SPILLED) != 0)
+                {
+                    var_types dstType = retTypeDesc->GetReturnRegType(i);
+                    regNumber unspillTreeReg = call->GetRegNumByIdx(i);
+
+                    if (reloadTree != nullptr)
+                    {                        
+                        dstReg = reloadTree->GetRegNumByIdx(i);
+                        if (dstReg == REG_NA)
+                        {
+                            dstReg = unspillTreeReg;
+                        }
+                    }
+                    else
+                    {
+                        dstReg = unspillTreeReg;
+                    }
+
+                    TempDsc* t = regSet.rsUnspillInPlace(call, unspillTreeReg, i);
+                    getEmitter()->emitIns_R_S(ins_Load(dstType),
+                                              emitActualTypeSize(dstType),
+                                              dstReg,
+                                              t->tdTempNum(),
+                                              0);
+                    compiler->tmpRlsTemp(t);
+                    gcInfo.gcMarkRegPtrVal(dstReg, dstType);                   
+                }
+            }
+
+            unspillTree->gtFlags &= ~GTF_SPILLED;
+            unspillTree->SetInReg();
         }
         else
         {
-            TempDsc* t = regSet.rsUnspillInPlace(unspillTree);
+            TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum);
             getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType),
-                            emitActualTypeSize(unspillTree->gtType),
-                            dstReg,
-                            t->tdTempNum(),
-                            0);
+                                      emitActualTypeSize(unspillTree->TypeGet()),
+                                      dstReg,
+                                      t->tdTempNum(),
+                                      0);
             compiler->tmpRlsTemp(t);
 
             unspillTree->gtFlags &= ~GTF_SPILLED;
             unspillTree->SetInReg();
-        }
-
-        gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
+            gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
+        }        
     }
 }
 
@@ -4477,78 +4599,107 @@ void CodeGen::genConsumeRegAndCopy(GenTree *tree, regNumber needReg)
 void CodeGen::genRegCopy(GenTree* treeNode)
 {
     assert(treeNode->OperGet() == GT_COPY);
-    var_types targetType = treeNode->TypeGet();
-    regNumber targetReg = treeNode->gtRegNum;
-    assert(targetReg != REG_NA);
+    GenTree* op1 = treeNode->gtOp.gtOp1;  
 
-    GenTree* op1 = treeNode->gtOp.gtOp1;
+    if (op1->IsMultiRegCall())
+    {
+        genConsumeReg(op1);
 
-    // Check whether this node and the node from which we're copying the value have the same
-    // register type.
-    // This can happen if (currently iff) we have a SIMD vector type that fits in an integer
-    // register, in which case it is passed as an argument, or returned from a call,
-    // in an integer register and must be copied if it's in an xmm register.
+        GenTreeCopyOrReload* copyTree = treeNode->AsCopyOrReload();
+        GenTreeCall* call = op1->AsCall();
+        ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+        unsigned regCount = retTypeDesc->GetReturnRegCount();
 
-    bool srcFltReg = (varTypeIsFloating(op1)      || varTypeIsSIMD(op1));
-    bool tgtFltReg = (varTypeIsFloating(treeNode) || varTypeIsSIMD(treeNode));
-    if (srcFltReg != tgtFltReg)
-    {
-        instruction ins;
-        regNumber fpReg;
-        regNumber intReg;
-        if (tgtFltReg)
-        {
-            ins = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet());
-            fpReg = targetReg;
-            intReg = op1->gtRegNum;
-        }
-        else
+        for (unsigned i = 0; i < regCount; ++i)
         {
-            ins = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet());
-            intReg = targetReg;
-            fpReg = op1->gtRegNum;
+            var_types type = retTypeDesc->GetReturnRegType(i);
+            regNumber fromReg = call->GetRegNumByIdx(i);
+            regNumber toReg = copyTree->GetRegNumByIdx(i);
+            
+            // A Multi-reg GT_COPY node will have valid reg only for those
+            // positions that corresponding result reg of call node needs
+            // to be copied.
+            if (toReg != REG_NA)
+            {
+                assert(toReg != fromReg);
+                inst_RV_RV(ins_Copy(type), toReg, fromReg, type);
+            }
         }
-        inst_RV_RV(ins, fpReg, intReg, targetType);
     }
     else
     {
-        inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
-    }
+        var_types targetType = treeNode->TypeGet();
+        regNumber targetReg = treeNode->gtRegNum;
+        assert(targetReg != REG_NA);
 
-    if (op1->IsLocal())
-    {
-        // The lclVar will never be a def.
-        // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
-        // appropriately set the gcInfo for the copied value.
-        // If not, there are two cases we need to handle:
-        // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
-        //   will remain live in its original register.
-        //   genProduceReg() will appropriately set the gcInfo for the copied value,
-        //   and genConsumeReg will reset it.
-        // - Otherwise, we need to update register info for the lclVar.
+        // Check whether this node and the node from which we're copying the value have
+        // different register types. This can happen if (currently iff) we have a SIMD
+        // vector type that fits in an integer register, in which case it is passed as
+        // an argument, or returned from a call, in an integer register and must be
+        // copied if it's in an xmm register.
 
-        GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
-        assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
+        bool srcFltReg = (varTypeIsFloating(op1) || varTypeIsSIMD(op1));
+        bool tgtFltReg = (varTypeIsFloating(treeNode) || varTypeIsSIMD(treeNode));
+        if (srcFltReg != tgtFltReg)
+        {
+            instruction ins;
+            regNumber fpReg;
+            regNumber intReg;
+            if (tgtFltReg)
+            {
+                ins = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet());
+                fpReg = targetReg;
+                intReg = op1->gtRegNum;
+            }
+            else
+            {
+                ins = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet());
+                intReg = targetReg;
+                fpReg = op1->gtRegNum;
+            }
+            inst_RV_RV(ins, fpReg, intReg, targetType);
+        }
+        else
+        {
+            inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
+        }
 
-        if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
+        if (op1->IsLocal())
         {
-            LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
+            // The lclVar will never be a def.
+            // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
+            // appropriately set the gcInfo for the copied value.
+            // If not, there are two cases we need to handle:
+            // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
+            //   will remain live in its original register.
+            //   genProduceReg() will appropriately set the gcInfo for the copied value,
+            //   and genConsumeReg will reset it.
+            // - Otherwise, we need to update register info for the lclVar.
+
+            GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
+            assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
 
-            // If we didn't just spill it (in genConsumeReg, above), then update the register info
-            if (varDsc->lvRegNum != REG_STK)
+            if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
             {
-                // The old location is dying
-                genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
+                LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
 
-                gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum));
+                // If we didn't just spill it (in genConsumeReg, above), then update the register info
+                if (varDsc->lvRegNum != REG_STK)
+                {
+                    // The old location is dying
+                    genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
+
+                    gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum));
 
-                genUpdateVarReg(varDsc, treeNode);
+                    genUpdateVarReg(varDsc, treeNode);
 
-                // The new location is going live
-                genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
+                    // The new location is going live
+                    genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
+                }
             }
         }
     }
+
     genProduceReg(treeNode);
 }
 
@@ -4585,13 +4736,24 @@ void CodeGen::genCheckConsumeNode(GenTree* treeNode)
 }
 #endif // DEBUG
 
-// Do liveness update for a subnode that is being consumed by codegen.
-regNumber CodeGen::genConsumeReg(GenTree *tree)
+//--------------------------------------------------------------------
+// genConsumeReg: Do liveness update for a subnode that is being
+// consumed by codegen.
+//
+// Arguments:
+//    tree - GenTree node
+//
+// Return Value:
+//    Returns the reg number of tree.
+//    In case of multi-reg call node returns the first reg number
+//    of the multi-reg return.
+regNumber CodeGen::genConsumeReg(GenTree* tree)
 {
     if (tree->OperGet() == GT_COPY)
     {
         genRegCopy(tree);
     }
+
     // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it
     // interferes with one of the other sources (or the target, if it's a "delayed use" register)). 
     // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and
@@ -4604,7 +4766,7 @@ regNumber CodeGen::genConsumeReg(GenTree *tree)
     // because if it's on the stack it will always get reloaded into tree->gtRegNum).
     if (genIsRegCandidateLocal(tree))
     {
-        GenTreeLclVarCommon *lcl = tree->AsLclVarCommon();
+        GenTreeLclVarCommonlcl = tree->AsLclVarCommon();
         LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
         if (varDsc->lvRegNum != REG_STK && varDsc->lvRegNum != tree->gtRegNum)
         {
@@ -4617,7 +4779,7 @@ regNumber CodeGen::genConsumeReg(GenTree *tree)
     // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
     genUpdateLife(tree);
 
-    assert(tree->gtRegNum != REG_NA);
+    assert(tree->gtHasReg());
 
     // there are three cases where consuming a reg means clearing the bit in the live mask
     // 1. it was not produced by a local
@@ -4642,7 +4804,7 @@ regNumber CodeGen::genConsumeReg(GenTree *tree)
     }
     else
     {
-        gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+        gcInfo.gcMarkRegSetNpt(tree->gtGetRegMask());
     }
 
     genCheckConsumeNode(tree);
@@ -4906,11 +5068,27 @@ void CodeGen::genConsumeBlockOp(GenTreeBlkOp* blkNode, regNumber dstReg, regNumb
     }
 }
 
-// do liveness update for register produced by the current node in codegen
-void CodeGen::genProduceReg(GenTree *tree)
+//-------------------------------------------------------------------------
+// genProduceReg: do liveness update for register produced by the current
+// node in codegen.
+//
+// Arguments:
+//     tree   -  Gentree node
+//
+// Return Value:
+//     None.
+void CodeGen::genProduceReg(GenTree* tree)
 {
     if (tree->gtFlags & GTF_SPILL)
     {
+        // Code for GT_COPY node gets generated as part of consuming regs by its parent.
+        // A GT_COPY node in turn produces reg result and it should never be marked to
+        // spill.
+        //
+        // Similarly GT_RELOAD node gets generated as part of consuming regs by its
+        // parent and should never be marked for spilling.
+        noway_assert(!tree->IsCopyOrReload());
+
         if (genIsRegCandidateLocal(tree))
         {
             // Store local variable to its home location.
@@ -4922,11 +5100,38 @@ void CodeGen::genProduceReg(GenTree *tree)
         }
         else
         {
-            tree->SetInReg();
-            regSet.rsSpillTree(tree->gtRegNum, tree);
+            // In case of multi-reg call node, spill flag on call node
+            // indicates that one or more of its allocated regs need to
+            // be spilled.  Call node needs to be further queried to 
+            // know which of its result regs needs to be spilled.
+            if (tree->IsMultiRegCall())
+            {
+                GenTreeCall* call = tree->AsCall();
+                ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+                unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+                for (unsigned i = 0; i < regCount; ++i)
+                {
+                    unsigned flags = call->GetRegSpillFlagByIdx(i);
+                    if ((flags & GTF_SPILL) != 0)
+                    {
+                        regNumber reg = call->GetRegNumByIdx(i);
+                        call->SetInReg();
+                        regSet.rsSpillTree(reg, call, i);
+                        gcInfo.gcMarkRegSetNpt(genRegMask(reg));                      
+                    }
+                }
+            }
+            else
+            {
+                tree->SetInReg();
+                regSet.rsSpillTree(tree->gtRegNum, tree);
+                gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+            }
+
             tree->gtFlags |= GTF_SPILLED;
             tree->gtFlags &= ~GTF_SPILL;
-            gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+
             return;
         }
     }
@@ -4945,8 +5150,51 @@ void CodeGen::genProduceReg(GenTree *tree)
         //    the register as live, with a GC pointer, if the variable is dead.
         if (!genIsRegCandidateLocal(tree) ||
             ((tree->gtFlags & GTF_VAR_DEATH) == 0))
-        {
-            gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet());
+        {            
+            // Multi-reg call node will produce more than one register result.
+            // Mark all the regs produced by call node.
+            if (tree->IsMultiRegCall())
+            {
+                GenTreeCall* call = tree->AsCall();
+                ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+                unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+                for (unsigned i = 0; i < regCount; ++i)
+                {
+                    regNumber reg = call->GetRegNumByIdx(i);
+                    var_types type = retTypeDesc->GetReturnRegType(i);
+                    gcInfo.gcMarkRegPtrVal(reg, type);
+                }
+            }
+            else if (tree->IsCopyOrReloadOfMultiRegCall())
+            {
+                // we should never see reload of multi-reg call here
+                // because GT_RELOAD gets generated in reg consuming path.
+                noway_assert(tree->OperGet() == GT_COPY);
+
+                // A multi-reg GT_COPY node produces those regs to which
+                // copy has taken place.
+                GenTreeCopyOrReload* copy = tree->AsCopyOrReload();
+                GenTreeCall* call = copy->gtGetOp1()->AsCall();
+                ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+                unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+                for (unsigned i = 0; i < regCount; ++i)
+                {
+                    var_types type = retTypeDesc->GetReturnRegType(i);
+                    regNumber fromReg = call->GetRegNumByIdx(i);
+                    regNumber toReg = copy->GetRegNumByIdx(i);
+
+                    if (toReg != REG_NA)
+                    {
+                        gcInfo.gcMarkRegPtrVal(toReg, type);
+                    }
+                }
+            }
+            else
+            {
+                gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet());
+            }
         }
     }
     tree->SetInReg();
@@ -5477,20 +5725,19 @@ void CodeGen::genCallInstruction(GenTreePtr node)
     }
 
     // Determine return value size(s).
+    ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
     emitAttr retSize = EA_PTRSIZE;
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
     emitAttr secondRetSize = EA_UNKNOWN;
-    if (varTypeIsStruct(call->gtType))
+
+    if (call->HasMultiRegRetVal())
     {
-        assert(call->HasMultiRegRetVal());
-        ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
         retSize = emitTypeSize(retTypeDesc->GetReturnRegType(0));
         secondRetSize = emitTypeSize(retTypeDesc->GetReturnRegType(1));
     }
     else
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING  
     {
+        assert(!varTypeIsStruct(call));
+
         if (call->gtType == TYP_REF ||
             call->gtType == TYP_ARRAY)
         {
@@ -5688,25 +5935,43 @@ void CodeGen::genCallInstruction(GenTreePtr node)
 #endif // _TARGET_X86_
         {
             regNumber returnReg;
-            // TODO-Cleanup: For UNIX AMD64, we should not be allocating a return register for struct
-            // returns that are on stack.
-            // For the SIMD case, however, we do want a "return register", as the consumer of the call
-            // will want the value in a register. In future we should flexibly allocate this return
-            // register, but that should be done with a general cleanup of the allocation of return
-            // registers for structs.
-            if (varTypeIsFloating(returnType)
-                FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY( || varTypeIsSIMD(returnType)))
+
+            if (call->HasMultiRegRetVal())
             {
-                returnReg = REG_FLOATRET;
+                assert(retTypeDesc != nullptr);
+                unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+                // If regs allocated to call node are different from ABI return
+                // regs in which the call has returned its result, move the result
+                // to regs allocated to call node.
+                for (unsigned i = 0; i < regCount; ++i)
+                {
+                    var_types regType = retTypeDesc->GetReturnRegType(i);
+                    returnReg = retTypeDesc->GetABIReturnReg(i);
+                    regNumber allocatedReg = call->GetRegNumByIdx(i);
+                    if (returnReg != allocatedReg)
+                    {
+                        inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
+                    }
+                }
             }
             else
-            {
-                returnReg = REG_INTRET;
-            }
-            if (call->gtRegNum != returnReg)
-            {
-                inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
+            {                
+                if (varTypeIsFloating(returnType))
+                {
+                    returnReg = REG_FLOATRET;
+                }
+                else
+                {
+                    returnReg = REG_INTRET;
+                }
+
+                if (call->gtRegNum != returnReg)
+                {
+                    inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
+                }                
             }
+
             genProduceReg(call);
         }
     }
index 5dbdfe3..53101d8 100644 (file)
@@ -1091,9 +1091,16 @@ Compiler::fgWalkResult  Compiler::fgWalkTree(GenTreePtr  * pTree,
     return result;
 }
 
-// Sets the register to the "no register assignment" value, depending upon the type
-// of the node, and whether it fits any of the special cases for register pairs.
-
+// ------------------------------------------------------------------------------------------
+// gtClearReg: Sets the register to the "no register assignment" value, depending upon
+// the type of the node, and whether it fits any of the special cases for register pairs
+// or multi-reg call nodes.
+//
+// Arguments:
+//     compiler  -  compiler instance
+//
+// Return Value:
+//     None
 void
 GenTree::gtClearReg(Compiler* compiler)
 {
@@ -1109,6 +1116,218 @@ GenTree::gtClearReg(Compiler* compiler)
     {
         gtRegNum = REG_NA;
     }
+
+    // Also clear multi-reg state if this is a call node
+    if (IsCall())
+    {
+        this->AsCall()->ClearOtherRegs();
+    }
+    else if (IsCopyOrReload())
+    {
+        this->AsCopyOrReload()->ClearOtherRegs();
+    }
+}
+
+//-----------------------------------------------------------
+// CopyReg: Copy the _gtRegNum/_gtRegPair/gtRegTag fields.
+//
+// Arguments:
+//     from   -  GenTree node from which to copy
+//
+// Return Value:
+//     None
+void 
+GenTree::CopyReg(GenTreePtr from)
+{
+    // To do the copy, use _gtRegPair, which must be bigger than _gtRegNum. Note that the values
+    // might be undefined (so gtRegTag == GT_REGTAG_NONE).
+    _gtRegPair = from->_gtRegPair;
+    C_ASSERT(sizeof(_gtRegPair) >= sizeof(_gtRegNum));
+    INDEBUG(gtRegTag = from->gtRegTag;)
+
+    // Also copy multi-reg state if this is a call node
+    if (IsCall())
+    {
+        assert(from->IsCall());
+        this->AsCall()->CopyOtherRegs(from->AsCall());
+    }
+    else if (IsCopyOrReload())
+    {
+        this->AsCopyOrReload()->CopyOtherRegs(from->AsCopyOrReload());
+    }
+}
+
+//------------------------------------------------------------------
+// gtHasReg: Whether node beeen assigned a register by LSRA
+//
+// Arguments:
+//    None
+//
+// Return Value:
+//    Returns true if the node was assigned a register.
+//
+//    In case of multi-reg call nodes, it is considered
+//    having a reg if regs are allocated for all its
+//    return values.
+//
+//    In case of GT_COPY or GT_RELOAD of a multi-reg call,
+//    GT_COPY/GT_RELOAD is considered having a reg if it
+//    has a reg assigned to any of its positions.
+//
+// Assumption:
+//    In order for this to work properly, gtClearReg must be called
+//    prior to setting the register value.
+//
+bool GenTree::gtHasReg() const
+{
+    bool hasReg;
+
+#if CPU_LONG_USES_REGPAIR
+    if (isRegPairType(TypeGet()))
+    {
+        assert(_gtRegNum != REG_NA);
+        INDEBUG(assert(gtRegTag == GT_REGTAG_REGPAIR));
+        hasReg = (gtRegPair != REG_PAIR_NONE);
+    }
+    else
+#endif
+    {
+        assert(_gtRegNum != REG_PAIR_NONE);
+        INDEBUG(assert(gtRegTag == GT_REGTAG_REG));
+
+        if (IsMultiRegCall())
+        {
+            // Has to cast away const-ness because GetReturnTypeDesc() is a non-const method
+            GenTree* tree = const_cast<GenTree*>(this);
+            GenTreeCall* call = tree->AsCall();
+            unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount();
+            hasReg = false;
+
+            // A Multi-reg call node is said to have regs, if it has
+            // reg assigned to each of its result registers.
+            for (unsigned i = 0; i < regCount; ++i)
+            {
+                hasReg = (call->GetRegNumByIdx(i) != REG_NA);
+                if (!hasReg)
+                {
+                    break;
+                }
+            }
+        }
+        else if (IsCopyOrReloadOfMultiRegCall())
+        {
+            GenTree* tree = const_cast<GenTree*>(this);
+            GenTreeCopyOrReload* copyOrReload = tree->AsCopyOrReload();
+            GenTreeCall* call = copyOrReload->gtGetOp1()->AsCall();
+            unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount();
+            hasReg = false;
+
+            // A Multi-reg copy or reload node is said to have regs,
+            // if it has valid regs in any of the positions.
+            for (unsigned i = 0; i < regCount; ++i)
+            {
+                hasReg = (copyOrReload->GetRegNumByIdx(i) != REG_NA);
+                if (hasReg)
+                {
+                    break;
+                }
+            }
+        }
+        else
+        {
+            hasReg = (gtRegNum != REG_NA);
+        }
+    }
+
+    return hasReg;
+}
+
+//---------------------------------------------------------------
+// gtGetRegMask: Get the reg mask of the node.
+//
+// Arguments:
+//    None
+//
+// Return Value:
+//    Reg Mask of GenTree node.
+//
+regMaskTP 
+GenTree::gtGetRegMask() const
+{
+    regMaskTP resultMask;
+
+#if CPU_LONG_USES_REGPAIR
+    if (isRegPairType(TypeGet()))
+    {
+        resultMask = genRegPairMask(gtRegPair);
+    }
+    else
+#endif
+    {
+        if (IsMultiRegCall())
+        {
+            // temporarily cast away const-ness as AsCall() method is not declared const
+            resultMask = genRegMask(gtRegNum);
+            GenTree* temp = const_cast<GenTree*>(this);
+            resultMask |= temp->AsCall()->GetOtherRegMask();
+        }
+        else if (IsCopyOrReloadOfMultiRegCall())
+        {
+            // A multi-reg copy or reload, will have valid regs for only those 
+            // positions that need to be copied or reloaded.  Hence we need
+            // to consider only those registers for computing reg mask.
+
+            GenTree* tree = const_cast<GenTree*>(this);
+            GenTreeCopyOrReload* copyOrReload = tree->AsCopyOrReload();
+            GenTreeCall* call = copyOrReload->gtGetOp1()->AsCall();
+            unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount();
+
+            resultMask = RBM_NONE;
+            for (unsigned i = 0; i < regCount; ++i)
+            {
+                regNumber reg = copyOrReload->GetRegNumByIdx(i);
+                if (reg != REG_NA)
+                {
+                    resultMask |= genRegMask(reg);
+                }
+            }
+        }
+        else
+        {
+            resultMask = genRegMask(gtRegNum);
+        }
+    }
+
+    return resultMask;
+}
+
+//---------------------------------------------------------------
+// GetOtherRegMask: Get the reg mask of gtOtherRegs of call node
+//
+// Arguments:
+//    None
+//
+// Return Value:
+//    Reg mask of gtOtherRegs of call node.
+//
+regMaskTP  
+GenTreeCall::GetOtherRegMask() const
+{
+    regMaskTP resultMask = RBM_NONE;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i)
+    {
+        if (gtOtherRegs[i] != REG_NA)
+        {
+            resultMask |= genRegMask(gtOtherRegs[i]);
+            continue;
+        }
+        break;
+    }
+#endif
+
+    return resultMask;
 }
 
 /*****************************************************************************
@@ -5163,6 +5382,12 @@ GenTreeCall*          Compiler::gtNewCallNode(gtCallTypes     callType,
     }
 #endif
 
+    // Initialize gtOtherRegs 
+    node->ClearOtherRegs();
+
+    // Initialize spill flags of gtOtherRegs
+    node->ClearOtherRegFlags();
+
     return node;
 }
 
@@ -6111,6 +6336,13 @@ GenTreePtr          Compiler::gtCloneExpr(GenTree * tree,
             }
             break;
 
+        case GT_COPY:
+        case GT_RELOAD:
+            {
+                copy = new(this, oper) GenTreeCopyOrReload(oper, tree->TypeGet(), tree->gtGetOp1());
+            }
+            break;
+
 #ifdef FEATURE_SIMD
         case GT_SIMD:
             {
@@ -6304,6 +6536,7 @@ GenTreePtr          Compiler::gtCloneExpr(GenTree * tree,
         copy->gtCall.gtInlineObservation = tree->gtCall.gtInlineObservation;
 #endif
 
+        copy->AsCall()->CopyOtherRegFlags(tree->AsCall());
         break;
 
     case GT_FIELD:
@@ -7659,6 +7892,28 @@ void                Compiler::gtDispRegVal(GenTree *  tree)
         break;
     }
 
+    if (tree->IsMultiRegCall())
+    {
+        // 0th reg is gtRegNum, which is already printed above.
+        // Print the remaining regs of a multi-reg call node.
+        GenTreeCall* call = tree->AsCall();
+        unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount();
+        for (unsigned i = 1; i < regCount; ++i)
+        {
+            printf(",%s", compRegVarName(call->GetRegNumByIdx(i)));
+        }        
+    }
+    else if (tree->IsCopyOrReloadOfMultiRegCall())
+    {
+        GenTreeCopyOrReload* copyOrReload = tree->AsCopyOrReload();
+        GenTreeCall* call = tree->gtGetOp1()->AsCall();
+        unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount();
+        for (unsigned i = 1; i < regCount; ++i)
+        {
+            printf(",%s", compRegVarName(copyOrReload->GetRegNumByIdx(i)));
+        }
+    }
+
     if  (tree->gtFlags & GTF_REG_VAL)
     {
         printf(" RV");
@@ -8596,6 +8851,10 @@ void                Compiler::gtDispTree(GenTreePtr             tree,
             }
 
             gtDispVN(tree);
+            if (tree->IsMultiRegCall())
+            {
+                gtDispRegVal(tree);
+            }
             printf("\n");
 
             if (!topOnly)
index 550a579..ec83811 100644 (file)
@@ -551,52 +551,13 @@ public:
 #endif
 
     // Copy the _gtRegNum/_gtRegPair/gtRegTag fields
-    void CopyReg(GenTreePtr from)
-    {
-        // To do the copy, use _gtRegPair, which must be bigger than _gtRegNum. Note that the values
-        // might be undefined (so gtRegTag == GT_REGTAG_NONE).
-        _gtRegPair = from->_gtRegPair;
-        C_ASSERT(sizeof(_gtRegPair) >= sizeof(_gtRegNum));
-        INDEBUG(gtRegTag = from->gtRegTag;)
-    }
+    void CopyReg(GenTreePtr from);
 
     void gtClearReg(Compiler* compiler);
 
-    bool gtHasReg() const
-    {
-        // Has the node been assigned a register by LSRA?
-        //
-        // In order for this to work properly, gtClearReg (above) must be called prior to setting
-        // the register value.
-#if CPU_LONG_USES_REGPAIR
-        if (isRegPairType(TypeGet()))
-        {
-            assert(_gtRegNum != REG_NA);
-            INDEBUG(assert(gtRegTag == GT_REGTAG_REGPAIR));
-            return gtRegPair != REG_PAIR_NONE;
-        }
-        else
-#endif
-        {
-            assert(_gtRegNum != REG_PAIR_NONE);
-            INDEBUG(assert(gtRegTag == GT_REGTAG_REG));
-            return gtRegNum != REG_NA;
-        }
-    }
+    bool gtHasReg() const;
 
-    regMaskTP gtGetRegMask() const
-    {
-#if CPU_LONG_USES_REGPAIR
-        if (isRegPairType(TypeGet()))
-        {
-            return genRegPairMask(gtRegPair);
-        }
-        else
-#endif
-        {
-            return genRegMask(gtRegNum);
-        }
-    }
+    regMaskTP gtGetRegMask() const;
 
     unsigned            gtFlags;        // see GTF_xxxx below
     
@@ -1386,7 +1347,14 @@ public:
     // Return the child of this node if it is a GT_RELOAD or GT_COPY; otherwise simply return the node itself
     inline GenTree*   gtSkipReloadOrCopy();
 
-    inline bool     IsMultiRegCallStoreToLocal();
+    // Returns true if it is a call node returning its value in more than one register
+    inline bool     IsMultiRegCall() const;
+
+    // Returns true if it is a GT_COPY or GT_RELOAD node
+    inline bool     IsCopyOrReload() const;
+
+    // Returns true if it is a GT_COPY or GT_RELOAD of a multi-reg call node
+    inline bool     IsCopyOrReloadOfMultiRegCall() const;
 
     bool            OperMayThrow();
 
@@ -2384,8 +2352,8 @@ enum class InlineObservation;
 // registers. For such calls this struct provides the following info 
 // on their return type
 //    - type of value returned in each return register
-//    - return register numbers in which the value is returned
-//    - a spill mask for lsra/codegen purpose
+//    - ABI return register numbers in which the value is returned
+//    - count of return registers in which the value is returned
 //
 // TODO-ARM: Update this to meet the needs of Arm64 and Arm32
 // TODO-X86: Update this to meet the needs of x86
@@ -2428,7 +2396,7 @@ public:
     // Return Value:
     //   Count of return registers.
     //   Returns 0 if the return type is not returned in registers.
-    unsigned GetReturnRegCount()
+    unsigned GetReturnRegCount() const
     {
         assert(m_inited);
 
@@ -2451,6 +2419,21 @@ public:
         return regCount;
     }
 
+    //-----------------------------------------------------------------------
+    // IsMultiRegRetType: check whether the type is returned in multiple 
+    // return registers.
+    //
+    // Arguments: 
+    //    None
+    //
+    // Return Value:
+    //    Returns true if the type is returned in multiple return registers.
+    //    False otherwise.
+    bool IsMultiRegRetType() const
+    {
+        return GetReturnRegCount() > 1;
+    }
+
     //--------------------------------------------------------------------------
     // GetReturnRegType:  Get var_type of the return register specified by index.
     // 
@@ -2504,12 +2487,23 @@ struct GenTreeCall final : public GenTree
 
     regMaskTP         gtCallRegUsedMask;      // mask of registers used to pass parameters
 
-    // For now Return Type Descriptor is enabled only for x64 unix.
+    // State required to support multi-reg returning call nodes.
+    // For now it is enabled only for x64 unix.
+    //
     // TODO-ARM: enable this for HFA returns on Arm64 and Arm32
     // TODO-X86: enable this for long returns on x86
     // TODO-AllArch: enable for all call nodes to unify single-reg and multi-reg returns.
 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
     ReturnTypeDesc    gtReturnTypeDesc;
+
+    // gtRegNum would always be the first return reg.
+    // The following array holds the other reg numbers of multi-reg return.
+    regNumber         gtOtherRegs[MAX_RET_REG_COUNT - 1];
+
+    // GTF_SPILL or GTF_SPILLED flag on a multi-reg call node indicates that one or
+    // more of its result regs are in that state.  The spill flag of each of the
+    // return register is stored in the below array.
+    unsigned          gtSpillFlags[MAX_RET_REG_COUNT];
 #endif 
 
     //-----------------------------------------------------------------------
@@ -2537,6 +2531,185 @@ struct GenTreeCall final : public GenTree
 #endif
     }
 
+    //---------------------------------------------------------------------------
+    // GetRegNumByIdx: get ith return register allocated to this call node.
+    //
+    // Arguments:
+    //     idx   -   index of the return register
+    //
+    // Return Value:
+    //     Return regNumber of ith return register of call node.
+    //     Returns REG_NA if there is no valid return register for the given index.
+    //
+    // TODO-ARM: Implement this routine for Arm64 and Arm32
+    // TODO-X86: Implement this routine for x86
+    regNumber  GetRegNumByIdx(unsigned idx) const
+    {
+        assert(idx < MAX_RET_REG_COUNT);
+
+        if (idx == 0)
+        {
+            return gtRegNum;
+        }
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        return gtOtherRegs[idx-1];
+#else
+        return REG_NA;
+#endif
+    }
+
+    //----------------------------------------------------------------------
+    // SetRegNumByIdx: set ith return register of this call node
+    //
+    // Arguments:
+    //    reg    -   reg number
+    //    idx    -   index of the return register
+    //
+    // Return Value:
+    //    None
+    //
+    // TODO-ARM: Implement this routine for Arm64 and Arm32
+    // TODO-X86: Implement this routine for x86
+    void  SetRegNumByIdx(regNumber reg, unsigned idx)
+    {
+        assert(idx < MAX_RET_REG_COUNT);
+
+        if (idx == 0)
+        {
+            gtRegNum = reg;
+        }
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        else
+        {
+            gtOtherRegs[idx - 1] = reg;
+            assert(gtOtherRegs[idx - 1] == reg);
+        }
+#else
+        unreached();
+#endif
+    }
+
+    //----------------------------------------------------------------------------
+    // ClearOtherRegs: clear multi-reg state to indicate no regs are allocated
+    //
+    // Arguments:
+    //    None
+    //
+    // Return Value:
+    //    None
+    //
+    void  ClearOtherRegs()
+    {
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i)
+        {
+            gtOtherRegs[i] = REG_NA;
+        }
+#endif
+    }
+
+    //----------------------------------------------------------------------------
+    // CopyOtherRegs: copy multi-reg state from the given call node to this node
+    //
+    // Arguments:
+    //    fromCall  -  GenTreeCall node from which to copy multi-reg state
+    //
+    // Return Value:
+    //    None
+    //
+    void CopyOtherRegs(GenTreeCall* fromCall)
+    {
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i)
+        {
+            this->gtOtherRegs[i] = fromCall->gtOtherRegs[i];
+        }
+#endif
+    }
+
+    // Get reg mask of all the valid registers of gtOtherRegs array
+    regMaskTP  GetOtherRegMask() const;
+
+    //----------------------------------------------------------------------
+    // GetRegSpillFlagByIdx: get spill flag associated with the return register 
+    // specified by its index.
+    //
+    // Arguments:
+    //    idx  -  Position or index of the return register
+    //
+    // Return Value:
+    //    Returns GTF_* flags associated with.
+    unsigned GetRegSpillFlagByIdx(unsigned idx) const
+    {
+        assert(idx < MAX_RET_REG_COUNT);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        return gtSpillFlags[idx];
+#else
+        assert(!"unreached");
+        return 0;
+#endif
+    }
+
+    //----------------------------------------------------------------------
+    // SetRegSpillFlagByIdx: set spill flags for the return register 
+    // specified by its index.
+    //
+    // Arguments:
+    //    flags  -  GTF_* flags
+    //    idx    -  Position or index of the return register
+    //
+    // Return Value:
+    //    None
+    void SetRegSpillFlagByIdx(unsigned flags, unsigned idx)
+    {
+        assert(idx < MAX_RET_REG_COUNT);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        gtSpillFlags[idx] = flags;
+#else
+        unreached();
+#endif
+    }
+
+    //-------------------------------------------------------------------
+    // clearOtherRegFlags: clear GTF_* flags associated with gtOtherRegs
+    //
+    // Arguments:
+    //     None
+    //
+    // Return Value:
+    //     None
+    void ClearOtherRegFlags()
+    {
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        for (unsigned i = 0; i < MAX_RET_REG_COUNT; ++i)
+        {
+            gtSpillFlags[i] = 0;
+        }
+#endif
+    }
+
+    //-------------------------------------------------------------------------
+    // CopyOtherRegFlags: copy GTF_* flags associated with gtOtherRegs from
+    // the given call node.
+    //
+    // Arguments:
+    //    fromCall  -  GenTreeCall node from which to copy
+    //
+    // Return Value:
+    //    None
+    //
+    void CopyOtherRegFlags(GenTreeCall* fromCall)
+    {
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        for (unsigned i = 0; i < MAX_RET_REG_COUNT; ++i)
+        {
+            this->gtSpillFlags[i] = fromCall->gtSpillFlags[i];
+        }
+#endif
+    }
 
 #define     GTF_CALL_M_EXPLICIT_TAILCALL       0x0001  // GT_CALL -- the call is "tail" prefixed and importer has performed tail call checks
 #define     GTF_CALL_M_TAILCALL                0x0002  // GT_CALL -- the call is a tailcall
@@ -2633,7 +2806,6 @@ struct GenTreeCall final : public GenTree
 #endif
     }
 
-
     // Returns true if VM has flagged this method as CORINFO_FLG_PINVOKE.
     bool IsPInvoke()                { return (gtCallMoreFlags & GTF_CALL_M_PINVOKE) != 0; }
 
@@ -3747,6 +3919,138 @@ struct GenTreePutArgStk: public GenTreeUnOp
 #endif
 };
 
+// Represents GT_COPY or GT_RELOAD node
+struct GenTreeCopyOrReload : public GenTreeUnOp
+{
+    // State required to support copy/reload of a multi-reg call node.
+    // First register is is always given by gtRegNum.
+    // Currently enabled for x64 unix.
+    //
+    // TODO-ARM: Enable this when multi-reg call node support is added.
+    // TODO-X86: Enable this when multi-reg call node support is added.
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    regNumber gtOtherRegs[MAX_RET_REG_COUNT - 1];
+#endif
+
+    //----------------------------------------------------------
+    // ClearOtherRegs: set gtOtherRegs to REG_NA.
+    //
+    // Arguments:
+    //    None
+    //
+    // Return Value:
+    //    None
+    //
+    // TODO-ARM: Implement this routine for Arm64 and Arm32
+    // TODO-X86: Implement this routine for x86
+    void ClearOtherRegs()
+    {
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i)
+        {
+            gtOtherRegs[i] = REG_NA;
+        }
+#endif
+    }
+
+    //-----------------------------------------------------------
+    // GetRegNumByIdx: Get regNumber of ith position.
+    //
+    // Arguments:
+    //    idx   -   register position.
+    //
+    // Return Value:
+    //    Returns regNumber assigned to ith position.
+    //
+    // TODO-ARM: Implement this routine for Arm64 and Arm32
+    // TODO-X86: Implement this routine for x86
+    regNumber GetRegNumByIdx(unsigned idx) const
+    {
+        assert(idx < MAX_RET_REG_COUNT);
+
+        if (idx == 0)
+        {
+            return gtRegNum;
+        }
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        return gtOtherRegs[idx - 1];
+#else
+        return REG_NA;
+#endif
+    }
+
+    //-----------------------------------------------------------
+    // SetRegNumByIdx: Set the regNumber for ith position.
+    //
+    // Arguments:
+    //    reg   -   reg number 
+    //    idx   -   register position.
+    //
+    // Return Value:
+    //    None.
+    //
+    // TODO-ARM: Implement this routine for Arm64 and Arm32
+    // TODO-X86: Implement this routine for x86
+    void SetRegNumByIdx(regNumber reg, unsigned idx)
+    {
+        assert(idx < MAX_RET_REG_COUNT);
+
+        if (idx == 0)
+        {
+            gtRegNum = reg;
+        }
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        else
+        {
+            gtOtherRegs[idx - 1] = reg;
+            assert(gtOtherRegs[idx - 1] == reg);
+        }
+#else
+        else
+        {
+            unreached();
+        }
+#endif
+    }
+
+    //----------------------------------------------------------------------------
+    // CopyOtherRegs: copy multi-reg state from the given copy/reload node to this
+    // node.
+    //
+    // Arguments:
+    //    from  -  GenTree node from which to copy multi-reg state
+    //
+    // Return Value:
+    //    None
+    //
+    // TODO-ARM: Implement this routine for Arm64 and Arm32
+    // TODO-X86: Implement this routine for x86
+    void CopyOtherRegs(GenTreeCopyOrReload* from)
+    {
+        assert(OperGet() == from->OperGet());
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i)
+        {
+            gtOtherRegs[i] = from->gtOtherRegs[i];
+        }
+#endif
+    }
+
+    GenTreeCopyOrReload(genTreeOps oper,
+        var_types type,
+        GenTree* op1) : GenTreeUnOp(oper, type, op1)
+    {
+        gtRegNum = REG_NA;
+        ClearOtherRegs();
+    }
+
+#if DEBUGGABLE_GENTREE
+    GenTreeCopyOrReload() : GenTreeUnOp() {}
+#endif
+};
+
 // Deferred inline functions of GenTree -- these need the subtypes above to
 // be defined already.
 
@@ -3962,25 +4266,57 @@ inline GenTree*         GenTree::gtSkipReloadOrCopy()
     return this;
 }
 
-//----------------------------------------------------------------------------------------
-// IsMultiRegCallStoreToLocal: Whether store op is storing multi-reg return value of call
-// to a local.
+//-----------------------------------------------------------------------------------
+// IsMultiRegCall: whether a call node returning its value in more than one register
 //
 // Arguments:
-//    None
+//     None
 //
 // Return Value:
-//    Returns true if store op is storing a multi-reg return value of a call into a local.
-//    False otherwise.
-// 
-inline bool GenTree::IsMultiRegCallStoreToLocal()
+//     Returns true if this GenTree is a multi register returning call 
+inline bool  GenTree::IsMultiRegCall() const
 {
-    assert(OperGet() == GT_STORE_LCL_VAR);
+    if (this->IsCall())
+    {
+        // We cannot use AsCall() as it is not declared const
+        const GenTreeCall* call = reinterpret_cast<const GenTreeCall *>(this);
+        return call->HasMultiRegRetVal();
+    }
 
-    GenTreePtr op1 = gtGetOp1();
-    GenTreePtr actualOperand = op1->gtSkipReloadOrCopy();
+    return false;
+}
 
-    return (actualOperand->OperGet() == GT_CALL) && actualOperand->AsCall()->HasMultiRegRetVal();
+//-------------------------------------------------------------------------
+// IsCopyOrReload: whether this is a GT_COPY or GT_RELOAD node.
+//
+// Arguments:
+//     None
+//
+// Return Value:
+//     Returns true if this GenTree is a copy or reload node.
+inline bool GenTree::IsCopyOrReload() const
+{
+    return (gtOper == GT_COPY || gtOper == GT_RELOAD);
+}
+
+//-----------------------------------------------------------------------------------
+// IsCopyOrReloadOfMultiRegCall: whether this is a GT_COPY or GT_RELOAD of a multi-reg
+// call node.
+//
+// Arguments:
+//     None
+//
+// Return Value:
+//     Returns true if this GenTree is a copy or reload of multi-reg call node.
+inline bool GenTree::IsCopyOrReloadOfMultiRegCall() const
+{
+    if (IsCopyOrReload())
+    {
+        GenTree* t = const_cast<GenTree*>(this);
+        return t->gtGetOp1()->IsMultiRegCall();
+    }
+
+    return false;
 }
 
 inline bool GenTree::IsCnsIntOrI() const
index 3c55c30..2f0b3a3 100644 (file)
@@ -81,6 +81,7 @@ GTSTRUCT_1(ArrIndex    , GT_ARR_INDEX)
 GTSTRUCT_1(RetExpr     , GT_RET_EXPR) 
 GTSTRUCT_1(Stmt        , GT_STMT) 
 GTSTRUCT_1(Obj         , GT_OBJ)
+GTSTRUCT_2(CopyOrReload, GT_COPY, GT_RELOAD)
 GTSTRUCT_2(ClsVar      , GT_CLS_VAR, GT_CLS_VAR_ADDR) 
 GTSTRUCT_1(ArgPlace    , GT_ARGPLACE) 
 GTSTRUCT_1(Label       , GT_LABEL) 
index 7c24aa8..2d5409a 100644 (file)
@@ -7109,7 +7109,7 @@ GenTreePtr          Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CL
     assert(varTypeIsStruct(info.compRetType));
     assert(info.compRetBuffArg == BAD_VAR_NUM);
 
-#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
+#if defined(_TARGET_XARCH_)
 
 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
     // No VarArgs for CoreCLR on x64 Unix
@@ -13822,7 +13822,7 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE &
                 {
                     // If single eightbyte, the return type would have been normalized and there won't be a temp var.
                     // This code will be called only if the struct return has not been normalized (i.e. 2 eightbytes - max allowed.)
-                    assert(retRegCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_RETURN_IN_REGISTERS);
+                    assert(retRegCount == MAX_RET_REG_COUNT);
                     // Same as !structDesc.passedInRegisters but just don't bother with impAssignStructPtr.
 #endif // !defined(_TARGET_ARM_)
 
index 506ccfe..5e67067 100644 (file)
@@ -39,6 +39,25 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
 {
     TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
 
+    // Is this the case of var = call where call is returning
+    // a value in multiple return registers?
+    GenTree* op1 = storeLoc->gtGetOp1();
+    if (op1->IsMultiRegCall())
+    {
+        // backend expects to see this case only for store lclvar.
+        assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
+
+        // srcCount = number of registers in which the value is returned by call
+        GenTreeCall* call = op1->AsCall();
+        ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+        info->srcCount = retTypeDesc->GetReturnRegCount();
+
+        // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
+        regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call);
+        op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates);
+        return;
+    }
+
 #ifdef FEATURE_SIMD    
     if (storeLoc->TypeGet() == TYP_SIMD12)
     {
@@ -55,7 +74,6 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
     // If the source is a containable immediate, make it contained, unless it is
     // an int-size or larger store of zero to memory, because we can generate smaller code
     // by zeroing a register and then storing it.
-    GenTree* op1 = storeLoc->gtOp1;
     if (IsContainableImmed(storeLoc, op1) && (!op1->IsZero() || varTypeIsSmall(storeLoc)))
     {
         MakeSrcContained(storeLoc, op1);
@@ -296,50 +314,56 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
             else
 #endif // !defined(_TARGET_64BIT_)
             {
+                GenTree* op1 = tree->gtGetOp1();
+                regMaskTP useCandidates = RBM_NONE;
+
+                info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+                info->dstCount = 0;
+
 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
                 if (varTypeIsStruct(tree))
-                {
-                    noway_assert((tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR) ||
-                                 (tree->gtOp.gtOp1->OperGet() == GT_CALL));
-
-                    if (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
+                {                    
+                    // op1 has to be either an lclvar or a multi-reg returning call
+                    if (op1->OperGet() == GT_LCL_VAR)
                     {
-                        GenTreeLclVarCommon* lclVarPtr = tree->gtOp.gtOp1->AsLclVarCommon();
+                        GenTreeLclVarCommon* lclVarPtr = op1->AsLclVarCommon();
                         LclVarDsc* varDsc = &(compiler->lvaTable[lclVarPtr->gtLclNum]);
                         assert(varDsc->lvIsMultiRegArgOrRet);
                         varDsc->lvDoNotEnregister = true;
 
                         // If this is a two eightbyte return, make the var
-                        // contained by the return expression. The code gen will put
+                        // contained by the return expression. Codegen will put
                         // the values in the right registers for return.
-                        info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
-                        info->dstCount = 0;
-                        MakeSrcContained(tree, tree->gtOp.gtOp1);
-                        break;
+                        MakeSrcContained(tree, op1);
                     }
+                    else
+                    {
+                        noway_assert(op1->IsMultiRegCall());
 
-                    // If the return gtOp1 is GT_CALL, just fallthrough. The return registers should already be set properly by the GT_CALL.
+                        ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
+                        info->srcCount = retTypeDesc->GetReturnRegCount();
+                        useCandidates = retTypeDesc->GetABIReturnRegs();
+                    }
                 }
+                else
 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
-                // TODO-AMD64-Unix: When the GT_CALL for multi-register return structs is changed to use 2 destinations,
-                // change the code below to use 2 src for such op1s (this is the case of op1 being a GT_CALL).
-                info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
-                info->dstCount = 0;
-
-                regMaskTP useCandidates;
-                switch (tree->TypeGet())
                 {
-                case TYP_VOID:   useCandidates = RBM_NONE; break;
-                case TYP_FLOAT:  useCandidates = RBM_FLOATRET; break;
-                case TYP_DOUBLE: useCandidates = RBM_DOUBLERET; break;
+                    // Non-struct type return - determine useCandidates                   
+                    switch (tree->TypeGet())
+                    {
+                    case TYP_VOID:   useCandidates = RBM_NONE; break;
+                    case TYP_FLOAT:  useCandidates = RBM_FLOATRET; break;
+                    case TYP_DOUBLE: useCandidates = RBM_DOUBLERET; break;
 #if defined(_TARGET_64BIT_)
-                case TYP_LONG:   useCandidates = RBM_LNGRET; break;
+                    case TYP_LONG:   useCandidates = RBM_LNGRET; break;
 #endif // defined(_TARGET_64BIT_)
-                default:         useCandidates = RBM_INTRET; break;
+                    default:         useCandidates = RBM_INTRET; break;
+                    }
                 }
+
                 if (useCandidates != RBM_NONE)
                 {
-                    tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, useCandidates);
+                    op1->gtLsraInfo.setSrcCandidates(l, useCandidates);
                 }
             }
             break;
@@ -835,10 +859,30 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
 
         case GT_CALL:
         {
-            info->srcCount = 0;
-            info->dstCount =  (tree->TypeGet() != TYP_VOID) ? 1 : 0;
+            bool hasMultiRegRetVal = false;
+            ReturnTypeDesc* retTypeDesc = nullptr;
 
-            GenTree *ctrlExpr = tree->gtCall.gtControlExpr;
+            info->srcCount = 0;
+            if (tree->TypeGet() != TYP_VOID)
+            {
+                hasMultiRegRetVal = tree->AsCall()->HasMultiRegRetVal();
+                if (hasMultiRegRetVal)
+                {
+                    // dst count = number of registers in which the value is returned by call
+                    retTypeDesc = tree->AsCall()->GetReturnTypeDesc();
+                    info->dstCount = retTypeDesc->GetReturnRegCount();
+                }
+                else
+                {
+                    info->dstCount = 1;
+                }
+            }
+            else
+            {
+                info->dstCount = 0;
+            }
+            
+            GenTree* ctrlExpr = tree->gtCall.gtControlExpr;
             if (tree->gtCall.gtCallType == CT_INDIRECT)
             {
                 // either gtControlExpr != null or gtCallAddr != null.
@@ -884,8 +928,12 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
             }
 
             // Set destination candidates for return value of the call.
-            if (varTypeIsFloating(registerType)
-                FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY( || varTypeIsSIMD(registerType)))
+            if (hasMultiRegRetVal)
+            {
+                assert(retTypeDesc != nullptr);
+                info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs());
+            }
+            else if (varTypeIsFloating(registerType))
             {
 #ifdef _TARGET_X86_
                 // The return value will be on the X87 stack, and we will need to move it.
@@ -1689,10 +1737,11 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
         }
 #endif //_TARGET_X86_
 
-        tree = next;
-
         // We need to be sure that we've set info->srcCount and info->dstCount appropriately
-        assert(info->dstCount < 2);
+        assert((info->dstCount < 2) ||
+               (tree->IsMultiRegCall() && info->dstCount == MAX_RET_REG_COUNT));
+
+        tree = next;
     }
 }
 
index e0fd5a2..127419b 100644 (file)
@@ -114,9 +114,30 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 */
 
-void lsraAssignRegToTree(GenTreePtr tree, regNumber reg)
+//--------------------------------------------------------------
+// lsraAssignRegToTree: Assign the given reg to tree node.
+//
+// Arguments:
+//    tree    -    Gentree node
+//    reg     -    register to be assigned
+//    regIdx  -    register idx, if tree is a multi-reg call node.
+//                 regIdx will be zero for single-reg result producing tree nodes.
+//
+// Return Value:
+//    None
+//
+void lsraAssignRegToTree(GenTreePtr tree, regNumber reg, unsigned regIdx)
 {
-    tree->gtRegNum  = reg;
+    if (regIdx == 0)
+    {
+        tree->gtRegNum = reg;
+    }
+    else
+    {
+        assert(tree->IsMultiRegCall());
+        GenTreeCall* call = tree->AsCall();
+        call->SetRegNumByIdx(reg, regIdx);
+    }
 }
 
 // allRegs represents a set of registers that can
@@ -137,6 +158,68 @@ regMaskTP LinearScan::allRegs(RegisterType rt)
         return availableIntRegs;
 }
 
+//--------------------------------------------------------------------------
+// allMultiRegCallNodeRegs: represents a set of registers that can be used
+// to allocate a multi-reg call node.
+//
+// Arguments:
+//    call   -  Multi-reg call node
+//
+// Return Value:
+//    Mask representing the set of available registers for multi-reg call 
+//    node.
+//
+// Note:
+// Multi-reg call node available regs = Bitwise-OR(allregs(GetReturnRegType(i)))
+// for all i=0..RetRegCount-1.
+regMaskTP LinearScan::allMultiRegCallNodeRegs(GenTreeCall* call)
+{
+    assert(call->HasMultiRegRetVal());
+
+    ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+    regMaskTP resultMask = allRegs(retTypeDesc->GetReturnRegType(0));
+
+    unsigned count = retTypeDesc->GetReturnRegCount();
+    for (unsigned i = 1; i < count; ++i)
+    {
+        resultMask |= allRegs(retTypeDesc->GetReturnRegType(i));
+    }
+
+    return resultMask;
+}
+
+//--------------------------------------------------------------------------
+// allRegs: returns the set of registers that can accomodate the type of
+// given node.
+//
+// Arguments:
+//    tree   -  GenTree node
+//
+// Return Value:
+//    Mask representing the set of available registers for given tree
+//
+// Note: In case of multi-reg call node, the full set of registers must be
+// determined by looking at types of individual return register types.  
+// In this case, the registers may include registers from different register
+// sets and will not be limited to the actual ABI return registers.
+regMaskTP LinearScan::allRegs(GenTree* tree)
+{
+    regMaskTP resultMask;
+
+    // In case of multi-reg calls, allRegs is defined as
+    // Bitwise-Or(allRegs(GetReturnRegType(i)) for i=0..ReturnRegCount-1
+    if (tree->IsMultiRegCall())
+    {
+        resultMask = allMultiRegCallNodeRegs(tree->AsCall());
+    }
+    else
+    {
+        resultMask = allRegs(tree->TypeGet());
+    }
+
+    return resultMask;
+}
+
 regMaskTP LinearScan::allSIMDRegs()
 {
     return availableFloatRegs;
@@ -635,30 +718,64 @@ LinearScan::associateRefPosWithInterval(RefPosition *rp)
     }
 }
 
-RefPosition *
-LinearScan::newRefPosition(
-    regNumber reg, LsraLocation theLocation,
-    RefType theRefType, GenTree * theTreeNode,
-    regMaskTP mask)
+//---------------------------------------------------------------------------
+// newRefPosition: allocate and initialize a new RefPosition.
+//
+// Arguments:
+//     reg             -  reg number that identifies RegRecord to be associated 
+//                        with this RefPosition
+//     theLocation     -  LSRA location of RefPosition
+//     theRefType      -  RefPosition type
+//     theTreeNode     -  GenTree node for which this RefPosition is created
+//     mask            -  Set of valid registers for this RefPosition
+//     multiRegIdx     -  register position if this RefPosition corresponds to a
+//                        multi-reg call node.
+//
+// Return Value:
+//     a new RefPosition
+//                       
+RefPosition*
+LinearScan::newRefPosition(regNumber reg, 
+                           LsraLocation theLocation,
+                           RefType theRefType, 
+                           GenTree* theTreeNode,
+                           regMaskTP mask)
 {
-    RefPosition *newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType);
+    RefPositionnewRP = newRefPositionRaw(theLocation, theTreeNode, theRefType);
 
     newRP->setReg(getRegisterRecord(reg));
-
     newRP->registerAssignment = mask;
+
+    newRP->setMultiRegIdx(0);
+
     associateRefPosWithInterval(newRP);
 
     DBEXEC(VERBOSE, newRP->dump());
     return newRP;
 }
 
-
-
-RefPosition *
-LinearScan::newRefPosition(
-    Interval * theInterval, LsraLocation theLocation,
-    RefType theRefType, GenTree * theTreeNode,
-    regMaskTP mask)
+//---------------------------------------------------------------------------
+// newRefPosition: allocate and initialize a new RefPosition.
+//
+// Arguments:
+//     theInterval     -  interval to which RefPosition is associated with.
+//     theLocation     -  LSRA location of RefPosition
+//     theRefType      -  RefPosition type
+//     theTreeNode     -  GenTree node for which this RefPosition is created
+//     mask            -  Set of valid registers for this RefPosition
+//     multiRegIdx     -  register position if this RefPosition corresponds to a
+//                        multi-reg call node.
+//
+// Return Value:
+//     a new RefPosition
+//                       
+RefPosition*
+LinearScan::newRefPosition(Interval* theInterval, 
+                           LsraLocation theLocation,
+                           RefType theRefType, 
+                           GenTree* theTreeNode,
+                           regMaskTP mask,
+                           unsigned multiRegIdx /* = 0 */)
 {
 #ifdef DEBUG
     if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType)
@@ -686,12 +803,12 @@ LinearScan::newRefPosition(
     if (insertFixedRef)
     {
         regNumber physicalReg = genRegNumFromMask(mask);
-        RefPosition *pos = newRefPosition (physicalReg, theLocation,  RefTypeFixedReg, nullptr, mask);
+        RefPositionpos = newRefPosition (physicalReg, theLocation,  RefTypeFixedReg, nullptr, mask);
         assert(theInterval != nullptr);
         assert((allRegs(theInterval->registerType) & mask) != 0);
     }
 
-    RefPosition *newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType);
+    RefPositionnewRP = newRefPositionRaw(theLocation, theTreeNode, theRefType);
 
     newRP->setInterval(theInterval);
 
@@ -712,6 +829,8 @@ LinearScan::newRefPosition(
 #endif // !_TARGET_AMD64_
     newRP->registerAssignment = mask;
 
+    newRP->setMultiRegIdx(multiRegIdx);
+
     associateRefPosWithInterval(newRP);
 
     DBEXEC(VERBOSE, newRP->dump());
@@ -2985,7 +3104,15 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
         // This is the case for dead nodes that occur after
         // tree rationalization
         // TODO-Cleanup: Identify and remove these dead nodes prior to register allocation.
-        produce = 1;
+        if (tree->IsMultiRegCall())
+        {
+            // In case of multi-reg call node, produce = number of return registers
+            produce = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount();
+        }
+        else
+        {
+            produce = 1;
+        }
     }
 
 #ifdef DEBUG
@@ -3049,14 +3176,17 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
 
         // for interstitial tree temps, a use is always last and end;
         // this is  set by default in newRefPosition
-        GenTree * useNode = locInfo.treeNode;
+        GenTree* useNode = locInfo.treeNode;
         assert(useNode != nullptr);
         var_types type = useNode->TypeGet();
         regMaskTP candidates = getUseCandidates(useNode);
-        Interval *i = locInfo.interval;
+        Interval* i = locInfo.interval;
+        unsigned multiRegIdx = locInfo.multiRegIdx;
 
 #ifdef FEATURE_SIMD
-        if (tree->OperIsLocalStore() && varDefInterval == nullptr)
+        // In case of multi-reg call store to a local, there won't be any mismatch of
+        // use candidates with the type of the tree node.
+        if (tree->OperIsLocalStore() && varDefInterval == nullptr && !useNode->IsMultiRegCall())
         {
             // This is a non-candidate store.  If this is a SIMD type, the use candidates
             // may not match the type of the tree node.  If that is the case, change the
@@ -3110,12 +3240,12 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
                 regNumber physicalReg = genRegNumFromMask(fixedAssignment);
                 RefPosition *pos = newRefPosition (physicalReg, currentLoc,  RefTypeFixedReg, nullptr, fixedAssignment);
             }
-            pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, allRegs(i->registerType));
+            pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, allRegs(i->registerType), multiRegIdx);
             pos->registerAssignment = candidates;
         }
         else
         {
-            pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, candidates);
+            pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, candidates, multiRegIdx);
         }
         if (delayRegFree)
         {
@@ -3130,7 +3260,6 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
     buildInternalRegisterUsesForNode(tree, currentLoc, internalRefs, internalCount);
 
     RegisterType registerType = getDefType(tree);
-
     regMaskTP candidates = getDefCandidates(tree);
     regMaskTP useCandidates = getUseCandidates(tree);
 
@@ -3145,52 +3274,51 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
     }
 #endif // DEBUG
 
-    int targetRegs = produce;
-
 #if defined(_TARGET_AMD64_)
-    assert(produce <= 1);
+    // Multi-reg call node is the only node that could produce multi-reg value
+    assert(produce <= 1 || (tree->IsMultiRegCall() && produce == MAX_RET_REG_COUNT));
 #elif defined(_TARGET_ARM_)
     assert(!varTypeIsMultiReg(tree->TypeGet()));
 #endif // _TARGET_xxx_
 
+    // Add kill positions before adding def positions
+    buildKillPositionsForNode(tree, currentLoc + 1);
+
 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
     VARSET_TP       VARSET_INIT_NOCOPY(liveLargeVectors, VarSetOps::UninitVal());
-#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
-
-    // push defs
-    if (produce == 0)
+    if (RBM_FLT_CALLEE_SAVED != RBM_NONE)
     {
-        buildKillPositionsForNode(tree, currentLoc + 1);
-
-#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
-        if (RBM_FLT_CALLEE_SAVED != RBM_NONE)
-        {
-            // Build RefPositions for saving any live large vectors.
-            // This must be done after the kills, so that we know which large vectors are still live.
-            VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc));
-        }
+        // Build RefPositions for saving any live large vectors.
+        // This must be done after the kills, so that we know which large vectors are still live.
+        VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc));
+    }
 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+    
+    ReturnTypeDesc* retTypeDesc = nullptr;
+    bool isMultiRegCall = tree->IsMultiRegCall();
+    if (isMultiRegCall)
+    {
+        retTypeDesc = tree->AsCall()->GetReturnTypeDesc();
+        assert((int)genCountBits(candidates) == produce);
+        assert(candidates == retTypeDesc->GetABIReturnRegs());
     }
 
+    // push defs
+    LsraLocation defLocation = currentLoc + 1;
     for (int i=0; i < produce; i++)
-    {
-        LsraLocation lastDefLocation = currentLoc + 1;
+    {        
+        regMaskTP currCandidates = candidates;
+        Interval *interval = varDefInterval;
 
-        // If this is the last def add the phys reg defs
-        bool generatedKills = false;
-        if (i == produce-1) 
+        // In case of multi-reg call node, registerType is given by
+        // the type of ith position return register.
+        if (isMultiRegCall)
         {
-            generatedKills = buildKillPositionsForNode(tree, lastDefLocation);
-
-#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
-            // Build RefPositions for saving any live large vectors.
-            // This must be done after the kills, so that we know which large vectors are still live.
-            VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc));
-#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+            registerType = retTypeDesc->GetReturnRegType((unsigned)i);
+            currCandidates = genRegMask(retTypeDesc->GetABIReturnReg(i));
+            useCandidates = allRegs(registerType);
         }
-        regMaskTP currCandidates = candidates;
 
-        Interval *interval = varDefInterval;
         if (interval == nullptr)
         {
             // Make a new interval
@@ -3204,10 +3332,12 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
                 assert(!tree->IsReuseRegVal());
                 interval->isConstant = true;
             }
+
             if ((currCandidates & useCandidates) != RBM_NONE)
             {
                 interval->updateRegisterPreferences(currCandidates & useCandidates);
             }
+
             if (isSpecialPutArg)
             {
                 interval->isSpecialPutArg = true;
@@ -3227,11 +3357,10 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
         // but not push it
         if (!noPush)
         {
-            stack->Push(LocationInfo(lastDefLocation, interval, tree));
+            stack->Push(LocationInfo(defLocation, interval, tree, (unsigned) i));
         }
 
-        LsraLocation defLocation = (i == produce-1) ? lastDefLocation : currentLoc;
-        RefPosition *pos = newRefPosition(interval, defLocation, defRefType, defNode, currCandidates);
+        RefPosition* pos = newRefPosition(interval, defLocation, defRefType, defNode, currCandidates, (unsigned)i);
         if (info.isLocalDefUse)
         {
             pos->isLocalDefUse = true;
@@ -3241,6 +3370,7 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
         interval->updateRegisterPreferences(currCandidates);
         interval->updateRegisterPreferences(useCandidates);
     }
+
 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
     buildUpperVectorRestoreRefPositions(tree, currentLoc, liveLargeVectors);
 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
@@ -6677,7 +6807,7 @@ LinearScan::resolveLocalRef(GenTreePtr treeNode, RefPosition * currentRefPositio
             if (!currentRefPosition->isFixedRegRef || currentRefPosition->moveReg)
             {
                 // This is the second case, where we need to generate a copy
-                insertCopyOrReload(treeNode, currentRefPosition); 
+                insertCopyOrReload(treeNode, currentRefPosition->getMultiRegIdx(), currentRefPosition); 
             }
         }
         else
@@ -6739,7 +6869,7 @@ LinearScan::resolveLocalRef(GenTreePtr treeNode, RefPosition * currentRefPositio
 void
 LinearScan::writeRegisters(RefPosition *currentRefPosition, GenTree *tree)
 {
-    lsraAssignRegToTree(tree, currentRefPosition->assignedReg());
+    lsraAssignRegToTree(tree, currentRefPosition->assignedReg(), currentRefPosition->getMultiRegIdx());
 }
 
 //------------------------------------------------------------------------ 
@@ -6748,8 +6878,10 @@ LinearScan::writeRegisters(RefPosition *currentRefPosition, GenTree *tree)
 //   than the one it was spilled from (GT_RELOAD).
 //
 // Arguments: 
-//    tree              - This is the node to reload. Insert the reload node between this node and its parent.
-//    spillRefPosition  - The RefPosition of the spill. spillRefPosition->nextRefPosition is the RefPosition of the reload.
+//    tree              - This is the node to copy or reload. 
+//                        Insert copy or reload node between this node and its parent.
+//    multiRegIdx       - register position of tree node for which copy or reload is needed.
+//    refPosition       - The RefPosition at which copy or reload will take place.
 //
 // Notes:
 //    The GT_COPY or GT_RELOAD will be inserted in the proper spot in execution order where the reload is to occur.
@@ -6795,15 +6927,14 @@ LinearScan::writeRegisters(RefPosition *currentRefPosition, GenTree *tree)
 // used if we reload to the same register. Normally, though, in that case we just mark the node with GTF_SPILLED,
 // and the unspilling code automatically reuses the same register, and does the reload when it notices that flag
 // when considering a node's operands.
-
+//
 void
-LinearScan::insertCopyOrReload(GenTreePtr tree, RefPosition* refPosition)
-{
+LinearScan::insertCopyOrReload(GenTreePtr tree, unsigned multiRegIdx, RefPosition* refPosition)
+{  
     GenTreePtr* parentChildPointer = nullptr;
     GenTreePtr parent = tree->gtGetParent(&parentChildPointer);
     noway_assert(parent != nullptr && parentChildPointer != nullptr);
 
-    // Create the new node, with "tree" as its only child.
     genTreeOps  oper;
     if (refPosition->reload)
     {
@@ -6814,41 +6945,65 @@ LinearScan::insertCopyOrReload(GenTreePtr tree, RefPosition* refPosition)
         oper = GT_COPY;
     }
 
-    var_types treeType = tree->TypeGet();
+    // If the parent is a reload/copy node, then tree must be a multi-reg call node
+    // that has already had one of its registers spilled. This is Because multi-reg
+    // call node is the only node whose RefTypeDef positions get independently
+    // spilled or reloaded.  It is possible that one of its RefTypeDef position got
+    // spilled and the next use of it requires it to be in a different register.
+    //
+    // In this case set the ith position reg of reload/copy node to the reg allocated
+    // for copy/reload refPosition.  Essentially a copy/reload node will have a reg
+    // for each multi-reg position of its child. If there is a valid reg in ith 
+    // position of GT_COPY or GT_RELOAD node then the corresponding result of its
+    // child needs to be copied or reloaded to that reg.
+    if (parent->IsCopyOrReload())
+    {
+        noway_assert(parent->OperGet() == oper);
+        noway_assert(tree->IsMultiRegCall());
+        GenTreeCall* call = tree->AsCall();
+        GenTreeCopyOrReload* copyOrReload = parent->AsCopyOrReload();
+        noway_assert(copyOrReload->GetRegNumByIdx(multiRegIdx) == REG_NA);
+        copyOrReload->SetRegNumByIdx(refPosition->assignedReg(), multiRegIdx);
+    }
+    else
+    {
+        // Create the new node, with "tree" as its only child.    
+        var_types treeType = tree->TypeGet();
 
 #ifdef FEATURE_SIMD
-    // Check to see whether we need to move to a different register set.
-    // This currently only happens in the case of SIMD vector types that are small enough (pointer size)
-    // that they must be passed & returned in integer registers.
-    // 'treeType' is the type of the register we are moving FROM,
-    // and refPosition->registerAssignment is the mask for the register we are moving TO.
-    // If they don't match, we need to reverse the type for the "move" node.
+        // Check to see whether we need to move to a different register set.
+        // This currently only happens in the case of SIMD vector types that are small enough (pointer size)
+        // that they must be passed & returned in integer registers.
+        // 'treeType' is the type of the register we are moving FROM,
+        // and refPosition->registerAssignment is the mask for the register we are moving TO.
+        // If they don't match, we need to reverse the type for the "move" node.
 
-    if ((allRegs(treeType) & refPosition->registerAssignment) == 0)
-    {
-        treeType = (useFloatReg(treeType)) ? TYP_I_IMPL : TYP_SIMD8;
-    }
+        if ((allRegs(treeType) & refPosition->registerAssignment) == 0)
+        {
+            treeType = (useFloatReg(treeType)) ? TYP_I_IMPL : TYP_SIMD8;
+        }
 #endif // FEATURE_SIMD
 
-    GenTreePtr newNode = compiler->gtNewOperNode(oper, treeType, tree);
-    assert(refPosition->registerAssignment != RBM_NONE);
-    newNode->CopyCosts(tree);
-    newNode->gtRegNum = refPosition->assignedReg();
-    newNode->gtLsraInfo.isLsraAdded = true;
-    newNode->gtLsraInfo.isLocalDefUse = false;
-    if (refPosition->copyReg)
-    {
-        // This is a TEMPORARY copy
-        assert(isCandidateLocalRef(tree));
-        newNode->gtFlags |= GTF_VAR_DEATH;
-    }
+        GenTreeCopyOrReload* newNode = new(compiler, oper) GenTreeCopyOrReload(oper, treeType, tree);
+        assert(refPosition->registerAssignment != RBM_NONE);
+        newNode->CopyCosts(tree);
+        newNode->SetRegNumByIdx(refPosition->assignedReg(), multiRegIdx);
+        newNode->gtLsraInfo.isLsraAdded = true;
+        newNode->gtLsraInfo.isLocalDefUse = false;
+        if (refPosition->copyReg)
+        {
+            // This is a TEMPORARY copy
+            assert(isCandidateLocalRef(tree));
+            newNode->gtFlags |= GTF_VAR_DEATH;
+        }
 
-    // Replace tree in the parent node.
-    *parentChildPointer = newNode;
+        // Replace tree in the parent node.
+        *parentChildPointer = newNode;
 
-    // we insert this directly after the spilled node.  it does not reload at that point but
-    // just updates registers
-    tree->InsertAfterSelf(newNode);
+        // we insert this directly after the spilled node.  it does not reload at that point but
+        // just updates registers
+        tree->InsertAfterSelf(newNode);
+    }
 }
 
 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
@@ -7060,7 +7215,19 @@ LinearScan::updateMaxSpill(RefPosition* refPosition)
                     treeNode = interval->firstRefPosition->treeNode;
                 }
                 assert(treeNode != nullptr);
-                typ = compiler->tmpNormalizeType(treeNode->TypeGet());
+
+                // In case of multi-reg call nodes, we need to use the type
+                // of the return register given by multiRegIdx of the refposition.
+                if (treeNode->IsMultiRegCall())
+                {
+                    ReturnTypeDesc* retTypeDesc = treeNode->AsCall()->GetReturnTypeDesc();
+                    typ = retTypeDesc->GetReturnRegType(refPosition->getMultiRegIdx());                        
+                }
+                else
+                {
+                    typ = treeNode->TypeGet();
+                }
+                typ = compiler->tmpNormalizeType(typ);
             }
 
             if (refPosition->spillAfter && !refPosition->reload)
@@ -7383,22 +7550,33 @@ LinearScan::resolveRegisters()
                         if (currentRefPosition->spillAfter)
                         {
                             treeNode->gtFlags |= GTF_SPILL;
+
                             // If this is a constant interval that is reusing a pre-existing value, we actually need
                             // to generate the value at this point in order to spill it.
                             if (treeNode->IsReuseRegVal())
                             {
                                 treeNode->ResetReuseRegVal();
                             }
+
+                            // In case of multi-reg call node, also set spill flag on the 
+                            // register specified by multi-reg index of current RefPosition.
+                            // Note that the spill flag on treeNode indicates that one or
+                            // more its allocated registers are in that state.
+                            if (treeNode->IsMultiRegCall())
+                            {
+                                GenTreeCall* call = treeNode->AsCall();
+                                call->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx());
+                            }
                         }
 
                         // If the value is reloaded or moved to a different register, we need to insert
                         // a node to hold the register to which it should be reloaded
-                        RefPosition * nextRefPosition = currentRefPosition->nextRefPosition;
+                        RefPosition* nextRefPosition = currentRefPosition->nextRefPosition;
                         assert(nextRefPosition != nullptr);
                         if (INDEBUG(alwaysInsertReload() ||)
                             nextRefPosition->assignedReg() != currentRefPosition->assignedReg())
                         {
-                            insertCopyOrReload(treeNode, nextRefPosition);
+                            insertCopyOrReload(treeNode, currentRefPosition->getMultiRegIdx(), nextRefPosition);
                         }
                     }
 
@@ -7665,7 +7843,7 @@ LinearScan::insertMove(BasicBlock * block,
     }
     else
     {
-        top = compiler->gtNewOperNode(GT_COPY, varDsc->TypeGet(), src);
+        top = new(compiler, GT_COPY) GenTreeCopyOrReload(GT_COPY, varDsc->TypeGet(), src);
         // This is the new home of the lclVar - indicate that by clearing the GTF_VAR_DEATH flag.
         // Note that if src is itself a lastUse, this will have no effect.
         top->gtFlags &= ~(GTF_VAR_DEATH);
index f127064..079c7eb 100644 (file)
@@ -42,11 +42,22 @@ regMaskTP calleeSaveRegs(RegisterType rt)
 struct LocationInfo
 {
     LsraLocation  loc;
-    Interval    * interval;
-    GenTree     * treeNode;
-
-    LocationInfo(LsraLocation l, Interval *i, GenTree *t)
-    : loc(l), interval(i), treeNode(t) {}
+    
+    // Reg Index in case of multi-reg result producing call node.
+    // Indicates the position of the register that this location refers to.
+    // The max bits needed is based on max value of MAX_RET_REG_COUNT value
+    // across all targets and that happens 4 on on Arm.  Hence index value
+    // would be 0..MAX_RET_REG_COUNT-1. 
+    unsigned      multiRegIdx : 2;
+
+    Interval*     interval;
+    GenTree*      treeNode;
+
+    LocationInfo(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0)
+    : loc(l), multiRegIdx(regIdx), interval(i), treeNode(t)
+    {
+        assert(multiRegIdx == regIdx);
+    }
 
     // default constructor for data structures
     LocationInfo() {}
@@ -377,7 +388,7 @@ public:
     // Insert a copy in the case where a tree node value must be moved to a different
     // register at the point of use, or it is reloaded to a different register
     // than the one it was spilled from
-    void            insertCopyOrReload(GenTreePtr tree, RefPosition* refPosition);
+    void            insertCopyOrReload(GenTreePtr tree, unsigned multiRegIdx, RefPosition* refPosition);
 
 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
     // Insert code to save and restore the upper half of a vector that lives
@@ -658,6 +669,8 @@ private:
                                               LsraLocation currentLoc);
 
     regMaskTP       allRegs(RegisterType rt);
+    regMaskTP       allRegs(GenTree* tree);
+    regMaskTP       allMultiRegCallNodeRegs(GenTreeCall* tree);
     regMaskTP       allSIMDRegs();
     regMaskTP       internalFloatRegCandidates();
 
@@ -722,12 +735,17 @@ private:
 
     RefPosition *   newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType);
 
-    RefPosition *   newRefPosition(Interval * theInterval, LsraLocation theLocation,
-                                   RefType theRefType, GenTree * theTreeNode,
-                                   regMaskTP mask);
-
-    RefPosition *   newRefPosition(regNumber reg, LsraLocation theLocation,
-                                   RefType theRefType, GenTree * theTreeNode,
+    RefPosition*    newRefPosition(Interval* theInterval, 
+                                   LsraLocation theLocation,
+                                   RefType theRefType, 
+                                   GenTree* theTreeNode,
+                                   regMaskTP mask,
+                                   unsigned multiRegIdx = 0);
+
+    RefPosition*    newRefPosition(regNumber reg, 
+                                   LsraLocation theLocation,
+                                   RefType theRefType, 
+                                   GenTree* theTreeNode,
                                    regMaskTP mask);
 
     void applyCalleeSaveHeuristics(RefPosition* rp);
@@ -1274,6 +1292,7 @@ public:
         , nodeLocation(nodeLocation)
         , registerAssignment(RBM_NONE)
         , refType(refType)
+        , multiRegIdx(0)
         , lastUse(false)
         , reload(false)
         , spillAfter(false)
@@ -1334,6 +1353,21 @@ public:
                );
     }
 
+    // Used by RefTypeDef/Use positions of a multi-reg call node.
+    // Indicates the position of the register that this ref position refers to.
+    // The max bits needed is based on max value of MAX_RET_REG_COUNT value
+    // across all targets and that happens 4 on on Arm.  Hence index value
+    // would be 0..MAX_RET_REG_COUNT-1. 
+    unsigned        multiRegIdx  : 2;
+
+    void            setMultiRegIdx(unsigned idx)
+    {
+        multiRegIdx = idx;
+        assert(multiRegIdx == idx);
+    }
+
+    unsigned        getMultiRegIdx() { return multiRegIdx;  }
+
     // Last Use - this may be true for multiple RefPositions in the same Interval
     bool            lastUse      : 1;
 
index 3297358..5b6fab8 100644 (file)
@@ -38,7 +38,7 @@ public:
         int dstCount;
     void setDstCount(int count)
     {
-        assert(count == 0 || count == 1);
+        assert(count <= MAX_RET_REG_COUNT);
         _dstCount = (char) count;
     }
     int getDstCount() { return _dstCount; }
index c1b9018..a188461 100644 (file)
@@ -1415,19 +1415,47 @@ void                RegSet::rsSpillRegIfUsed(regNumber reg)
 #endif // LEGACY_BACKEND
 
 
-/*****************************************************************************
- *
- *  Spill the tree held in 'reg'
- */
-
-void                RegSet::rsSpillTree(regNumber reg, GenTreePtr tree)
-{
-    SpillDsc   *    spill;
-    TempDsc    *    temp;
+//------------------------------------------------------------
+// rsSpillTree: Spill the tree held in 'reg'.
+//
+// Arguments:
+//   reg     -   Register of tree node that is to be spilled
+//   tree    -   GenTree node that is being spilled
+//   regIdx  -   Register index identifying the specific result 
+//               register of a multi-reg call node. For single-reg
+//               producing tree nodes its value is zero.
+//
+// Return Value:
+//   None.
+//
+// Assumption:
+//    RyuJIT backend specific: in case of multi-reg call nodes, GTF_SPILL
+//    flag associated with the reg that is being spilled is cleared.  The
+//    caller of this method is expected to clear GTF_SPILL flag on call
+//    node after all of its registers marked for spilling are spilled.
+//
+void                RegSet::rsSpillTree(regNumber reg, 
+                                        GenTreePtr tree,
+                                        unsigned regIdx /* =0 */)
+{    
+    assert(tree != nullptr);
+        
+    GenTreeCall* call = nullptr;
+    var_types    treeType;
 
-    assert(tree);
+#ifndef LEGACY_BACKEND
+    if (tree->IsMultiRegCall())
+    {
+        call = tree->AsCall();
+        ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+        treeType = retTypeDesc->GetReturnRegType(regIdx);
+    }
+    else
+#endif
+    {
+        treeType = tree->TypeGet();
+    }
 
-    var_types       treeType = tree->TypeGet();
     var_types       tempType = Compiler::tmpNormalizeType(treeType);
     regMaskTP       mask;
     bool            floatSpill = false;
@@ -1445,8 +1473,8 @@ void                RegSet::rsSpillTree(regNumber reg, GenTreePtr tree)
     rsNeededSpillReg = true;
 
 #ifdef LEGACY_BACKEND
-    /* The register we're spilling must be used but not locked
-       or an enregistered variable. */
+    // The register we're spilling must be used but not locked
+    // or an enregistered variable. 
 
     assert((mask & rsMaskUsed) == mask);
     assert((mask & rsMaskLock) == 0);
@@ -1455,17 +1483,31 @@ void                RegSet::rsSpillTree(regNumber reg, GenTreePtr tree)
 
 #ifndef LEGACY_BACKEND
     // We should only be spilling nodes marked for spill,
-    // vars should be handled elsewhere,
-    // and we shouldn't spill nodes twice so we reset GTF_SPILL
+    // vars should be handled elsewhere, and to prevent
+    // spilling twice clear GTF_SPILL flag on tree node.
+    //
+    // In case of multi-reg call nodes only the spill flag
+    // associated with the reg is cleared. Spill flag on
+    // call node should be cleared by the caller of this method.
     assert(tree->gtOper != GT_REG_VAR);
-    assert(!varTypeIsMultiReg(tree));
-    assert(tree->gtFlags & GTF_SPILL);
-    tree->gtFlags &= ~GTF_SPILL;
+    assert((tree->gtFlags & GTF_SPILL) != 0);
+
+    unsigned regFlags = 0;
+    if (call != nullptr)
+    {
+        regFlags = call->GetRegSpillFlagByIdx(regIdx);
+        assert((regFlags & GTF_SPILL) != 0);
+        regFlags &= ~GTF_SPILL;
+    }
+    else
+    {
+        assert(!varTypeIsMultiReg(tree));
+        tree->gtFlags &= ~GTF_SPILL;
+    }  
 #endif // !LEGACY_BACKEND
 
 #if CPU_LONG_USES_REGPAIR
-    /* Are we spilling a part of a register pair? */
-
+    // Are we spilling a part of a register pair? 
     if  (treeType == TYP_LONG)
     {
         tempType = TYP_I_IMPL;
@@ -1479,20 +1521,18 @@ void                RegSet::rsSpillTree(regNumber reg, GenTreePtr tree)
     }
 #else
     assert(tree->InReg());
-    assert(tree->gtRegNum == reg);
+    assert(tree->gtRegNum == reg || (call != nullptr && call->GetRegNumByIdx(regIdx) == reg));
 #endif // CPU_LONG_USES_REGPAIR
 
-    /* Are any registers free for spillage? */
-
-    spill = SpillDsc::alloc(m_rsCompiler, this, tempType);
+    // Are any registers free for spillage?
+    SpillDsc* spill = SpillDsc::alloc(m_rsCompiler, this, tempType);
 
-    /* Grab a temp to store the spilled value */
-
-    spill->spillTemp = temp = m_rsCompiler->tmpGetTemp(tempType);
+    // Grab a temp to store the spilled value
+    TempDsc* temp = m_rsCompiler->tmpGetTemp(tempType);
+    spill->spillTemp = temp;
     tempType = temp->tdTempType();
 
-    /* Remember what it is we have spilled */
-
+    // Remember what it is we have spilled
     spill->spillTree = tree;
 #ifdef LEGACY_BACKEND
     spill->spillAddr = rsUsedAddr[reg];
@@ -1512,15 +1552,13 @@ void                RegSet::rsSpillTree(regNumber reg, GenTreePtr tree)
 #endif
 
 #ifdef LEGACY_BACKEND
-    /* Is the register part of a complex address mode? */
-
+    // Is the register part of a complex address mode?
     rsAddrSpillOper(rsUsedAddr[reg]);
 #endif // LEGACY_BACKEND
 
-    /* 'lastDsc' is 'spill' for simple cases, and will point to the last
-       multi-use descriptor if 'reg' is being multi-used */
-
-    SpillDsc *  lastDsc = spill;
+    // 'lastDsc' is 'spill' for simple cases, and will point to the last
+    // multi-use descriptor if 'reg' is being multi-used
+    SpillDsc*  lastDsc = spill;
 
 #ifdef LEGACY_BACKEND
     if  ((rsMaskMult & mask) == 0)
@@ -1529,31 +1567,27 @@ void                RegSet::rsSpillTree(regNumber reg, GenTreePtr tree)
     }
     else
     {
-        /* The register is being multi-used and will have entries in
-           rsMultiDesc[reg]. Spill all of them (ie. move them to
-           rsSpillDesc[reg]).
-           When we unspill the reg, they will all be moved back to
-           rsMultiDesc[].
-         */
+        // The register is being multi-used and will have entries in
+        // rsMultiDesc[reg]. Spill all of them (ie. move them to
+        // rsSpillDesc[reg]).
+        // When we unspill the reg, they will all be moved back to
+        // rsMultiDesc[].
 
         spill->spillMoreMultis = true;
 
-        SpillDsc * nextDsc = rsMultiDesc[reg];
+        SpillDsc* nextDsc = rsMultiDesc[reg];
 
         do
         {
-            assert(nextDsc);
-
-            /* Is this multi-use part of a complex address mode? */
+            assert(nextDsc != nullptr);
 
+            // Is this multi-use part of a complex address mode?
             rsAddrSpillOper(nextDsc->spillAddr);
 
-            /* Mark the tree node as having been spilled */
-
+            // Mark the tree node as having been spilled 
             rsMarkSpill(nextDsc->spillTree, reg);
 
-            /* lastDsc points to the last of the multi-spill descrs for 'reg' */
-
+            // lastDsc points to the last of the multi-spill descrs for 'reg'
             nextDsc->spillTemp = temp;
 
 #ifdef  DEBUG
@@ -1575,15 +1609,13 @@ void                RegSet::rsSpillTree(regNumber reg, GenTreePtr tree)
 
         rsMultiDesc[reg] = nextDsc;
 
-        /* 'reg' is no longer considered to be multi-used. We will set this
-           mask again when this value gets unspilled */
-
+        // 'reg' is no longer considered to be multi-used. We will set this
+        // mask again when this value gets unspilled 
         rsMaskMult &= ~mask;
     }
 #endif // LEGACY_BACKEND
 
-    /* Insert the spill descriptor(s) in the list */
-
+    // Insert the spill descriptor(s) in the list
     lastDsc->spillNext = rsSpillDesc[reg];
                          rsSpillDesc[reg] = spill;
 
@@ -1591,20 +1623,26 @@ void                RegSet::rsSpillTree(regNumber reg, GenTreePtr tree)
     if  (m_rsCompiler->verbose)     printf("\n");
 #endif
 
-     /* Generate the code to spill the register */
+    // Generate the code to spill the register
     var_types  storeType = floatSpill ? treeType : tempType;
 
     m_rsCompiler->codeGen->spillReg(storeType, temp, reg);
 
-    /* Mark the tree node as having been spilled */
-
+    // Mark the tree node as having been spilled
     rsMarkSpill(tree, reg);
 
 #ifdef LEGACY_BACKEND
-    /* The register is now free */
-
+    // The register is now free
     rsMarkRegFree(mask);
-#endif // LEGACY_BACKEND
+#else
+    // In case of multi-reg call node also mark the specific
+    // result reg as spilled.
+    if (call != nullptr)
+    {
+        regFlags |= GTF_SPILLED;
+        call->SetRegSpillFlagByIdx(regFlags, regIdx);
+    }
+#endif //!LEGACY_BACKEND
 }
 
 #if defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87
@@ -2208,30 +2246,51 @@ regNumber           RegSet::rsUnspillOneReg(GenTreePtr    tree,
 }
 #endif // LEGACY_BACKEND
 
-/*****************************************************************************
- *  The given tree operand has been spilled; just mark it as unspilled so
- *  that we can use it as "normal" local.
- *  It is the responsibility of the caller to free the spill temp.
- */
-
-TempDsc *     RegSet::rsUnspillInPlace(GenTreePtr    tree)
+//---------------------------------------------------------------------
+//  rsUnspillInPlace: The given tree operand has been spilled; just mark
+//  it as unspilled so that we can use it as "normal" local.
+//
+//  Arguments:
+//     tree    -  GenTree that needs to be marked as unspilled.
+//     oldReg  -  reg of tree that was spilled.
+//
+//  Return Value:
+//     None.
+//
+//  Assumptions:
+//  1. It is the responsibility of the caller to free the spill temp.
+//  2. RyuJIT backend specific: In case of multi-reg call node 
+//     GTF_SPILLED flag associated with reg is cleared.  It is the
+//     responsibility of caller to clear GTF_SPILLED flag on call node
+//     itself after ensuring there are no outstanding regs in GTF_SPILLED
+//     state.
+//
+TempDsc*     RegSet::rsUnspillInPlace(GenTreePtr tree, 
+                                      regNumber  oldReg,
+                                      unsigned   regIdx /* =0 */)
 {
-    /* Get the tree's SpillDsc */
-
     assert(!isRegPairType(tree->gtType));
-    regNumber   oldReg = tree->gtRegNum;
 
+    // Get the tree's SpillDsc  
     SpillDsc* prevDsc;
     SpillDsc* spillDsc = rsGetSpillInfo(tree, oldReg, &prevDsc);
     PREFIX_ASSUME(spillDsc != nullptr);
 
-    /* Get the temp */
-
+    // Get the temp
     TempDsc* temp = rsGetSpillTempWord(oldReg, spillDsc, prevDsc);
 
-    /* The value is now unspilled */
-
-    tree->gtFlags &= ~GTF_SPILLED;
+    // The value is now unspilled
+    if (tree->IsMultiRegCall())
+    {
+        GenTreeCall* call = tree->AsCall();
+        unsigned flags = call->GetRegSpillFlagByIdx(regIdx);
+        flags &= ~GTF_SPILLED;
+        call->SetRegSpillFlagByIdx(flags, regIdx);
+    }
+    else
+    {
+        tree->gtFlags &= ~GTF_SPILLED;
+    }
 
 #ifdef  DEBUG
     if  (m_rsCompiler->verbose) 
index 2f1c5af..a2ea3fe 100644 (file)
@@ -75,17 +75,17 @@ private:
     // The same descriptor is also used for 'multi-use' register tracking, BTW.
     struct  SpillDsc
     {
-        SpillDsc   *        spillNext;    // next spilled value of same reg
+        SpillDsc*           spillNext;    // next spilled value of same reg
 
         union
         {
             GenTreePtr      spillTree;    // the value that was spilled
 #ifdef LEGACY_BACKEND
-            LclVarDsc *     spillVarDsc;  // variable if it's an enregistered variable
+            LclVarDsc     spillVarDsc;  // variable if it's an enregistered variable
 #endif // LEGACY_BACKEND
         };
 
-        TempDsc    *        spillTemp;    // the temp holding the spilled value
+        TempDsc*            spillTemp;    // the temp holding the spilled value
 
 #ifdef LEGACY_BACKEND
         GenTreePtr          spillAddr;    // owning complex address mode or nullptr
@@ -98,8 +98,8 @@ private:
         };
 #endif // LEGACY_BACKEND
 
-        static SpillDsc   * alloc   (Compiler * pComp, RegSet *regSet, var_types type);
-        static void         freeDsc (RegSet *regSet, SpillDsc *spillDsc);
+        static SpillDsc*    alloc   (Compiler* pComp, RegSet* regSet, var_types type);
+        static void         freeDsc (RegSet *regSet, SpillDscspillDsc);
     };
 
 #ifdef LEGACY_BACKEND
@@ -351,7 +351,8 @@ private:
     void                rsSpillEnd      ();
 
     void                rsSpillTree     (regNumber      reg,
-                                         GenTreePtr     tree);
+                                         GenTreePtr     tree,
+                                         unsigned       regIdx = 0);
 
 #if defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87
     void                rsSpillFPStack(GenTreePtr tree);
@@ -385,7 +386,9 @@ private:
                                         regMaskTP      needReg);
 #endif // LEGACY_BACKEND
 
-    TempDsc *           rsUnspillInPlace(GenTreePtr     tree);
+    TempDsc*            rsUnspillInPlace(GenTreePtr     tree,
+                                         regNumber      oldReg,
+                                         unsigned       regIdx = 0);
 
 #ifdef LEGACY_BACKEND
     void                rsUnspillReg    (GenTreePtr     tree,