From a88ba565faa592e5aeb59df22640961e81d8d037 Mon Sep 17 00:00:00 2001 From: sivarv Date: Wed, 4 May 2016 08:29:18 -0700 Subject: [PATCH] Multi-reg call node support. --- src/jit/codegenarm.cpp | 2 +- src/jit/codegenarm64.cpp | 2 +- src/jit/codegenlegacy.cpp | 2 +- src/jit/codegenxarch.cpp | 571 ++++++++++++++++++++++++++++---------- src/jit/gentree.cpp | 265 +++++++++++++++++- src/jit/gentree.h | 456 ++++++++++++++++++++++++++---- src/jit/gtstructs.h | 1 + src/jit/importer.cpp | 4 +- src/jit/lowerxarch.cpp | 117 +++++--- src/jit/lsra.cpp | 374 ++++++++++++++++++------- src/jit/lsra.h | 58 +++- src/jit/nodeinfo.h | 2 +- src/jit/regset.cpp | 209 +++++++++----- src/jit/regset.h | 17 +- 14 files changed, 1632 insertions(+), 448 deletions(-) diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp index 53b7315ea2..a3f5a005a5 100644 --- a/src/jit/codegenarm.cpp +++ b/src/jit/codegenarm.cpp @@ -1821,7 +1821,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree *tree) } else { - TempDsc* t = regSet.rsUnspillInPlace(unspillTree); + TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum); compiler->tmpRlsTemp(t); getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->gtType), diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index 031131b56b..2228aa1c22 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -4706,7 +4706,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree *tree) } else { - TempDsc* t = regSet.rsUnspillInPlace(unspillTree); + TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum); getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->gtType), dstReg, diff --git a/src/jit/codegenlegacy.cpp b/src/jit/codegenlegacy.cpp index 2b34af8d5d..b5a9f790c8 100644 --- a/src/jit/codegenlegacy.cpp +++ b/src/jit/codegenlegacy.cpp @@ -7170,7 +7170,7 @@ DONE_LEA_ADD: { /* Get the temp we spilled into. */ - TempDsc * temp = regSet.rsUnspillInPlace(op1); + TempDsc * temp = regSet.rsUnspillInPlace(op1, op1->gtRegNum); /* For 8bit operations, we need to make sure that op2 is in a byte-addressable registers */ diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index b75dc9fa2d..0250977599 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -185,16 +185,18 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) // Handle multi-reg return type values if (compiler->compMethodReturnsMultiRegRetType()) { -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING ReturnTypeDesc retTypeDesc; retTypeDesc.Initialize(compiler, compiler->info.compMethodInfo->args.retTypeClass); + unsigned regCount = retTypeDesc.GetReturnRegCount(); - assert(retTypeDesc.GetReturnRegCount() == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_RETURN_IN_REGISTERS); + // Only x86 and x64 Unix ABI allows multi-reg return and + // number of result regs should be equal to MAX_RET_REG_COUNT. + assert(regCount == MAX_RET_REG_COUNT); - // Set the GC-ness of the struct return registers. - gcInfo.gcMarkRegPtrVal(REG_INTRET, retTypeDesc.GetReturnRegType(0)); - gcInfo.gcMarkRegPtrVal(REG_INTRET_1, retTypeDesc.GetReturnRegType(1)); -#endif + for (unsigned i = 0; i < regCount; ++i) + { + gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i)); + } } else if (compiler->compMethodReturnsRetBufAddr()) { @@ -1510,6 +1512,8 @@ CodeGen::isStructReturn(GenTreePtr treeNode) // Return Value: // None // +// Assumption: +// op1 of GT_RETURN node is either GT_LCL_VAR or multi-reg GT_CALL void CodeGen::genStructReturn(GenTreePtr treeNode) { @@ -1518,9 +1522,6 @@ CodeGen::genStructReturn(GenTreePtr treeNode) var_types targetType = treeNode->TypeGet(); #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING - noway_assert((op1->OperGet() == GT_LCL_VAR) || - (op1->OperGet() == GT_CALL)); - if (op1->OperGet() == GT_LCL_VAR) { assert(op1->isContained()); @@ -1531,31 +1532,94 @@ CodeGen::genStructReturn(GenTreePtr treeNode) ReturnTypeDesc retTypeDesc; retTypeDesc.Initialize(compiler, varDsc->lvVerTypeInfo.GetClassHandle()); - assert(retTypeDesc.GetReturnRegCount() == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_RETURN_IN_REGISTERS); - - var_types type0 = retTypeDesc.GetReturnRegType(0); - var_types type1 = retTypeDesc.GetReturnRegType(1); + unsigned regCount = retTypeDesc.GetReturnRegCount(); + assert(regCount == MAX_RET_REG_COUNT); - regNumber reg0 = retTypeDesc.GetABIReturnReg(0); - regNumber reg1 = retTypeDesc.GetABIReturnReg(1); - assert(reg0 != REG_NA && reg1 != REG_NA); - - // Move the values into the return registers - getEmitter()->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), reg0, lclVar->gtLclNum, 0); - getEmitter()->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), reg1, lclVar->gtLclNum, 8); + // Move the value into ABI return registers + int offset = 0; + for (unsigned i = 0; i < regCount; ++i) + { + var_types type = retTypeDesc.GetReturnRegType(i); + regNumber reg = retTypeDesc.GetABIReturnReg(i); + getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), reg, lclVar->gtLclNum, offset); + offset += genTypeSize(type); + } } else { - // Assumption: multi-reg return value of a GT_CALL node is never spilled. - // TODO-BUG: support for multi-reg call nodes. + assert(op1->IsMultiRegCall()); + genConsumeRegs(op1); - assert(op1->OperGet() == GT_CALL); - assert((op1->gtFlags & GTF_SPILLED) == 0); - } + GenTreeCall* call = op1->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + assert(regCount == MAX_RET_REG_COUNT); + + // Handle circular dependency between call allocated regs and ABI return regs. + // + // It is possible under LSRA stress that originally allocated regs of call node, + // say rax and rdx, are spilled and reloaded to rdx and rax respectively. But + // GT_RETURN needs to move values as follows: rdx->rax, rax->rdx. Similar kind + // kind of circular dependency could arise between xmm0 and xmm1 return regs. + // Codegen is expected to handle such circular dependency. + // + var_types regType0 = retTypeDesc->GetReturnRegType(0); + regNumber returnReg0 = retTypeDesc->GetABIReturnReg(0); + regNumber allocatedReg0 = call->GetRegNumByIdx(0); + + var_types regType1 = retTypeDesc->GetReturnRegType(1); + regNumber returnReg1 = retTypeDesc->GetABIReturnReg(1); + regNumber allocatedReg1 = call->GetRegNumByIdx(1); + + if (allocatedReg0 == returnReg1 && + allocatedReg1 == returnReg0) + { + // Circular dependency - swap allocatedReg0 and allocatedReg1 + if (varTypeIsFloating(regType0)) + { + assert(varTypeIsFloating(regType1)); + + // The fastest way to swap two XMM regs is using PXOR + inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE); + inst_RV_RV(INS_pxor, allocatedReg1, allocatedReg0, TYP_DOUBLE); + inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE); + } + else + { + assert(varTypeIsIntegral(regType0)); + assert(varTypeIsIntegral(regType1)); + inst_RV_RV(INS_xchg, allocatedReg1, allocatedReg0, TYP_I_IMPL); + } + } + else if (allocatedReg1 == returnReg0) + { + // Change the order of moves to correctly handle dependency. + if (allocatedReg1 != returnReg1) + { + inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1); + } + if (allocatedReg0 != returnReg0) + { + inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0); + } + } + else + { + // No circular dependency case. + if (allocatedReg0 != returnReg0) + { + inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0); + } + if (allocatedReg1 != returnReg1) + { + inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1); + } + } + } #else - assert("!unreached"); + unreached(); #endif } @@ -1650,7 +1714,7 @@ CodeGen::genReturn(GenTreePtr treeNode) op1->gtFlags |= GTF_SPILLED; op1->gtFlags &= ~GTF_SPILL; - TempDsc* t = regSet.rsUnspillInPlace(op1); + TempDsc* t = regSet.rsUnspillInPlace(op1, op1->gtRegNum); inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0); op1->gtFlags &= ~GTF_SPILLED; compiler->tmpRlsTemp(t); @@ -1960,9 +2024,11 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) case GT_STORE_LCL_VAR: { + GenTreePtr op1 = treeNode->gtGetOp1(); + // var = call, where call returns a multi-reg return value // case is handled separately. - if (treeNode->IsMultiRegCallStoreToLocal()) + if (op1->gtSkipReloadOrCopy()->IsMultiRegCall()) { genMultiRegCallStoreToLocal(treeNode); } @@ -1984,8 +2050,7 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) break; } #endif // !defined(_TARGET_64BIT_) - - GenTreePtr op1 = treeNode->gtGetOp1(); + genConsumeRegs(op1); if (treeNode->gtRegNum == REG_NA) @@ -2635,12 +2700,12 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode) // None // // Assumption: -// The child of store is a GT_CALL node. +// The child of store is a multi-reg call node. // void CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode) { - assert(treeNode->IsMultiRegCallStoreToLocal()); + assert(treeNode->OperGet() == GT_STORE_LCL_VAR); #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING // Structs of size >=9 and <=16 are returned in two return registers on x64 Unix. @@ -2649,38 +2714,44 @@ CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode) // Assumption: struct local var needs to be in memory noway_assert(!treeNode->InReg()); + // Assumption: current x64 Unix implementation requires that a multi-reg struct + // var in 'var = call' is flagged as lvIsMultiRegArgOrRet to prevent it from + // being struct promoted. + unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum; + LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); + noway_assert(varDsc->lvIsMultiRegArgOrRet); + GenTree* op1 = treeNode->gtGetOp1(); - GenTreeCall* actualOp1 = op1->gtSkipReloadOrCopy()->AsCall(); - assert(actualOp1->HasMultiRegRetVal()); + GenTree* actualOp1 = op1->gtSkipReloadOrCopy(); + GenTreeCall* call = actualOp1->AsCall(); + assert(call->HasMultiRegRetVal()); genConsumeRegs(op1); - ReturnTypeDesc* retTypeDesc = actualOp1->GetReturnTypeDesc(); - assert(retTypeDesc->GetReturnRegCount() == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_RETURN_IN_REGISTERS); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + assert(retTypeDesc->GetReturnRegCount() == MAX_RET_REG_COUNT); + unsigned regCount = retTypeDesc->GetReturnRegCount(); - var_types type0 = retTypeDesc->GetReturnRegType(0); - var_types type1 = retTypeDesc->GetReturnRegType(1); - - regNumber reg0 = retTypeDesc->GetABIReturnReg(0); - regNumber reg1 = retTypeDesc->GetABIReturnReg(1); - - assert(reg0 != REG_NA && reg1 != REG_NA); - - // Assumption: multi-reg return value of a GT_CALL node never gets spilled. - // TODO-BUG: support for multi-reg GT_CALL nodes. - - unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum; - LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); - - // Assumption: current x64 Unix implementation requires that a multi-reg struct - // var in 'var = call' is flagged as lvIsMultiRegArgOrRet to prevent it from - // being struct poromoted. - // - - noway_assert(varDsc->lvIsMultiRegArgOrRet); + int offset = 0; + for (unsigned i = 0; i < regCount; ++i) + { + var_types type = retTypeDesc->GetReturnRegType(i); + regNumber reg = call->GetRegNumByIdx(i); + if (op1->IsCopyOrReload()) + { + // GT_COPY/GT_RELOAD will have valid reg for those positions + // that need to be copied or reloaded. + regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i); + if (reloadReg != REG_NA) + { + reg = reloadReg; + } + } - getEmitter()->emitIns_S_R(ins_Store(type0), emitTypeSize(type0), reg0, lclNum, 0); - getEmitter()->emitIns_S_R(ins_Store(type1), emitTypeSize(type1), reg1, lclNum, 8); + assert(reg != REG_NA); + getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset); + offset += genTypeSize(type); + } #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING assert(!"Unreached"); #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING @@ -4355,13 +4426,14 @@ void CodeGen::genCodeForShiftRMW(GenTreeStoreInd* storeInd) void CodeGen::genUnspillRegIfNeeded(GenTree *tree) { regNumber dstReg = tree->gtRegNum; - GenTree* unspillTree = tree; + if (tree->gtOper == GT_RELOAD) { unspillTree = tree->gtOp.gtOp1; } - if (unspillTree->gtFlags & GTF_SPILLED) + + if ((unspillTree->gtFlags & GTF_SPILLED) != 0) { if (genIsRegCandidateLocal(unspillTree)) { @@ -4439,22 +4511,72 @@ void CodeGen::genUnspillRegIfNeeded(GenTree *tree) regSet.AddMaskVars(genGetRegMask(varDsc)); } + + gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet()); + } + else if (unspillTree->IsMultiRegCall()) + { + GenTreeCall* call = unspillTree->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + GenTreeCopyOrReload* reloadTree = nullptr; + if (tree->OperGet() == GT_RELOAD) + { + reloadTree = tree->AsCopyOrReload(); + } + + // In case of multi-reg call node, GTF_SPILLED flag on it indicates that + // one or more of its result regs are spilled. Call node needs to be + // queried to know which specific result regs to be unspilled. + for (unsigned i = 0; i < regCount; ++i) + { + unsigned flags = call->GetRegSpillFlagByIdx(i); + if ((flags & GTF_SPILLED) != 0) + { + var_types dstType = retTypeDesc->GetReturnRegType(i); + regNumber unspillTreeReg = call->GetRegNumByIdx(i); + + if (reloadTree != nullptr) + { + dstReg = reloadTree->GetRegNumByIdx(i); + if (dstReg == REG_NA) + { + dstReg = unspillTreeReg; + } + } + else + { + dstReg = unspillTreeReg; + } + + TempDsc* t = regSet.rsUnspillInPlace(call, unspillTreeReg, i); + getEmitter()->emitIns_R_S(ins_Load(dstType), + emitActualTypeSize(dstType), + dstReg, + t->tdTempNum(), + 0); + compiler->tmpRlsTemp(t); + gcInfo.gcMarkRegPtrVal(dstReg, dstType); + } + } + + unspillTree->gtFlags &= ~GTF_SPILLED; + unspillTree->SetInReg(); } else { - TempDsc* t = regSet.rsUnspillInPlace(unspillTree); + TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum); getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), - emitActualTypeSize(unspillTree->gtType), - dstReg, - t->tdTempNum(), - 0); + emitActualTypeSize(unspillTree->TypeGet()), + dstReg, + t->tdTempNum(), + 0); compiler->tmpRlsTemp(t); unspillTree->gtFlags &= ~GTF_SPILLED; unspillTree->SetInReg(); - } - - gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet()); + gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet()); + } } } @@ -4477,78 +4599,107 @@ void CodeGen::genConsumeRegAndCopy(GenTree *tree, regNumber needReg) void CodeGen::genRegCopy(GenTree* treeNode) { assert(treeNode->OperGet() == GT_COPY); - var_types targetType = treeNode->TypeGet(); - regNumber targetReg = treeNode->gtRegNum; - assert(targetReg != REG_NA); + GenTree* op1 = treeNode->gtOp.gtOp1; - GenTree* op1 = treeNode->gtOp.gtOp1; + if (op1->IsMultiRegCall()) + { + genConsumeReg(op1); - // Check whether this node and the node from which we're copying the value have the same - // register type. - // This can happen if (currently iff) we have a SIMD vector type that fits in an integer - // register, in which case it is passed as an argument, or returned from a call, - // in an integer register and must be copied if it's in an xmm register. + GenTreeCopyOrReload* copyTree = treeNode->AsCopyOrReload(); + GenTreeCall* call = op1->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); - bool srcFltReg = (varTypeIsFloating(op1) || varTypeIsSIMD(op1)); - bool tgtFltReg = (varTypeIsFloating(treeNode) || varTypeIsSIMD(treeNode)); - if (srcFltReg != tgtFltReg) - { - instruction ins; - regNumber fpReg; - regNumber intReg; - if (tgtFltReg) - { - ins = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet()); - fpReg = targetReg; - intReg = op1->gtRegNum; - } - else + for (unsigned i = 0; i < regCount; ++i) { - ins = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet()); - intReg = targetReg; - fpReg = op1->gtRegNum; + var_types type = retTypeDesc->GetReturnRegType(i); + regNumber fromReg = call->GetRegNumByIdx(i); + regNumber toReg = copyTree->GetRegNumByIdx(i); + + // A Multi-reg GT_COPY node will have valid reg only for those + // positions that corresponding result reg of call node needs + // to be copied. + if (toReg != REG_NA) + { + assert(toReg != fromReg); + inst_RV_RV(ins_Copy(type), toReg, fromReg, type); + } } - inst_RV_RV(ins, fpReg, intReg, targetType); } else { - inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType); - } + var_types targetType = treeNode->TypeGet(); + regNumber targetReg = treeNode->gtRegNum; + assert(targetReg != REG_NA); - if (op1->IsLocal()) - { - // The lclVar will never be a def. - // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will - // appropriately set the gcInfo for the copied value. - // If not, there are two cases we need to handle: - // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable - // will remain live in its original register. - // genProduceReg() will appropriately set the gcInfo for the copied value, - // and genConsumeReg will reset it. - // - Otherwise, we need to update register info for the lclVar. + // Check whether this node and the node from which we're copying the value have + // different register types. This can happen if (currently iff) we have a SIMD + // vector type that fits in an integer register, in which case it is passed as + // an argument, or returned from a call, in an integer register and must be + // copied if it's in an xmm register. - GenTreeLclVarCommon* lcl = op1->AsLclVarCommon(); - assert((lcl->gtFlags & GTF_VAR_DEF) == 0); + bool srcFltReg = (varTypeIsFloating(op1) || varTypeIsSIMD(op1)); + bool tgtFltReg = (varTypeIsFloating(treeNode) || varTypeIsSIMD(treeNode)); + if (srcFltReg != tgtFltReg) + { + instruction ins; + regNumber fpReg; + regNumber intReg; + if (tgtFltReg) + { + ins = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet()); + fpReg = targetReg; + intReg = op1->gtRegNum; + } + else + { + ins = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet()); + intReg = targetReg; + fpReg = op1->gtRegNum; + } + inst_RV_RV(ins, fpReg, intReg, targetType); + } + else + { + inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType); + } - if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0) + if (op1->IsLocal()) { - LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum]; + // The lclVar will never be a def. + // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will + // appropriately set the gcInfo for the copied value. + // If not, there are two cases we need to handle: + // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable + // will remain live in its original register. + // genProduceReg() will appropriately set the gcInfo for the copied value, + // and genConsumeReg will reset it. + // - Otherwise, we need to update register info for the lclVar. + + GenTreeLclVarCommon* lcl = op1->AsLclVarCommon(); + assert((lcl->gtFlags & GTF_VAR_DEF) == 0); - // If we didn't just spill it (in genConsumeReg, above), then update the register info - if (varDsc->lvRegNum != REG_STK) + if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0) { - // The old location is dying - genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1)); + LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum]; - gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum)); + // If we didn't just spill it (in genConsumeReg, above), then update the register info + if (varDsc->lvRegNum != REG_STK) + { + // The old location is dying + genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1)); + + gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum)); - genUpdateVarReg(varDsc, treeNode); + genUpdateVarReg(varDsc, treeNode); - // The new location is going live - genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode)); + // The new location is going live + genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode)); + } } } } + genProduceReg(treeNode); } @@ -4585,13 +4736,24 @@ void CodeGen::genCheckConsumeNode(GenTree* treeNode) } #endif // DEBUG -// Do liveness update for a subnode that is being consumed by codegen. -regNumber CodeGen::genConsumeReg(GenTree *tree) +//-------------------------------------------------------------------- +// genConsumeReg: Do liveness update for a subnode that is being +// consumed by codegen. +// +// Arguments: +// tree - GenTree node +// +// Return Value: +// Returns the reg number of tree. +// In case of multi-reg call node returns the first reg number +// of the multi-reg return. +regNumber CodeGen::genConsumeReg(GenTree* tree) { if (tree->OperGet() == GT_COPY) { genRegCopy(tree); } + // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it // interferes with one of the other sources (or the target, if it's a "delayed use" register)). // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and @@ -4604,7 +4766,7 @@ regNumber CodeGen::genConsumeReg(GenTree *tree) // because if it's on the stack it will always get reloaded into tree->gtRegNum). if (genIsRegCandidateLocal(tree)) { - GenTreeLclVarCommon *lcl = tree->AsLclVarCommon(); + GenTreeLclVarCommon* lcl = tree->AsLclVarCommon(); LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()]; if (varDsc->lvRegNum != REG_STK && varDsc->lvRegNum != tree->gtRegNum) { @@ -4617,7 +4779,7 @@ regNumber CodeGen::genConsumeReg(GenTree *tree) // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar genUpdateLife(tree); - assert(tree->gtRegNum != REG_NA); + assert(tree->gtHasReg()); // there are three cases where consuming a reg means clearing the bit in the live mask // 1. it was not produced by a local @@ -4642,7 +4804,7 @@ regNumber CodeGen::genConsumeReg(GenTree *tree) } else { - gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); + gcInfo.gcMarkRegSetNpt(tree->gtGetRegMask()); } genCheckConsumeNode(tree); @@ -4906,11 +5068,27 @@ void CodeGen::genConsumeBlockOp(GenTreeBlkOp* blkNode, regNumber dstReg, regNumb } } -// do liveness update for register produced by the current node in codegen -void CodeGen::genProduceReg(GenTree *tree) +//------------------------------------------------------------------------- +// genProduceReg: do liveness update for register produced by the current +// node in codegen. +// +// Arguments: +// tree - Gentree node +// +// Return Value: +// None. +void CodeGen::genProduceReg(GenTree* tree) { if (tree->gtFlags & GTF_SPILL) { + // Code for GT_COPY node gets generated as part of consuming regs by its parent. + // A GT_COPY node in turn produces reg result and it should never be marked to + // spill. + // + // Similarly GT_RELOAD node gets generated as part of consuming regs by its + // parent and should never be marked for spilling. + noway_assert(!tree->IsCopyOrReload()); + if (genIsRegCandidateLocal(tree)) { // Store local variable to its home location. @@ -4922,11 +5100,38 @@ void CodeGen::genProduceReg(GenTree *tree) } else { - tree->SetInReg(); - regSet.rsSpillTree(tree->gtRegNum, tree); + // In case of multi-reg call node, spill flag on call node + // indicates that one or more of its allocated regs need to + // be spilled. Call node needs to be further queried to + // know which of its result regs needs to be spilled. + if (tree->IsMultiRegCall()) + { + GenTreeCall* call = tree->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + + for (unsigned i = 0; i < regCount; ++i) + { + unsigned flags = call->GetRegSpillFlagByIdx(i); + if ((flags & GTF_SPILL) != 0) + { + regNumber reg = call->GetRegNumByIdx(i); + call->SetInReg(); + regSet.rsSpillTree(reg, call, i); + gcInfo.gcMarkRegSetNpt(genRegMask(reg)); + } + } + } + else + { + tree->SetInReg(); + regSet.rsSpillTree(tree->gtRegNum, tree); + gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); + } + tree->gtFlags |= GTF_SPILLED; tree->gtFlags &= ~GTF_SPILL; - gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); + return; } } @@ -4945,8 +5150,51 @@ void CodeGen::genProduceReg(GenTree *tree) // the register as live, with a GC pointer, if the variable is dead. if (!genIsRegCandidateLocal(tree) || ((tree->gtFlags & GTF_VAR_DEATH) == 0)) - { - gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet()); + { + // Multi-reg call node will produce more than one register result. + // Mark all the regs produced by call node. + if (tree->IsMultiRegCall()) + { + GenTreeCall* call = tree->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + + for (unsigned i = 0; i < regCount; ++i) + { + regNumber reg = call->GetRegNumByIdx(i); + var_types type = retTypeDesc->GetReturnRegType(i); + gcInfo.gcMarkRegPtrVal(reg, type); + } + } + else if (tree->IsCopyOrReloadOfMultiRegCall()) + { + // we should never see reload of multi-reg call here + // because GT_RELOAD gets generated in reg consuming path. + noway_assert(tree->OperGet() == GT_COPY); + + // A multi-reg GT_COPY node produces those regs to which + // copy has taken place. + GenTreeCopyOrReload* copy = tree->AsCopyOrReload(); + GenTreeCall* call = copy->gtGetOp1()->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + + for (unsigned i = 0; i < regCount; ++i) + { + var_types type = retTypeDesc->GetReturnRegType(i); + regNumber fromReg = call->GetRegNumByIdx(i); + regNumber toReg = copy->GetRegNumByIdx(i); + + if (toReg != REG_NA) + { + gcInfo.gcMarkRegPtrVal(toReg, type); + } + } + } + else + { + gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet()); + } } } tree->SetInReg(); @@ -5477,20 +5725,19 @@ void CodeGen::genCallInstruction(GenTreePtr node) } // Determine return value size(s). + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); emitAttr retSize = EA_PTRSIZE; - -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING emitAttr secondRetSize = EA_UNKNOWN; - if (varTypeIsStruct(call->gtType)) + + if (call->HasMultiRegRetVal()) { - assert(call->HasMultiRegRetVal()); - ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); retSize = emitTypeSize(retTypeDesc->GetReturnRegType(0)); secondRetSize = emitTypeSize(retTypeDesc->GetReturnRegType(1)); } else -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING { + assert(!varTypeIsStruct(call)); + if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY) { @@ -5688,25 +5935,43 @@ void CodeGen::genCallInstruction(GenTreePtr node) #endif // _TARGET_X86_ { regNumber returnReg; - // TODO-Cleanup: For UNIX AMD64, we should not be allocating a return register for struct - // returns that are on stack. - // For the SIMD case, however, we do want a "return register", as the consumer of the call - // will want the value in a register. In future we should flexibly allocate this return - // register, but that should be done with a general cleanup of the allocation of return - // registers for structs. - if (varTypeIsFloating(returnType) - FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY( || varTypeIsSIMD(returnType))) + + if (call->HasMultiRegRetVal()) { - returnReg = REG_FLOATRET; + assert(retTypeDesc != nullptr); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + + // If regs allocated to call node are different from ABI return + // regs in which the call has returned its result, move the result + // to regs allocated to call node. + for (unsigned i = 0; i < regCount; ++i) + { + var_types regType = retTypeDesc->GetReturnRegType(i); + returnReg = retTypeDesc->GetABIReturnReg(i); + regNumber allocatedReg = call->GetRegNumByIdx(i); + if (returnReg != allocatedReg) + { + inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType); + } + } } else - { - returnReg = REG_INTRET; - } - if (call->gtRegNum != returnReg) - { - inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType); + { + if (varTypeIsFloating(returnType)) + { + returnReg = REG_FLOATRET; + } + else + { + returnReg = REG_INTRET; + } + + if (call->gtRegNum != returnReg) + { + inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType); + } } + genProduceReg(call); } } diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 5dbdfe31f0..53101d8e4c 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -1091,9 +1091,16 @@ Compiler::fgWalkResult Compiler::fgWalkTree(GenTreePtr * pTree, return result; } -// Sets the register to the "no register assignment" value, depending upon the type -// of the node, and whether it fits any of the special cases for register pairs. - +// ------------------------------------------------------------------------------------------ +// gtClearReg: Sets the register to the "no register assignment" value, depending upon +// the type of the node, and whether it fits any of the special cases for register pairs +// or multi-reg call nodes. +// +// Arguments: +// compiler - compiler instance +// +// Return Value: +// None void GenTree::gtClearReg(Compiler* compiler) { @@ -1109,6 +1116,218 @@ GenTree::gtClearReg(Compiler* compiler) { gtRegNum = REG_NA; } + + // Also clear multi-reg state if this is a call node + if (IsCall()) + { + this->AsCall()->ClearOtherRegs(); + } + else if (IsCopyOrReload()) + { + this->AsCopyOrReload()->ClearOtherRegs(); + } +} + +//----------------------------------------------------------- +// CopyReg: Copy the _gtRegNum/_gtRegPair/gtRegTag fields. +// +// Arguments: +// from - GenTree node from which to copy +// +// Return Value: +// None +void +GenTree::CopyReg(GenTreePtr from) +{ + // To do the copy, use _gtRegPair, which must be bigger than _gtRegNum. Note that the values + // might be undefined (so gtRegTag == GT_REGTAG_NONE). + _gtRegPair = from->_gtRegPair; + C_ASSERT(sizeof(_gtRegPair) >= sizeof(_gtRegNum)); + INDEBUG(gtRegTag = from->gtRegTag;) + + // Also copy multi-reg state if this is a call node + if (IsCall()) + { + assert(from->IsCall()); + this->AsCall()->CopyOtherRegs(from->AsCall()); + } + else if (IsCopyOrReload()) + { + this->AsCopyOrReload()->CopyOtherRegs(from->AsCopyOrReload()); + } +} + +//------------------------------------------------------------------ +// gtHasReg: Whether node beeen assigned a register by LSRA +// +// Arguments: +// None +// +// Return Value: +// Returns true if the node was assigned a register. +// +// In case of multi-reg call nodes, it is considered +// having a reg if regs are allocated for all its +// return values. +// +// In case of GT_COPY or GT_RELOAD of a multi-reg call, +// GT_COPY/GT_RELOAD is considered having a reg if it +// has a reg assigned to any of its positions. +// +// Assumption: +// In order for this to work properly, gtClearReg must be called +// prior to setting the register value. +// +bool GenTree::gtHasReg() const +{ + bool hasReg; + +#if CPU_LONG_USES_REGPAIR + if (isRegPairType(TypeGet())) + { + assert(_gtRegNum != REG_NA); + INDEBUG(assert(gtRegTag == GT_REGTAG_REGPAIR)); + hasReg = (gtRegPair != REG_PAIR_NONE); + } + else +#endif + { + assert(_gtRegNum != REG_PAIR_NONE); + INDEBUG(assert(gtRegTag == GT_REGTAG_REG)); + + if (IsMultiRegCall()) + { + // Has to cast away const-ness because GetReturnTypeDesc() is a non-const method + GenTree* tree = const_cast(this); + GenTreeCall* call = tree->AsCall(); + unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount(); + hasReg = false; + + // A Multi-reg call node is said to have regs, if it has + // reg assigned to each of its result registers. + for (unsigned i = 0; i < regCount; ++i) + { + hasReg = (call->GetRegNumByIdx(i) != REG_NA); + if (!hasReg) + { + break; + } + } + } + else if (IsCopyOrReloadOfMultiRegCall()) + { + GenTree* tree = const_cast(this); + GenTreeCopyOrReload* copyOrReload = tree->AsCopyOrReload(); + GenTreeCall* call = copyOrReload->gtGetOp1()->AsCall(); + unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount(); + hasReg = false; + + // A Multi-reg copy or reload node is said to have regs, + // if it has valid regs in any of the positions. + for (unsigned i = 0; i < regCount; ++i) + { + hasReg = (copyOrReload->GetRegNumByIdx(i) != REG_NA); + if (hasReg) + { + break; + } + } + } + else + { + hasReg = (gtRegNum != REG_NA); + } + } + + return hasReg; +} + +//--------------------------------------------------------------- +// gtGetRegMask: Get the reg mask of the node. +// +// Arguments: +// None +// +// Return Value: +// Reg Mask of GenTree node. +// +regMaskTP +GenTree::gtGetRegMask() const +{ + regMaskTP resultMask; + +#if CPU_LONG_USES_REGPAIR + if (isRegPairType(TypeGet())) + { + resultMask = genRegPairMask(gtRegPair); + } + else +#endif + { + if (IsMultiRegCall()) + { + // temporarily cast away const-ness as AsCall() method is not declared const + resultMask = genRegMask(gtRegNum); + GenTree* temp = const_cast(this); + resultMask |= temp->AsCall()->GetOtherRegMask(); + } + else if (IsCopyOrReloadOfMultiRegCall()) + { + // A multi-reg copy or reload, will have valid regs for only those + // positions that need to be copied or reloaded. Hence we need + // to consider only those registers for computing reg mask. + + GenTree* tree = const_cast(this); + GenTreeCopyOrReload* copyOrReload = tree->AsCopyOrReload(); + GenTreeCall* call = copyOrReload->gtGetOp1()->AsCall(); + unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount(); + + resultMask = RBM_NONE; + for (unsigned i = 0; i < regCount; ++i) + { + regNumber reg = copyOrReload->GetRegNumByIdx(i); + if (reg != REG_NA) + { + resultMask |= genRegMask(reg); + } + } + } + else + { + resultMask = genRegMask(gtRegNum); + } + } + + return resultMask; +} + +//--------------------------------------------------------------- +// GetOtherRegMask: Get the reg mask of gtOtherRegs of call node +// +// Arguments: +// None +// +// Return Value: +// Reg mask of gtOtherRegs of call node. +// +regMaskTP +GenTreeCall::GetOtherRegMask() const +{ + regMaskTP resultMask = RBM_NONE; + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i) + { + if (gtOtherRegs[i] != REG_NA) + { + resultMask |= genRegMask(gtOtherRegs[i]); + continue; + } + break; + } +#endif + + return resultMask; } /***************************************************************************** @@ -5163,6 +5382,12 @@ GenTreeCall* Compiler::gtNewCallNode(gtCallTypes callType, } #endif + // Initialize gtOtherRegs + node->ClearOtherRegs(); + + // Initialize spill flags of gtOtherRegs + node->ClearOtherRegFlags(); + return node; } @@ -6111,6 +6336,13 @@ GenTreePtr Compiler::gtCloneExpr(GenTree * tree, } break; + case GT_COPY: + case GT_RELOAD: + { + copy = new(this, oper) GenTreeCopyOrReload(oper, tree->TypeGet(), tree->gtGetOp1()); + } + break; + #ifdef FEATURE_SIMD case GT_SIMD: { @@ -6304,6 +6536,7 @@ GenTreePtr Compiler::gtCloneExpr(GenTree * tree, copy->gtCall.gtInlineObservation = tree->gtCall.gtInlineObservation; #endif + copy->AsCall()->CopyOtherRegFlags(tree->AsCall()); break; case GT_FIELD: @@ -7659,6 +7892,28 @@ void Compiler::gtDispRegVal(GenTree * tree) break; } + if (tree->IsMultiRegCall()) + { + // 0th reg is gtRegNum, which is already printed above. + // Print the remaining regs of a multi-reg call node. + GenTreeCall* call = tree->AsCall(); + unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount(); + for (unsigned i = 1; i < regCount; ++i) + { + printf(",%s", compRegVarName(call->GetRegNumByIdx(i))); + } + } + else if (tree->IsCopyOrReloadOfMultiRegCall()) + { + GenTreeCopyOrReload* copyOrReload = tree->AsCopyOrReload(); + GenTreeCall* call = tree->gtGetOp1()->AsCall(); + unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount(); + for (unsigned i = 1; i < regCount; ++i) + { + printf(",%s", compRegVarName(copyOrReload->GetRegNumByIdx(i))); + } + } + if (tree->gtFlags & GTF_REG_VAL) { printf(" RV"); @@ -8596,6 +8851,10 @@ void Compiler::gtDispTree(GenTreePtr tree, } gtDispVN(tree); + if (tree->IsMultiRegCall()) + { + gtDispRegVal(tree); + } printf("\n"); if (!topOnly) diff --git a/src/jit/gentree.h b/src/jit/gentree.h index 550a5799fb..ec838116a6 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -551,52 +551,13 @@ public: #endif // Copy the _gtRegNum/_gtRegPair/gtRegTag fields - void CopyReg(GenTreePtr from) - { - // To do the copy, use _gtRegPair, which must be bigger than _gtRegNum. Note that the values - // might be undefined (so gtRegTag == GT_REGTAG_NONE). - _gtRegPair = from->_gtRegPair; - C_ASSERT(sizeof(_gtRegPair) >= sizeof(_gtRegNum)); - INDEBUG(gtRegTag = from->gtRegTag;) - } + void CopyReg(GenTreePtr from); void gtClearReg(Compiler* compiler); - bool gtHasReg() const - { - // Has the node been assigned a register by LSRA? - // - // In order for this to work properly, gtClearReg (above) must be called prior to setting - // the register value. -#if CPU_LONG_USES_REGPAIR - if (isRegPairType(TypeGet())) - { - assert(_gtRegNum != REG_NA); - INDEBUG(assert(gtRegTag == GT_REGTAG_REGPAIR)); - return gtRegPair != REG_PAIR_NONE; - } - else -#endif - { - assert(_gtRegNum != REG_PAIR_NONE); - INDEBUG(assert(gtRegTag == GT_REGTAG_REG)); - return gtRegNum != REG_NA; - } - } + bool gtHasReg() const; - regMaskTP gtGetRegMask() const - { -#if CPU_LONG_USES_REGPAIR - if (isRegPairType(TypeGet())) - { - return genRegPairMask(gtRegPair); - } - else -#endif - { - return genRegMask(gtRegNum); - } - } + regMaskTP gtGetRegMask() const; unsigned gtFlags; // see GTF_xxxx below @@ -1386,7 +1347,14 @@ public: // Return the child of this node if it is a GT_RELOAD or GT_COPY; otherwise simply return the node itself inline GenTree* gtSkipReloadOrCopy(); - inline bool IsMultiRegCallStoreToLocal(); + // Returns true if it is a call node returning its value in more than one register + inline bool IsMultiRegCall() const; + + // Returns true if it is a GT_COPY or GT_RELOAD node + inline bool IsCopyOrReload() const; + + // Returns true if it is a GT_COPY or GT_RELOAD of a multi-reg call node + inline bool IsCopyOrReloadOfMultiRegCall() const; bool OperMayThrow(); @@ -2384,8 +2352,8 @@ enum class InlineObservation; // registers. For such calls this struct provides the following info // on their return type // - type of value returned in each return register -// - return register numbers in which the value is returned -// - a spill mask for lsra/codegen purpose +// - ABI return register numbers in which the value is returned +// - count of return registers in which the value is returned // // TODO-ARM: Update this to meet the needs of Arm64 and Arm32 // TODO-X86: Update this to meet the needs of x86 @@ -2428,7 +2396,7 @@ public: // Return Value: // Count of return registers. // Returns 0 if the return type is not returned in registers. - unsigned GetReturnRegCount() + unsigned GetReturnRegCount() const { assert(m_inited); @@ -2451,6 +2419,21 @@ public: return regCount; } + //----------------------------------------------------------------------- + // IsMultiRegRetType: check whether the type is returned in multiple + // return registers. + // + // Arguments: + // None + // + // Return Value: + // Returns true if the type is returned in multiple return registers. + // False otherwise. + bool IsMultiRegRetType() const + { + return GetReturnRegCount() > 1; + } + //-------------------------------------------------------------------------- // GetReturnRegType: Get var_type of the return register specified by index. // @@ -2504,12 +2487,23 @@ struct GenTreeCall final : public GenTree regMaskTP gtCallRegUsedMask; // mask of registers used to pass parameters - // For now Return Type Descriptor is enabled only for x64 unix. + // State required to support multi-reg returning call nodes. + // For now it is enabled only for x64 unix. + // // TODO-ARM: enable this for HFA returns on Arm64 and Arm32 // TODO-X86: enable this for long returns on x86 // TODO-AllArch: enable for all call nodes to unify single-reg and multi-reg returns. #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING ReturnTypeDesc gtReturnTypeDesc; + + // gtRegNum would always be the first return reg. + // The following array holds the other reg numbers of multi-reg return. + regNumber gtOtherRegs[MAX_RET_REG_COUNT - 1]; + + // GTF_SPILL or GTF_SPILLED flag on a multi-reg call node indicates that one or + // more of its result regs are in that state. The spill flag of each of the + // return register is stored in the below array. + unsigned gtSpillFlags[MAX_RET_REG_COUNT]; #endif //----------------------------------------------------------------------- @@ -2537,6 +2531,185 @@ struct GenTreeCall final : public GenTree #endif } + //--------------------------------------------------------------------------- + // GetRegNumByIdx: get ith return register allocated to this call node. + // + // Arguments: + // idx - index of the return register + // + // Return Value: + // Return regNumber of ith return register of call node. + // Returns REG_NA if there is no valid return register for the given index. + // + // TODO-ARM: Implement this routine for Arm64 and Arm32 + // TODO-X86: Implement this routine for x86 + regNumber GetRegNumByIdx(unsigned idx) const + { + assert(idx < MAX_RET_REG_COUNT); + + if (idx == 0) + { + return gtRegNum; + } + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + return gtOtherRegs[idx-1]; +#else + return REG_NA; +#endif + } + + //---------------------------------------------------------------------- + // SetRegNumByIdx: set ith return register of this call node + // + // Arguments: + // reg - reg number + // idx - index of the return register + // + // Return Value: + // None + // + // TODO-ARM: Implement this routine for Arm64 and Arm32 + // TODO-X86: Implement this routine for x86 + void SetRegNumByIdx(regNumber reg, unsigned idx) + { + assert(idx < MAX_RET_REG_COUNT); + + if (idx == 0) + { + gtRegNum = reg; + } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + else + { + gtOtherRegs[idx - 1] = reg; + assert(gtOtherRegs[idx - 1] == reg); + } +#else + unreached(); +#endif + } + + //---------------------------------------------------------------------------- + // ClearOtherRegs: clear multi-reg state to indicate no regs are allocated + // + // Arguments: + // None + // + // Return Value: + // None + // + void ClearOtherRegs() + { +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i) + { + gtOtherRegs[i] = REG_NA; + } +#endif + } + + //---------------------------------------------------------------------------- + // CopyOtherRegs: copy multi-reg state from the given call node to this node + // + // Arguments: + // fromCall - GenTreeCall node from which to copy multi-reg state + // + // Return Value: + // None + // + void CopyOtherRegs(GenTreeCall* fromCall) + { +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i) + { + this->gtOtherRegs[i] = fromCall->gtOtherRegs[i]; + } +#endif + } + + // Get reg mask of all the valid registers of gtOtherRegs array + regMaskTP GetOtherRegMask() const; + + //---------------------------------------------------------------------- + // GetRegSpillFlagByIdx: get spill flag associated with the return register + // specified by its index. + // + // Arguments: + // idx - Position or index of the return register + // + // Return Value: + // Returns GTF_* flags associated with. + unsigned GetRegSpillFlagByIdx(unsigned idx) const + { + assert(idx < MAX_RET_REG_COUNT); + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + return gtSpillFlags[idx]; +#else + assert(!"unreached"); + return 0; +#endif + } + + //---------------------------------------------------------------------- + // SetRegSpillFlagByIdx: set spill flags for the return register + // specified by its index. + // + // Arguments: + // flags - GTF_* flags + // idx - Position or index of the return register + // + // Return Value: + // None + void SetRegSpillFlagByIdx(unsigned flags, unsigned idx) + { + assert(idx < MAX_RET_REG_COUNT); + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + gtSpillFlags[idx] = flags; +#else + unreached(); +#endif + } + + //------------------------------------------------------------------- + // clearOtherRegFlags: clear GTF_* flags associated with gtOtherRegs + // + // Arguments: + // None + // + // Return Value: + // None + void ClearOtherRegFlags() + { +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + for (unsigned i = 0; i < MAX_RET_REG_COUNT; ++i) + { + gtSpillFlags[i] = 0; + } +#endif + } + + //------------------------------------------------------------------------- + // CopyOtherRegFlags: copy GTF_* flags associated with gtOtherRegs from + // the given call node. + // + // Arguments: + // fromCall - GenTreeCall node from which to copy + // + // Return Value: + // None + // + void CopyOtherRegFlags(GenTreeCall* fromCall) + { +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + for (unsigned i = 0; i < MAX_RET_REG_COUNT; ++i) + { + this->gtSpillFlags[i] = fromCall->gtSpillFlags[i]; + } +#endif + } #define GTF_CALL_M_EXPLICIT_TAILCALL 0x0001 // GT_CALL -- the call is "tail" prefixed and importer has performed tail call checks #define GTF_CALL_M_TAILCALL 0x0002 // GT_CALL -- the call is a tailcall @@ -2633,7 +2806,6 @@ struct GenTreeCall final : public GenTree #endif } - // Returns true if VM has flagged this method as CORINFO_FLG_PINVOKE. bool IsPInvoke() { return (gtCallMoreFlags & GTF_CALL_M_PINVOKE) != 0; } @@ -3747,6 +3919,138 @@ struct GenTreePutArgStk: public GenTreeUnOp #endif }; +// Represents GT_COPY or GT_RELOAD node +struct GenTreeCopyOrReload : public GenTreeUnOp +{ + // State required to support copy/reload of a multi-reg call node. + // First register is is always given by gtRegNum. + // Currently enabled for x64 unix. + // + // TODO-ARM: Enable this when multi-reg call node support is added. + // TODO-X86: Enable this when multi-reg call node support is added. +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + regNumber gtOtherRegs[MAX_RET_REG_COUNT - 1]; +#endif + + //---------------------------------------------------------- + // ClearOtherRegs: set gtOtherRegs to REG_NA. + // + // Arguments: + // None + // + // Return Value: + // None + // + // TODO-ARM: Implement this routine for Arm64 and Arm32 + // TODO-X86: Implement this routine for x86 + void ClearOtherRegs() + { +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i) + { + gtOtherRegs[i] = REG_NA; + } +#endif + } + + //----------------------------------------------------------- + // GetRegNumByIdx: Get regNumber of ith position. + // + // Arguments: + // idx - register position. + // + // Return Value: + // Returns regNumber assigned to ith position. + // + // TODO-ARM: Implement this routine for Arm64 and Arm32 + // TODO-X86: Implement this routine for x86 + regNumber GetRegNumByIdx(unsigned idx) const + { + assert(idx < MAX_RET_REG_COUNT); + + if (idx == 0) + { + return gtRegNum; + } + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + return gtOtherRegs[idx - 1]; +#else + return REG_NA; +#endif + } + + //----------------------------------------------------------- + // SetRegNumByIdx: Set the regNumber for ith position. + // + // Arguments: + // reg - reg number + // idx - register position. + // + // Return Value: + // None. + // + // TODO-ARM: Implement this routine for Arm64 and Arm32 + // TODO-X86: Implement this routine for x86 + void SetRegNumByIdx(regNumber reg, unsigned idx) + { + assert(idx < MAX_RET_REG_COUNT); + + if (idx == 0) + { + gtRegNum = reg; + } +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + else + { + gtOtherRegs[idx - 1] = reg; + assert(gtOtherRegs[idx - 1] == reg); + } +#else + else + { + unreached(); + } +#endif + } + + //---------------------------------------------------------------------------- + // CopyOtherRegs: copy multi-reg state from the given copy/reload node to this + // node. + // + // Arguments: + // from - GenTree node from which to copy multi-reg state + // + // Return Value: + // None + // + // TODO-ARM: Implement this routine for Arm64 and Arm32 + // TODO-X86: Implement this routine for x86 + void CopyOtherRegs(GenTreeCopyOrReload* from) + { + assert(OperGet() == from->OperGet()); + +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i) + { + gtOtherRegs[i] = from->gtOtherRegs[i]; + } +#endif + } + + GenTreeCopyOrReload(genTreeOps oper, + var_types type, + GenTree* op1) : GenTreeUnOp(oper, type, op1) + { + gtRegNum = REG_NA; + ClearOtherRegs(); + } + +#if DEBUGGABLE_GENTREE + GenTreeCopyOrReload() : GenTreeUnOp() {} +#endif +}; + // Deferred inline functions of GenTree -- these need the subtypes above to // be defined already. @@ -3962,25 +4266,57 @@ inline GenTree* GenTree::gtSkipReloadOrCopy() return this; } -//---------------------------------------------------------------------------------------- -// IsMultiRegCallStoreToLocal: Whether store op is storing multi-reg return value of call -// to a local. +//----------------------------------------------------------------------------------- +// IsMultiRegCall: whether a call node returning its value in more than one register // // Arguments: -// None +// None // // Return Value: -// Returns true if store op is storing a multi-reg return value of a call into a local. -// False otherwise. -// -inline bool GenTree::IsMultiRegCallStoreToLocal() +// Returns true if this GenTree is a multi register returning call +inline bool GenTree::IsMultiRegCall() const { - assert(OperGet() == GT_STORE_LCL_VAR); + if (this->IsCall()) + { + // We cannot use AsCall() as it is not declared const + const GenTreeCall* call = reinterpret_cast(this); + return call->HasMultiRegRetVal(); + } - GenTreePtr op1 = gtGetOp1(); - GenTreePtr actualOperand = op1->gtSkipReloadOrCopy(); + return false; +} - return (actualOperand->OperGet() == GT_CALL) && actualOperand->AsCall()->HasMultiRegRetVal(); +//------------------------------------------------------------------------- +// IsCopyOrReload: whether this is a GT_COPY or GT_RELOAD node. +// +// Arguments: +// None +// +// Return Value: +// Returns true if this GenTree is a copy or reload node. +inline bool GenTree::IsCopyOrReload() const +{ + return (gtOper == GT_COPY || gtOper == GT_RELOAD); +} + +//----------------------------------------------------------------------------------- +// IsCopyOrReloadOfMultiRegCall: whether this is a GT_COPY or GT_RELOAD of a multi-reg +// call node. +// +// Arguments: +// None +// +// Return Value: +// Returns true if this GenTree is a copy or reload of multi-reg call node. +inline bool GenTree::IsCopyOrReloadOfMultiRegCall() const +{ + if (IsCopyOrReload()) + { + GenTree* t = const_cast(this); + return t->gtGetOp1()->IsMultiRegCall(); + } + + return false; } inline bool GenTree::IsCnsIntOrI() const diff --git a/src/jit/gtstructs.h b/src/jit/gtstructs.h index 3c55c30517..2f0b3a3936 100644 --- a/src/jit/gtstructs.h +++ b/src/jit/gtstructs.h @@ -81,6 +81,7 @@ GTSTRUCT_1(ArrIndex , GT_ARR_INDEX) GTSTRUCT_1(RetExpr , GT_RET_EXPR) GTSTRUCT_1(Stmt , GT_STMT) GTSTRUCT_1(Obj , GT_OBJ) +GTSTRUCT_2(CopyOrReload, GT_COPY, GT_RELOAD) GTSTRUCT_2(ClsVar , GT_CLS_VAR, GT_CLS_VAR_ADDR) GTSTRUCT_1(ArgPlace , GT_ARGPLACE) GTSTRUCT_1(Label , GT_LABEL) diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index 7c24aa87e4..2d5409a6e7 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -7109,7 +7109,7 @@ GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CL assert(varTypeIsStruct(info.compRetType)); assert(info.compRetBuffArg == BAD_VAR_NUM); -#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) +#if defined(_TARGET_XARCH_) #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING // No VarArgs for CoreCLR on x64 Unix @@ -13822,7 +13822,7 @@ bool Compiler::impReturnInstruction(BasicBlock *block, int prefixFlags, OPCODE & { // If single eightbyte, the return type would have been normalized and there won't be a temp var. // This code will be called only if the struct return has not been normalized (i.e. 2 eightbytes - max allowed.) - assert(retRegCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_RETURN_IN_REGISTERS); + assert(retRegCount == MAX_RET_REG_COUNT); // Same as !structDesc.passedInRegisters but just don't bother with impAssignStructPtr. #endif // !defined(_TARGET_ARM_) diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index 506ccfeabb..5e67067db8 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -39,6 +39,25 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) { TreeNodeInfo* info = &(storeLoc->gtLsraInfo); + // Is this the case of var = call where call is returning + // a value in multiple return registers? + GenTree* op1 = storeLoc->gtGetOp1(); + if (op1->IsMultiRegCall()) + { + // backend expects to see this case only for store lclvar. + assert(storeLoc->OperGet() == GT_STORE_LCL_VAR); + + // srcCount = number of registers in which the value is returned by call + GenTreeCall* call = op1->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + info->srcCount = retTypeDesc->GetReturnRegCount(); + + // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1 + regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call); + op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates); + return; + } + #ifdef FEATURE_SIMD if (storeLoc->TypeGet() == TYP_SIMD12) { @@ -55,7 +74,6 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) // If the source is a containable immediate, make it contained, unless it is // an int-size or larger store of zero to memory, because we can generate smaller code // by zeroing a register and then storing it. - GenTree* op1 = storeLoc->gtOp1; if (IsContainableImmed(storeLoc, op1) && (!op1->IsZero() || varTypeIsSmall(storeLoc))) { MakeSrcContained(storeLoc, op1); @@ -296,50 +314,56 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) else #endif // !defined(_TARGET_64BIT_) { + GenTree* op1 = tree->gtGetOp1(); + regMaskTP useCandidates = RBM_NONE; + + info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1; + info->dstCount = 0; + #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING if (varTypeIsStruct(tree)) - { - noway_assert((tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR) || - (tree->gtOp.gtOp1->OperGet() == GT_CALL)); - - if (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR) + { + // op1 has to be either an lclvar or a multi-reg returning call + if (op1->OperGet() == GT_LCL_VAR) { - GenTreeLclVarCommon* lclVarPtr = tree->gtOp.gtOp1->AsLclVarCommon(); + GenTreeLclVarCommon* lclVarPtr = op1->AsLclVarCommon(); LclVarDsc* varDsc = &(compiler->lvaTable[lclVarPtr->gtLclNum]); assert(varDsc->lvIsMultiRegArgOrRet); varDsc->lvDoNotEnregister = true; // If this is a two eightbyte return, make the var - // contained by the return expression. The code gen will put + // contained by the return expression. Codegen will put // the values in the right registers for return. - info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1; - info->dstCount = 0; - MakeSrcContained(tree, tree->gtOp.gtOp1); - break; + MakeSrcContained(tree, op1); } + else + { + noway_assert(op1->IsMultiRegCall()); - // If the return gtOp1 is GT_CALL, just fallthrough. The return registers should already be set properly by the GT_CALL. + ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc(); + info->srcCount = retTypeDesc->GetReturnRegCount(); + useCandidates = retTypeDesc->GetABIReturnRegs(); + } } + else #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING - // TODO-AMD64-Unix: When the GT_CALL for multi-register return structs is changed to use 2 destinations, - // change the code below to use 2 src for such op1s (this is the case of op1 being a GT_CALL). - info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1; - info->dstCount = 0; - - regMaskTP useCandidates; - switch (tree->TypeGet()) { - case TYP_VOID: useCandidates = RBM_NONE; break; - case TYP_FLOAT: useCandidates = RBM_FLOATRET; break; - case TYP_DOUBLE: useCandidates = RBM_DOUBLERET; break; + // Non-struct type return - determine useCandidates + switch (tree->TypeGet()) + { + case TYP_VOID: useCandidates = RBM_NONE; break; + case TYP_FLOAT: useCandidates = RBM_FLOATRET; break; + case TYP_DOUBLE: useCandidates = RBM_DOUBLERET; break; #if defined(_TARGET_64BIT_) - case TYP_LONG: useCandidates = RBM_LNGRET; break; + case TYP_LONG: useCandidates = RBM_LNGRET; break; #endif // defined(_TARGET_64BIT_) - default: useCandidates = RBM_INTRET; break; + default: useCandidates = RBM_INTRET; break; + } } + if (useCandidates != RBM_NONE) { - tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, useCandidates); + op1->gtLsraInfo.setSrcCandidates(l, useCandidates); } } break; @@ -835,10 +859,30 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) case GT_CALL: { - info->srcCount = 0; - info->dstCount = (tree->TypeGet() != TYP_VOID) ? 1 : 0; + bool hasMultiRegRetVal = false; + ReturnTypeDesc* retTypeDesc = nullptr; - GenTree *ctrlExpr = tree->gtCall.gtControlExpr; + info->srcCount = 0; + if (tree->TypeGet() != TYP_VOID) + { + hasMultiRegRetVal = tree->AsCall()->HasMultiRegRetVal(); + if (hasMultiRegRetVal) + { + // dst count = number of registers in which the value is returned by call + retTypeDesc = tree->AsCall()->GetReturnTypeDesc(); + info->dstCount = retTypeDesc->GetReturnRegCount(); + } + else + { + info->dstCount = 1; + } + } + else + { + info->dstCount = 0; + } + + GenTree* ctrlExpr = tree->gtCall.gtControlExpr; if (tree->gtCall.gtCallType == CT_INDIRECT) { // either gtControlExpr != null or gtCallAddr != null. @@ -884,8 +928,12 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) } // Set destination candidates for return value of the call. - if (varTypeIsFloating(registerType) - FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY( || varTypeIsSIMD(registerType))) + if (hasMultiRegRetVal) + { + assert(retTypeDesc != nullptr); + info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs()); + } + else if (varTypeIsFloating(registerType)) { #ifdef _TARGET_X86_ // The return value will be on the X87 stack, and we will need to move it. @@ -1689,10 +1737,11 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) } #endif //_TARGET_X86_ - tree = next; - // We need to be sure that we've set info->srcCount and info->dstCount appropriately - assert(info->dstCount < 2); + assert((info->dstCount < 2) || + (tree->IsMultiRegCall() && info->dstCount == MAX_RET_REG_COUNT)); + + tree = next; } } diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index e0fd5a23db..127419b51c 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -114,9 +114,30 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ -void lsraAssignRegToTree(GenTreePtr tree, regNumber reg) +//-------------------------------------------------------------- +// lsraAssignRegToTree: Assign the given reg to tree node. +// +// Arguments: +// tree - Gentree node +// reg - register to be assigned +// regIdx - register idx, if tree is a multi-reg call node. +// regIdx will be zero for single-reg result producing tree nodes. +// +// Return Value: +// None +// +void lsraAssignRegToTree(GenTreePtr tree, regNumber reg, unsigned regIdx) { - tree->gtRegNum = reg; + if (regIdx == 0) + { + tree->gtRegNum = reg; + } + else + { + assert(tree->IsMultiRegCall()); + GenTreeCall* call = tree->AsCall(); + call->SetRegNumByIdx(reg, regIdx); + } } // allRegs represents a set of registers that can @@ -137,6 +158,68 @@ regMaskTP LinearScan::allRegs(RegisterType rt) return availableIntRegs; } +//-------------------------------------------------------------------------- +// allMultiRegCallNodeRegs: represents a set of registers that can be used +// to allocate a multi-reg call node. +// +// Arguments: +// call - Multi-reg call node +// +// Return Value: +// Mask representing the set of available registers for multi-reg call +// node. +// +// Note: +// Multi-reg call node available regs = Bitwise-OR(allregs(GetReturnRegType(i))) +// for all i=0..RetRegCount-1. +regMaskTP LinearScan::allMultiRegCallNodeRegs(GenTreeCall* call) +{ + assert(call->HasMultiRegRetVal()); + + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + regMaskTP resultMask = allRegs(retTypeDesc->GetReturnRegType(0)); + + unsigned count = retTypeDesc->GetReturnRegCount(); + for (unsigned i = 1; i < count; ++i) + { + resultMask |= allRegs(retTypeDesc->GetReturnRegType(i)); + } + + return resultMask; +} + +//-------------------------------------------------------------------------- +// allRegs: returns the set of registers that can accomodate the type of +// given node. +// +// Arguments: +// tree - GenTree node +// +// Return Value: +// Mask representing the set of available registers for given tree +// +// Note: In case of multi-reg call node, the full set of registers must be +// determined by looking at types of individual return register types. +// In this case, the registers may include registers from different register +// sets and will not be limited to the actual ABI return registers. +regMaskTP LinearScan::allRegs(GenTree* tree) +{ + regMaskTP resultMask; + + // In case of multi-reg calls, allRegs is defined as + // Bitwise-Or(allRegs(GetReturnRegType(i)) for i=0..ReturnRegCount-1 + if (tree->IsMultiRegCall()) + { + resultMask = allMultiRegCallNodeRegs(tree->AsCall()); + } + else + { + resultMask = allRegs(tree->TypeGet()); + } + + return resultMask; +} + regMaskTP LinearScan::allSIMDRegs() { return availableFloatRegs; @@ -635,30 +718,64 @@ LinearScan::associateRefPosWithInterval(RefPosition *rp) } } -RefPosition * -LinearScan::newRefPosition( - regNumber reg, LsraLocation theLocation, - RefType theRefType, GenTree * theTreeNode, - regMaskTP mask) +//--------------------------------------------------------------------------- +// newRefPosition: allocate and initialize a new RefPosition. +// +// Arguments: +// reg - reg number that identifies RegRecord to be associated +// with this RefPosition +// theLocation - LSRA location of RefPosition +// theRefType - RefPosition type +// theTreeNode - GenTree node for which this RefPosition is created +// mask - Set of valid registers for this RefPosition +// multiRegIdx - register position if this RefPosition corresponds to a +// multi-reg call node. +// +// Return Value: +// a new RefPosition +// +RefPosition* +LinearScan::newRefPosition(regNumber reg, + LsraLocation theLocation, + RefType theRefType, + GenTree* theTreeNode, + regMaskTP mask) { - RefPosition *newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); + RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); newRP->setReg(getRegisterRecord(reg)); - newRP->registerAssignment = mask; + + newRP->setMultiRegIdx(0); + associateRefPosWithInterval(newRP); DBEXEC(VERBOSE, newRP->dump()); return newRP; } - - -RefPosition * -LinearScan::newRefPosition( - Interval * theInterval, LsraLocation theLocation, - RefType theRefType, GenTree * theTreeNode, - regMaskTP mask) +//--------------------------------------------------------------------------- +// newRefPosition: allocate and initialize a new RefPosition. +// +// Arguments: +// theInterval - interval to which RefPosition is associated with. +// theLocation - LSRA location of RefPosition +// theRefType - RefPosition type +// theTreeNode - GenTree node for which this RefPosition is created +// mask - Set of valid registers for this RefPosition +// multiRegIdx - register position if this RefPosition corresponds to a +// multi-reg call node. +// +// Return Value: +// a new RefPosition +// +RefPosition* +LinearScan::newRefPosition(Interval* theInterval, + LsraLocation theLocation, + RefType theRefType, + GenTree* theTreeNode, + regMaskTP mask, + unsigned multiRegIdx /* = 0 */) { #ifdef DEBUG if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType) @@ -686,12 +803,12 @@ LinearScan::newRefPosition( if (insertFixedRef) { regNumber physicalReg = genRegNumFromMask(mask); - RefPosition *pos = newRefPosition (physicalReg, theLocation, RefTypeFixedReg, nullptr, mask); + RefPosition* pos = newRefPosition (physicalReg, theLocation, RefTypeFixedReg, nullptr, mask); assert(theInterval != nullptr); assert((allRegs(theInterval->registerType) & mask) != 0); } - RefPosition *newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); + RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); newRP->setInterval(theInterval); @@ -712,6 +829,8 @@ LinearScan::newRefPosition( #endif // !_TARGET_AMD64_ newRP->registerAssignment = mask; + newRP->setMultiRegIdx(multiRegIdx); + associateRefPosWithInterval(newRP); DBEXEC(VERBOSE, newRP->dump()); @@ -2985,7 +3104,15 @@ LinearScan::buildRefPositionsForNode(GenTree *tree, // This is the case for dead nodes that occur after // tree rationalization // TODO-Cleanup: Identify and remove these dead nodes prior to register allocation. - produce = 1; + if (tree->IsMultiRegCall()) + { + // In case of multi-reg call node, produce = number of return registers + produce = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount(); + } + else + { + produce = 1; + } } #ifdef DEBUG @@ -3049,14 +3176,17 @@ LinearScan::buildRefPositionsForNode(GenTree *tree, // for interstitial tree temps, a use is always last and end; // this is set by default in newRefPosition - GenTree * useNode = locInfo.treeNode; + GenTree* useNode = locInfo.treeNode; assert(useNode != nullptr); var_types type = useNode->TypeGet(); regMaskTP candidates = getUseCandidates(useNode); - Interval *i = locInfo.interval; + Interval* i = locInfo.interval; + unsigned multiRegIdx = locInfo.multiRegIdx; #ifdef FEATURE_SIMD - if (tree->OperIsLocalStore() && varDefInterval == nullptr) + // In case of multi-reg call store to a local, there won't be any mismatch of + // use candidates with the type of the tree node. + if (tree->OperIsLocalStore() && varDefInterval == nullptr && !useNode->IsMultiRegCall()) { // This is a non-candidate store. If this is a SIMD type, the use candidates // may not match the type of the tree node. If that is the case, change the @@ -3110,12 +3240,12 @@ LinearScan::buildRefPositionsForNode(GenTree *tree, regNumber physicalReg = genRegNumFromMask(fixedAssignment); RefPosition *pos = newRefPosition (physicalReg, currentLoc, RefTypeFixedReg, nullptr, fixedAssignment); } - pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, allRegs(i->registerType)); + pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, allRegs(i->registerType), multiRegIdx); pos->registerAssignment = candidates; } else { - pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, candidates); + pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, candidates, multiRegIdx); } if (delayRegFree) { @@ -3130,7 +3260,6 @@ LinearScan::buildRefPositionsForNode(GenTree *tree, buildInternalRegisterUsesForNode(tree, currentLoc, internalRefs, internalCount); RegisterType registerType = getDefType(tree); - regMaskTP candidates = getDefCandidates(tree); regMaskTP useCandidates = getUseCandidates(tree); @@ -3145,52 +3274,51 @@ LinearScan::buildRefPositionsForNode(GenTree *tree, } #endif // DEBUG - int targetRegs = produce; - #if defined(_TARGET_AMD64_) - assert(produce <= 1); + // Multi-reg call node is the only node that could produce multi-reg value + assert(produce <= 1 || (tree->IsMultiRegCall() && produce == MAX_RET_REG_COUNT)); #elif defined(_TARGET_ARM_) assert(!varTypeIsMultiReg(tree->TypeGet())); #endif // _TARGET_xxx_ + // Add kill positions before adding def positions + buildKillPositionsForNode(tree, currentLoc + 1); + #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE VARSET_TP VARSET_INIT_NOCOPY(liveLargeVectors, VarSetOps::UninitVal()); -#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - - // push defs - if (produce == 0) + if (RBM_FLT_CALLEE_SAVED != RBM_NONE) { - buildKillPositionsForNode(tree, currentLoc + 1); - -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - if (RBM_FLT_CALLEE_SAVED != RBM_NONE) - { - // Build RefPositions for saving any live large vectors. - // This must be done after the kills, so that we know which large vectors are still live. - VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc)); - } + // Build RefPositions for saving any live large vectors. + // This must be done after the kills, so that we know which large vectors are still live. + VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc)); + } #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + + ReturnTypeDesc* retTypeDesc = nullptr; + bool isMultiRegCall = tree->IsMultiRegCall(); + if (isMultiRegCall) + { + retTypeDesc = tree->AsCall()->GetReturnTypeDesc(); + assert((int)genCountBits(candidates) == produce); + assert(candidates == retTypeDesc->GetABIReturnRegs()); } + // push defs + LsraLocation defLocation = currentLoc + 1; for (int i=0; i < produce; i++) - { - LsraLocation lastDefLocation = currentLoc + 1; + { + regMaskTP currCandidates = candidates; + Interval *interval = varDefInterval; - // If this is the last def add the phys reg defs - bool generatedKills = false; - if (i == produce-1) + // In case of multi-reg call node, registerType is given by + // the type of ith position return register. + if (isMultiRegCall) { - generatedKills = buildKillPositionsForNode(tree, lastDefLocation); - -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - // Build RefPositions for saving any live large vectors. - // This must be done after the kills, so that we know which large vectors are still live. - VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc)); -#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + registerType = retTypeDesc->GetReturnRegType((unsigned)i); + currCandidates = genRegMask(retTypeDesc->GetABIReturnReg(i)); + useCandidates = allRegs(registerType); } - regMaskTP currCandidates = candidates; - Interval *interval = varDefInterval; if (interval == nullptr) { // Make a new interval @@ -3204,10 +3332,12 @@ LinearScan::buildRefPositionsForNode(GenTree *tree, assert(!tree->IsReuseRegVal()); interval->isConstant = true; } + if ((currCandidates & useCandidates) != RBM_NONE) { interval->updateRegisterPreferences(currCandidates & useCandidates); } + if (isSpecialPutArg) { interval->isSpecialPutArg = true; @@ -3227,11 +3357,10 @@ LinearScan::buildRefPositionsForNode(GenTree *tree, // but not push it if (!noPush) { - stack->Push(LocationInfo(lastDefLocation, interval, tree)); + stack->Push(LocationInfo(defLocation, interval, tree, (unsigned) i)); } - LsraLocation defLocation = (i == produce-1) ? lastDefLocation : currentLoc; - RefPosition *pos = newRefPosition(interval, defLocation, defRefType, defNode, currCandidates); + RefPosition* pos = newRefPosition(interval, defLocation, defRefType, defNode, currCandidates, (unsigned)i); if (info.isLocalDefUse) { pos->isLocalDefUse = true; @@ -3241,6 +3370,7 @@ LinearScan::buildRefPositionsForNode(GenTree *tree, interval->updateRegisterPreferences(currCandidates); interval->updateRegisterPreferences(useCandidates); } + #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE buildUpperVectorRestoreRefPositions(tree, currentLoc, liveLargeVectors); #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE @@ -6677,7 +6807,7 @@ LinearScan::resolveLocalRef(GenTreePtr treeNode, RefPosition * currentRefPositio if (!currentRefPosition->isFixedRegRef || currentRefPosition->moveReg) { // This is the second case, where we need to generate a copy - insertCopyOrReload(treeNode, currentRefPosition); + insertCopyOrReload(treeNode, currentRefPosition->getMultiRegIdx(), currentRefPosition); } } else @@ -6739,7 +6869,7 @@ LinearScan::resolveLocalRef(GenTreePtr treeNode, RefPosition * currentRefPositio void LinearScan::writeRegisters(RefPosition *currentRefPosition, GenTree *tree) { - lsraAssignRegToTree(tree, currentRefPosition->assignedReg()); + lsraAssignRegToTree(tree, currentRefPosition->assignedReg(), currentRefPosition->getMultiRegIdx()); } //------------------------------------------------------------------------ @@ -6748,8 +6878,10 @@ LinearScan::writeRegisters(RefPosition *currentRefPosition, GenTree *tree) // than the one it was spilled from (GT_RELOAD). // // Arguments: -// tree - This is the node to reload. Insert the reload node between this node and its parent. -// spillRefPosition - The RefPosition of the spill. spillRefPosition->nextRefPosition is the RefPosition of the reload. +// tree - This is the node to copy or reload. +// Insert copy or reload node between this node and its parent. +// multiRegIdx - register position of tree node for which copy or reload is needed. +// refPosition - The RefPosition at which copy or reload will take place. // // Notes: // The GT_COPY or GT_RELOAD will be inserted in the proper spot in execution order where the reload is to occur. @@ -6795,15 +6927,14 @@ LinearScan::writeRegisters(RefPosition *currentRefPosition, GenTree *tree) // used if we reload to the same register. Normally, though, in that case we just mark the node with GTF_SPILLED, // and the unspilling code automatically reuses the same register, and does the reload when it notices that flag // when considering a node's operands. - +// void -LinearScan::insertCopyOrReload(GenTreePtr tree, RefPosition* refPosition) -{ +LinearScan::insertCopyOrReload(GenTreePtr tree, unsigned multiRegIdx, RefPosition* refPosition) +{ GenTreePtr* parentChildPointer = nullptr; GenTreePtr parent = tree->gtGetParent(&parentChildPointer); noway_assert(parent != nullptr && parentChildPointer != nullptr); - // Create the new node, with "tree" as its only child. genTreeOps oper; if (refPosition->reload) { @@ -6814,41 +6945,65 @@ LinearScan::insertCopyOrReload(GenTreePtr tree, RefPosition* refPosition) oper = GT_COPY; } - var_types treeType = tree->TypeGet(); + // If the parent is a reload/copy node, then tree must be a multi-reg call node + // that has already had one of its registers spilled. This is Because multi-reg + // call node is the only node whose RefTypeDef positions get independently + // spilled or reloaded. It is possible that one of its RefTypeDef position got + // spilled and the next use of it requires it to be in a different register. + // + // In this case set the ith position reg of reload/copy node to the reg allocated + // for copy/reload refPosition. Essentially a copy/reload node will have a reg + // for each multi-reg position of its child. If there is a valid reg in ith + // position of GT_COPY or GT_RELOAD node then the corresponding result of its + // child needs to be copied or reloaded to that reg. + if (parent->IsCopyOrReload()) + { + noway_assert(parent->OperGet() == oper); + noway_assert(tree->IsMultiRegCall()); + GenTreeCall* call = tree->AsCall(); + GenTreeCopyOrReload* copyOrReload = parent->AsCopyOrReload(); + noway_assert(copyOrReload->GetRegNumByIdx(multiRegIdx) == REG_NA); + copyOrReload->SetRegNumByIdx(refPosition->assignedReg(), multiRegIdx); + } + else + { + // Create the new node, with "tree" as its only child. + var_types treeType = tree->TypeGet(); #ifdef FEATURE_SIMD - // Check to see whether we need to move to a different register set. - // This currently only happens in the case of SIMD vector types that are small enough (pointer size) - // that they must be passed & returned in integer registers. - // 'treeType' is the type of the register we are moving FROM, - // and refPosition->registerAssignment is the mask for the register we are moving TO. - // If they don't match, we need to reverse the type for the "move" node. + // Check to see whether we need to move to a different register set. + // This currently only happens in the case of SIMD vector types that are small enough (pointer size) + // that they must be passed & returned in integer registers. + // 'treeType' is the type of the register we are moving FROM, + // and refPosition->registerAssignment is the mask for the register we are moving TO. + // If they don't match, we need to reverse the type for the "move" node. - if ((allRegs(treeType) & refPosition->registerAssignment) == 0) - { - treeType = (useFloatReg(treeType)) ? TYP_I_IMPL : TYP_SIMD8; - } + if ((allRegs(treeType) & refPosition->registerAssignment) == 0) + { + treeType = (useFloatReg(treeType)) ? TYP_I_IMPL : TYP_SIMD8; + } #endif // FEATURE_SIMD - GenTreePtr newNode = compiler->gtNewOperNode(oper, treeType, tree); - assert(refPosition->registerAssignment != RBM_NONE); - newNode->CopyCosts(tree); - newNode->gtRegNum = refPosition->assignedReg(); - newNode->gtLsraInfo.isLsraAdded = true; - newNode->gtLsraInfo.isLocalDefUse = false; - if (refPosition->copyReg) - { - // This is a TEMPORARY copy - assert(isCandidateLocalRef(tree)); - newNode->gtFlags |= GTF_VAR_DEATH; - } + GenTreeCopyOrReload* newNode = new(compiler, oper) GenTreeCopyOrReload(oper, treeType, tree); + assert(refPosition->registerAssignment != RBM_NONE); + newNode->CopyCosts(tree); + newNode->SetRegNumByIdx(refPosition->assignedReg(), multiRegIdx); + newNode->gtLsraInfo.isLsraAdded = true; + newNode->gtLsraInfo.isLocalDefUse = false; + if (refPosition->copyReg) + { + // This is a TEMPORARY copy + assert(isCandidateLocalRef(tree)); + newNode->gtFlags |= GTF_VAR_DEATH; + } - // Replace tree in the parent node. - *parentChildPointer = newNode; + // Replace tree in the parent node. + *parentChildPointer = newNode; - // we insert this directly after the spilled node. it does not reload at that point but - // just updates registers - tree->InsertAfterSelf(newNode); + // we insert this directly after the spilled node. it does not reload at that point but + // just updates registers + tree->InsertAfterSelf(newNode); + } } #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE @@ -7060,7 +7215,19 @@ LinearScan::updateMaxSpill(RefPosition* refPosition) treeNode = interval->firstRefPosition->treeNode; } assert(treeNode != nullptr); - typ = compiler->tmpNormalizeType(treeNode->TypeGet()); + + // In case of multi-reg call nodes, we need to use the type + // of the return register given by multiRegIdx of the refposition. + if (treeNode->IsMultiRegCall()) + { + ReturnTypeDesc* retTypeDesc = treeNode->AsCall()->GetReturnTypeDesc(); + typ = retTypeDesc->GetReturnRegType(refPosition->getMultiRegIdx()); + } + else + { + typ = treeNode->TypeGet(); + } + typ = compiler->tmpNormalizeType(typ); } if (refPosition->spillAfter && !refPosition->reload) @@ -7383,22 +7550,33 @@ LinearScan::resolveRegisters() if (currentRefPosition->spillAfter) { treeNode->gtFlags |= GTF_SPILL; + // If this is a constant interval that is reusing a pre-existing value, we actually need // to generate the value at this point in order to spill it. if (treeNode->IsReuseRegVal()) { treeNode->ResetReuseRegVal(); } + + // In case of multi-reg call node, also set spill flag on the + // register specified by multi-reg index of current RefPosition. + // Note that the spill flag on treeNode indicates that one or + // more its allocated registers are in that state. + if (treeNode->IsMultiRegCall()) + { + GenTreeCall* call = treeNode->AsCall(); + call->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx()); + } } // If the value is reloaded or moved to a different register, we need to insert // a node to hold the register to which it should be reloaded - RefPosition * nextRefPosition = currentRefPosition->nextRefPosition; + RefPosition* nextRefPosition = currentRefPosition->nextRefPosition; assert(nextRefPosition != nullptr); if (INDEBUG(alwaysInsertReload() ||) nextRefPosition->assignedReg() != currentRefPosition->assignedReg()) { - insertCopyOrReload(treeNode, nextRefPosition); + insertCopyOrReload(treeNode, currentRefPosition->getMultiRegIdx(), nextRefPosition); } } @@ -7665,7 +7843,7 @@ LinearScan::insertMove(BasicBlock * block, } else { - top = compiler->gtNewOperNode(GT_COPY, varDsc->TypeGet(), src); + top = new(compiler, GT_COPY) GenTreeCopyOrReload(GT_COPY, varDsc->TypeGet(), src); // This is the new home of the lclVar - indicate that by clearing the GTF_VAR_DEATH flag. // Note that if src is itself a lastUse, this will have no effect. top->gtFlags &= ~(GTF_VAR_DEATH); diff --git a/src/jit/lsra.h b/src/jit/lsra.h index f1270641a4..079c7ebf5b 100644 --- a/src/jit/lsra.h +++ b/src/jit/lsra.h @@ -42,11 +42,22 @@ regMaskTP calleeSaveRegs(RegisterType rt) struct LocationInfo { LsraLocation loc; - Interval * interval; - GenTree * treeNode; - - LocationInfo(LsraLocation l, Interval *i, GenTree *t) - : loc(l), interval(i), treeNode(t) {} + + // Reg Index in case of multi-reg result producing call node. + // Indicates the position of the register that this location refers to. + // The max bits needed is based on max value of MAX_RET_REG_COUNT value + // across all targets and that happens 4 on on Arm. Hence index value + // would be 0..MAX_RET_REG_COUNT-1. + unsigned multiRegIdx : 2; + + Interval* interval; + GenTree* treeNode; + + LocationInfo(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0) + : loc(l), multiRegIdx(regIdx), interval(i), treeNode(t) + { + assert(multiRegIdx == regIdx); + } // default constructor for data structures LocationInfo() {} @@ -377,7 +388,7 @@ public: // Insert a copy in the case where a tree node value must be moved to a different // register at the point of use, or it is reloaded to a different register // than the one it was spilled from - void insertCopyOrReload(GenTreePtr tree, RefPosition* refPosition); + void insertCopyOrReload(GenTreePtr tree, unsigned multiRegIdx, RefPosition* refPosition); #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE // Insert code to save and restore the upper half of a vector that lives @@ -658,6 +669,8 @@ private: LsraLocation currentLoc); regMaskTP allRegs(RegisterType rt); + regMaskTP allRegs(GenTree* tree); + regMaskTP allMultiRegCallNodeRegs(GenTreeCall* tree); regMaskTP allSIMDRegs(); regMaskTP internalFloatRegCandidates(); @@ -722,12 +735,17 @@ private: RefPosition * newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType); - RefPosition * newRefPosition(Interval * theInterval, LsraLocation theLocation, - RefType theRefType, GenTree * theTreeNode, - regMaskTP mask); - - RefPosition * newRefPosition(regNumber reg, LsraLocation theLocation, - RefType theRefType, GenTree * theTreeNode, + RefPosition* newRefPosition(Interval* theInterval, + LsraLocation theLocation, + RefType theRefType, + GenTree* theTreeNode, + regMaskTP mask, + unsigned multiRegIdx = 0); + + RefPosition* newRefPosition(regNumber reg, + LsraLocation theLocation, + RefType theRefType, + GenTree* theTreeNode, regMaskTP mask); void applyCalleeSaveHeuristics(RefPosition* rp); @@ -1274,6 +1292,7 @@ public: , nodeLocation(nodeLocation) , registerAssignment(RBM_NONE) , refType(refType) + , multiRegIdx(0) , lastUse(false) , reload(false) , spillAfter(false) @@ -1334,6 +1353,21 @@ public: ); } + // Used by RefTypeDef/Use positions of a multi-reg call node. + // Indicates the position of the register that this ref position refers to. + // The max bits needed is based on max value of MAX_RET_REG_COUNT value + // across all targets and that happens 4 on on Arm. Hence index value + // would be 0..MAX_RET_REG_COUNT-1. + unsigned multiRegIdx : 2; + + void setMultiRegIdx(unsigned idx) + { + multiRegIdx = idx; + assert(multiRegIdx == idx); + } + + unsigned getMultiRegIdx() { return multiRegIdx; } + // Last Use - this may be true for multiple RefPositions in the same Interval bool lastUse : 1; diff --git a/src/jit/nodeinfo.h b/src/jit/nodeinfo.h index 3297358fc4..5b6fab87ea 100644 --- a/src/jit/nodeinfo.h +++ b/src/jit/nodeinfo.h @@ -38,7 +38,7 @@ public: int dstCount; void setDstCount(int count) { - assert(count == 0 || count == 1); + assert(count <= MAX_RET_REG_COUNT); _dstCount = (char) count; } int getDstCount() { return _dstCount; } diff --git a/src/jit/regset.cpp b/src/jit/regset.cpp index c1b90186d7..a188461450 100644 --- a/src/jit/regset.cpp +++ b/src/jit/regset.cpp @@ -1415,19 +1415,47 @@ void RegSet::rsSpillRegIfUsed(regNumber reg) #endif // LEGACY_BACKEND -/***************************************************************************** - * - * Spill the tree held in 'reg' - */ - -void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree) -{ - SpillDsc * spill; - TempDsc * temp; +//------------------------------------------------------------ +// rsSpillTree: Spill the tree held in 'reg'. +// +// Arguments: +// reg - Register of tree node that is to be spilled +// tree - GenTree node that is being spilled +// regIdx - Register index identifying the specific result +// register of a multi-reg call node. For single-reg +// producing tree nodes its value is zero. +// +// Return Value: +// None. +// +// Assumption: +// RyuJIT backend specific: in case of multi-reg call nodes, GTF_SPILL +// flag associated with the reg that is being spilled is cleared. The +// caller of this method is expected to clear GTF_SPILL flag on call +// node after all of its registers marked for spilling are spilled. +// +void RegSet::rsSpillTree(regNumber reg, + GenTreePtr tree, + unsigned regIdx /* =0 */) +{ + assert(tree != nullptr); + + GenTreeCall* call = nullptr; + var_types treeType; - assert(tree); +#ifndef LEGACY_BACKEND + if (tree->IsMultiRegCall()) + { + call = tree->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + treeType = retTypeDesc->GetReturnRegType(regIdx); + } + else +#endif + { + treeType = tree->TypeGet(); + } - var_types treeType = tree->TypeGet(); var_types tempType = Compiler::tmpNormalizeType(treeType); regMaskTP mask; bool floatSpill = false; @@ -1445,8 +1473,8 @@ void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree) rsNeededSpillReg = true; #ifdef LEGACY_BACKEND - /* The register we're spilling must be used but not locked - or an enregistered variable. */ + // The register we're spilling must be used but not locked + // or an enregistered variable. assert((mask & rsMaskUsed) == mask); assert((mask & rsMaskLock) == 0); @@ -1455,17 +1483,31 @@ void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree) #ifndef LEGACY_BACKEND // We should only be spilling nodes marked for spill, - // vars should be handled elsewhere, - // and we shouldn't spill nodes twice so we reset GTF_SPILL + // vars should be handled elsewhere, and to prevent + // spilling twice clear GTF_SPILL flag on tree node. + // + // In case of multi-reg call nodes only the spill flag + // associated with the reg is cleared. Spill flag on + // call node should be cleared by the caller of this method. assert(tree->gtOper != GT_REG_VAR); - assert(!varTypeIsMultiReg(tree)); - assert(tree->gtFlags & GTF_SPILL); - tree->gtFlags &= ~GTF_SPILL; + assert((tree->gtFlags & GTF_SPILL) != 0); + + unsigned regFlags = 0; + if (call != nullptr) + { + regFlags = call->GetRegSpillFlagByIdx(regIdx); + assert((regFlags & GTF_SPILL) != 0); + regFlags &= ~GTF_SPILL; + } + else + { + assert(!varTypeIsMultiReg(tree)); + tree->gtFlags &= ~GTF_SPILL; + } #endif // !LEGACY_BACKEND #if CPU_LONG_USES_REGPAIR - /* Are we spilling a part of a register pair? */ - + // Are we spilling a part of a register pair? if (treeType == TYP_LONG) { tempType = TYP_I_IMPL; @@ -1479,20 +1521,18 @@ void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree) } #else assert(tree->InReg()); - assert(tree->gtRegNum == reg); + assert(tree->gtRegNum == reg || (call != nullptr && call->GetRegNumByIdx(regIdx) == reg)); #endif // CPU_LONG_USES_REGPAIR - /* Are any registers free for spillage? */ - - spill = SpillDsc::alloc(m_rsCompiler, this, tempType); + // Are any registers free for spillage? + SpillDsc* spill = SpillDsc::alloc(m_rsCompiler, this, tempType); - /* Grab a temp to store the spilled value */ - - spill->spillTemp = temp = m_rsCompiler->tmpGetTemp(tempType); + // Grab a temp to store the spilled value + TempDsc* temp = m_rsCompiler->tmpGetTemp(tempType); + spill->spillTemp = temp; tempType = temp->tdTempType(); - /* Remember what it is we have spilled */ - + // Remember what it is we have spilled spill->spillTree = tree; #ifdef LEGACY_BACKEND spill->spillAddr = rsUsedAddr[reg]; @@ -1512,15 +1552,13 @@ void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree) #endif #ifdef LEGACY_BACKEND - /* Is the register part of a complex address mode? */ - + // Is the register part of a complex address mode? rsAddrSpillOper(rsUsedAddr[reg]); #endif // LEGACY_BACKEND - /* 'lastDsc' is 'spill' for simple cases, and will point to the last - multi-use descriptor if 'reg' is being multi-used */ - - SpillDsc * lastDsc = spill; + // 'lastDsc' is 'spill' for simple cases, and will point to the last + // multi-use descriptor if 'reg' is being multi-used + SpillDsc* lastDsc = spill; #ifdef LEGACY_BACKEND if ((rsMaskMult & mask) == 0) @@ -1529,31 +1567,27 @@ void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree) } else { - /* The register is being multi-used and will have entries in - rsMultiDesc[reg]. Spill all of them (ie. move them to - rsSpillDesc[reg]). - When we unspill the reg, they will all be moved back to - rsMultiDesc[]. - */ + // The register is being multi-used and will have entries in + // rsMultiDesc[reg]. Spill all of them (ie. move them to + // rsSpillDesc[reg]). + // When we unspill the reg, they will all be moved back to + // rsMultiDesc[]. spill->spillMoreMultis = true; - SpillDsc * nextDsc = rsMultiDesc[reg]; + SpillDsc* nextDsc = rsMultiDesc[reg]; do { - assert(nextDsc); - - /* Is this multi-use part of a complex address mode? */ + assert(nextDsc != nullptr); + // Is this multi-use part of a complex address mode? rsAddrSpillOper(nextDsc->spillAddr); - /* Mark the tree node as having been spilled */ - + // Mark the tree node as having been spilled rsMarkSpill(nextDsc->spillTree, reg); - /* lastDsc points to the last of the multi-spill descrs for 'reg' */ - + // lastDsc points to the last of the multi-spill descrs for 'reg' nextDsc->spillTemp = temp; #ifdef DEBUG @@ -1575,15 +1609,13 @@ void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree) rsMultiDesc[reg] = nextDsc; - /* 'reg' is no longer considered to be multi-used. We will set this - mask again when this value gets unspilled */ - + // 'reg' is no longer considered to be multi-used. We will set this + // mask again when this value gets unspilled rsMaskMult &= ~mask; } #endif // LEGACY_BACKEND - /* Insert the spill descriptor(s) in the list */ - + // Insert the spill descriptor(s) in the list lastDsc->spillNext = rsSpillDesc[reg]; rsSpillDesc[reg] = spill; @@ -1591,20 +1623,26 @@ void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree) if (m_rsCompiler->verbose) printf("\n"); #endif - /* Generate the code to spill the register */ + // Generate the code to spill the register var_types storeType = floatSpill ? treeType : tempType; m_rsCompiler->codeGen->spillReg(storeType, temp, reg); - /* Mark the tree node as having been spilled */ - + // Mark the tree node as having been spilled rsMarkSpill(tree, reg); #ifdef LEGACY_BACKEND - /* The register is now free */ - + // The register is now free rsMarkRegFree(mask); -#endif // LEGACY_BACKEND +#else + // In case of multi-reg call node also mark the specific + // result reg as spilled. + if (call != nullptr) + { + regFlags |= GTF_SPILLED; + call->SetRegSpillFlagByIdx(regFlags, regIdx); + } +#endif //!LEGACY_BACKEND } #if defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87 @@ -2208,30 +2246,51 @@ regNumber RegSet::rsUnspillOneReg(GenTreePtr tree, } #endif // LEGACY_BACKEND -/***************************************************************************** - * The given tree operand has been spilled; just mark it as unspilled so - * that we can use it as "normal" local. - * It is the responsibility of the caller to free the spill temp. - */ - -TempDsc * RegSet::rsUnspillInPlace(GenTreePtr tree) +//--------------------------------------------------------------------- +// rsUnspillInPlace: The given tree operand has been spilled; just mark +// it as unspilled so that we can use it as "normal" local. +// +// Arguments: +// tree - GenTree that needs to be marked as unspilled. +// oldReg - reg of tree that was spilled. +// +// Return Value: +// None. +// +// Assumptions: +// 1. It is the responsibility of the caller to free the spill temp. +// 2. RyuJIT backend specific: In case of multi-reg call node +// GTF_SPILLED flag associated with reg is cleared. It is the +// responsibility of caller to clear GTF_SPILLED flag on call node +// itself after ensuring there are no outstanding regs in GTF_SPILLED +// state. +// +TempDsc* RegSet::rsUnspillInPlace(GenTreePtr tree, + regNumber oldReg, + unsigned regIdx /* =0 */) { - /* Get the tree's SpillDsc */ - assert(!isRegPairType(tree->gtType)); - regNumber oldReg = tree->gtRegNum; + // Get the tree's SpillDsc SpillDsc* prevDsc; SpillDsc* spillDsc = rsGetSpillInfo(tree, oldReg, &prevDsc); PREFIX_ASSUME(spillDsc != nullptr); - /* Get the temp */ - + // Get the temp TempDsc* temp = rsGetSpillTempWord(oldReg, spillDsc, prevDsc); - /* The value is now unspilled */ - - tree->gtFlags &= ~GTF_SPILLED; + // The value is now unspilled + if (tree->IsMultiRegCall()) + { + GenTreeCall* call = tree->AsCall(); + unsigned flags = call->GetRegSpillFlagByIdx(regIdx); + flags &= ~GTF_SPILLED; + call->SetRegSpillFlagByIdx(flags, regIdx); + } + else + { + tree->gtFlags &= ~GTF_SPILLED; + } #ifdef DEBUG if (m_rsCompiler->verbose) diff --git a/src/jit/regset.h b/src/jit/regset.h index 2f1c5af1e5..a2ea3fe8a2 100644 --- a/src/jit/regset.h +++ b/src/jit/regset.h @@ -75,17 +75,17 @@ private: // The same descriptor is also used for 'multi-use' register tracking, BTW. struct SpillDsc { - SpillDsc * spillNext; // next spilled value of same reg + SpillDsc* spillNext; // next spilled value of same reg union { GenTreePtr spillTree; // the value that was spilled #ifdef LEGACY_BACKEND - LclVarDsc * spillVarDsc; // variable if it's an enregistered variable + LclVarDsc* spillVarDsc; // variable if it's an enregistered variable #endif // LEGACY_BACKEND }; - TempDsc * spillTemp; // the temp holding the spilled value + TempDsc* spillTemp; // the temp holding the spilled value #ifdef LEGACY_BACKEND GenTreePtr spillAddr; // owning complex address mode or nullptr @@ -98,8 +98,8 @@ private: }; #endif // LEGACY_BACKEND - static SpillDsc * alloc (Compiler * pComp, RegSet *regSet, var_types type); - static void freeDsc (RegSet *regSet, SpillDsc *spillDsc); + static SpillDsc* alloc (Compiler* pComp, RegSet* regSet, var_types type); + static void freeDsc (RegSet *regSet, SpillDsc* spillDsc); }; #ifdef LEGACY_BACKEND @@ -351,7 +351,8 @@ private: void rsSpillEnd (); void rsSpillTree (regNumber reg, - GenTreePtr tree); + GenTreePtr tree, + unsigned regIdx = 0); #if defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87 void rsSpillFPStack(GenTreePtr tree); @@ -385,7 +386,9 @@ private: regMaskTP needReg); #endif // LEGACY_BACKEND - TempDsc * rsUnspillInPlace(GenTreePtr tree); + TempDsc* rsUnspillInPlace(GenTreePtr tree, + regNumber oldReg, + unsigned regIdx = 0); #ifdef LEGACY_BACKEND void rsUnspillReg (GenTreePtr tree, -- 2.34.1