From 0a817e0195e76a94e88ccd6a8779ed6451e8640a Mon Sep 17 00:00:00 2001 From: sivarv Date: Mon, 18 Jul 2016 16:56:59 -0700 Subject: [PATCH] Support for reg optional tree temps. --- src/jit/codegencommon.cpp | 23 ++++ src/jit/codegeninterface.h | 3 + src/jit/codegenxarch.cpp | 12 ++- src/jit/emitxarch.cpp | 43 ++++++-- src/jit/gentree.cpp | 9 ++ src/jit/gentree.h | 27 ++++- src/jit/lower.h | 58 +++++++++- src/jit/lowerxarch.cpp | 258 +++++++++++++++++++++++++++++---------------- src/jit/lsra.cpp | 84 +++++++++++++-- src/jit/lsra.h | 21 ++-- 10 files changed, 418 insertions(+), 120 deletions(-) diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index 1196e61..ffd5b70 100755 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -1398,6 +1398,29 @@ regNumber CodeGenInterface::genGetThisArgReg(GenTreePtr call) return REG_ARG_0; } +//---------------------------------------------------------------------- +// getSpillTempDsc: get the TempDsc corresponding to a spilled tree. +// +// Arguments: +// tree - spilled GenTree node +// +// Return Value: +// TempDsc corresponding to tree +TempDsc* CodeGenInterface::getSpillTempDsc(GenTree* tree) +{ + // tree must be in spilled state. + assert((tree->gtFlags & GTF_SPILLED) != 0); + + // Get the tree's SpillDsc. + RegSet::SpillDsc* prevDsc; + RegSet::SpillDsc* spillDsc = regSet.rsGetSpillInfo(tree, tree->gtRegNum, &prevDsc); + assert(spillDsc != nullptr); + + // Get the temp desc. + TempDsc* temp = regSet.rsGetSpillTempWord(tree->gtRegNum, spillDsc, prevDsc); + return temp; +} + #ifdef _TARGET_XARCH_ #ifdef _TARGET_AMD64_ diff --git a/src/jit/codegeninterface.h b/src/jit/codegeninterface.h index cb4774d..d321b57 100644 --- a/src/jit/codegeninterface.h +++ b/src/jit/codegeninterface.h @@ -313,6 +313,9 @@ public: void SpillFloat (regNumber reg, bool bIsCall = false); #endif // LEGACY_BACKEND + // The following method is used by xarch emitter for handling contained tree temps. + TempDsc* getSpillTempDsc(GenTree* tree); + public: emitter* getEmitter() { return m_cgEmitter; } protected: diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index c7fc1fd..fedee03 100755 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -1341,7 +1341,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode) { emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor); } - else if (divisor->gtRegNum == targetReg) + else if (!divisor->isContained() && divisor->gtRegNum == targetReg) { // It is not possible to generate 2-operand divss or divsd where reg2 = reg1 / reg2 // because divss/divsd reg1, reg2 will over-write reg1. Therefore, in case of AMD64 @@ -1466,8 +1466,8 @@ void CodeGen::genCodeForBinary(GenTree* treeNode) // The arithmetic node must be sitting in a register (since it's not contained) noway_assert(targetReg != REG_NA); - regNumber op1reg = op1->gtRegNum; - regNumber op2reg = op2->gtRegNum; + regNumber op1reg = op1->isContained() ? REG_NA: op1->gtRegNum; + regNumber op2reg = op2->isContained() ? REG_NA: op2->gtRegNum; GenTreePtr dst; GenTreePtr src; @@ -5148,7 +5148,11 @@ void CodeGen::genConsumeRegs(GenTree* tree) if (tree->isContained()) { - if (tree->isIndir()) + if (tree->isContainedSpillTemp()) + { + // spill temps are un-tracked and hence no need to update life + } + else if (tree->isIndir()) { genConsumeAddress(tree->AsIndir()->Addr()); } diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index 388a51d..632cc02 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -2893,7 +2893,7 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G { dblConst = src->AsDblCon(); } - + // find local field if any GenTreeLclFld* lclField = nullptr; if (src->isContainedLclField()) @@ -2918,9 +2918,25 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G lclVar = dst->AsLclVar(); } + // find contained spill tmp if any + TempDsc* tmpDsc = nullptr; + if (src->isContainedSpillTemp()) + { + assert(src->IsRegOptional()); + tmpDsc = codeGen->getSpillTempDsc(src); + } + else if (dst->isContainedSpillTemp()) + { + assert(dst->IsRegOptional()); + tmpDsc = codeGen->getSpillTempDsc(dst); + } + // First handle the simple non-memory cases // - if ((mem == nullptr) && (lclField == nullptr) && (lclVar == nullptr)) + if ((mem == nullptr) && + (lclField == nullptr) && + (lclVar == nullptr) && + (tmpDsc == nullptr)) { if (intConst != nullptr) { @@ -2959,7 +2975,7 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G // Next handle the cases where we have a stack based local memory operand. // unsigned varNum = BAD_VAR_NUM; - unsigned offset = (unsigned) -1; + unsigned offset = (unsigned)-1; if (lclField != nullptr) { @@ -2971,12 +2987,22 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G varNum = lclVar->AsLclVarCommon()->GetLclNum(); offset = 0; } + else if (tmpDsc != nullptr) + { + varNum = tmpDsc->tdTempNum(); + offset = 0; + } - if (varNum != BAD_VAR_NUM) + // Spill temp numbers are negative and start with -1 + // which also happens to be BAD_VAR_NUM. For this reason + // we also need to check 'tmpDsc != nullptr' here. + if (varNum != BAD_VAR_NUM || + tmpDsc != nullptr) { // Is the memory op in the source position? if (src->isContainedLclField() || - src->isContainedLclVar()) + src->isContainedLclVar() || + src->isContainedSpillTemp()) { if (instrHasImplicitRegPairDest(ins)) { @@ -2993,7 +3019,7 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G } else // The memory op is in the dest position. { - assert(dst->gtRegNum == REG_NA); + assert(dst->gtRegNum == REG_NA || dst->IsRegOptional()); // src could be int or reg if (src->isContainedIntOrIImmed()) @@ -3011,6 +3037,11 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G } } + if (tmpDsc != nullptr) + { + emitComp->tmpRlsTemp(tmpDsc); + } + return dst->gtRegNum; } diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 54e2948..d5b4cba 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -13317,13 +13317,22 @@ GenTree::IsLclVarUpdateTree(GenTree** pOtherTree, genTreeOps *pOper) // until after the LSRA phase has allocated physical registers to the treenodes. bool GenTree::isContained() const { + if (isContainedSpillTemp()) + { + return true; + } + if (gtHasReg()) + { return false; + } // these actually produce a register (the flags reg, we just don't model it) // and are a separate instruction from the branch that consumes the result if (OperKind() & GTK_RELOP) + { return false; + } // TODO-Cleanup : this is not clean, would be nice to have some way of marking this. switch (OperGet()) diff --git a/src/jit/gentree.h b/src/jit/gentree.h index 85202ec..ef2cf7e 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -495,7 +495,9 @@ public: bool isContainedLclField() const { return isContained() && isLclField(); } - bool isContainedLclVar() const { return isContained() && (OperGet() == GT_LCL_VAR); } + bool isContainedLclVar() const { return isContained() && (OperGet() == GT_LCL_VAR); } + + bool isContainedSpillTemp() const; // Indicates whether it is a memory op. // Right now it includes Indir and LclField ops. @@ -503,7 +505,7 @@ public: bool isContainedMemoryOp() const { - return (isContained() && isMemoryOp()) || isContainedLclVar(); + return (isContained() && isMemoryOp()) || isContainedLclVar() || isContainedSpillTemp(); } regNumber GetRegNum() const @@ -665,11 +667,13 @@ public: #define GTF_REVERSE_OPS 0x00000020 // operand op2 should be evaluated before op1 (normally, op1 is evaluated first and op2 is evaluated second) #define GTF_REG_VAL 0x00000040 // operand is sitting in a register (or part of a TYP_LONG operand is sitting in a register) - #define GTF_SPILLED 0x00000080 // the value has been spilled - #define GTF_SPILLED_OPER 0x00000100 // op1 has been spilled + #define GTF_SPILLED 0x00000080 // the value has been spilled #ifdef LEGACY_BACKEND - #define GTF_SPILLED_OP2 0x00000200 // op2 has been spilled + #define GTF_SPILLED_OPER 0x00000100 // op1 has been spilled + #define GTF_SPILLED_OP2 0x00000200 // op2 has been spilled +#else + #define GTF_NOREG_AT_USE 0x00000100 // tree node is in memory at the point of use #endif // LEGACY_BACKEND #define GTF_REDINDEX_CHECK 0x00000100 // Used for redundant range checks. Disjoint from GTF_SPILLED_OPER @@ -4498,6 +4502,19 @@ GenTreeBlkOp::HasGCPtr() return false; } +inline bool GenTree::isContainedSpillTemp() const +{ +#if !defined(LEGACY_BACKEND) + // If spilled and no reg at use, then it is treated as contained. + if (((gtFlags & GTF_SPILLED) != 0) && + ((gtFlags & GTF_NOREG_AT_USE) != 0)) + { + return true; + } +#endif //!LEGACY_BACKEND + + return false; +} /*****************************************************************************/ diff --git a/src/jit/lower.h b/src/jit/lower.h index 4baeb7e..9f62978 100644 --- a/src/jit/lower.h +++ b/src/jit/lower.h @@ -143,8 +143,62 @@ private: void TreeNodeInfoInit(GenTreePtr* tree, GenTree* parent); #if defined(_TARGET_XARCH_) void TreeNodeInfoInitSimple(GenTree* tree); - void SetRegOptionalForBinOp(GenTree* tree); - void TryToSetRegOptional(GenTree* operand); + + //---------------------------------------------------------------------- + // SetRegOptional - sets a bit to indicate to LSRA that register + // for a given tree node is optional for codegen purpose. If no + // register is allocated to such a tree node, its parent node treats + // it as a contained memory operand during codegen. + // + // Arguments: + // tree - GenTree node + // + // Returns + // None + void SetRegOptional(GenTree* tree) + { + tree->gtLsraInfo.regOptional = true; + } + + GenTree* PreferredRegOptionalOperand(GenTree* tree); + + // ------------------------------------------------------------------ + // SetRegOptionalBinOp - Indicates which of the operands of a bin-op + // register requirement is optional. Xarch instruction set allows + // either of op1 or op2 of binary operation (e.g. add, mul etc) to be + // a memory operand. This routine provides info to register allocator + // which of its operands optionally require a register. Lsra might not + // allocate a register to RefTypeUse positions of such operands if it + // is beneficial. In such a case codegen will treat them as memory + // operands. + // + // Arguments: + // tree - Gentree of a bininary operation. + // + // Returns + // None. + // + // Note: On xarch at most only one of the operands will be marked as + // reg optional, even when both operands could be considered register + // optional. + void SetRegOptionalForBinOp(GenTree* tree) + { + assert(GenTree::OperIsBinary(tree->OperGet())); + + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); + + if (tree->OperIsCommutative() && + tree->TypeGet() == op1->TypeGet()) + { + GenTree* preferredOp = PreferredRegOptionalOperand(tree); + SetRegOptional(preferredOp); + } + else if (tree->TypeGet() == op2->TypeGet()) + { + SetRegOptional(op2); + } + } #endif // defined(_TARGET_XARCH_) void TreeNodeInfoInitReturn(GenTree* tree); void TreeNodeInfoInitShiftRotate(GenTree* tree); diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index d41239c..8353f2c 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -561,7 +561,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) info->srcCount = 2; info->dstCount = 0; - GenTreePtr other = nullptr; + GenTreePtr other; if (CheckImmedAndMakeContained(tree, node->gtIndex)) { other = node->gtArrLen; @@ -579,17 +579,17 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt) other = node->gtArrLen; } - if (other->isMemoryOp()) + if (node->gtIndex->TypeGet() == node->gtArrLen->TypeGet()) { - if (node->gtIndex->TypeGet() == node->gtArrLen->TypeGet()) + if (other->isMemoryOp()) { MakeSrcContained(tree, other); } - } - else - { - // since 'other' operand is not contained, we can mark it as reg optional - TryToSetRegOptional(other); + else + { + // We can mark 'other' as reg optional, since it is not contained. + SetRegOptional(other); + } } } break; @@ -2087,7 +2087,8 @@ Lowering::TreeNodeInfoInitModDiv(GenTree* tree) else { // If there are no containable operands, we can make an operand reg optional. - SetRegOptionalForBinOp(tree); + // SSE2 allows only op2 to be a memory-op. + SetRegOptional(op2); } return; @@ -2128,7 +2129,8 @@ Lowering::TreeNodeInfoInitModDiv(GenTree* tree) op2->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX)); // If there are no containable operands, we can make an operand reg optional. - SetRegOptionalForBinOp(tree); + // Div instruction allows only op2 to be a memory op. + SetRegOptional(op2); } } @@ -2167,7 +2169,7 @@ Lowering::TreeNodeInfoInitIntrinsic(GenTree* tree) { // Mark the operand as reg optional since codegen can still // generate code if op1 is on stack. - TryToSetRegOptional(op1); + SetRegOptional(op1); } break; @@ -2503,7 +2505,7 @@ Lowering::TreeNodeInfoInitCast(GenTree* tree) { // Mark castOp as reg optional to indicate codegen // can still generate code if it is on stack. - TryToSetRegOptional(castOp); + SetRegOptional(castOp); } } } @@ -2861,18 +2863,16 @@ void Lowering::LowerCmp(GenTreePtr tree) { MakeSrcContained(tree, otherOp); } - else if (otherOp->isMemoryOp()) + else if (otherOp->isMemoryOp() && + ((otherOp == op2) || IsSafeToContainMem(tree, otherOp))) { - if ((otherOp == op2) || IsSafeToContainMem(tree, otherOp)) - { - MakeSrcContained(tree, otherOp); - } + MakeSrcContained(tree, otherOp); } else { - // Mark otherOp as reg optional to indicate codgen can still generate - // code even if otherOp is on stack. - TryToSetRegOptional(otherOp); + // SSE2 allows only otherOp to be a memory-op. Since otherOp is not + // contained, we can mark it reg-optional. + SetRegOptional(otherOp); } return; @@ -2950,6 +2950,7 @@ void Lowering::LowerCmp(GenTreePtr tree) } } } + if (op1CanBeContained) { if (op1->isMemoryOp()) @@ -2957,7 +2958,9 @@ void Lowering::LowerCmp(GenTreePtr tree) MakeSrcContained(tree, op1); } else - { + { + bool op1IsMadeContained = false; + // When op1 is a GT_AND we can often generate a single "test" instruction // instead of two instructions (an "and" instruction followed by a "cmp"/"test") // @@ -3083,6 +3086,7 @@ void Lowering::LowerCmp(GenTreePtr tree) } // Mark the 'op1' (the GT_AND) operand as contained MakeSrcContained(tree, op1); + op1IsMadeContained = true; // During Codegen we will now generate "test andOp1, andOp2CnsVal" } @@ -3127,8 +3131,8 @@ void Lowering::LowerCmp(GenTreePtr tree) assert(!castOp1->gtOverflowEx()); // Must not be an overflow checking operation GenTreePtr removeTreeNode = op1; - GenTreePtr removeTreeNodeChild = castOp1; tree->gtOp.gtOp1 = castOp1; + op1 = castOp1; castOp1->gtType = TYP_UBYTE; // trim down the value if castOp1 is an int constant since its type changed to UBYTE. @@ -3150,6 +3154,7 @@ void Lowering::LowerCmp(GenTreePtr tree) if (castOp1->isMemoryOp()) { MakeSrcContained(tree, op1); + op1IsMadeContained = true; } } } @@ -3163,6 +3168,12 @@ void Lowering::LowerCmp(GenTreePtr tree) #endif } } + + // If not made contained, op1 can be marked as reg-optional. + if (!op1IsMadeContained) + { + SetRegOptional(op1); + } } } } @@ -3181,12 +3192,7 @@ void Lowering::LowerCmp(GenTreePtr tree) // One of op1 or op2 could be marked as reg optional // to indicate that codgen can still generate code // if one of them is on stack. - TryToSetRegOptional(op2); - - if (!op2->IsRegOptional()) - { - TryToSetRegOptional(op1); - } + SetRegOptional(PreferredRegOptionalOperand(tree)); } if (varTypeIsSmall(op1Type) && varTypeIsUnsigned(op1Type)) @@ -3717,6 +3723,10 @@ void Lowering::SetMulOpCounts(GenTreePtr tree) bool requiresOverflowCheck = tree->gtOverflowEx(); bool useLeaEncoding = false; GenTreePtr memOp = nullptr; + + bool hasImpliedFirstOperand = false; + GenTreeIntConCommon* imm = nullptr; + GenTreePtr other = nullptr; // There are three forms of x86 multiply: // one-op form: RDX:RAX = RAX * r/m @@ -3740,26 +3750,25 @@ void Lowering::SetMulOpCounts(GenTreePtr tree) // In LSRA we set the kill set for this operation to RBM_RAX|RBM_RDX // info->setDstCandidates(m_lsra,RBM_RAX); + hasImpliedFirstOperand = true; } else if (tree->gtOper == GT_MULHI) { // have to use the encoding:RDX:RAX = RAX * rm info->setDstCandidates(m_lsra, RBM_RAX); + hasImpliedFirstOperand = true; } else if (IsContainableImmed(tree, op2) || IsContainableImmed(tree, op1)) { - GenTreeIntConCommon* imm; - GenTreePtr other; - if (IsContainableImmed(tree, op2)) { imm = op2->AsIntConCommon(); - other = op1; + other = op1; } else { imm = op1->AsIntConCommon(); - other = op2; + other = op2; } // CQ: We want to rewrite this into a LEA @@ -3770,11 +3779,12 @@ void Lowering::SetMulOpCounts(GenTreePtr tree) } MakeSrcContained(tree, imm); // The imm is always contained - if (other->isIndir()) + if (other->isMemoryOp()) { memOp = other; // memOp may be contained below } } + // We allow one operand to be a contained memory operand. // The memory op type must match with the 'tree' type. // This is because during codegen we use 'tree' type to derive EmitTypeSize. @@ -3790,17 +3800,28 @@ void Lowering::SetMulOpCounts(GenTreePtr tree) // if (!useLeaEncoding) { - if (memOp != nullptr) + if ((memOp != nullptr) && + (memOp->TypeGet() == tree->TypeGet()) && + IsSafeToContainMem(tree, memOp)) { - if ((memOp->TypeGet() == tree->TypeGet()) && - IsSafeToContainMem(tree, memOp)) - { - MakeSrcContained(tree, memOp); - } + MakeSrcContained(tree, memOp); + } + else if (imm != nullptr) + { + // Has a contained immediate operand. + // Only 'other' operand can be marked as reg optional. + assert(other != nullptr); + SetRegOptional(other); + } + else if (hasImpliedFirstOperand) + { + // Only op2 can be marke as reg optional. + SetRegOptional(op2); } else { - // If there are no containable operands, we can make an operand reg optional. + // If there are no containable operands, we can make either of op1 or op2 + // as reg optional. SetRegOptionalForBinOp(tree); } } @@ -3869,67 +3890,128 @@ bool Lowering:: IsContainableImmed(GenTree* parentNode, GenTree* childNode) return true; } -//---------------------------------------------------------------------- -// TryToSetRegOptional - sets a bit to indicate to LSRA that register -// for a given tree node is optional for codegen purpose. If no -// register is allocated to such a tree node, its parent node treats -// it as a contained memory operand during codegen. -// -// Arguments: -// tree - GenTree node +//----------------------------------------------------------------------- +// PreferredRegOptionalOperand: returns one of the operands of given +// binary oper that is to be preferred for marking as reg optional. // -// Returns -// None -// -// Note: Right now a tree node is marked as reg optional only -// if is it a GT_LCL_VAR. This routine needs to be modified if -// in future if lower/codegen needs to support other tree node -// types. -void Lowering::TryToSetRegOptional(GenTree* tree) -{ - if (tree->OperGet() == GT_LCL_VAR) - { - tree->gtLsraInfo.regOptional = true; - } -} - -// ------------------------------------------------------------------ -// SetRegOptionalBinOp - Indicates which of the operands of a bin-op -// register requirement is optional. Xarch instruction set allows -// either of op1 or op2 of binary operation (e.g. add, mul etc) to be -// a memory operand. This routine provides info to register allocator -// which of its operands optionally require a register. Lsra might not -// allocate a register to RefTypeUse positions of such operands if it -// is beneficial. In such a case codegen will treat them as memory -// operands. +// Since only one of op1 or op2 can be a memory operand on xarch, only +// one of them have to be marked as reg optional. Since Lower doesn't +// know apriori which of op1 or op2 is not likely to get a register, it +// has to make a guess. This routine encapsulates heuristics that +// guess whether it is likely to be beneficial to mark op1 or op2 as +// reg optional. +// // // Arguments: -// tree - Gentree of a bininary operation. +// tree - a binary-op tree node that is either commutative +// or a compare oper. // -// Returns -// None. -// -// Note: On xarch at most only one of the operands will be marked as -// reg optional, even when both operands could be considered register -// optional. -void Lowering::SetRegOptionalForBinOp(GenTree* tree) +// Returns: +// Returns op1 or op2 of tree node that is preferred for +// marking as reg optional. +// +// Note: if the tree oper is neither commutative nor a compare oper +// then only op2 can be reg optional on xarch and hence no need to +// call this routine. +GenTree* Lowering::PreferredRegOptionalOperand(GenTree* tree) { assert(GenTree::OperIsBinary(tree->OperGet())); + assert(tree->OperIsCommutative() || tree->OperIsCompare()); GenTree* op1 = tree->gtGetOp1(); GenTree* op2 = tree->gtGetOp2(); + GenTree* preferredOp = nullptr; - if (tree->TypeGet() == op2->TypeGet()) + // This routine uses the following heuristics: + // + // a) If both are tracked locals, marking the one with lower weighted + // ref count as reg-optional would likely be beneficial as it has + // higher probability of not getting a register. + // + // b) op1 = tracked local and op2 = untracked local: LSRA creates two + // ref positions for op2: a def and use position. op2's def position + // requires a reg and it is allocated a reg by spilling another + // interval (if required) and that could be even op1. For this reason + // it is beneficial to mark op1 as reg optional. + // + // TODO: It is not always mandatory for a def position of an untracked + // local to be allocated a register if it is on rhs of an assignment + // and its use position is reg-optional and has not been assigned a + // register. Reg optional def positions is currently not yet supported. + // + // c) op1 = untracked local and op2 = tracked local: marking op1 as + // reg optional is beneficial, since its use position is less likely + // to get a register. + // + // d) If both are untracked locals (i.e. treated like tree temps by + // LSRA): though either of them could be marked as reg optional, + // marking op1 as reg optional is likely to be beneficial because + // while allocating op2's def position, there is a possibility of + // spilling op1's def and in which case op1 is treated as contained + // memory operand rather than requiring to reload. + // + // e) If only one of them is a local var, prefer to mark it as + // reg-optional. This is heuristic is based on the results + // obtained against CQ perf benchmarks. + // + // f) If neither of them are local vars (i.e. tree temps), prefer to + // mark op1 as reg optional for the same reason as mentioned in (d) above. + if (op1->OperGet() == GT_LCL_VAR && + op2->OperGet() == GT_LCL_VAR) { - TryToSetRegOptional(op2); - } + LclVarDsc* v1 = comp->lvaTable + op1->AsLclVarCommon()->GetLclNum(); + LclVarDsc* v2 = comp->lvaTable + op2->AsLclVarCommon()->GetLclNum(); - if (!op2->IsRegOptional() && - tree->OperIsCommutative() && - tree->TypeGet() == op1->TypeGet()) + if (v1->lvTracked && v2->lvTracked) + { + // Both are tracked locals. The one with lower weight is less likely + // to get a register and hence beneficial to mark the one with lower + // weight as reg optional. + if (v1->lvRefCntWtd < v2->lvRefCntWtd) + { + preferredOp = op1; + } + else + { + preferredOp = op2; + } + } + else if (v2->lvTracked) + { + // v1 is an untracked lcl and it is use position is less likely to + // get a register. + preferredOp = op1; + } + else if (v1->lvTracked) + { + // v2 is an untracked lcl and its def position always + // needs a reg. Hence it is better to mark v1 as + // reg optional. + preferredOp = op1; + } + else + { + preferredOp = op1;; + } + } + else if (op1->OperGet() == GT_LCL_VAR) + { + preferredOp = op1; + } + else if (op2->OperGet() == GT_LCL_VAR) { - TryToSetRegOptional(op1); + preferredOp = op2; } + else + { + // Neither of the operands is a local, prefer marking + // operand that is evaluated first as reg optional + // since its use position is less likely to get a register. + bool reverseOps = ((tree->gtFlags & GTF_REVERSE_OPS) != 0); + preferredOp = reverseOps ? op2 : op1; + } + + return preferredOp; } #endif // _TARGET_XARCH_ diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index 9be61e4..266d68e 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -752,6 +752,7 @@ LinearScan::newRefPosition(regNumber reg, newRP->registerAssignment = mask; newRP->setMultiRegIdx(0); + newRP->setAllocateIfProfitable(0); associateRefPosWithInterval(newRP); @@ -835,6 +836,7 @@ LinearScan::newRefPosition(Interval* theInterval, newRP->registerAssignment = mask; newRP->setMultiRegIdx(multiRegIdx); + newRP->setAllocateIfProfitable(0); associateRefPosWithInterval(newRP); @@ -3023,6 +3025,7 @@ LinearScan::buildRefPositionsForNode(GenTree *tree, pos->isLocalDefUse = true; bool isLastUse = ((tree->gtFlags & GTF_VAR_DEATH) != 0); pos->lastUse = isLastUse; + pos->setAllocateIfProfitable(tree->IsRegOptional()); DBEXEC(VERBOSE, pos->dump()); return; } @@ -3216,6 +3219,7 @@ LinearScan::buildRefPositionsForNode(GenTree *tree, prefSrcInterval = i; } + bool regOptionalAtUse = useNode->IsRegOptional(); bool isLastUse = true; if (isCandidateLocalRef(useNode)) { @@ -3224,7 +3228,7 @@ LinearScan::buildRefPositionsForNode(GenTree *tree, else { // For non-localVar uses we record nothing, - // as nothing needs to be written back to the tree) + // as nothing needs to be written back to the tree. useNode = nullptr; } @@ -3260,7 +3264,15 @@ LinearScan::buildRefPositionsForNode(GenTree *tree, pos->delayRegFree = true; } - if (isLastUse) pos->lastUse = true; + if (isLastUse) + { + pos->lastUse = true; + } + + if (regOptionalAtUse) + { + pos->setAllocateIfProfitable(1); + } } JITDUMP("\n"); @@ -5145,6 +5157,26 @@ LinearScan::allocateBusyReg(Interval* current, else { isBetterLocation = (nextLocation > farthestLocation); + + if (nextLocation > farthestLocation) + { + isBetterLocation = true; + } + else if (nextLocation == farthestLocation) + { + // Both weight and distance are equal. + // Prefer that ref position which is marked both reload and + // allocate if profitable. These ref positions don't need + // need to be spilled as they are already in memory and + // codegen considers them as contained memory operands. + isBetterLocation = (recentAssignedRef != nullptr) && + recentAssignedRef->reload && + recentAssignedRef->AllocateIfProfitable(); + } + else + { + isBetterLocation = false; + } } } @@ -7395,7 +7427,11 @@ LinearScan::recordMaxSpill() void LinearScan::updateMaxSpill(RefPosition* refPosition) { - if (refPosition->spillAfter || refPosition->reload) + RefType refType = refPosition->refType; + + if (refPosition->spillAfter || + refPosition->reload || + (refPosition->AllocateIfProfitable() && refPosition->assignedReg() == REG_NA)) { Interval* interval = refPosition->getInterval(); if (!interval->isLocalVar) @@ -7406,8 +7442,8 @@ LinearScan::updateMaxSpill(RefPosition* refPosition) // 8-byte non-GC items, and 16-byte or 32-byte SIMD vectors. // LSRA is agnostic to those choices but needs // to know what they are here. - RefType refType = refPosition->refType; var_types typ; + #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE if ((refType == RefTypeUpperVectorSaveDef) || (refType == RefTypeUpperVectorSaveUse)) { @@ -7419,7 +7455,7 @@ LinearScan::updateMaxSpill(RefPosition* refPosition) GenTreePtr treeNode = refPosition->treeNode; if (treeNode == nullptr) { - assert(RefTypeIsUse(refPosition->refType)); + assert(RefTypeIsUse(refType)); treeNode = interval->firstRefPosition->treeNode; } assert(treeNode != nullptr); @@ -7451,6 +7487,17 @@ LinearScan::updateMaxSpill(RefPosition* refPosition) assert(currentSpill[typ] > 0); currentSpill[typ]--; } + else if (refPosition->AllocateIfProfitable() && + refPosition->assignedReg() == REG_NA) + { + // A spill temp not getting reloaded into a reg because it is + // marked as allocate if profitable and getting used from its + // memory location. To properly account max spill for typ we + // decrement spill count. + assert(RefTypeIsUse(refType)); + assert(currentSpill[typ] > 0); + currentSpill[typ]--; + } JITDUMP(" Max spill for %s is %d\n", varTypeName(typ), maxSpill[typ]); } } @@ -7669,18 +7716,20 @@ LinearScan::resolveRegisters() if (treeNode == nullptr) { // This is either a use, a dead def, or a field of a struct - Interval * interval = currentRefPosition->getInterval(); + Interval* interval = currentRefPosition->getInterval(); assert(currentRefPosition->refType == RefTypeUse || currentRefPosition->registerAssignment == RBM_NONE || interval->isStructField); + // TODO-Review: Need to handle the case where any of the struct fields // are reloaded/spilled at this use assert(!interval->isStructField || (currentRefPosition->reload == false && currentRefPosition->spillAfter == false)); + if (interval->isLocalVar && !interval->isStructField) { - LclVarDsc * varDsc = interval->getLocalVar(compiler); + LclVarDsc* varDsc = interval->getLocalVar(compiler); // This must be a dead definition. We need to mark the lclVar // so that it's not considered a candidate for lvRegister, as @@ -7688,6 +7737,7 @@ LinearScan::resolveRegisters() assert(currentRefPosition->refType == RefTypeDef); varDsc->lvRegNum = REG_STK; } + JITDUMP("No tree node to write back to\n"); continue; } @@ -7784,7 +7834,25 @@ LinearScan::resolveRegisters() if (INDEBUG(alwaysInsertReload() ||) nextRefPosition->assignedReg() != currentRefPosition->assignedReg()) { - insertCopyOrReload(treeNode, currentRefPosition->getMultiRegIdx(), nextRefPosition); + if (nextRefPosition->assignedReg() != REG_NA) + { + insertCopyOrReload(treeNode, currentRefPosition->getMultiRegIdx(), nextRefPosition); + } + else + { + assert(nextRefPosition->AllocateIfProfitable()); + + // In case of tree temps, if def is spilled and use didn't + // get a register, set a flag on tree node to be treated as + // contained at the point of its use. + if (currentRefPosition->spillAfter && + currentRefPosition->refType == RefTypeDef && + nextRefPosition->refType == RefTypeUse) + { + assert(nextRefPosition->treeNode == nullptr); + treeNode->gtFlags |= GTF_NOREG_AT_USE; + } + } } } diff --git a/src/jit/lsra.h b/src/jit/lsra.h index 11af9be..9ce2bd7 100644 --- a/src/jit/lsra.h +++ b/src/jit/lsra.h @@ -1372,22 +1372,29 @@ public: ) && !AllocateIfProfitable(); } - // Returns true whether this ref position is to be allocated - // a reg only if it is profitable. Currently these are the + // Indicates whether this ref position is to be allocated + // a reg only if profitable. Currently these are the // ref positions that lower/codegen has indicated as reg // optional and is considered a contained memory operand if // no reg is allocated. + unsigned allocRegIfProfitable : 1; + + void setAllocateIfProfitable(unsigned val) + { + allocRegIfProfitable = val; + } + + // Returns true whether this ref position is to be allocated + // a reg only if it is profitable. bool AllocateIfProfitable() { // TODO-CQ: Right now if a ref position is marked as // copyreg or movereg, then it is not treated as // 'allocate if profitable'. This is an implementation // limitation that needs to be addressed. - return (refType == RefTypeUse) && - !copyReg && - !moveReg && - (treeNode != nullptr) && - treeNode->IsRegOptional(); + return allocRegIfProfitable && + !copyReg && + !moveReg; } // Used by RefTypeDef/Use positions of a multi-reg call node. -- 2.7.4