From 6ef848ca17d13b05ab23a75a422c52e4dffe1b34 Mon Sep 17 00:00:00 2001 From: sivarv Date: Tue, 25 Oct 2016 16:52:08 -0700 Subject: [PATCH] Optimize 'test' instruction for op1 == 0 and op1 != 0 where op1 is known to set flags. --- src/jit/codegenxarch.cpp | 46 +++++++++++++++++++++++++++++++++++++++------- src/jit/gentree.cpp | 20 ++++++++++++++++++-- src/jit/gentree.h | 16 ++++++++++++---- src/jit/lowerxarch.cpp | 48 +++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 116 insertions(+), 14 deletions(-) diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index f63a268..cc7a58c 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -757,7 +757,7 @@ void CodeGen::genCodeForBinary(GenTree* treeNode) } // now we know there are 3 different operands so attempt to use LEA else if (oper == GT_ADD && !varTypeIsFloating(treeNode) && !treeNode->gtOverflowEx() // LEA does not set flags - && (op2->isContainedIntOrIImmed() || !op2->isContained())) + && (op2->isContainedIntOrIImmed() || !op2->isContained()) && !treeNode->gtSetFlags()) { if (op2->isContainedIntOrIImmed()) { @@ -5971,17 +5971,49 @@ void CodeGen::genCompareInt(GenTreePtr treeNode) var_types op2Type = op2->TypeGet(); regNumber targetReg = treeNode->gtRegNum; + // Case of op1 == 0 or op1 != 0: + // Optimize generation of 'test' instruction if op1 sets flags. + // + // Note that if LSRA has inserted any GT_RELOAD/GT_COPY before + // op1, it will not modify the flags set by codegen of op1. + // Similarly op1 could also be reg-optional at its use and + // it was spilled after producing its result in a register. + // Spill code too will not modify the flags set by op1. + GenTree* realOp1 = op1->gtSkipReloadOrCopy(); + if (realOp1->gtSetFlags()) + { + // op1 must set ZF and SF flags + assert(realOp1->gtSetZSFlags()); + + // Must be (in)equality against zero. + assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE); + assert(op2->IsIntegralConst(0)); + assert(op2->isContained()); + + // Just consume the operands + genConsumeOperands(tree); + + // No need to generate test instruction since + // op1 sets flags + + // Are we evaluating this into a register? + if (targetReg != REG_NA) + { + genSetRegToCond(targetReg, tree); + genProduceReg(tree); + } + + return; + } + #ifdef FEATURE_SIMD // If we have GT_JTRUE(GT_EQ/NE(GT_SIMD((in)Equality, v1, v2), true/false)), // then we don't need to generate code for GT_EQ/GT_NE, since SIMD (in)Equality intrinsic // would set or clear Zero flag. - // - // Is treeNode == or != that doesn't need to materialize result into a register? - if ((tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE) && (targetReg == REG_NA)) + if ((targetReg == REG_NA) && (tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE)) { - // Is it a SIMD (in)Equality that doesn't need to - // materialize result into a register? - if (op1->gtRegNum == REG_NA && op1->IsSIMDEqualityOrInequality()) + // Is it a SIMD (in)Equality that doesn't need to materialize result into a register? + if ((op1->gtRegNum == REG_NA) && op1->IsSIMDEqualityOrInequality()) { // Must be comparing against true or false. assert(op2->IsIntegralConst(0) || op2->IsIntegralConst(1)); diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index b7e1c2e..ed75bc3 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -8603,14 +8603,13 @@ bool GenTree::gtSetFlags() const // // Precondition we have a GTK_SMPOP // - assert(OperIsSimple()); - if (!varTypeIsIntegralOrI(TypeGet())) { return false; } #if FEATURE_SET_FLAGS + assert(OperIsSimple()); if ((gtFlags & GTF_SET_FLAGS) && gtOper != GT_IND) { @@ -8624,6 +8623,7 @@ bool GenTree::gtSetFlags() const #else // !FEATURE_SET_FLAGS +#ifdef LEGACY_BACKEND #ifdef _TARGET_XARCH_ // Return true if/when the codegen for this node will set the flags // @@ -8645,6 +8645,22 @@ bool GenTree::gtSetFlags() const return false; #endif +#else // !LEGACY_BACKEND +#ifdef _TARGET_XARCH_ + if (((gtFlags & GTF_SET_FLAGS) != 0) && (gtOper != GT_IND)) + { + // GTF_SET_FLAGS is not valid on GT_IND and is overlaid with GTF_NONFAULTING_IND + return true; + } + else + { + return false; + } +#else + unreached(); +#endif +#endif // !LEGACY_BACKEND + #endif // !FEATURE_SET_FLAGS } diff --git a/src/jit/gentree.h b/src/jit/gentree.h index bb34050..db065c3 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -768,15 +768,15 @@ public: #ifdef LEGACY_BACKEND #define GTF_SPILLED_OPER 0x00000100 // op1 has been spilled #define GTF_SPILLED_OP2 0x00000200 // op2 has been spilled -#else +#else // !LEGACY_BACKEND #define GTF_NOREG_AT_USE 0x00000100 // tree node is in memory at the point of use -#endif // LEGACY_BACKEND +#endif // !LEGACY_BACKEND #define GTF_ZSF_SET 0x00000400 // the zero(ZF) and sign(SF) flags set to the operand -#if FEATURE_SET_FLAGS + #define GTF_SET_FLAGS 0x00000800 // Requires that codegen for this node set the flags // Use gtSetFlags() to check this flags -#endif + #define GTF_IND_NONFAULTING 0x00000800 // An indir that cannot fault. GTF_SET_FLAGS is not used on indirs #define GTF_MAKE_CSE 0x00002000 // Hoisted Expression: try hard to make this into CSE (see optPerformHoistExpr) @@ -1844,6 +1844,14 @@ public: bool gtOverflowEx() const; bool gtSetFlags() const; bool gtRequestSetFlags(); + + // Returns true if the codegen of this tree node + // sets ZF and SF flags. + bool gtSetZSFlags() const + { + return (gtFlags & GTF_ZSF_SET) != 0; + } + #ifdef DEBUG bool gtIsValid64RsltMul(); static int gtDispFlags(unsigned flags, unsigned debugFlags); diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index 11a50cc..c17eeff 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -342,7 +342,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) // integerCompareResult = GT_EQ/NE(simdCompareResult, true/false) // GT_JTRUE(integerCompareResult) // - // In this case we don't need to generate code fo GT_EQ_/NE, since SIMD (In)Equality + // In this case we don't need to generate code for GT_EQ_/NE, since SIMD (In)Equality // intrinsic would set or clear Zero flag. genTreeOps cmpOper = cmp->OperGet(); @@ -569,6 +569,11 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) info->internalFloatCount = 1; info->setInternalCandidates(l, l->internalFloatRegCandidates()); } + else + { + // Codegen of this tree node sets ZF and SF flags. + tree->gtFlags |= GTF_ZSF_SET; + } break; case GT_NOT: @@ -1164,6 +1169,9 @@ void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree) { MakeSrcContained(tree, shiftBy); } + + // Codegen of this tree node sets ZF and SF flags. + tree->gtFlags |= GTF_ZSF_SET; } //------------------------------------------------------------------------ @@ -2399,6 +2407,9 @@ void Lowering::TreeNodeInfoInitLogicalOp(GenTree* tree) // as reg optional. SetRegOptionalForBinOp(tree); } + + // Codegen of this tree node sets ZF and SF flags. + tree->gtFlags |= GTF_ZSF_SET; } //------------------------------------------------------------------------ @@ -3603,6 +3614,41 @@ void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree) if (!op1IsMadeContained) { SetRegOptional(op1); + + // If op1 codegen sets ZF and SF flags and ==/!= against + // zero, we don't need to generate test instruction, + // provided we don't have another GenTree node between op1 + // and tree that could potentially modify flags. + // + // TODO-CQ: right now the below peep is inexpensive and + // gets the benefit in most of cases because in majority + // of cases op1, op2 and tree would be in that order in + // execution. In general we should be able to check that all + // the nodes that come after op1 in execution order do not + // modify the flags so that it is safe to avoid generating a + // test instruction. Such a check requires that on each + // GenTree node we need to set the info whether its codegen + // will modify flags. + // + // TODO-CQ: We can optimize compare against zero in the + // following cases by generating the branch as indicated + // against each case. + // 1) unsigned compare + // < 0 - always FALSE + // <= 0 - ZF=1 and jne + // > 0 - ZF=0 and je + // >= 0 - always TRUE + // + // 2) signed compare + // < 0 - SF=1 and js + // >= 0 - SF=0 and jns + if (isEqualityCompare && op1->gtSetZSFlags() && op2->IsIntegralConst(0) && (op1->gtNext == op2) && + (op2->gtNext == tree)) + { + // Require codegen of op1 to set the flags. + assert(!op1->gtSetFlags()); + op1->gtFlags |= GTF_SET_FLAGS; + } } } } -- 2.7.4