From: Mike Danes Date: Thu, 8 Sep 2016 11:54:14 +0000 (+0300) Subject: Implement long compare lowering for x86. X-Git-Tag: submit/tizen/20210909.063632~11030^2~9408^2~3 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b0132b388d1b1f89cdcb7ef75ac3c052694e49f1;p=platform%2Fupstream%2Fdotnet%2Fruntime.git Implement long compare lowering for x86. Comparisons between long-typed values on x86 require at least two and at most three separate branches: one or two to compare the high 32 bits and one to compare the low 32 bits. In essence, each long compare introduces two additional basic blocks into the flow graph, but these blocks were not reified until code generation. Furthermore, code generation of a long comparison used by a JTRUE was deferred until the JTRUE itself without marking the inputs to the compare as live at the JTRUE. Taken together, these representational issues caused bugs like the one seen in the the following assembly: 33DB xor ebx, ebx BE03000000 mov esi, 3 33FF xor edi, edi 8B75F0 mov esi, dword ptr [ebp-10H] ; these 2 are reloads inserted by LSRA 8B7DEC mov edi, dword ptr [ebp-14H] ; between the compare and the branch 3BDF cmp ebx, edi 72D6 jb SHORT G_M51005_IG03 7704 ja SHORT G_M51005_IG04 3BC6 cmp eax, esi 72D0 jb SHORT G_M51005_IG03 The reloads that LSRA has inserted have killed the registers assigned to the inputs to the compare, thus causing the compare instructions to read unexpected values (GH dotnet/coreclr#7038). Although a number of alternatives were discussed, the best solution seems to be to expose the control flow via the flow graph rather than leaving it until code generation; this commit implements that approach. Commit migrated from https://github.com/dotnet/coreclr/commit/4dc7c44f3e466067b73f7f28c0b69a7878b4ca20 --- diff --git a/src/coreclr/src/jit/codegenlinear.h b/src/coreclr/src/jit/codegenlinear.h index 7338b46..6cc437b 100644 --- a/src/coreclr/src/jit/codegenlinear.h +++ b/src/coreclr/src/jit/codegenlinear.h @@ -53,7 +53,6 @@ void genCompareInt(GenTreePtr treeNode); #if !defined(_TARGET_64BIT_) void genCompareLong(GenTreePtr treeNode); -void genJTrueLong(GenTreePtr treeNode); #endif #ifdef FEATURE_SIMD diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index 7827366..6c73a0c 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -2460,23 +2460,11 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) // X86 Long comparison else if (varTypeIsLong(op1Type)) { - // When not materializing the result in a register, the compare logic is generated - // when we generate the GT_JTRUE. - if (treeNode->gtRegNum != REG_NA) - { - genCompareLong(treeNode); - } - else - { - if ((treeNode->gtNext != nullptr) && (treeNode->gtNext->OperGet() != GT_JTRUE)) - { - NYI("Long compare/reload/jtrue sequence"); - } - - // We generate the compare when we generate the GT_JTRUE, but we need to consume - // the operands now. - genConsumeOperands(treeNode->AsOp()); - } +#ifdef DEBUG + LIR::Use use; + assert((treeNode->gtRegNum != REG_NA) || !LIR::AsRange(compiler->compCurBB).TryGetUse(treeNode, &use)); +#endif + genCompareLong(treeNode); } #endif // !defined(_TARGET_64BIT_) else @@ -2497,7 +2485,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) // For long compares, we emit special logic if (varTypeIsLong(cmp->gtGetOp1())) { - genJTrueLong(cmp); + unreached(); } else #endif @@ -7034,8 +7022,6 @@ void CodeGen::genCompareLong(GenTreePtr treeNode) genConsumeOperands(tree); - assert(targetReg != REG_NA); - GenTreePtr loOp1 = op1->gtGetOp1(); GenTreePtr hiOp1 = op1->gtGetOp2(); GenTreePtr loOp2 = op2->gtGetOp1(); @@ -7049,6 +7035,12 @@ void CodeGen::genCompareLong(GenTreePtr treeNode) // Emit the compare instruction getEmitter()->emitInsBinary(ins, cmpAttr, hiOp1, hiOp2); + // If the result is not being materialized in a register, we're done. + if (targetReg == REG_NA) + { + return; + } + // Generate the first jump for the high compare CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED; @@ -7119,152 +7111,6 @@ void CodeGen::genCompareLong(GenTreePtr treeNode) genProduceReg(tree); } } - -//------------------------------------------------------------------------ -// genJTrueLong: Generate code for comparing two longs on x86 for the case where the result -// is not manifested in a register. -// -// Arguments: -// treeNode - the compare tree -// -// Return Value: -// None. -// Comments: -// For long compares, we need to compare the high parts of operands first, then the low parts. -// We only have to do the low compare if the high parts of the operands are equal. -// -// In the case where the result of a rel-op is not realized in a register, we generate: -// -// Opcode x86 equivalent Comment -// ------ -------------- ------- -// -// GT_LT; unsigned cmp hiOp1,hiOp2 -// jb trueLabel -// ja falseLabel -// cmp loOp1,loOp2 -// jb trueLabel -// falseLabel: -// -// GT_LE; unsigned cmp hiOp1,hiOp2 -// jb trueLabel -// ja falseLabel -// cmp loOp1,loOp2 -// jbe trueLabel -// falseLabel: -// -// GT_GT; unsigned cmp hiOp1,hiOp2 -// ja trueLabel -// jb falseLabel -// cmp loOp1,loOp2 -// ja trueLabel -// falseLabel: -// -// GT_GE; unsigned cmp hiOp1,hiOp2 -// ja trueLabel -// jb falseLabel -// cmp loOp1,loOp2 -// jae trueLabel -// falseLabel: -// -// GT_LT; signed cmp hiOp1,hiOp2 -// jl trueLabel -// jg falseLabel -// cmp loOp1,loOp2 -// jb trueLabel -// falseLabel: -// -// GT_LE; signed cmp hiOp1,hiOp2 -// jl trueLabel -// jg falseLabel -// cmp loOp1,loOp2 -// jbe trueLabel -// falseLabel: -// -// GT_GT; signed cmp hiOp1,hiOp2 -// jg trueLabel -// jl falseLabel -// cmp loOp1,loOp2 -// ja trueLabel -// falseLabel: -// -// GT_GE; signed cmp hiOp1,hiOp2 -// jg trueLabel -// jl falseLabel -// cmp loOp1,loOp2 -// jae trueLabel -// falseLabel: -// -// GT_EQ; cmp hiOp1,hiOp2 -// jne falseLabel -// cmp loOp1,loOp2 -// je trueLabel -// falseLabel: -// -// GT_NE; cmp hiOp1,hiOp2 -// jne labelTrue -// cmp loOp1,loOp2 -// jne trueLabel -// falseLabel: -// -// TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test. -void CodeGen::genJTrueLong(GenTreePtr treeNode) -{ - assert(treeNode->OperIsCompare()); - - GenTreeOp* tree = treeNode->AsOp(); - GenTreePtr op1 = tree->gtOp1; - GenTreePtr op2 = tree->gtOp2; - - assert(varTypeIsLong(op1->TypeGet())); - assert(varTypeIsLong(op2->TypeGet())); - - regNumber targetReg = treeNode->gtRegNum; - - assert(targetReg == REG_NA); - - GenTreePtr loOp1 = op1->gtGetOp1(); - GenTreePtr hiOp1 = op1->gtGetOp2(); - GenTreePtr loOp2 = op2->gtGetOp1(); - GenTreePtr hiOp2 = op2->gtGetOp2(); - - // Emit the compare instruction - getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, hiOp1, hiOp2); - - // Generate the first jump for the high compare - CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED; - - // TODO-X86-CQ: If the next block is a BBJ_ALWAYS, we can set falseLabel = compiler->compCurBB->bbNext->bbJumpDest. - BasicBlock* falseLabel = genCreateTempLabel(); - - emitJumpKind jumpKindHi[2]; - - // Generate the jumps for the high compare - genJumpKindsForTreeLongHi(tree, jumpKindHi); - - BasicBlock* trueLabel = compiler->compCurBB->bbJumpDest; - - if (jumpKindHi[0] != EJ_NONE) - { - inst_JMP(jumpKindHi[0], trueLabel); - } - - if (jumpKindHi[1] != EJ_NONE) - { - inst_JMP(jumpKindHi[1], falseLabel); - } - - // The low jump must be unsigned - emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED); - - // Emit the comparison and the jump to the trueLabel - getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, loOp1, loOp2); - - inst_JMP(jumpKindLo, trueLabel); - - // Generate falseLabel, which is the false path. We will jump here if the high compare is false - // or fall through if the low compare is false. - genDefineTempLabel(falseLabel); -} #endif //! defined(_TARGET_64BIT_) //------------------------------------------------------------------------ diff --git a/src/coreclr/src/jit/lower.cpp b/src/coreclr/src/jit/lower.cpp index bca72b7..015020a 100644 --- a/src/coreclr/src/jit/lower.cpp +++ b/src/coreclr/src/jit/lower.cpp @@ -149,6 +149,15 @@ GenTree* Lowering::LowerNode(GenTree* node) LowerCall(node); break; + case GT_LT: + case GT_LE: + case GT_GT: + case GT_GE: + case GT_EQ: + case GT_NE: + LowerCompare(node); + break; + case GT_JMP: LowerJmpMethod(node); break; @@ -1863,6 +1872,150 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget return result; } +void Lowering::LowerCompare(GenTree* cmp) +{ +#ifndef _TARGET_64BIT_ + if (cmp->gtGetOp1()->TypeGet() != TYP_LONG) + { + return; + } + + LIR::Use cmpUse; + + if (!BlockRange().TryGetUse(cmp, &cmpUse) || cmpUse.User()->OperGet() != GT_JTRUE) + { + return; + } + + GenTree* src1 = cmp->gtGetOp1(); + GenTree* src2 = cmp->gtGetOp2(); + unsigned weight = m_block->getBBWeight(comp); + + LIR::Use loSrc1(BlockRange(), &(src1->gtOp.gtOp1), src1); + LIR::Use hiSrc1(BlockRange(), &(src1->gtOp.gtOp2), src1); + LIR::Use loSrc2(BlockRange(), &(src2->gtOp.gtOp1), src2); + LIR::Use hiSrc2(BlockRange(), &(src2->gtOp.gtOp2), src2); + + if (loSrc1.Def()->OperGet() != GT_CNS_INT && loSrc1.Def()->OperGet() != GT_LCL_VAR) + { + loSrc1.ReplaceWithLclVar(comp, weight); + } + + if (hiSrc1.Def()->OperGet() != GT_CNS_INT && hiSrc1.Def()->OperGet() != GT_LCL_VAR) + { + hiSrc1.ReplaceWithLclVar(comp, weight); + } + + if (loSrc2.Def()->OperGet() != GT_CNS_INT && loSrc2.Def()->OperGet() != GT_LCL_VAR) + { + loSrc2.ReplaceWithLclVar(comp, weight); + } + + if (hiSrc2.Def()->OperGet() != GT_CNS_INT && hiSrc2.Def()->OperGet() != GT_LCL_VAR) + { + hiSrc2.ReplaceWithLclVar(comp, weight); + } + + BasicBlock* jumpDest = m_block->bbJumpDest; + BasicBlock* nextDest = m_block->bbNext; + BasicBlock* newBlock = comp->fgSplitBlockAtEnd(m_block); + + cmp->gtType = TYP_INT; + cmp->gtOp.gtOp1 = hiSrc1.Def(); + cmp->gtOp.gtOp2 = hiSrc2.Def(); + + if (cmp->OperGet() == GT_EQ || cmp->OperGet() == GT_NE) + { + BlockRange().Remove(loSrc1.Def()); + BlockRange().Remove(loSrc2.Def()); + GenTree* loCmp = comp->gtNewOperNode(cmp->OperGet(), TYP_INT, loSrc1.Def(), loSrc2.Def()); + loCmp->gtFlags = cmp->gtFlags; + GenTree* loJcc = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp); + LIR::AsRange(newBlock).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJcc); + + m_block->bbJumpKind = BBJ_COND; + + if (cmp->OperGet() == GT_EQ) + { + cmp->gtOper = GT_NE; + m_block->bbJumpDest = nextDest; + nextDest->bbFlags |= BBF_JMP_TARGET; + comp->fgAddRefPred(nextDest, m_block); + } + else + { + m_block->bbJumpDest = jumpDest; + comp->fgAddRefPred(jumpDest, m_block); + } + + assert(newBlock->bbJumpKind == BBJ_COND); + assert(newBlock->bbJumpDest == jumpDest); + } + else + { + genTreeOps hiCmpOper; + genTreeOps loCmpOper; + + switch (cmp->OperGet()) + { + case GT_LT: + cmp->gtOper = GT_GT; + hiCmpOper = GT_LT; + loCmpOper = GT_LT; + break; + case GT_LE: + cmp->gtOper = GT_GT; + hiCmpOper = GT_LT; + loCmpOper = GT_LE; + break; + case GT_GT: + cmp->gtOper = GT_LT; + hiCmpOper = GT_GT; + loCmpOper = GT_GT; + break; + case GT_GE: + cmp->gtOper = GT_LT; + hiCmpOper = GT_GT; + loCmpOper = GT_GE; + break; + default: + unreached(); + } + + BasicBlock* newBlock2 = comp->fgSplitBlockAtEnd(newBlock); + + GenTree* hiSrc1Def = comp->gtClone(hiSrc1.Def()); + GenTree* hiSrc2Def = comp->gtClone(hiSrc2.Def()); + GenTree* hiCmp = comp->gtNewOperNode(hiCmpOper, TYP_INT, hiSrc1Def, hiSrc2Def); + hiCmp->gtFlags = cmp->gtFlags; + GenTree* hiJcc = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, hiCmp); + LIR::AsRange(newBlock).InsertAfter(nullptr, hiSrc1Def, hiSrc2Def, hiCmp, hiJcc); + + BlockRange().Remove(loSrc1.Def()); + BlockRange().Remove(loSrc2.Def()); + GenTree* loCmp = comp->gtNewOperNode(loCmpOper, TYP_INT, loSrc1.Def(), loSrc2.Def()); + loCmp->gtFlags = cmp->gtFlags | GTF_UNSIGNED; + GenTree* loJcc = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp); + LIR::AsRange(newBlock2).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJcc); + + m_block->bbJumpKind = BBJ_COND; + m_block->bbJumpDest = nextDest; + nextDest->bbFlags |= BBF_JMP_TARGET; + comp->fgAddRefPred(nextDest, m_block); + + newBlock->bbJumpKind = BBJ_COND; + newBlock->bbJumpDest = jumpDest; + comp->fgAddRefPred(jumpDest, newBlock); + + assert(newBlock2->bbJumpKind == BBJ_COND); + assert(newBlock2->bbJumpDest == jumpDest); + } + + BlockRange().Remove(src1); + BlockRange().Remove(src2); +#endif +} + // Lower "jmp " tail call to insert PInvoke method epilog if required. void Lowering::LowerJmpMethod(GenTree* jmp) { diff --git a/src/coreclr/src/jit/lower.h b/src/coreclr/src/jit/lower.h index 1450f03..dc6d867 100644 --- a/src/coreclr/src/jit/lower.h +++ b/src/coreclr/src/jit/lower.h @@ -64,6 +64,7 @@ private: // Call Lowering // ------------------------------ void LowerCall(GenTree* call); + void LowerCompare(GenTree* tree); void LowerJmpMethod(GenTree* jmp); void LowerRet(GenTree* ret); GenTree* LowerDelegateInvoke(GenTreeCall* call);