Implement long compare lowering for x86.
authorMike Danes <onemihaid@hotmail.com>
Thu, 8 Sep 2016 11:54:14 +0000 (14:54 +0300)
committerPat Gavlin <pagavlin@microsoft.com>
Wed, 14 Sep 2016 17:33:34 +0000 (10:33 -0700)
Comparisons between long-typed values on x86 require at least two and
at most three separate branches: one or two to compare the high 32
bits and one to compare the low 32 bits. In essence, each long
compare introduces two additional basic blocks into the flow graph,
but these blocks were not reified until code generation. Furthermore,
code generation of a long comparison used by a JTRUE was deferred
until the JTRUE itself without marking the inputs to the compare as
live at the JTRUE. Taken together, these representational issues
caused bugs like the one seen in the the following assembly:

       33DB         xor      ebx, ebx
       BE03000000   mov      esi, 3
       33FF         xor      edi, edi
       8B75F0       mov      esi, dword ptr [ebp-10H] ; these 2 are reloads inserted by LSRA
       8B7DEC       mov      edi, dword ptr [ebp-14H] ; between the compare and the branch
       3BDF         cmp      ebx, edi
       72D6         jb       SHORT G_M51005_IG03
       7704         ja       SHORT G_M51005_IG04
       3BC6         cmp      eax, esi
       72D0         jb       SHORT G_M51005_IG03

The reloads that LSRA has inserted have killed the registers assigned
to the inputs to the compare, thus causing the compare instructions
to read unexpected values (GH dotnet/coreclr#7038).

Although a number of alternatives were discussed, the best solution
seems to be to expose the control flow via the flow graph rather than
leaving it until code generation; this commit implements that approach.

Commit migrated from https://github.com/dotnet/coreclr/commit/4dc7c44f3e466067b73f7f28c0b69a7878b4ca20

src/coreclr/src/jit/codegenlinear.h
src/coreclr/src/jit/codegenxarch.cpp
src/coreclr/src/jit/lower.cpp
src/coreclr/src/jit/lower.h

index 7338b46..6cc437b 100644 (file)
@@ -53,7 +53,6 @@ void genCompareInt(GenTreePtr treeNode);
 
 #if !defined(_TARGET_64BIT_)
 void genCompareLong(GenTreePtr treeNode);
-void genJTrueLong(GenTreePtr treeNode);
 #endif
 
 #ifdef FEATURE_SIMD
index 7827366..6c73a0c 100644 (file)
@@ -2460,23 +2460,11 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
             // X86 Long comparison
             else if (varTypeIsLong(op1Type))
             {
-                // When not materializing the result in a register, the compare logic is generated
-                // when we generate the GT_JTRUE.
-                if (treeNode->gtRegNum != REG_NA)
-                {
-                    genCompareLong(treeNode);
-                }
-                else
-                {
-                    if ((treeNode->gtNext != nullptr) && (treeNode->gtNext->OperGet() != GT_JTRUE))
-                    {
-                        NYI("Long compare/reload/jtrue sequence");
-                    }
-
-                    // We generate the compare when we generate the GT_JTRUE, but we need to consume
-                    // the operands now.
-                    genConsumeOperands(treeNode->AsOp());
-                }
+#ifdef DEBUG
+                LIR::Use use;
+                assert((treeNode->gtRegNum != REG_NA) || !LIR::AsRange(compiler->compCurBB).TryGetUse(treeNode, &use));
+#endif
+                genCompareLong(treeNode);
             }
 #endif // !defined(_TARGET_64BIT_)
             else
@@ -2497,7 +2485,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
             // For long compares, we emit special logic
             if (varTypeIsLong(cmp->gtGetOp1()))
             {
-                genJTrueLong(cmp);
+                unreached();
             }
             else
 #endif
@@ -7034,8 +7022,6 @@ void CodeGen::genCompareLong(GenTreePtr treeNode)
 
     genConsumeOperands(tree);
 
-    assert(targetReg != REG_NA);
-
     GenTreePtr loOp1 = op1->gtGetOp1();
     GenTreePtr hiOp1 = op1->gtGetOp2();
     GenTreePtr loOp2 = op2->gtGetOp1();
@@ -7049,6 +7035,12 @@ void CodeGen::genCompareLong(GenTreePtr treeNode)
     // Emit the compare instruction
     getEmitter()->emitInsBinary(ins, cmpAttr, hiOp1, hiOp2);
 
+    // If the result is not being materialized in a register, we're done.
+    if (targetReg == REG_NA)
+    {
+        return;
+    }
+
     // Generate the first jump for the high compare
     CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
 
@@ -7119,152 +7111,6 @@ void CodeGen::genCompareLong(GenTreePtr treeNode)
         genProduceReg(tree);
     }
 }
-
-//------------------------------------------------------------------------
-// genJTrueLong: Generate code for comparing two longs on x86 for the case where the result
-// is not manifested in a register.
-//
-// Arguments:
-//    treeNode - the compare tree
-//
-// Return Value:
-//    None.
-// Comments:
-// For long compares, we need to compare the high parts of operands first, then the low parts.
-// We only have to do the low compare if the high parts of the operands are equal.
-//
-// In the case where the result of a rel-op is not realized in a register, we generate:
-//
-//    Opcode            x86 equivalent          Comment
-//    ------            --------------          -------
-//
-//    GT_LT; unsigned   cmp hiOp1,hiOp2
-//                      jb  trueLabel
-//                      ja  falseLabel
-//                      cmp loOp1,loOp2
-//                      jb  trueLabel
-//                      falseLabel:
-//
-//    GT_LE; unsigned   cmp hiOp1,hiOp2
-//                      jb  trueLabel
-//                      ja  falseLabel
-//                      cmp loOp1,loOp2
-//                      jbe trueLabel
-//                      falseLabel:
-//
-//    GT_GT; unsigned   cmp hiOp1,hiOp2
-//                      ja  trueLabel
-//                      jb  falseLabel
-//                      cmp loOp1,loOp2
-//                      ja  trueLabel
-//                      falseLabel:
-//
-//    GT_GE; unsigned   cmp hiOp1,hiOp2
-//                      ja  trueLabel
-//                      jb  falseLabel
-//                      cmp loOp1,loOp2
-//                      jae trueLabel
-//                      falseLabel:
-//
-//    GT_LT; signed     cmp hiOp1,hiOp2
-//                      jl  trueLabel
-//                      jg  falseLabel
-//                      cmp loOp1,loOp2
-//                      jb  trueLabel
-//                      falseLabel:
-//
-//    GT_LE; signed     cmp hiOp1,hiOp2
-//                      jl  trueLabel
-//                      jg  falseLabel
-//                      cmp loOp1,loOp2
-//                      jbe trueLabel
-//                      falseLabel:
-//
-//    GT_GT; signed     cmp hiOp1,hiOp2
-//                      jg  trueLabel
-//                      jl  falseLabel
-//                      cmp loOp1,loOp2
-//                      ja  trueLabel
-//                      falseLabel:
-//
-//    GT_GE; signed     cmp hiOp1,hiOp2
-//                      jg  trueLabel
-//                      jl  falseLabel
-//                      cmp loOp1,loOp2
-//                      jae trueLabel
-//                      falseLabel:
-//
-//    GT_EQ;            cmp hiOp1,hiOp2
-//                      jne falseLabel
-//                      cmp loOp1,loOp2
-//                      je  trueLabel
-//                      falseLabel:
-//
-//    GT_NE;            cmp hiOp1,hiOp2
-//                      jne labelTrue
-//                      cmp loOp1,loOp2
-//                      jne trueLabel
-//                      falseLabel:
-//
-// TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test.
-void CodeGen::genJTrueLong(GenTreePtr treeNode)
-{
-    assert(treeNode->OperIsCompare());
-
-    GenTreeOp* tree = treeNode->AsOp();
-    GenTreePtr op1  = tree->gtOp1;
-    GenTreePtr op2  = tree->gtOp2;
-
-    assert(varTypeIsLong(op1->TypeGet()));
-    assert(varTypeIsLong(op2->TypeGet()));
-
-    regNumber targetReg = treeNode->gtRegNum;
-
-    assert(targetReg == REG_NA);
-
-    GenTreePtr loOp1 = op1->gtGetOp1();
-    GenTreePtr hiOp1 = op1->gtGetOp2();
-    GenTreePtr loOp2 = op2->gtGetOp1();
-    GenTreePtr hiOp2 = op2->gtGetOp2();
-
-    // Emit the compare instruction
-    getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, hiOp1, hiOp2);
-
-    // Generate the first jump for the high compare
-    CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
-
-    // TODO-X86-CQ: If the next block is a BBJ_ALWAYS, we can set falseLabel = compiler->compCurBB->bbNext->bbJumpDest.
-    BasicBlock* falseLabel = genCreateTempLabel();
-
-    emitJumpKind jumpKindHi[2];
-
-    // Generate the jumps for the high compare
-    genJumpKindsForTreeLongHi(tree, jumpKindHi);
-
-    BasicBlock* trueLabel = compiler->compCurBB->bbJumpDest;
-
-    if (jumpKindHi[0] != EJ_NONE)
-    {
-        inst_JMP(jumpKindHi[0], trueLabel);
-    }
-
-    if (jumpKindHi[1] != EJ_NONE)
-    {
-        inst_JMP(jumpKindHi[1], falseLabel);
-    }
-
-    // The low jump must be unsigned
-    emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
-
-    // Emit the comparison and the jump to the trueLabel
-    getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, loOp1, loOp2);
-
-    inst_JMP(jumpKindLo, trueLabel);
-
-    // Generate falseLabel, which is the false path. We will jump here if the high compare is false
-    // or fall through if the low compare is false.
-    genDefineTempLabel(falseLabel);
-}
 #endif //! defined(_TARGET_64BIT_)
 
 //------------------------------------------------------------------------
index bca72b7..015020a 100644 (file)
@@ -149,6 +149,15 @@ GenTree* Lowering::LowerNode(GenTree* node)
             LowerCall(node);
             break;
 
+        case GT_LT:
+        case GT_LE:
+        case GT_GT:
+        case GT_GE:
+        case GT_EQ:
+        case GT_NE:
+            LowerCompare(node);
+            break;
+
         case GT_JMP:
             LowerJmpMethod(node);
             break;
@@ -1863,6 +1872,150 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget
     return result;
 }
 
+void Lowering::LowerCompare(GenTree* cmp)
+{
+#ifndef _TARGET_64BIT_
+    if (cmp->gtGetOp1()->TypeGet() != TYP_LONG)
+    {
+        return;
+    }
+
+    LIR::Use cmpUse;
+    
+    if (!BlockRange().TryGetUse(cmp, &cmpUse) || cmpUse.User()->OperGet() != GT_JTRUE)
+    {
+        return;
+    }
+
+    GenTree* src1   = cmp->gtGetOp1();
+    GenTree* src2   = cmp->gtGetOp2();
+    unsigned weight = m_block->getBBWeight(comp);
+
+    LIR::Use loSrc1(BlockRange(), &(src1->gtOp.gtOp1), src1);
+    LIR::Use hiSrc1(BlockRange(), &(src1->gtOp.gtOp2), src1);
+    LIR::Use loSrc2(BlockRange(), &(src2->gtOp.gtOp1), src2);
+    LIR::Use hiSrc2(BlockRange(), &(src2->gtOp.gtOp2), src2);
+
+    if (loSrc1.Def()->OperGet() != GT_CNS_INT && loSrc1.Def()->OperGet() != GT_LCL_VAR)
+    {
+        loSrc1.ReplaceWithLclVar(comp, weight);
+    }
+    
+    if (hiSrc1.Def()->OperGet() != GT_CNS_INT && hiSrc1.Def()->OperGet() != GT_LCL_VAR)
+    {
+        hiSrc1.ReplaceWithLclVar(comp, weight);
+    }
+    
+    if (loSrc2.Def()->OperGet() != GT_CNS_INT && loSrc2.Def()->OperGet() != GT_LCL_VAR)
+    {
+        loSrc2.ReplaceWithLclVar(comp, weight);
+    }
+    
+    if (hiSrc2.Def()->OperGet() != GT_CNS_INT && hiSrc2.Def()->OperGet() != GT_LCL_VAR)
+    {
+        hiSrc2.ReplaceWithLclVar(comp, weight);
+    }
+
+    BasicBlock* jumpDest = m_block->bbJumpDest;
+    BasicBlock* nextDest = m_block->bbNext;
+    BasicBlock* newBlock = comp->fgSplitBlockAtEnd(m_block);
+
+    cmp->gtType     = TYP_INT;
+    cmp->gtOp.gtOp1 = hiSrc1.Def();
+    cmp->gtOp.gtOp2 = hiSrc2.Def();
+
+    if (cmp->OperGet() == GT_EQ || cmp->OperGet() == GT_NE)
+    {
+        BlockRange().Remove(loSrc1.Def());
+        BlockRange().Remove(loSrc2.Def());
+        GenTree* loCmp = comp->gtNewOperNode(cmp->OperGet(), TYP_INT, loSrc1.Def(), loSrc2.Def());
+        loCmp->gtFlags = cmp->gtFlags;
+        GenTree* loJcc = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp);
+        LIR::AsRange(newBlock).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJcc);
+
+        m_block->bbJumpKind = BBJ_COND;
+
+        if (cmp->OperGet() == GT_EQ)
+        {
+            cmp->gtOper         = GT_NE;
+            m_block->bbJumpDest = nextDest;
+            nextDest->bbFlags |= BBF_JMP_TARGET;
+            comp->fgAddRefPred(nextDest, m_block);
+        }
+        else
+        {
+            m_block->bbJumpDest = jumpDest;
+            comp->fgAddRefPred(jumpDest, m_block);
+        }
+
+        assert(newBlock->bbJumpKind == BBJ_COND);
+        assert(newBlock->bbJumpDest == jumpDest);
+    }
+    else
+    {
+        genTreeOps hiCmpOper;
+        genTreeOps loCmpOper;
+        
+        switch (cmp->OperGet())
+        {
+        case GT_LT:
+            cmp->gtOper = GT_GT;
+            hiCmpOper   = GT_LT;
+            loCmpOper   = GT_LT;
+            break;
+        case GT_LE:
+            cmp->gtOper = GT_GT;
+            hiCmpOper   = GT_LT;
+            loCmpOper   = GT_LE;
+            break;
+        case GT_GT:
+            cmp->gtOper = GT_LT;
+            hiCmpOper   = GT_GT;
+            loCmpOper   = GT_GT;
+            break;
+        case GT_GE:
+            cmp->gtOper = GT_LT;
+            hiCmpOper   = GT_GT;
+            loCmpOper   = GT_GE;
+            break;
+        default:
+            unreached();
+        }
+
+        BasicBlock* newBlock2 = comp->fgSplitBlockAtEnd(newBlock);
+
+        GenTree* hiSrc1Def = comp->gtClone(hiSrc1.Def());
+        GenTree* hiSrc2Def = comp->gtClone(hiSrc2.Def());
+        GenTree* hiCmp = comp->gtNewOperNode(hiCmpOper, TYP_INT, hiSrc1Def, hiSrc2Def);
+        hiCmp->gtFlags = cmp->gtFlags;
+        GenTree* hiJcc = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, hiCmp);
+        LIR::AsRange(newBlock).InsertAfter(nullptr, hiSrc1Def, hiSrc2Def, hiCmp, hiJcc);
+
+        BlockRange().Remove(loSrc1.Def());
+        BlockRange().Remove(loSrc2.Def());
+        GenTree* loCmp = comp->gtNewOperNode(loCmpOper, TYP_INT, loSrc1.Def(), loSrc2.Def());
+        loCmp->gtFlags = cmp->gtFlags | GTF_UNSIGNED;
+        GenTree* loJcc = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp);
+        LIR::AsRange(newBlock2).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJcc);
+
+        m_block->bbJumpKind = BBJ_COND;
+        m_block->bbJumpDest = nextDest;
+        nextDest->bbFlags |= BBF_JMP_TARGET;
+        comp->fgAddRefPred(nextDest, m_block);
+
+        newBlock->bbJumpKind = BBJ_COND;
+        newBlock->bbJumpDest = jumpDest;
+        comp->fgAddRefPred(jumpDest, newBlock);
+
+        assert(newBlock2->bbJumpKind == BBJ_COND);
+        assert(newBlock2->bbJumpDest == jumpDest);
+    }
+
+    BlockRange().Remove(src1);
+    BlockRange().Remove(src2);
+#endif
+}
+
 // Lower "jmp <method>" tail call to insert PInvoke method epilog if required.
 void Lowering::LowerJmpMethod(GenTree* jmp)
 {
index 1450f03..dc6d867 100644 (file)
@@ -64,6 +64,7 @@ private:
     // Call Lowering
     // ------------------------------
     void LowerCall(GenTree* call);
+    void LowerCompare(GenTree* tree);
     void LowerJmpMethod(GenTree* jmp);
     void LowerRet(GenTree* ret);
     GenTree* LowerDelegateInvoke(GenTreeCall* call);