Merge pull request #7189 from pgavlin/x86-cmp-long
authorPat Gavlin <pgavlin@gmail.com>
Wed, 14 Sep 2016 23:40:41 +0000 (16:40 -0700)
committerGitHub <noreply@github.com>
Wed, 14 Sep 2016 23:40:41 +0000 (16:40 -0700)
Implement long compare lowering for x86.

1  2 
src/jit/codegenxarch.cpp
src/jit/lower.cpp
src/jit/lowerarm64.cpp
src/jit/lsra.cpp

diff --combined src/jit/codegenxarch.cpp
@@@ -231,8 -231,6 +231,8 @@@ void CodeGen::genEmitGSCookieCheck(boo
      }
  
      regNumber regGSCheck;
 +    regMaskTP regMaskGSCheck = RBM_NONE;
 +
      if (!pushReg)
      {
          // Non-tail call: we can use any callee trash register that is not
      else
      {
  #ifdef _TARGET_X86_
 -        NYI_X86("Tail calls from methods that need GS check");
 -        regGSCheck = REG_NA;
 +        // It doesn't matter which register we pick, since we're going to save and restore it
 +        // around the check.
 +        // TODO-CQ: Can we optimize the choice of register to avoid doing the push/pop sometimes?
 +        regGSCheck     = REG_EAX;
 +        regMaskGSCheck = RBM_EAX;
  #else  // !_TARGET_X86_
          // Tail calls from methods that need GS check:  We need to preserve registers while
          // emitting GS cookie check for a tail prefixed call or a jmp. To emit GS cookie
  #endif // !_TARGET_X86_
      }
  
 +    regMaskTP   byrefPushedRegs = RBM_NONE;
 +    regMaskTP   norefPushedRegs = RBM_NONE;
 +    regMaskTP   pushedRegs      = RBM_NONE;
 +
      if (compiler->gsGlobalSecurityCookieAddr == nullptr)
      {
 +#if defined(_TARGET_AMD64_)
          // If GS cookie value fits within 32-bits we can use 'cmp mem64, imm32'.
          // Otherwise, load the value into a reg and use 'cmp mem64, reg64'.
          if ((int)compiler->gsGlobalSecurityCookieVal != (ssize_t)compiler->gsGlobalSecurityCookieVal)
              getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
          }
          else
 +#endif // defined(_TARGET_AMD64_)
          {
 +            assert((int)compiler->gsGlobalSecurityCookieVal == (ssize_t)compiler->gsGlobalSecurityCookieVal);
              getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
                                        (int)compiler->gsGlobalSecurityCookieVal);
          }
      else
      {
          // Ngen case - GS cookie value needs to be accessed through an indirection.
 +
 +        pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs);
 +
          instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
          getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSCheck, regGSCheck, 0);
          getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
      inst_JMP(jmpEqual, gsCheckBlk);
      genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
      genDefineTempLabel(gsCheckBlk);
 +
 +    genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs);
  }
  
  /*****************************************************************************
@@@ -1316,8 -1299,8 +1316,8 @@@ void CodeGen::genCodeForDivMod(GenTreeO
  
  #ifdef _TARGET_X86_
      bool dividendIsLong = varTypeIsLong(dividend->TypeGet());
 -    GenTree* dividendLo;
 -    GenTree* dividendHi;
 +    GenTree* dividendLo = nullptr;
 +    GenTree* dividendHi = nullptr;
  
      if (dividendIsLong)
      {
@@@ -2477,23 -2460,18 +2477,18 @@@ void CodeGen::genCodeForTreeNode(GenTre
              // X86 Long comparison
              else if (varTypeIsLong(op1Type))
              {
-                 // When not materializing the result in a register, the compare logic is generated
-                 // when we generate the GT_JTRUE.
-                 if (treeNode->gtRegNum != REG_NA)
-                 {
-                     genCompareLong(treeNode);
-                 }
-                 else
-                 {
-                     if ((treeNode->gtNext != nullptr) && (treeNode->gtNext->OperGet() != GT_JTRUE))
-                     {
-                         NYI("Long compare/reload/jtrue sequence");
-                     }
+ #ifdef DEBUG
+                 // The result of an unlowered long compare on a 32-bit target must either be
+                 // a) materialized into a register, or
+                 // b) unused.
+                 //
+                 // A long compare that has a result that is used but not materialized into a register should
+                 // have been handled by Lowering::LowerCompare.
  
-                     // We generate the compare when we generate the GT_JTRUE, but we need to consume
-                     // the operands now.
-                     genConsumeOperands(treeNode->AsOp());
-                 }
+                 LIR::Use use;
+                 assert((treeNode->gtRegNum != REG_NA) || !LIR::AsRange(compiler->compCurBB).TryGetUse(treeNode, &use));
+ #endif
+                 genCompareLong(treeNode);
              }
  #endif // !defined(_TARGET_64BIT_)
              else
              assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
  
  #if !defined(_TARGET_64BIT_)
-             // For long compares, we emit special logic
-             if (varTypeIsLong(cmp->gtGetOp1()))
-             {
-                 genJTrueLong(cmp);
-             }
-             else
+             // Long-typed compares should have been handled by Lowering::LowerCompare.
+             assert(!varTypeIsLong(cmp->gtGetOp1()));
  #endif
-             {
-                 // Get the "kind" and type of the comparison.  Note that whether it is an unsigned cmp
-                 // is governed by a flag NOT by the inherent type of the node
-                 // TODO-XArch-CQ: Check if we can use the currently set flags.
-                 emitJumpKind jumpKind[2];
-                 bool         branchToTrueLabel[2];
-                 genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel);
-                 BasicBlock* skipLabel = nullptr;
-                 if (jumpKind[0] != EJ_NONE)
-                 {
-                     BasicBlock* jmpTarget;
-                     if (branchToTrueLabel[0])
-                     {
-                         jmpTarget = compiler->compCurBB->bbJumpDest;
-                     }
-                     else
-                     {
-                         // This case arises only for ordered GT_EQ right now
-                         assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0));
-                         skipLabel = genCreateTempLabel();
-                         jmpTarget = skipLabel;
-                     }
  
-                     inst_JMP(jumpKind[0], jmpTarget);
-                 }
+             // Get the "kind" and type of the comparison.  Note that whether it is an unsigned cmp
+             // is governed by a flag NOT by the inherent type of the node
+             // TODO-XArch-CQ: Check if we can use the currently set flags.
+             emitJumpKind jumpKind[2];
+             bool         branchToTrueLabel[2];
+             genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel);
  
-                 if (jumpKind[1] != EJ_NONE)
+             BasicBlock* skipLabel = nullptr;
+             if (jumpKind[0] != EJ_NONE)
+             {
+                 BasicBlock* jmpTarget;
+                 if (branchToTrueLabel[0])
                  {
-                     // the second conditional branch always has to be to the true label
-                     assert(branchToTrueLabel[1]);
-                     inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest);
+                     jmpTarget = compiler->compCurBB->bbJumpDest;
                  }
-                 if (skipLabel != nullptr)
+                 else
                  {
-                     genDefineTempLabel(skipLabel);
+                     // This case arises only for ordered GT_EQ right now
+                     assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0));
+                     skipLabel = genCreateTempLabel();
+                     jmpTarget = skipLabel;
                  }
+                 inst_JMP(jumpKind[0], jmpTarget);
+             }
+             if (jumpKind[1] != EJ_NONE)
+             {
+                 // the second conditional branch always has to be to the true label
+                 assert(branchToTrueLabel[1]);
+                 inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest);
+             }
+             if (skipLabel != nullptr)
+             {
+                 genDefineTempLabel(skipLabel);
              }
          }
          break;
  
+         case GT_JCC:
+         {
+             GenTreeJumpCC* jcc = treeNode->AsJumpCC();
+             assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
+             CompareKind compareKind = ((jcc->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+             emitJumpKind jumpKind   = genJumpKindForOper(jcc->gtCondition, compareKind);
+             inst_JMP(jumpKind, compiler->compCurBB->bbJumpDest);
+         }
+         break;
          case GT_RETURNTRAP:
          {
              // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
@@@ -5246,7 -5232,7 +5249,7 @@@ void CodeGen::genConsumeRegs(GenTree* t
          }
          else if (tree->OperGet() == GT_AND)
          {
-             // This is the special contained GT_AND that we created in Lowering::LowerCmp()
+             // This is the special contained GT_AND that we created in Lowering::TreeNodeInfoInitCmp()
              // Now we need to consume the operands of the GT_AND node.
              genConsumeOperands(tree->AsOp());
          }
@@@ -6239,14 -6225,6 +6242,14 @@@ void CodeGen::genCallInstruction(GenTre
  
  #endif // defined(_TARGET_X86_)
  
 +    if (call->IsTailCallViaHelper())
 +    {
 +        if (compiler->getNeedsGSSecurityCookie())
 +        {
 +            genEmitGSCookieCheck(true);
 +        }
 +    }
 +
      if (target != nullptr)
      {
          if (target->isContainedIndir())
@@@ -7059,8 -7037,6 +7062,6 @@@ void CodeGen::genCompareLong(GenTreePt
  
      genConsumeOperands(tree);
  
-     assert(targetReg != REG_NA);
      GenTreePtr loOp1 = op1->gtGetOp1();
      GenTreePtr hiOp1 = op1->gtGetOp2();
      GenTreePtr loOp2 = op2->gtGetOp1();
      // Emit the compare instruction
      getEmitter()->emitInsBinary(ins, cmpAttr, hiOp1, hiOp2);
  
+     // If the result is not being materialized in a register, we're done.
+     if (targetReg == REG_NA)
+     {
+         return;
+     }
      // Generate the first jump for the high compare
      CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
  
          genProduceReg(tree);
      }
  }
- //------------------------------------------------------------------------
- // genJTrueLong: Generate code for comparing two longs on x86 for the case where the result
- // is not manifested in a register.
- //
- // Arguments:
- //    treeNode - the compare tree
- //
- // Return Value:
- //    None.
- // Comments:
- // For long compares, we need to compare the high parts of operands first, then the low parts.
- // We only have to do the low compare if the high parts of the operands are equal.
- //
- // In the case where the result of a rel-op is not realized in a register, we generate:
- //
- //    Opcode            x86 equivalent          Comment
- //    ------            --------------          -------
- //
- //    GT_LT; unsigned   cmp hiOp1,hiOp2
- //                      jb  trueLabel
- //                      ja  falseLabel
- //                      cmp loOp1,loOp2
- //                      jb  trueLabel
- //                      falseLabel:
- //
- //    GT_LE; unsigned   cmp hiOp1,hiOp2
- //                      jb  trueLabel
- //                      ja  falseLabel
- //                      cmp loOp1,loOp2
- //                      jbe trueLabel
- //                      falseLabel:
- //
- //    GT_GT; unsigned   cmp hiOp1,hiOp2
- //                      ja  trueLabel
- //                      jb  falseLabel
- //                      cmp loOp1,loOp2
- //                      ja  trueLabel
- //                      falseLabel:
- //
- //    GT_GE; unsigned   cmp hiOp1,hiOp2
- //                      ja  trueLabel
- //                      jb  falseLabel
- //                      cmp loOp1,loOp2
- //                      jae trueLabel
- //                      falseLabel:
- //
- //    GT_LT; signed     cmp hiOp1,hiOp2
- //                      jl  trueLabel
- //                      jg  falseLabel
- //                      cmp loOp1,loOp2
- //                      jb  trueLabel
- //                      falseLabel:
- //
- //    GT_LE; signed     cmp hiOp1,hiOp2
- //                      jl  trueLabel
- //                      jg  falseLabel
- //                      cmp loOp1,loOp2
- //                      jbe trueLabel
- //                      falseLabel:
- //
- //    GT_GT; signed     cmp hiOp1,hiOp2
- //                      jg  trueLabel
- //                      jl  falseLabel
- //                      cmp loOp1,loOp2
- //                      ja  trueLabel
- //                      falseLabel:
- //
- //    GT_GE; signed     cmp hiOp1,hiOp2
- //                      jg  trueLabel
- //                      jl  falseLabel
- //                      cmp loOp1,loOp2
- //                      jae trueLabel
- //                      falseLabel:
- //
- //    GT_EQ;            cmp hiOp1,hiOp2
- //                      jne falseLabel
- //                      cmp loOp1,loOp2
- //                      je  trueLabel
- //                      falseLabel:
- //
- //    GT_NE;            cmp hiOp1,hiOp2
- //                      jne labelTrue
- //                      cmp loOp1,loOp2
- //                      jne trueLabel
- //                      falseLabel:
- //
- // TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test.
- void CodeGen::genJTrueLong(GenTreePtr treeNode)
- {
-     assert(treeNode->OperIsCompare());
-     GenTreeOp* tree = treeNode->AsOp();
-     GenTreePtr op1  = tree->gtOp1;
-     GenTreePtr op2  = tree->gtOp2;
-     assert(varTypeIsLong(op1->TypeGet()));
-     assert(varTypeIsLong(op2->TypeGet()));
-     regNumber targetReg = treeNode->gtRegNum;
-     assert(targetReg == REG_NA);
-     GenTreePtr loOp1 = op1->gtGetOp1();
-     GenTreePtr hiOp1 = op1->gtGetOp2();
-     GenTreePtr loOp2 = op2->gtGetOp1();
-     GenTreePtr hiOp2 = op2->gtGetOp2();
-     // Emit the compare instruction
-     getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, hiOp1, hiOp2);
-     // Generate the first jump for the high compare
-     CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
-     // TODO-X86-CQ: If the next block is a BBJ_ALWAYS, we can set falseLabel = compiler->compCurBB->bbNext->bbJumpDest.
-     BasicBlock* falseLabel = genCreateTempLabel();
-     emitJumpKind jumpKindHi[2];
-     // Generate the jumps for the high compare
-     genJumpKindsForTreeLongHi(tree, jumpKindHi);
-     BasicBlock* trueLabel = compiler->compCurBB->bbJumpDest;
-     if (jumpKindHi[0] != EJ_NONE)
-     {
-         inst_JMP(jumpKindHi[0], trueLabel);
-     }
-     if (jumpKindHi[1] != EJ_NONE)
-     {
-         inst_JMP(jumpKindHi[1], falseLabel);
-     }
-     // The low jump must be unsigned
-     emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
-     // Emit the comparison and the jump to the trueLabel
-     getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, loOp1, loOp2);
-     inst_JMP(jumpKindLo, trueLabel);
-     // Generate falseLabel, which is the false path. We will jump here if the high compare is false
-     // or fall through if the low compare is false.
-     genDefineTempLabel(falseLabel);
- }
  #endif //! defined(_TARGET_64BIT_)
  
  //------------------------------------------------------------------------
@@@ -7469,7 -7305,7 +7330,7 @@@ void CodeGen::genCompareInt(GenTreePtr 
      {
          // Do we have a short compare against a constant in op2?
          //
-         // We checked for this case in LowerCmp() and if we can perform a small
+         // We checked for this case in TreeNodeInfoInitCmp() and if we can perform a small
          // compare immediate we labeled this compare with a GTF_RELOP_SMALL
          // and for unsigned small non-equality compares the GTF_UNSIGNED flag.
          //
          if (op1->isContained())
          {
              // op1 can be a contained memory op
-             // or the special contained GT_AND that we created in Lowering::LowerCmp()
+             // or the special contained GT_AND that we created in Lowering::TreeNodeInfoInitCmp()
              //
              if ((op1->OperGet() == GT_AND))
              {
diff --combined src/jit/lower.cpp
@@@ -149,6 -149,15 +149,15 @@@ GenTree* Lowering::LowerNode(GenTree* n
              LowerCall(node);
              break;
  
+         case GT_LT:
+         case GT_LE:
+         case GT_GT:
+         case GT_GE:
+         case GT_EQ:
+         case GT_NE:
+             LowerCompare(node);
+             break;
          case GT_JMP:
              LowerJmpMethod(node);
              break;
@@@ -1719,10 -1728,7 +1728,10 @@@ GenTree* Lowering::LowerTailCallViaHelp
      assert(!comp->opts.compNeedSecurityCheck);               // tail call from methods that need security check
      assert(!call->IsUnmanaged());                            // tail calls to unamanaged methods
      assert(!comp->compLocallocUsed);                         // tail call from methods that also do localloc
 +
 +#ifdef _TARGET_AMD64_
      assert(!comp->getNeedsGSSecurityCookie());               // jit64 compat: tail calls from methods that need GS check
 +#endif // _TARGET_AMD64_
  
      // We expect to see a call that meets the following conditions
      assert(call->IsTailCallViaHelper());
      return result;
  }
  
+ //------------------------------------------------------------------------
+ // Lowering::LowerCompare: lowers a compare node.
+ //
+ // For 64-bit targets, this doesn't do much of anything: all comparisons
+ // that we support can be handled in code generation on such targets.
+ //
+ // For 32-bit targets, however, any comparison that feeds a `GT_JTRUE`
+ // node must be lowered such that the liveness of the operands to the
+ // comparison is properly visible to the rest of the backend. As such,
+ // a 64-bit comparison is lowered from something like this:
+ //
+ //    ------------ BB02 [004..014) -> BB02 (cond), preds={BB02,BB01} succs={BB03,BB02}
+ //    N001 (  1,  1) [000006] ------------        t6 =    lclVar    int    V02 loc0         u:5 $148
+ //
+ //                                                     /--*  t6     int
+ //    N002 (  2,  3) [000007] ---------U--        t7 = *  cast      long <- ulong <- uint $3c0
+ //
+ //    N003 (  3, 10) [000009] ------------        t9 =    lconst    long   0x0000000000000003 $101
+ //
+ //                                                     /--*  t7     long
+ //                                                     +--*  t9     long
+ //    N004 (  9, 17) [000010] N------N-U--       t10 = *  <         int    $149
+ //
+ //                                                     /--*  t10    int
+ //    N005 ( 11, 19) [000011] ------------             *  jmpTrue   void
+ //
+ // To something like this:
+ //
+ //    ------------ BB02 [004..014) -> BB03 (cond), preds={BB06,BB07,BB01} succs={BB06,BB03}
+ //                   [000099] ------------       t99 =    const     int    0
+ //
+ //                   [000101] ------------      t101 =    const     int    0
+ //
+ //                                                     /--*  t99    int
+ //                                                     +--*  t101   int
+ //    N004 (  9, 17) [000010] N------N-U--       t10 = *  >         int    $149
+ //
+ //                                                     /--*  t10    int
+ //    N005 ( 11, 19) [000011] ------------             *  jmpTrue   void
+ //
+ //
+ //    ------------ BB06 [???..???) -> BB02 (cond), preds={BB02} succs={BB07,BB02}
+ //                   [000105] -------N-U--                jcc       void   cond=<
+ //
+ //
+ //    ------------ BB07 [???..???) -> BB02 (cond), preds={BB06} succs={BB03,BB02}
+ //    N001 (  1,  1) [000006] ------------        t6 =    lclVar    int    V02 loc0         u:5 $148
+ //
+ //    N003 (  3, 10) [000009] ------------        t9 =    const     int    3
+ //
+ //                                                     /--*  t6     int
+ //                                                     +--*  t9     int
+ //                   [000106] N------N-U--      t106 = *  <         int
+ //
+ //                                                     /--*  t106   int
+ //                   [000107] ------------             *  jmpTrue   void
+ //
+ // Which will eventually generate code similar to the following:
+ //
+ //    33DB         xor      ebx, ebx
+ //    85DB         test     ebx, ebx
+ //    7707         ja       SHORT G_M50523_IG04
+ //    72E7         jb       SHORT G_M50523_IG03
+ //    83F803       cmp      eax, 3
+ //    72E2         jb       SHORT G_M50523_IG03
+ //
+ void Lowering::LowerCompare(GenTree* cmp)
+ {
+ #ifndef _TARGET_64BIT_
+     if (cmp->gtGetOp1()->TypeGet() != TYP_LONG)
+     {
+         return;
+     }
+     LIR::Use cmpUse;
+     
+     if (!BlockRange().TryGetUse(cmp, &cmpUse) || cmpUse.User()->OperGet() != GT_JTRUE)
+     {
+         return;
+     }
+     GenTree* src1   = cmp->gtGetOp1();
+     GenTree* src2   = cmp->gtGetOp2();
+     unsigned weight = m_block->getBBWeight(comp);
+     LIR::Use loSrc1(BlockRange(), &(src1->gtOp.gtOp1), src1);
+     LIR::Use loSrc2(BlockRange(), &(src2->gtOp.gtOp1), src2);
+     if (loSrc1.Def()->OperGet() != GT_CNS_INT && loSrc1.Def()->OperGet() != GT_LCL_VAR)
+     {
+         loSrc1.ReplaceWithLclVar(comp, weight);
+     }
+     
+     if (loSrc2.Def()->OperGet() != GT_CNS_INT && loSrc2.Def()->OperGet() != GT_LCL_VAR)
+     {
+         loSrc2.ReplaceWithLclVar(comp, weight);
+     }
+     BasicBlock* jumpDest = m_block->bbJumpDest;
+     BasicBlock* nextDest = m_block->bbNext;
+     BasicBlock* newBlock = comp->fgSplitBlockAtEnd(m_block);
+     cmp->gtType     = TYP_INT;
+     cmp->gtOp.gtOp1 = src1->gtOp.gtOp2;
+     cmp->gtOp.gtOp2 = src2->gtOp.gtOp2;
+     if (cmp->OperGet() == GT_EQ || cmp->OperGet() == GT_NE)
+     {
+         // 64-bit equality comparisons (no matter the polarity) require two 32-bit comparisons: one for the upper 32
+         // bits and one for the lower 32 bits. As such, we update the flow graph like so:
+         //
+         //     Before:
+         //                 BB0: cond
+         //                   /   \
+         //                false  true
+         //                  |     |
+         //                 BB1   BB2
+         //
+         //     After:
+         //                  BB0: cond(hi)
+         //                   /        \
+         //                false       true
+         //                  |          |
+         //                  |     BB3: cond(lo)
+         //                  |      /       \
+         //                  |   false      true
+         //                  \    /          |
+         //                    BB1          BB2
+         //
+         BlockRange().Remove(loSrc1.Def());
+         BlockRange().Remove(loSrc2.Def());
+         GenTree* loCmp = comp->gtNewOperNode(cmp->OperGet(), TYP_INT, loSrc1.Def(), loSrc2.Def());
+         loCmp->gtFlags = cmp->gtFlags;
+         GenTree* loJtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp);
+         LIR::AsRange(newBlock).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJtrue);
+         m_block->bbJumpKind = BBJ_COND;
+         if (cmp->OperGet() == GT_EQ)
+         {
+             cmp->gtOper         = GT_NE;
+             m_block->bbJumpDest = nextDest;
+             nextDest->bbFlags |= BBF_JMP_TARGET;
+             comp->fgAddRefPred(nextDest, m_block);
+         }
+         else
+         {
+             m_block->bbJumpDest = jumpDest;
+             comp->fgAddRefPred(jumpDest, m_block);
+         }
+         assert(newBlock->bbJumpKind == BBJ_COND);
+         assert(newBlock->bbJumpDest == jumpDest);
+     }
+     else
+     {
+         // 64-bit ordinal comparisons are more complicated: they require two comparisons for the upper 32 bits and one
+         // comparison for the lower 32 bits. We update the flowgraph as such:
+         //
+         //     Before:
+         //                 BB0: cond
+         //                   /   \
+         //                false  true
+         //                  |     |
+         //                 BB1   BB2
+         //
+         //     After:
+         //           BB0: (!cond(hi) && !eq(hi))
+         //               /                \
+         //             true              false
+         //              |                  |
+         //              |      BB3: (cond(hi) && !eq(hi))
+         //              |             /        \
+         //              |          false      true
+         //              |            |          |
+         //              |      BB4: cond(lo)    |
+         //              |       /         \     |
+         //              |    false        true  |
+         //              \     /             \   /
+         //                BB1                BB2
+         //
+         //
+         // Note that the actual comparisons used to implement "(!cond(hi) && !eq(hi))" and "(cond(hi) && !eq(hi))"
+         // differ based on the original condition, and all consist of a single node. The switch statement below
+         // performs the necessary mapping.
+         //
+         genTreeOps hiCmpOper;
+         genTreeOps loCmpOper;
+         
+         switch (cmp->OperGet())
+         {
+         case GT_LT:
+             cmp->gtOper = GT_GT;
+             hiCmpOper   = GT_LT;
+             loCmpOper   = GT_LT;
+             break;
+         case GT_LE:
+             cmp->gtOper = GT_GT;
+             hiCmpOper   = GT_LT;
+             loCmpOper   = GT_LE;
+             break;
+         case GT_GT:
+             cmp->gtOper = GT_LT;
+             hiCmpOper   = GT_GT;
+             loCmpOper   = GT_GT;
+             break;
+         case GT_GE:
+             cmp->gtOper = GT_LT;
+             hiCmpOper   = GT_GT;
+             loCmpOper   = GT_GE;
+             break;
+         default:
+             unreached();
+         }
+         BasicBlock* newBlock2 = comp->fgSplitBlockAtEnd(newBlock);
+         GenTree* hiJcc = new (comp, GT_JCC) GenTreeJumpCC(hiCmpOper);
+         hiJcc->gtFlags = cmp->gtFlags;
+         LIR::AsRange(newBlock).InsertAfter(nullptr, hiJcc);
+         BlockRange().Remove(loSrc1.Def());
+         BlockRange().Remove(loSrc2.Def());
+         GenTree* loCmp = comp->gtNewOperNode(loCmpOper, TYP_INT, loSrc1.Def(), loSrc2.Def());
+         loCmp->gtFlags = cmp->gtFlags | GTF_UNSIGNED;
+         GenTree* loJtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp);
+         LIR::AsRange(newBlock2).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJtrue);
+         m_block->bbJumpKind = BBJ_COND;
+         m_block->bbJumpDest = nextDest;
+         nextDest->bbFlags |= BBF_JMP_TARGET;
+         comp->fgAddRefPred(nextDest, m_block);
+         newBlock->bbJumpKind = BBJ_COND;
+         newBlock->bbJumpDest = jumpDest;
+         comp->fgAddRefPred(jumpDest, newBlock);
+         assert(newBlock2->bbJumpKind == BBJ_COND);
+         assert(newBlock2->bbJumpDest == jumpDest);
+     }
+     BlockRange().Remove(src1);
+     BlockRange().Remove(src2);
+ #endif
+ }
  // Lower "jmp <method>" tail call to insert PInvoke method epilog if required.
  void Lowering::LowerJmpMethod(GenTree* jmp)
  {
diff --combined src/jit/lowerarm64.cpp
@@@ -484,7 -484,7 +484,7 @@@ void Lowering::TreeNodeInfoInit(GenTree
          case GT_LE:
          case GT_GE:
          case GT_GT:
-             LowerCmp(tree);
+             TreeNodeInfoInitCmp(tree);
              break;
  
          case GT_CKFINITE:
          break;
  
          case GT_BLK:
 -        case GT_OBJ:
          case GT_DYN_BLK:
              // These should all be eliminated prior to Lowering.
              assert(!"Non-store block node in Lowering");
              info->srcCount = 0;
              info->dstCount = 0;
 +            break;
  
          case GT_STORE_BLK:
          case GT_STORE_OBJ:
@@@ -1859,7 -1859,7 +1859,7 @@@ void Lowering::SetIndirAddrOpCounts(Gen
      }
  }
  
- void Lowering::LowerCmp(GenTreePtr tree)
+ void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
  {
      TreeNodeInfo* info = &(tree->gtLsraInfo);
  
diff --combined src/jit/lsra.cpp
@@@ -6968,7 -6968,10 +6968,7 @@@ void LinearScan::allocateRegisters(
              }
              else
              {
 -                // This must be a localVar or a single-reg fixed use or a tree temp with conflicting def & use.
 -
 -                assert(currentInterval && (currentInterval->isLocalVar || currentRefPosition->isFixedRegRef ||
 -                                           currentInterval->hasConflictingDefUse));
 +                assert(currentInterval != nullptr);
  
                  // It's already in a register, but not one we need.
                  // If it is a fixed use that is not marked "delayRegFree", there is already a FixedReg to ensure that
@@@ -8526,7 -8529,7 +8526,7 @@@ void LinearScan::insertMove
              noway_assert(!blockRange.IsEmpty());
  
              GenTree* branch = blockRange.LastNode();
-             assert(branch->OperGet() == GT_JTRUE || branch->OperGet() == GT_SWITCH_TABLE ||
+             assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE ||
                     branch->OperGet() == GT_SWITCH);
  
              blockRange.InsertBefore(branch, std::move(treeRange));
@@@ -8597,7 -8600,7 +8597,7 @@@ void LinearScan::insertSwap
              noway_assert(!blockRange.IsEmpty());
  
              GenTree* branch = blockRange.LastNode();
-             assert(branch->OperGet() == GT_JTRUE || branch->OperGet() == GT_SWITCH_TABLE ||
+             assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE ||
                     branch->OperGet() == GT_SWITCH);
  
              blockRange.InsertBefore(branch, std::move(swapRange));