Add optimization for shift by CNS_INT
authorMichelle McDaniel <adiaaida@gmail.com>
Fri, 9 Sep 2016 17:44:03 +0000 (10:44 -0700)
committerMichelle McDaniel <adiaaida@gmail.com>
Fri, 16 Sep 2016 20:08:29 +0000 (13:08 -0700)
This change adds support for shifting by a GT_CNS_INT without going
through a helper. If the shiftOp is a GT_CNS_INT we do several
transformations based on the shift amount:

If the shift amount is 0, the shift is a nop, so we just put together the
hi and lo ops as a GT_LONG.

If the shift amount is < 32, we generate a shl/shld pattern, a shr/shrd
pattern or a sar/shrd pattern, depending on the oper. The first operand of
the shrd/shld is a GT_LONG, which we crack in codegen, using it
essentially as two int operands, rather than creating a tri op GenTree
node (essentially so that we can have 3 operands, instead of the normal
two).

If the shift amount is 32, it differs between shifting left and shifting
right. For GT_LSH, we move the loOp into the hiResult and set the loResult
to 0. For GT_RSZ, we move the hiOp into the loResult, and set the hiResult
to 0. For GT_RSH, we move the hiOp into the loResult, and set the hiResult
to a 31 bit signed shift of the hiOp to sign extend.

If the shift amount is less than 64, but larger than 32: for GT_LSH, the
hiResult is a shift of the loOp by shift amount - 32 (the move from lo into hi is
the 32 bit shift). We set the loResult to 0. For GT_RSH and GT_RSZ, the
loResult is a right shift (signed for GT_RSH) of the hiOp by shift amount
- 32. The hiResult is 0 for GT_RSZ, and a 31 bit signed shift of hiOp1 for
GT_RSH.

If the shift amount is >= 64, we set both hiResult and loResult to 0 for
GT_LSH and GT_RSZ, and do a sign extend shift to set hiResult and loResult
to the sign of the original hiOp for GT_RSH.

src/jit/codegenlinear.h
src/jit/codegenxarch.cpp
src/jit/decomposelongs.cpp
src/jit/decomposelongs.h
src/jit/gtlist.h
src/jit/lowerxarch.cpp

index 6cc437b..793d6d2 100644 (file)
@@ -145,6 +145,10 @@ void genSetRegToIcon(regNumber reg, ssize_t val, var_types type = TYP_INT, insFl
 
 void genCodeForShift(GenTreePtr tree);
 
+#if defined(_TARGET_X86_)
+void genCodeForShiftLong(GenTreePtr tree);
+#endif
+
 #ifdef _TARGET_XARCH_
 void genCodeForShiftRMW(GenTreeStoreInd* storeInd);
 #endif // _TARGET_XARCH_
index 402d800..98fb249 100644 (file)
@@ -2069,6 +2069,17 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
             // genCodeForShift() calls genProduceReg()
             break;
 
+#if !defined(_TARGET_64BIT_)
+        case GT_LSH_HI:
+        case GT_RSH_LO:
+            // TODO-X86-CQ: This only handles the case where the operand being shifted is in a register. We don't
+            // need sourceHi to be always in reg in case of GT_LSH_HI (because it could be moved from memory to
+            // targetReg if sourceHi is a contained mem-op). Similarly for GT_RSH_LO, sourceLo could be marked as
+            // contained memory-op. Even if not a memory-op, we could mark it as reg-optional.
+            genCodeForShiftLong(treeNode);
+            break;
+#endif
+
         case GT_CAST:
             if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1))
             {
@@ -4694,6 +4705,12 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
         case GT_SUB_HI:
             ins = INS_sbb;
             break;
+        case GT_LSH_HI:
+            ins = INS_shld;
+            break;
+        case GT_RSH_LO:
+            ins = INS_shrd;
+            break;
 #endif // !defined(_TARGET_64BIT_)
         default:
             unreached();
@@ -4731,6 +4748,7 @@ void CodeGen::genCodeForShift(GenTreePtr tree)
     regNumber  operandReg = operand->gtRegNum;
 
     GenTreePtr shiftBy = tree->gtGetOp2();
+    
     if (shiftBy->isContainedIntOrIImmed())
     {
         // First, move the operand to the destination register and
@@ -4769,6 +4787,67 @@ void CodeGen::genCodeForShift(GenTreePtr tree)
     genProduceReg(tree);
 }
 
+#ifdef _TARGET_X86_
+//------------------------------------------------------------------------
+// genCodeForShiftLong: Generates the code sequence for a GenTree node that
+// represents a three operand bit shift or rotate operation (<<Hi, >>Lo).
+//
+// Arguments:
+//    tree - the bit shift node (that specifies the type of bit shift to perform).
+//
+// Assumptions:
+//    a) All GenTrees are register allocated.
+//    b) The shift-by-amount in tree->gtOp.gtOp2 is a contained constant
+//
+void CodeGen::genCodeForShiftLong(GenTreePtr tree)
+{
+    // Only the non-RMW case here.
+    genTreeOps oper = tree->OperGet();
+    assert(oper == GT_LSH_HI || oper == GT_RSH_LO);
+
+    GenTree* operand = tree->gtOp.gtOp1;
+    assert(operand->OperGet() == GT_LONG);
+    assert(!operand->gtOp.gtOp1->isContained());
+    assert(!operand->gtOp.gtOp2->isContained());
+
+    GenTree* operandLo = operand->gtGetOp1();
+    GenTree* operandHi = operand->gtGetOp2();
+
+    regNumber regLo = operandLo->gtRegNum;
+    regNumber regHi = operandHi->gtRegNum;
+
+    genConsumeOperands(tree->AsOp());
+
+    var_types   targetType = tree->TypeGet();
+    instruction ins        = genGetInsForOper(oper, targetType);
+
+    GenTreePtr shiftBy = tree->gtGetOp2();
+
+    assert(shiftBy->isContainedIntOrIImmed());
+
+    unsigned int count = shiftBy->AsIntConCommon()->IconValue();
+
+    regNumber regResult = (oper == GT_LSH_HI) ? regHi : regLo;
+
+    if (regResult != tree->gtRegNum)
+    {
+        inst_RV_RV(INS_mov, tree->gtRegNum, regResult, targetType);
+    }
+
+    if (oper == GT_LSH_HI)
+    {
+        inst_RV_RV_IV(ins, emitTypeSize(targetType), tree->gtRegNum, regLo, count);
+    }
+    else
+    {
+        assert(oper == GT_RSH_LO);
+        inst_RV_RV_IV(ins, emitTypeSize(targetType), tree->gtRegNum, regHi, count);
+    }
+
+    genProduceReg(tree);
+}
+#endif
+
 //------------------------------------------------------------------------
 // genCodeForShiftRMW: Generates the code sequence for a GT_STOREIND GenTree node that
 // represents a RMW bit shift or rotate operation (<<, >>, >>>, rol, ror), for example:
index 00745c0..c3be74c 100644 (file)
@@ -280,23 +280,22 @@ GenTree* DecomposeLongs::DecomposeNode(GenTree* tree)
 // Arguments:
 //    use - the LIR::Use object for the def that needs to be decomposed.
 //    loResult - the decomposed low part
-//    hiResult - the decomposed high part. This must follow loResult in the linear order,
-//               as the new GT_LONG node will be inserted immediately after it.
+//    hiResult - the decomposed high part
+//    insertResultAfter - the node that the GT_LONG should be inserted after
 //
 // Return Value:
 //    The next node to process.
 //
-GenTree* DecomposeLongs::FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult)
+GenTree* DecomposeLongs::FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult, GenTree* insertResultAfter)
 {
     assert(use.IsInitialized());
     assert(loResult != nullptr);
     assert(hiResult != nullptr);
     assert(Range().Contains(loResult));
     assert(Range().Contains(hiResult));
-    assert(loResult->Precedes(hiResult));
 
     GenTree* gtLong = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loResult, hiResult);
-    Range().InsertAfter(hiResult, gtLong);
+    Range().InsertAfter(insertResultAfter, gtLong);
 
     use.ReplaceWith(m_compiler, gtLong);
 
@@ -352,7 +351,7 @@ GenTree* DecomposeLongs::DecomposeLclVar(LIR::Use& use)
     m_compiler->lvaIncRefCnts(loResult);
     m_compiler->lvaIncRefCnts(hiResult);
 
-    return FinalizeDecomposition(use, loResult, hiResult);
+    return FinalizeDecomposition(use, loResult, hiResult, hiResult);
 }
 
 //------------------------------------------------------------------------
@@ -376,7 +375,7 @@ GenTree* DecomposeLongs::DecomposeLclFld(LIR::Use& use)
     GenTree* hiResult = m_compiler->gtNewLclFldNode(loResult->gtLclNum, TYP_INT, loResult->gtLclOffs + 4);
     Range().InsertAfter(loResult, hiResult);
 
-    return FinalizeDecomposition(use, loResult, hiResult);
+    return FinalizeDecomposition(use, loResult, hiResult, hiResult);
 }
 
 //------------------------------------------------------------------------
@@ -564,7 +563,7 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use)
         NYI("Unimplemented cast decomposition");
     }
 
-    return FinalizeDecomposition(use, loResult, hiResult);
+    return FinalizeDecomposition(use, loResult, hiResult, hiResult);
 }
 
 //------------------------------------------------------------------------
@@ -591,7 +590,7 @@ GenTree* DecomposeLongs::DecomposeCnsLng(LIR::Use& use)
     GenTree* hiResult = new (m_compiler, GT_CNS_INT) GenTreeIntCon(TYP_INT, hiVal);
     Range().InsertAfter(loResult, hiResult);
 
-    return FinalizeDecomposition(use, loResult, hiResult);
+    return FinalizeDecomposition(use, loResult, hiResult, hiResult);
 }
 
 //------------------------------------------------------------------------
@@ -740,7 +739,7 @@ GenTree* DecomposeLongs::DecomposeInd(LIR::Use& use)
 
     Range().InsertAfter(indLow, addrBaseHigh, addrHigh, indHigh);
 
-    return FinalizeDecomposition(use, indLow, indHigh);
+    return FinalizeDecomposition(use, indLow, indHigh, indHigh);
 }
 
 //------------------------------------------------------------------------
@@ -772,7 +771,7 @@ GenTree* DecomposeLongs::DecomposeNot(LIR::Use& use)
     GenTree* hiResult = new (m_compiler, GT_NOT) GenTreeOp(GT_NOT, TYP_INT, hiOp1, nullptr);
     Range().InsertAfter(loResult, hiResult);
 
-    return FinalizeDecomposition(use, loResult, hiResult);
+    return FinalizeDecomposition(use, loResult, hiResult, hiResult);
 }
 
 //------------------------------------------------------------------------
@@ -817,7 +816,7 @@ GenTree* DecomposeLongs::DecomposeNeg(LIR::Use& use)
 
     Range().InsertAfter(loResult, zero, hiAdjust, hiResult);
 
-    return FinalizeDecomposition(use, loResult, hiResult);
+    return FinalizeDecomposition(use, loResult, hiResult, hiResult);
 }
 
 //------------------------------------------------------------------------
@@ -878,14 +877,19 @@ GenTree* DecomposeLongs::DecomposeArith(LIR::Use& use)
         }
     }
 
-    return FinalizeDecomposition(use, loResult, hiResult);
+    return FinalizeDecomposition(use, loResult, hiResult, hiResult);
 }
 
 //------------------------------------------------------------------------
-// DecomposeShift: Decompose GT_LSH, GT_RSH, GT_RSZ. For shift nodes, we need to use
-// the shift helper functions, so we here convert the shift into a helper call by
-// pulling its arguments out of linear order and making them the args to a call, then
-// replacing the original node with the new call.
+// DecomposeShift: Decompose GT_LSH, GT_RSH, GT_RSZ. For shift nodes being shifted
+// by a constant int, we can inspect the shift amount and decompose to the appropriate
+// node types, generating a shl/shld pattern for GT_LSH, a shrd/shr pattern for GT_RSZ,
+// and a shrd/sar pattern for GT_SHR for most shift amounts. Shifting by 0, >= 32 and
+// >= 64 are special cased to produce better code patterns.
+//
+// For all other shift nodes, we need to use the shift helper functions, so we here convert
+// the shift into a helper call by pulling its arguments out of linear order and making
+// them the args to a call, then replacing the original node with the new call.
 //
 // Arguments:
 //    use - the LIR::Use object for the def that needs to be decomposed.
@@ -899,64 +903,339 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
 
     GenTree* tree   = use.Def();
     GenTree* gtLong = tree->gtGetOp1();
+    GenTree* oldShiftByOp = tree->gtGetOp2();
+
     genTreeOps oper = tree->OperGet();
+    genTreeOps shiftByOper = oldShiftByOp->OperGet();
 
     assert((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ));
 
-    LIR::Use loOp1Use(Range(), &gtLong->gtOp.gtOp1, gtLong);
-    loOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+    unsigned loOp1LclNum;
+    unsigned hiOp1LclNum;
 
-    LIR::Use hiOp1Use(Range(), &gtLong->gtOp.gtOp2, gtLong);
-    hiOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+    if (gtLong->gtOp.gtOp1->OperGet() != GT_LCL_VAR)
+    {
+        LIR::Use loOp1Use(Range(), &gtLong->gtOp.gtOp1, gtLong);
+        loOp1LclNum = loOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+    }
+    else
+    {
+        loOp1LclNum = gtLong->gtOp.gtOp1->AsLclVarCommon()->gtLclNum;
+    }
 
-    LIR::Use shiftWidthUse(Range(), &tree->gtOp.gtOp2, tree);
-    shiftWidthUse.ReplaceWithLclVar(m_compiler, m_blockWeight);
+    if (gtLong->gtOp.gtOp2->OperGet() != GT_LCL_VAR)
+    {
+        LIR::Use hiOp1Use(Range(), &gtLong->gtOp.gtOp2, gtLong);
+        hiOp1LclNum = hiOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+    }
+    else
+    {
+        hiOp1LclNum = gtLong->gtOp.gtOp2->AsLclVarCommon()->gtLclNum;
+    }
 
     GenTree* loOp1 = gtLong->gtGetOp1();
     GenTree* hiOp1 = gtLong->gtGetOp2();
 
-    GenTree* shiftWidthOp = tree->gtGetOp2();
-
     Range().Remove(gtLong);
     Range().Remove(loOp1);
     Range().Remove(hiOp1);
 
-    Range().Remove(shiftWidthOp);
+    // If we are shifting by a constant int, we do not want to use a helper, instead, we decompose.
+    if (shiftByOper == GT_CNS_INT)
+    {
+        unsigned int count = oldShiftByOp->gtIntCon.gtIconVal;
+        Range().Remove(oldShiftByOp);
 
-    // TODO-X86-CQ: If the shift operand is a GT_CNS_INT, we should pipe the instructions through to codegen
-    // and generate the shift instructions ourselves there, rather than replacing it with a helper call.
+        GenTree* loResult;
+        GenTree* hiResult;
 
-    unsigned helper;
+        GenTree* insertAfter;
 
-    switch (oper)
-    {
-        case GT_LSH:
-            helper = CORINFO_HELP_LLSH;
+        switch (oper)
+        {
+            case GT_LSH:
+            {
+                if (count == 0)
+                {
+                    // Do nothing.
+                    loResult = loOp1;
+                    hiResult = hiOp1;
+
+                    Range().InsertBefore(tree, loResult, hiResult);
+
+                    insertAfter = hiResult;
+                }
+                else if (count < 32)
+                {
+                    // Hi is a GT_LSH_HI, lo is a GT_LSH. Will produce:
+                    // reg1 = lo
+                    // shl lo, shift
+                    // shld hi, reg1, shift
+
+                    GenTree* shiftByHi = m_compiler->gtNewIconNode(count, TYP_INT);
+                    GenTree* shiftByLo = m_compiler->gtNewIconNode(count, TYP_INT);
+
+                    loResult = m_compiler->gtNewOperNode(GT_LSH, TYP_INT, loOp1, shiftByLo);
+
+                    // Create a GT_LONG that contains loCopy and hiOp1. This will be used in codegen to
+                    // generate the shld instruction
+                    GenTree* loCopy  = m_compiler->gtNewLclvNode(loOp1LclNum, TYP_INT);
+                    GenTree* hiOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loCopy, hiOp1);
+                    hiResult      = m_compiler->gtNewOperNode(GT_LSH_HI, TYP_INT, hiOp, shiftByHi);
+
+                    m_compiler->lvaIncRefCnts(loCopy);
+
+                    Range().InsertBefore(tree, loCopy, hiOp1, hiOp);
+                    Range().InsertBefore(tree, shiftByHi, hiResult);
+                    Range().InsertBefore(tree, loOp1, shiftByLo, loResult);
+
+                    insertAfter = loResult;
+                }
+                else
+                {
+                    assert(count >= 32);
+
+                    // Zero out loResult (shift of >= 32 bits shifts all lo bits to hiResult)
+                    loResult = m_compiler->gtNewZeroConNode(TYP_INT);
+                    Range().InsertBefore(tree, loResult);
+
+                    if (count < 64)
+                    {
+                        if (count == 32)
+                        {
+                            // Move loOp1 into hiResult (shift of 32 bits is just a mov of lo to hi)
+                            hiResult = loOp1;
+                            Range().InsertBefore(tree, hiResult);
+                        }
+                        else
+                        {
+                            assert(count > 32 && count < 64);
+
+                            // Move loOp1 into hiResult, do a GT_LSH with count - 32.
+                            GenTree* shiftBy = m_compiler->gtNewIconNode(count - 32, TYP_INT);
+                            hiResult         = m_compiler->gtNewOperNode(oper, TYP_INT, loOp1, shiftBy);
+                            Range().InsertBefore(tree, loOp1, shiftBy, hiResult);
+                        }
+                    }
+                    else
+                    {
+                        assert(count >= 64);
+
+                        // Zero out hi (shift of >= 64 bits moves all the bits out of the two registers)
+                        hiResult = m_compiler->gtNewZeroConNode(TYP_INT);
+                        Range().InsertBefore(tree, hiResult);
+                    }
+
+                    insertAfter = hiResult;
+                }
+            }
             break;
-        case GT_RSH:
-            helper = CORINFO_HELP_LRSH;
+            case GT_RSZ:
+            {
+                if (count == 0)
+                {
+                    // Do nothing.
+                    loResult = loOp1;
+                    hiResult = hiOp1;
+                    Range().InsertBefore(tree, loResult, hiResult);
+                }
+                else if (count < 32)
+                {
+                    // Hi is a GT_RSZ, lo is a GT_RSH_LO. Will produce:
+                    // reg1 = hi
+                    // shrd lo, reg1, shift
+                    // shr hi, shift
+
+                    GenTree* shiftByHi = m_compiler->gtNewIconNode(count, TYP_INT);
+                    GenTree* shiftByLo = m_compiler->gtNewIconNode(count, TYP_INT);
+                    GenTree* hiCopy    = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT);
+                    m_compiler->lvaIncRefCnts(hiCopy);
+
+                    hiResult = m_compiler->gtNewOperNode(GT_RSZ, TYP_INT, hiOp1, shiftByHi);
+
+                    // Create a GT_LONG that contains loOp1 and hiCopy. This will be used in codegen to
+                    // generate the shrd instruction
+                    GenTree* loOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loOp1, hiCopy);
+                    loResult      = m_compiler->gtNewOperNode(GT_RSH_LO, TYP_INT, loOp, shiftByLo);
+
+                    Range().InsertBefore(tree, loOp1, hiCopy, loOp);
+                    Range().InsertBefore(tree, shiftByLo, loResult);
+                    Range().InsertBefore(tree, hiOp1, shiftByHi, hiResult);
+                }
+                else
+                {
+                    assert(count >= 32);
+                    if (count < 64)
+                    {
+                        if (count == 32)
+                        {
+                            // Move hiOp1 into loResult.
+                            loResult = hiOp1;
+                            Range().InsertBefore(tree, loResult);
+                        }
+                        else
+                        {
+                            assert(count > 32 && count < 64);
+
+                            // Move hiOp1 into loResult, do a GT_RSZ with count - 32.
+                            GenTree* shiftBy = m_compiler->gtNewIconNode(count - 32, TYP_INT);
+                            loResult         = m_compiler->gtNewOperNode(oper, TYP_INT, hiOp1, shiftBy);
+                            Range().InsertBefore(tree, hiOp1, shiftBy, loResult);
+                        }
+                    }
+                    else
+                    {
+                        assert(count >= 64);
+
+                        // Zero out lo
+                        loResult = m_compiler->gtNewZeroConNode(TYP_INT);
+                        Range().InsertBefore(tree, loResult);
+                    }
+
+                    // Zero out hi
+                    hiResult = m_compiler->gtNewZeroConNode(TYP_INT);
+                    Range().InsertBefore(tree, hiResult);
+                }
+
+                insertAfter = hiResult;
+            }
             break;
-        case GT_RSZ:
-            helper = CORINFO_HELP_LRSZ;
+            case GT_RSH:
+            {
+                if (count == 0)
+                {
+                    // Do nothing.
+                    loResult = loOp1;
+                    hiResult = hiOp1;
+                    Range().InsertBefore(tree, loResult, hiResult);
+                }
+                else if (count < 32)
+                {
+                    // Hi is a GT_RSH, lo is a GT_RSH_LO. Will produce:
+                    // reg1 = hi
+                    // shrd lo, reg1, shift
+                    // sar hi, shift
+
+                    GenTree* shiftByHi = m_compiler->gtNewIconNode(count, TYP_INT);
+                    GenTree* shiftByLo = m_compiler->gtNewIconNode(count, TYP_INT);
+                    GenTree* hiCopy    = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT);
+                    m_compiler->lvaIncRefCnts(hiCopy);
+
+                    hiResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiOp1, shiftByHi);
+
+                    // Create a GT_LONG that contains loOp1 and hiCopy. This will be used in codegen to
+                    // generate the shrd instruction
+                    GenTree* loOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loOp1, hiCopy);
+                    loResult      = m_compiler->gtNewOperNode(GT_RSH_LO, TYP_INT, loOp, shiftByLo);
+
+                    Range().InsertBefore(tree, loOp1, hiCopy, loOp);
+                    Range().InsertBefore(tree, shiftByLo, loResult);
+                    Range().InsertBefore(tree, shiftByHi, hiOp1, hiResult);
+                }
+                else
+                {
+                    assert(count >= 32);
+                    if (count < 64)
+                    {
+                        if (count == 32)
+                        {
+                            // Move hiOp1 into loResult.
+                            loResult = hiOp1;
+                            Range().InsertBefore(tree, loResult);
+                        }
+                        else
+                        {
+                            assert(count > 32 && count < 64);
+
+                            // Move hiOp1 into loResult, do a GT_RSH with count - 32.
+                            GenTree* shiftBy = m_compiler->gtNewIconNode(count - 32, TYP_INT);
+                            loResult         = m_compiler->gtNewOperNode(oper, TYP_INT, hiOp1, shiftBy);
+                            Range().InsertBefore(tree, hiOp1, shiftBy, loResult);
+                        }
+
+                        // Propagate sign bit in hiResult
+                        GenTree* shiftBy = m_compiler->gtNewIconNode(31, TYP_INT);
+                        GenTree* hiCopy  = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT);
+                        hiResult         = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiCopy, shiftBy);
+                        Range().InsertBefore(tree, shiftBy, hiCopy, hiResult);
+
+                        m_compiler->lvaIncRefCnts(hiCopy);
+                    }
+                    else
+                    {
+                        assert(count >= 64);
+
+                        // Propagate sign bit in loResult
+                        GenTree* hiCopy    = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT);
+                        GenTree* loShiftBy = m_compiler->gtNewIconNode(31, TYP_INT);
+                        loResult           = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiCopy, loShiftBy);
+                        Range().InsertBefore(tree, hiCopy, loShiftBy, loResult);
+
+                        // Propagate sign bit in hiResult
+                        GenTree* shiftBy = m_compiler->gtNewIconNode(31, TYP_INT);
+                        hiResult         = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiOp1, shiftBy);
+                        Range().InsertBefore(tree, shiftBy, hiOp1, hiResult);
+
+                        m_compiler->lvaIncRefCnts(hiCopy);
+                    }
+                }
+
+                insertAfter = hiResult;
+            }
             break;
-        default:
-            unreached();
+            default:
+                unreached();
+        }
+
+        // Remove tree from Range
+        Range().Remove(tree);
+
+        return FinalizeDecomposition(use, loResult, hiResult, insertAfter);
     }
+    else
+    {
+        GenTree* shiftByOp = oldShiftByOp;
+        if (shiftByOp->OperGet() != GT_LCL_VAR)
+        {
+            LIR::Use shiftByUse(Range(), &tree->gtOp.gtOp2, tree);
+            shiftByUse.ReplaceWithLclVar(m_compiler, m_blockWeight);
+            shiftByOp = tree->gtGetOp2();
+        }
+
+        Range().Remove(shiftByOp);
 
-    GenTreeArgList* argList = m_compiler->gtNewArgList(loOp1, hiOp1, shiftWidthOp);
+        unsigned helper;
 
-    GenTree* call = m_compiler->gtNewHelperCallNode(helper, TYP_LONG, 0, argList);
+        switch (oper)
+        {
+            case GT_LSH:
+                helper = CORINFO_HELP_LLSH;
+                break;
+            case GT_RSH:
+                helper = CORINFO_HELP_LRSH;
+                break;
+            case GT_RSZ:
+                helper = CORINFO_HELP_LRSZ;
+                break;
+            default:
+                unreached();
+        }
+
+        GenTreeArgList* argList = m_compiler->gtNewArgList(loOp1, hiOp1, shiftByOp);
 
-    GenTreeCall*    callNode    = call->AsCall();
-    ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
-    retTypeDesc->InitializeLongReturnType(m_compiler);
+        GenTree* call = m_compiler->gtNewHelperCallNode(helper, TYP_LONG, 0, argList);
 
-    call = m_compiler->fgMorphArgs(callNode);
-    Range().InsertAfter(tree, LIR::SeqTree(m_compiler, call));
+        GenTreeCall*    callNode    = call->AsCall();
+        ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
+        retTypeDesc->InitializeLongReturnType(m_compiler);
 
-    Range().Remove(tree);
-    use.ReplaceWith(m_compiler, call);
-    return call;
+        call = m_compiler->fgMorphArgs(callNode);
+        Range().InsertAfter(tree, LIR::SeqTree(m_compiler, call));
+
+        Range().Remove(tree);
+        use.ReplaceWith(m_compiler, call);
+        return call;
+    }
 }
 
 //------------------------------------------------------------------------
@@ -1069,7 +1348,7 @@ GenTree* DecomposeLongs::DecomposeUMod(LIR::Use& use)
 
     Range().InsertAfter(loResult, hiResult);
 
-    return FinalizeDecomposition(use, loResult, hiResult);
+    return FinalizeDecomposition(use, loResult, hiResult, hiResult);
 }
 
 //------------------------------------------------------------------------
index 9cb183f..f087c3e 100644 (file)
@@ -55,7 +55,7 @@ private:
     GenTree* DecomposeUMod(LIR::Use& use);
 
     // Helper functions
-    GenTree* FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult);
+    GenTree* FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult, GenTree* insertResultAfter);
 
     GenTree* StoreNodeToVar(LIR::Use& use);
     static genTreeOps GetHiOper(genTreeOps oper);
index bc2f8f2..3bf3603 100644 (file)
@@ -179,6 +179,16 @@ GTNODE(MUL_LONG         , "*long"        ,GenTreeOp          ,1,GTK_BINOP) // A
                                                                            // helper calls. It is similar to GT_MULHI, the difference being that
                                                                            // GT_MULHI drops the lo part of the result, whereas GT_MUL_LONG keeps
                                                                            // both parts of the result.
+
+// The following are nodes that specify shifts that take a GT_LONG op1. The GT_LONG
+// contains the hi and lo parts of three operand shift form where one op will be
+// shifted into the other op as part of the operation (LSH_HI will shift
+// the high bits of the lo operand into the high operand as it shifts left. RSH_LO
+// will shift the lo bits of the high operand into the lo operand). LSH_HI
+// represents the high operation of a 64-bit left shift by a constant int, and
+// RSH_LO represents the lo operation of a 64-bit right shift by a constant int.
+GTNODE(LSH_HI           , "<<Hi"         ,GenTreeOp          ,0,GTK_BINOP)
+GTNODE(RSH_LO           , ">>Lo"         ,GenTreeOp          ,0,GTK_BINOP)
 #endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
 
 #ifdef FEATURE_SIMD
index f818809..4ffb592 100644 (file)
@@ -497,6 +497,10 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
         case GT_RSZ:
         case GT_ROL:
         case GT_ROR:
+#ifdef _TARGET_X86_
+        case GT_LSH_HI:
+        case GT_RSH_LO:
+#endif
             TreeNodeInfoInitShiftRotate(tree);
             break;
 
@@ -1035,6 +1039,31 @@ void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree)
     GenTreePtr shiftBy = tree->gtOp.gtOp2;
     GenTreePtr source  = tree->gtOp.gtOp1;
 
+#ifdef _TARGET_X86_
+    // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that
+    // we can have a three operand form. Increment the srcCount.
+    if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO)
+    {
+        assert(source->OperGet() == GT_LONG);
+
+        info->srcCount++;
+
+        if (tree->OperGet() == GT_LSH_HI)
+        {
+            GenTreePtr sourceLo = source->gtOp.gtOp1;
+            sourceLo->gtLsraInfo.isDelayFree = true;
+        }
+        else
+        {
+            GenTreePtr sourceHi = source->gtOp.gtOp2;
+            sourceHi->gtLsraInfo.isDelayFree = true;
+        }
+
+        source->gtLsraInfo.hasDelayFreeSrc = true;
+        info->hasDelayFreeSrc = true;
+    }
+#endif
+
     // x64 can encode 8 bits of shift and it will use 5 or 6. (the others are masked off)
     // We will allow whatever can be encoded - hope you know what you are doing.
     if (!IsContainableImmed(tree, shiftBy) || (shiftBy->gtIntConCommon.IconValue() > 255) ||