Implement DecomposeRotate
authorMichelle McDaniel <adiaaida@gmail.com>
Wed, 19 Oct 2016 17:43:15 +0000 (10:43 -0700)
committerMichelle McDaniel <adiaaida@gmail.com>
Thu, 20 Oct 2016 19:40:32 +0000 (12:40 -0700)
fgRecognizeAndMorphBitwiseRotation can potentially morph a tree with a
TYP_LONG return type on x86 if the rotate amount is a GT_CNS_INT. This
change implements DecomposeRotate for those rotate instructions. There are
5 cases:

1) Rotate by 0: do nothing.
2) Rotate by 32: swap hi and lo.
3) Rotate by < 32: produce a shld/shld (for GT_ROL) or a shrd/shrd (for
GT_ROR).
4) Rotate by >= 32: swap hi and lo, subtract from the rotate amount 32, and
do option 3.
5) Rotate by >= 64: do rotateAmount % 64 to get the rotate amount between
0 and 63, then do one of options 1-4.

This change also updates CodegenBringUpTests\Rotate.cs to exercise these
paths.

src/jit/decomposelongs.cpp
src/jit/decomposelongs.h
tests/src/JIT/CodeGenBringUpTests/Rotate.cs

index 1e0f4df..a9fc6b8 100644 (file)
@@ -244,6 +244,11 @@ GenTree* DecomposeLongs::DecomposeNode(GenTree* tree)
             nextNode = DecomposeShift(use);
             break;
 
+        case GT_ROL:
+        case GT_ROR:
+            nextNode = DecomposeRotate(use);
+            break;
+
         case GT_LOCKADD:
         case GT_XADD:
         case GT_XCHG:
@@ -1294,6 +1299,135 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
 }
 
 //------------------------------------------------------------------------
+// DecomposeRotate: Decompose GT_ROL and GT_ROR with constant shift amounts. We can
+// inspect the rotate amount and decompose to the appropriate node types, generating
+// a shld/shld pattern for GT_ROL, a shrd/shrd pattern for GT_ROR, for most rotate
+// amounts.
+//
+// Arguments:
+//    use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+//    The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeRotate(LIR::Use& use)
+{
+    GenTree* tree       = use.Def();
+    GenTree* gtLong     = tree->gtGetOp1();
+    GenTree* rotateByOp = tree->gtGetOp2();
+
+    genTreeOps oper = tree->OperGet();
+
+    assert((oper == GT_ROL) || (oper == GT_ROR));
+    assert(rotateByOp->IsCnsIntOrI());
+
+    // For longs, we need to change rols into two GT_LSH_HIs and rors into two GT_RSH_LOs
+    // so we will get:
+    //
+    // shld lo, hi, rotateAmount
+    // shld hi, loCopy, rotateAmount
+    //
+    // or:
+    //
+    // shrd lo, hi, rotateAmount
+    // shrd hi, loCopy, rotateAmount
+
+    if (oper == GT_ROL)
+    {
+        oper = GT_LSH_HI;
+    }
+    else
+    {
+        oper = GT_RSH_LO;
+    }
+
+    unsigned int count = rotateByOp->gtIntCon.gtIconVal;
+    Range().Remove(rotateByOp);
+
+    // Make sure the rotate amount is between 0 and 63.
+    assert((count < 64) && (count != 0));
+
+    GenTree* loOp1;
+    GenTree* hiOp1;
+
+    if (count > 32)
+    {
+        // If count > 32, we swap hi and lo, and subtract 32 from count
+        hiOp1 = gtLong->gtGetOp1();
+        loOp1 = gtLong->gtGetOp2();
+        count -= 32;
+    }
+    else
+    {
+        loOp1 = gtLong->gtGetOp1();
+        hiOp1 = gtLong->gtGetOp2();
+    }
+
+    GenTree* loResult;
+    GenTree* hiResult;
+
+    if (count == 32)
+    {
+        // If the rotate amount is 32, then swap hi and lo
+        gtLong->gtOp.gtOp1 = hiOp1;
+        gtLong->gtOp.gtOp2 = loOp1;
+
+        GenTree* next = tree->gtNext;
+        // Remove tree and don't do anything else.
+        Range().Remove(tree);
+        use.ReplaceWith(m_compiler, gtLong);
+        return next;
+    }
+    else
+    {
+        Range().Remove(gtLong);
+        loOp1 = RepresentOpAsLocalVar(loOp1, gtLong, &gtLong->gtOp.gtOp1);
+        hiOp1 = RepresentOpAsLocalVar(hiOp1, gtLong, &gtLong->gtOp.gtOp2);
+
+        unsigned loOp1LclNum = loOp1->AsLclVarCommon()->gtLclNum;
+        unsigned hiOp1LclNum = hiOp1->AsLclVarCommon()->gtLclNum;
+
+        if (count > 32)
+        {
+            Range().Remove(hiOp1);
+            Range().Remove(loOp1);
+        }
+        else
+        {
+            Range().Remove(loOp1);
+            Range().Remove(hiOp1);
+        }
+
+        GenTree* rotateByHi = m_compiler->gtNewIconNode(count, TYP_INT);
+        GenTree* rotateByLo = m_compiler->gtNewIconNode(count, TYP_INT);
+
+        // Create a GT_LONG that contains loOp1 and hiCopy. This will be used in codegen to
+        // generate the shld instruction
+        GenTree* hiCopy = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT);
+        GenTree* loOp   = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, hiCopy, loOp1);
+        loResult        = m_compiler->gtNewOperNode(oper, TYP_INT, loOp, rotateByLo);
+
+        // Create a GT_LONG that contains loCopy and hiOp1. This will be used in codegen to
+        // generate the shld instruction
+        GenTree* loCopy = m_compiler->gtNewLclvNode(loOp1LclNum, TYP_INT);
+        GenTree* hiOp   = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loCopy, hiOp1);
+        hiResult        = m_compiler->gtNewOperNode(oper, TYP_INT, hiOp, rotateByHi);
+
+        m_compiler->lvaIncRefCnts(loCopy);
+        m_compiler->lvaIncRefCnts(hiCopy);
+
+        Range().InsertBefore(tree, hiCopy, loOp1, loOp);
+        Range().InsertBefore(tree, rotateByLo, loResult);
+        Range().InsertBefore(tree, loCopy, hiOp1, hiOp);
+        Range().InsertBefore(tree, rotateByHi, hiResult);
+    }
+
+    Range().Remove(tree);
+
+    return FinalizeDecomposition(use, loResult, hiResult, hiResult);
+}
+
+//------------------------------------------------------------------------
 // DecomposeMul: Decompose GT_MUL. The only GT_MULs that make it to decompose are
 // those with the GTF_MUL_64RSLT flag set. These muls result in a mul instruction that
 // returns its result in two registers like GT_CALLs do. Additionally, these muls are
index 285d349..8965a0b 100644 (file)
@@ -52,6 +52,7 @@ private:
     GenTree* DecomposeNeg(LIR::Use& use);
     GenTree* DecomposeArith(LIR::Use& use);
     GenTree* DecomposeShift(LIR::Use& use);
+    GenTree* DecomposeRotate(LIR::Use& use);
     GenTree* DecomposeMul(LIR::Use& use);
     GenTree* DecomposeUMod(LIR::Use& use);
 
index 74a936e..98ea563 100644 (file)
@@ -102,12 +102,30 @@ public class Test
     [MethodImpl(MethodImplOptions.NoInlining)]
     static ulong rol64const()
     {
-        ulong value = flag() ? (ulong)0x123456789abcdef : (ulong)0x123456789abcdef;
+        ulong value = flag() ? (ulong)0x123456789abcdef : (ulong)0xabcdef123456789;
         int amount = 16;
         return (value >> (64 - amount)) | (value << amount);
     }
 
     [MethodImpl(MethodImplOptions.NoInlining)]
+    static ulong rol64_16(ulong value)
+    {
+        return (value >> (64 - 16)) | (value << 16);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static ulong rol64_32(ulong value)
+    {
+        return (value >> (64 - 32)) | (value << 32);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static ulong rol64_33(ulong value)
+    {
+        return (value >> (64 - 33)) | (value << 33);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
     ulong rol64field(int amount)
     {
         return (field << amount) | (field >> (64 - amount));
@@ -128,12 +146,30 @@ public class Test
     [MethodImpl(MethodImplOptions.NoInlining)]
     static ulong ror64const()
     {
-        ulong value = flag() ? (ulong)0x123456789abcdef : (ulong)0x123456789abcdef;
+        ulong value = flag() ? (ulong)0x123456789abcdef : (ulong)0xabcdef123456789;
         int amount = flag() ? 5 : 5;
         return (value << (64 - amount)) | (value >> amount);
     }
 
     [MethodImpl(MethodImplOptions.NoInlining)]
+    static ulong ror64_5(ulong value)
+    {
+        return (value << (64 - 5)) | (value >> 5);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static ulong ror64_32(ulong value)
+    {
+        return (value << (64 - 32)) | (value >> 32);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static ulong ror64_33(ulong value)
+    {
+        return (value << (64 - 33)) | (value >> 33);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
     static ulong ror64sfield(int amount)
     {
         return (s_field << (64 - amount)) | (s_field >> amount);
@@ -244,6 +280,21 @@ public class Test
             return Fail;
         }
 
+        if (rol64_16(0x123456789abcdef) != 0x456789abcdef0123)
+        {
+            return Fail;
+        }
+
+        if (rol64_32(0x123456789abcdef) != rol64(0x123456789abcdef, 32))
+        {
+            return Fail;
+        }
+
+        if (rol64_33(0x123456789abcdef) != rol64(0x123456789abcdef, 33))
+        {
+            return Fail;
+        }
+
         if (ror64(0x123456789abcdef, 0) != 0x123456789abcdef)
         {
             return Fail;
@@ -259,6 +310,21 @@ public class Test
             return Fail;
         }
 
+        if (ror64_5(0x123456789abcdef) != 0x78091a2b3c4d5e6f)
+        {
+            return Fail;
+        }
+
+        if (ror64_32(0x123456789abcdef) != ror64(0x123456789abcdef, 32))
+        {
+            return Fail;
+        }
+
+        if (ror64_33(0x123456789abcdef) != ror64(0x123456789abcdef, 33))
+        {
+            return Fail;
+        }
+
         if (rol32_call(0x12345678, 16) != 0x56781234)
         {
             return Fail;