From d24a3008cd1bbed78dd4e82414228da6e21361d2 Mon Sep 17 00:00:00 2001 From: Michelle McDaniel Date: Wed, 19 Oct 2016 10:43:15 -0700 Subject: [PATCH] Implement DecomposeRotate fgRecognizeAndMorphBitwiseRotation can potentially morph a tree with a TYP_LONG return type on x86 if the rotate amount is a GT_CNS_INT. This change implements DecomposeRotate for those rotate instructions. There are 5 cases: 1) Rotate by 0: do nothing. 2) Rotate by 32: swap hi and lo. 3) Rotate by < 32: produce a shld/shld (for GT_ROL) or a shrd/shrd (for GT_ROR). 4) Rotate by >= 32: swap hi and lo, subtract from the rotate amount 32, and do option 3. 5) Rotate by >= 64: do rotateAmount % 64 to get the rotate amount between 0 and 63, then do one of options 1-4. This change also updates CodegenBringUpTests\Rotate.cs to exercise these paths. --- src/jit/decomposelongs.cpp | 134 ++++++++++++++++++++++++++++ src/jit/decomposelongs.h | 1 + tests/src/JIT/CodeGenBringUpTests/Rotate.cs | 70 ++++++++++++++- 3 files changed, 203 insertions(+), 2 deletions(-) diff --git a/src/jit/decomposelongs.cpp b/src/jit/decomposelongs.cpp index 1e0f4df..a9fc6b8 100644 --- a/src/jit/decomposelongs.cpp +++ b/src/jit/decomposelongs.cpp @@ -244,6 +244,11 @@ GenTree* DecomposeLongs::DecomposeNode(GenTree* tree) nextNode = DecomposeShift(use); break; + case GT_ROL: + case GT_ROR: + nextNode = DecomposeRotate(use); + break; + case GT_LOCKADD: case GT_XADD: case GT_XCHG: @@ -1294,6 +1299,135 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use) } //------------------------------------------------------------------------ +// DecomposeRotate: Decompose GT_ROL and GT_ROR with constant shift amounts. We can +// inspect the rotate amount and decompose to the appropriate node types, generating +// a shld/shld pattern for GT_ROL, a shrd/shrd pattern for GT_ROR, for most rotate +// amounts. +// +// Arguments: +// use - the LIR::Use object for the def that needs to be decomposed. +// +// Return Value: +// The next node to process. +// +GenTree* DecomposeLongs::DecomposeRotate(LIR::Use& use) +{ + GenTree* tree = use.Def(); + GenTree* gtLong = tree->gtGetOp1(); + GenTree* rotateByOp = tree->gtGetOp2(); + + genTreeOps oper = tree->OperGet(); + + assert((oper == GT_ROL) || (oper == GT_ROR)); + assert(rotateByOp->IsCnsIntOrI()); + + // For longs, we need to change rols into two GT_LSH_HIs and rors into two GT_RSH_LOs + // so we will get: + // + // shld lo, hi, rotateAmount + // shld hi, loCopy, rotateAmount + // + // or: + // + // shrd lo, hi, rotateAmount + // shrd hi, loCopy, rotateAmount + + if (oper == GT_ROL) + { + oper = GT_LSH_HI; + } + else + { + oper = GT_RSH_LO; + } + + unsigned int count = rotateByOp->gtIntCon.gtIconVal; + Range().Remove(rotateByOp); + + // Make sure the rotate amount is between 0 and 63. + assert((count < 64) && (count != 0)); + + GenTree* loOp1; + GenTree* hiOp1; + + if (count > 32) + { + // If count > 32, we swap hi and lo, and subtract 32 from count + hiOp1 = gtLong->gtGetOp1(); + loOp1 = gtLong->gtGetOp2(); + count -= 32; + } + else + { + loOp1 = gtLong->gtGetOp1(); + hiOp1 = gtLong->gtGetOp2(); + } + + GenTree* loResult; + GenTree* hiResult; + + if (count == 32) + { + // If the rotate amount is 32, then swap hi and lo + gtLong->gtOp.gtOp1 = hiOp1; + gtLong->gtOp.gtOp2 = loOp1; + + GenTree* next = tree->gtNext; + // Remove tree and don't do anything else. + Range().Remove(tree); + use.ReplaceWith(m_compiler, gtLong); + return next; + } + else + { + Range().Remove(gtLong); + loOp1 = RepresentOpAsLocalVar(loOp1, gtLong, >Long->gtOp.gtOp1); + hiOp1 = RepresentOpAsLocalVar(hiOp1, gtLong, >Long->gtOp.gtOp2); + + unsigned loOp1LclNum = loOp1->AsLclVarCommon()->gtLclNum; + unsigned hiOp1LclNum = hiOp1->AsLclVarCommon()->gtLclNum; + + if (count > 32) + { + Range().Remove(hiOp1); + Range().Remove(loOp1); + } + else + { + Range().Remove(loOp1); + Range().Remove(hiOp1); + } + + GenTree* rotateByHi = m_compiler->gtNewIconNode(count, TYP_INT); + GenTree* rotateByLo = m_compiler->gtNewIconNode(count, TYP_INT); + + // Create a GT_LONG that contains loOp1 and hiCopy. This will be used in codegen to + // generate the shld instruction + GenTree* hiCopy = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT); + GenTree* loOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, hiCopy, loOp1); + loResult = m_compiler->gtNewOperNode(oper, TYP_INT, loOp, rotateByLo); + + // Create a GT_LONG that contains loCopy and hiOp1. This will be used in codegen to + // generate the shld instruction + GenTree* loCopy = m_compiler->gtNewLclvNode(loOp1LclNum, TYP_INT); + GenTree* hiOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loCopy, hiOp1); + hiResult = m_compiler->gtNewOperNode(oper, TYP_INT, hiOp, rotateByHi); + + m_compiler->lvaIncRefCnts(loCopy); + m_compiler->lvaIncRefCnts(hiCopy); + + Range().InsertBefore(tree, hiCopy, loOp1, loOp); + Range().InsertBefore(tree, rotateByLo, loResult); + Range().InsertBefore(tree, loCopy, hiOp1, hiOp); + Range().InsertBefore(tree, rotateByHi, hiResult); + } + + Range().Remove(tree); + + return FinalizeDecomposition(use, loResult, hiResult, hiResult); +} + +//------------------------------------------------------------------------ // DecomposeMul: Decompose GT_MUL. The only GT_MULs that make it to decompose are // those with the GTF_MUL_64RSLT flag set. These muls result in a mul instruction that // returns its result in two registers like GT_CALLs do. Additionally, these muls are diff --git a/src/jit/decomposelongs.h b/src/jit/decomposelongs.h index 285d349..8965a0b 100644 --- a/src/jit/decomposelongs.h +++ b/src/jit/decomposelongs.h @@ -52,6 +52,7 @@ private: GenTree* DecomposeNeg(LIR::Use& use); GenTree* DecomposeArith(LIR::Use& use); GenTree* DecomposeShift(LIR::Use& use); + GenTree* DecomposeRotate(LIR::Use& use); GenTree* DecomposeMul(LIR::Use& use); GenTree* DecomposeUMod(LIR::Use& use); diff --git a/tests/src/JIT/CodeGenBringUpTests/Rotate.cs b/tests/src/JIT/CodeGenBringUpTests/Rotate.cs index 74a936e..98ea563 100644 --- a/tests/src/JIT/CodeGenBringUpTests/Rotate.cs +++ b/tests/src/JIT/CodeGenBringUpTests/Rotate.cs @@ -102,12 +102,30 @@ public class Test [MethodImpl(MethodImplOptions.NoInlining)] static ulong rol64const() { - ulong value = flag() ? (ulong)0x123456789abcdef : (ulong)0x123456789abcdef; + ulong value = flag() ? (ulong)0x123456789abcdef : (ulong)0xabcdef123456789; int amount = 16; return (value >> (64 - amount)) | (value << amount); } [MethodImpl(MethodImplOptions.NoInlining)] + static ulong rol64_16(ulong value) + { + return (value >> (64 - 16)) | (value << 16); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong rol64_32(ulong value) + { + return (value >> (64 - 32)) | (value << 32); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong rol64_33(ulong value) + { + return (value >> (64 - 33)) | (value << 33); + } + + [MethodImpl(MethodImplOptions.NoInlining)] ulong rol64field(int amount) { return (field << amount) | (field >> (64 - amount)); @@ -128,12 +146,30 @@ public class Test [MethodImpl(MethodImplOptions.NoInlining)] static ulong ror64const() { - ulong value = flag() ? (ulong)0x123456789abcdef : (ulong)0x123456789abcdef; + ulong value = flag() ? (ulong)0x123456789abcdef : (ulong)0xabcdef123456789; int amount = flag() ? 5 : 5; return (value << (64 - amount)) | (value >> amount); } [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ror64_5(ulong value) + { + return (value << (64 - 5)) | (value >> 5); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ror64_32(ulong value) + { + return (value << (64 - 32)) | (value >> 32); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ror64_33(ulong value) + { + return (value << (64 - 33)) | (value >> 33); + } + + [MethodImpl(MethodImplOptions.NoInlining)] static ulong ror64sfield(int amount) { return (s_field << (64 - amount)) | (s_field >> amount); @@ -244,6 +280,21 @@ public class Test return Fail; } + if (rol64_16(0x123456789abcdef) != 0x456789abcdef0123) + { + return Fail; + } + + if (rol64_32(0x123456789abcdef) != rol64(0x123456789abcdef, 32)) + { + return Fail; + } + + if (rol64_33(0x123456789abcdef) != rol64(0x123456789abcdef, 33)) + { + return Fail; + } + if (ror64(0x123456789abcdef, 0) != 0x123456789abcdef) { return Fail; @@ -259,6 +310,21 @@ public class Test return Fail; } + if (ror64_5(0x123456789abcdef) != 0x78091a2b3c4d5e6f) + { + return Fail; + } + + if (ror64_32(0x123456789abcdef) != ror64(0x123456789abcdef, 32)) + { + return Fail; + } + + if (ror64_33(0x123456789abcdef) != ror64(0x123456789abcdef, 33)) + { + return Fail; + } + if (rol32_call(0x12345678, 16) != 0x56781234) { return Fail; -- 2.7.4