Merge pull request #1830 from erozenfeld/RotateBits
authorEugene Rozenfeld <erozen@microsoft.com>
Thu, 22 Oct 2015 22:37:41 +0000 (15:37 -0700)
committerEugene Rozenfeld <erozen@microsoft.com>
Thu, 22 Oct 2015 22:37:41 +0000 (15:37 -0700)
     Generate efficient code for rotation patterns.

21 files changed:
src/jit/codegenarm64.cpp
src/jit/codegenxarch.cpp
src/jit/compiler.h
src/jit/emitxarch.cpp
src/jit/gentree.cpp
src/jit/gentree.h
src/jit/gtlist.h
src/jit/instr.cpp
src/jit/instrsxarch.h
src/jit/lclvars.cpp
src/jit/lower.cpp
src/jit/lower.h
src/jit/lowerarm.cpp
src/jit/lowerarm64.cpp
src/jit/lowerxarch.cpp
src/jit/lsra.cpp
src/jit/morph.cpp
src/jit/optcse.cpp
src/jit/valuenum.cpp
tests/src/JIT/CodeGenBringUpTests/Rotate.cs [new file with mode: 0644]
tests/src/JIT/CodeGenBringUpTests/Rotate.csproj [new file with mode: 0644]

index 93ad2a3..952a7d0 100644 (file)
@@ -2411,6 +2411,7 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
     case GT_LSH:
     case GT_RSH:
     case GT_RSZ:
+    case GT_ROR:
         genCodeForShift(treeNode->gtGetOp1(), treeNode->gtGetOp2(), treeNode);
         // genCodeForShift() calls genProduceReg()
         break;
@@ -4432,6 +4433,9 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
         case GT_OR:
             ins = INS_orr;
             break;
+        case GT_ROR:
+            ins = INS_ror;
+            break;
         case GT_RSH:
             ins = INS_asr;
             break;
index 7064862..d55a6b3 100644 (file)
@@ -1516,6 +1516,8 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
     case GT_LSH:
     case GT_RSH:
     case GT_RSZ:
+    case GT_ROL:
+    case GT_ROR:
         genCodeForShift(treeNode->gtGetOp1(), treeNode->gtGetOp2(), treeNode);
         // genCodeForShift() calls genProduceReg()
         break;
@@ -2584,7 +2586,9 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
                     {
                         if (data->OperGet() == GT_LSH ||
                             data->OperGet() == GT_RSH ||
-                            data->OperGet() == GT_RSZ)
+                            data->OperGet() == GT_RSZ ||
+                            data->OperGet() == GT_ROL ||
+                            data->OperGet() == GT_ROR)
                         {
                             genCodeForShift(addr, data->gtOp.gtOp2, data);
                         }
@@ -4644,11 +4648,13 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
     {
     case GT_ADD: ins = INS_add; break;
     case GT_AND: ins = INS_and; break;
-    case GT_MUL: ins = INS_imul; break;
     case GT_LSH: ins = INS_shl; break;
+    case GT_MUL: ins = INS_imul; break;
     case GT_NEG: ins = INS_neg; break;
     case GT_NOT: ins = INS_not; break;
     case GT_OR:  ins = INS_or;  break;
+    case GT_ROL: ins = INS_rol;  break;
+    case GT_ROR: ins = INS_ror;  break;
     case GT_RSH: ins = INS_sar; break;
     case GT_RSZ: ins = INS_shr; break;
     case GT_SUB: ins = INS_sub; break;
@@ -4660,10 +4666,10 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
 }
 
 /** Generates the code sequence for a GenTree node that
- * represents a bit shift operation (<<, >>, >>>).
+ * represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
  *
- * Arguments: operand:  the value to be shifted by shiftBy bits.
- *            shiftBy:  the number of bits to shift the operand.
+ * Arguments: operand:  the value to be shifted or rotated by shiftBy bits.
+ *            shiftBy:  the number of bits to shift or rotate the operand.
  *            parent:   the actual bitshift node (that specifies the
  *                      type of bitshift to perform.
  *
@@ -4758,6 +4764,12 @@ void CodeGen::genCodeForShift(GenTreePtr operand, GenTreePtr shiftBy,
                 case INS_shr:
                     ins = INS_shr_1;
                     break;
+                case INS_rol:
+                    ins = INS_rol_1;
+                    break;
+                case INS_ror:
+                    ins = INS_ror_1;
+                    break;
                 default:
                     // leave 'ins' unchanged
                     break;
@@ -4777,6 +4789,12 @@ void CodeGen::genCodeForShift(GenTreePtr operand, GenTreePtr shiftBy,
                 case INS_shr:
                     ins = INS_shr_N;
                     break;
+                case INS_rol:
+                    ins = INS_rol_N;
+                    break;
+                case INS_ror:
+                    ins = INS_ror_N;
+                    break;
                 default:
                     // leave 'ins' unchanged
                     break;
index bc851dc..13c64b7 100644 (file)
@@ -4435,7 +4435,9 @@ private:
     void                fgFixupIfCallArg(ArrayStack<GenTree *> *parentStack,
                                          GenTree *oldChild, 
                                          GenTree *newChild);
-                                  
+    //                  Recognize a rotation pattern and convert into a GT_ROL or a GT_ROR node.
+    GenTreePtr          fgMorphRotation(GenTreePtr tree);
+
     //-------- Determine the order in which the trees will be evaluated -------
 
     unsigned            fgTreeSeqNum;
index d6de1f2..d052ce2 100644 (file)
@@ -3389,6 +3389,8 @@ void                emitter::emitIns_R_I(instruction ins,
 
     case INS_rcl_N:
     case INS_rcr_N:
+    case INS_rol_N:
+    case INS_ror_N:
     case INS_shl_N:
     case INS_shr_N:
     case INS_sar_N:
@@ -3968,6 +3970,8 @@ void                emitter::emitIns_C_I   (instruction  ins,
     {
     case INS_rcl_N:
     case INS_rcr_N:
+    case INS_rol_N:
+    case INS_ror_N:
     case INS_shl_N:
     case INS_shr_N:
     case INS_sar_N:
@@ -4146,6 +4150,8 @@ void                emitter::emitIns_I_AR  (instruction ins,
     {
     case INS_rcl_N:
     case INS_rcr_N:
+    case INS_rol_N:
+    case INS_ror_N:
     case INS_shl_N:
     case INS_shr_N:
     case INS_sar_N:
@@ -4212,6 +4218,8 @@ void                emitter::emitIns_I_AI  (instruction ins,
     {
     case INS_rcl_N:
     case INS_rcr_N:
+    case INS_rol_N:
+    case INS_ror_N:
     case INS_shl_N:
     case INS_shr_N:
     case INS_sar_N:
@@ -4474,6 +4482,8 @@ void                emitter::emitIns_I_ARR (instruction ins,
     {
     case INS_rcl_N:
     case INS_rcr_N:
+    case INS_rol_N:
+    case INS_ror_N:
     case INS_shl_N:
     case INS_shr_N:
     case INS_sar_N:
@@ -4620,6 +4630,8 @@ void                emitter::emitIns_I_ARX (instruction ins,
     {
     case INS_rcl_N:
     case INS_rcr_N:
+    case INS_rol_N:
+    case INS_ror_N:
     case INS_shl_N:
     case INS_shr_N:
     case INS_sar_N:
@@ -4768,6 +4780,8 @@ void                emitter::emitIns_I_AX (instruction ins,
     {
     case INS_rcl_N:
     case INS_rcr_N:
+    case INS_rol_N:
+    case INS_ror_N:
     case INS_shl_N:
     case INS_shr_N:
     case INS_sar_N:
@@ -5050,6 +5064,8 @@ void                emitter::emitIns_S_I  (instruction ins,
     {
     case INS_rcl_N:
     case INS_rcr_N:
+    case INS_rol_N:
+    case INS_ror_N:
     case INS_shl_N:
     case INS_shr_N:
     case INS_sar_N:
@@ -6363,6 +6379,8 @@ void                emitter::emitDispShift(instruction ins, int cnt)
     {
     case INS_rcl_1:
     case INS_rcr_1:
+    case INS_rol_1:
+    case INS_ror_1:
     case INS_shl_1:
     case INS_shr_1:
     case INS_sar_1:
@@ -6371,6 +6389,8 @@ void                emitter::emitDispShift(instruction ins, int cnt)
 
     case INS_rcl:
     case INS_rcr:
+    case INS_rol:
+    case INS_ror:
     case INS_shl:
     case INS_shr:
     case INS_sar:
@@ -6379,6 +6399,8 @@ void                emitter::emitDispShift(instruction ins, int cnt)
 
     case INS_rcl_N:
     case INS_rcr_N:
+    case INS_rol_N:
+    case INS_ror_N:
     case INS_shl_N:
     case INS_shr_N:
     case INS_sar_N:
index 3c06925..d6bd3af 100644 (file)
@@ -3694,6 +3694,8 @@ COMMON_CNS:
         case GT_LSH:
         case GT_RSH:
         case GT_RSZ:
+        case GT_ROL:
+        case GT_ROR:
         case GT_ASG_LSH:
         case GT_ASG_RSH:
         case GT_ASG_RSZ:
@@ -9248,6 +9250,8 @@ GenTreePtr              Compiler::gtFoldExprSpecial(GenTreePtr tree)
     case GT_LSH:
     case GT_RSH:
     case GT_RSZ:
+    case GT_ROL:
+    case GT_ROR:
     case GT_ASG_LSH:
     case GT_ASG_RSH:
     case GT_ASG_RSZ:
@@ -9943,9 +9947,13 @@ CHK_OVF:
         case GT_LSH: i1 <<= (i2 & 0x1f); break;
         case GT_RSH: i1 >>= (i2 & 0x1f); break;
         case GT_RSZ:
-                /* logical shift -> make it unsigned to propagate the sign bit */
+                /* logical shift -> make it unsigned to not propagate the sign bit */
                 i1 = UINT32(i1) >> (i2 & 0x1f);
             break;
+        case GT_ROL: i1 = (i1 << (i2 & 0x1f)) | (UINT32(i1) >> ((32 - i2) & 0x1f));
+            break;
+        case GT_ROR: i1 = (i1 << ((32 - i2) & 0x1f)) | (UINT32(i1) >> (i2 & 0x1f));
+            break;
 
         /* DIV and MOD can generate an INT 0 - if division by 0
          * or overflow - when dividing MIN by -1 */
@@ -10284,11 +10292,15 @@ LNG_ADD_CHKOVF:
         case GT_XOR: lval1 ^= lval2; break;
         case GT_AND: lval1 &= lval2; break;
 
-        case GT_LSH: lval1 <<= (op2->gtIntConCommon.IconValue() & 0x3f); break;
-        case GT_RSH: lval1 >>= (op2->gtIntConCommon.IconValue() & 0x3f); break;
+        case GT_LSH: lval1 <<= (lval2 & 0x3f); break;
+        case GT_RSH: lval1 >>= (lval2 & 0x3f); break;
         case GT_RSZ:
-                /* logical shift -> make it unsigned to propagate the sign bit */
-                lval1 = UINT64(lval1) >> (op2->gtIntConCommon.IconValue() & 0x3f);
+                /* logical shift -> make it unsigned to not propagate the sign bit */
+                lval1 = UINT64(lval1) >> (lval2 & 0x3f);
+            break;
+        case GT_ROL: lval1 = (lval1 << (lval2 & 0x3f)) | (UINT64(lval1) >> ((64 - lval2) & 0x3f));
+            break;
+        case GT_ROR: lval1 = (lval1 << ((64 - lval2) & 0x3f)) | (UINT64(lval1) >> (lval2 & 0x3f));
             break;
 
             //Both DIV and IDIV on x86 raise an exception for min_int (and min_long) / -1.  So we preserve
index 1402445..431f5c8 100644 (file)
@@ -1086,23 +1086,25 @@ public:
     int             OperIsArithmetic() const
     {
         genTreeOps op = OperGet();
-        return     op==GT_ADD 
-                || op==GT_SUB        
-                || op==GT_MUL 
+        return     op==GT_ADD
+                || op==GT_SUB
+                || op==GT_MUL
                 || op==GT_DIV
                 || op==GT_MOD
-        
+
                 || op==GT_UDIV
                 || op==GT_UMOD
 
-                || op==GT_OR 
+                || op==GT_OR
                 || op==GT_XOR
                 || op==GT_AND
 
                 || op==GT_LSH
                 || op==GT_RSH
-                || op==GT_RSZ;        
-        
+                || op==GT_RSZ
+
+                || op==GT_ROL
+                || op==GT_ROR;
     }
 
     static
@@ -3506,6 +3508,8 @@ inline bool GenTree::RequiresNonNullOp2(genTreeOps oper)
     case GT_LSH:
     case GT_RSH:
     case GT_RSZ:
+    case GT_ROL:
+    case GT_ROR:
     case GT_INDEX:
     case GT_ASG:
     case GT_ASG_ADD:
index 9283374..a23d8ad 100644 (file)
@@ -108,6 +108,8 @@ GTNODE(GT_AND        , "&"          ,1,GTK_BINOP|GTK_LOGOP)
 GTNODE(GT_LSH        , "<<"         ,0,GTK_BINOP)
 GTNODE(GT_RSH        , ">>"         ,0,GTK_BINOP)
 GTNODE(GT_RSZ        , ">>>"        ,0,GTK_BINOP)
+GTNODE(GT_ROL        , "rol"        ,0,GTK_BINOP)
+GTNODE(GT_ROR        , "ror"        ,0,GTK_BINOP)
 GTNODE(GT_MULHI      , "mulhi"      ,1,GTK_BINOP) // returns high bits (top N bits of the 2N bit result of an NxN multiply)
 
 GTNODE(GT_ASG        , "="          ,0,GTK_BINOP|GTK_ASGOP)
index d1cc16f..c63952a 100644 (file)
@@ -2627,6 +2627,8 @@ void        CodeGen::inst_RV_SH(instruction  ins,
 
     assert(ins == INS_rcl  ||
            ins == INS_rcr  ||
+           ins == INS_rol  ||
+           ins == INS_ror  ||
            ins == INS_shl  ||
            ins == INS_shr  ||
            ins == INS_sar);
@@ -2639,6 +2641,8 @@ void        CodeGen::inst_RV_SH(instruction  ins,
 
         assert(INS_rcl + 1 == INS_rcl_1);
         assert(INS_rcr + 1 == INS_rcr_1);
+        assert(INS_rol + 1 == INS_rol_1);
+        assert(INS_ror + 1 == INS_ror_1);
         assert(INS_shl + 1 == INS_shl_1);
         assert(INS_shr + 1 == INS_shr_1);
         assert(INS_sar + 1 == INS_sar_1);
@@ -2651,6 +2655,8 @@ void        CodeGen::inst_RV_SH(instruction  ins,
 
         assert(INS_rcl + 2 == INS_rcl_N);
         assert(INS_rcr + 2 == INS_rcr_N);
+        assert(INS_rol + 2 == INS_rol_N);
+        assert(INS_ror + 2 == INS_ror_N);
         assert(INS_shl + 2 == INS_shl_N);
         assert(INS_shr + 2 == INS_shr_N);
         assert(INS_sar + 2 == INS_sar_N);
index c0be9aa..9f8a995 100644 (file)
@@ -339,6 +339,13 @@ INST2(ret    , "ret"          , 0, IUM_RD, 0, 0, 0x0000C3, 0x0000C2)
 INST2(loop   , "loop"         , 0, IUM_RD, 0, 0, BAD_CODE, 0x0000E2)
 INST2(call   , "call"         , 0, IUM_RD, 0, 1, 0x0010FF, 0x0000E8)
 
+INST2(rol    , "rol"          , 0, IUM_RW, 0, 1, 0x0000D2, BAD_CODE)
+INST2(rol_1  , "rol"          , 0, IUM_RW, 0, 1, 0x0000D0, 0x0000D0)
+INST2(rol_N  , "rol"          , 0, IUM_RW, 0, 1, 0x0000C0, 0x0000C0)
+INST2(ror    , "ror"          , 0, IUM_RW, 0, 1, 0x0008D2, BAD_CODE)
+INST2(ror_1  , "ror"          , 0, IUM_RW, 0, 1, 0x0008D0, 0x0008D0)
+INST2(ror_N  , "ror"          , 0, IUM_RW, 0, 1, 0x0008C0, 0x0008C0)
+
 INST2(rcl    , "rcl"          , 0, IUM_RW, 1, 1, 0x0010D2, BAD_CODE)
 INST2(rcl_1  , "rcl"          , 0, IUM_RW, 1, 1, 0x0010D0, 0x0010D0)
 INST2(rcl_N  , "rcl"          , 0, IUM_RW, 1, 1, 0x0010C0, 0x0010C0)
index b9e89f1..15c8504 100644 (file)
@@ -2899,7 +2899,9 @@ void                Compiler::lvaMarkLclRefs(GenTreePtr tree)
 
     if  (tree->gtOper == GT_LSH ||
          tree->gtOper == GT_RSH ||
-         tree->gtOper == GT_RSZ)
+         tree->gtOper == GT_RSZ ||
+         tree->gtOper == GT_ROL ||
+         tree->gtOper == GT_ROR)
     {
         if  (tree->gtType == TYP_INT)
         {
index 5882ecf..d94b0ba 100644 (file)
@@ -400,6 +400,8 @@ void Lowering::DecomposeNode(GenTreePtr* pTree, Compiler::fgWalkData* data)
     case GT_LSH:
     case GT_RSH:
     case GT_RSZ:
+    case GT_ROL:
+    case GT_ROR:
     case GT_MULHI:
         NYI("Arithmetic binary operators on TYP_LONG");
         break;
@@ -525,6 +527,11 @@ void Lowering::LowerNode(GenTreePtr* ppTree, Compiler::fgWalkData* data)
         }
         break;
 
+    case GT_ROL:
+    case GT_ROR:
+        LowerRotate(*ppTree);
+        break;
+
 #ifdef FEATURE_SIMD
     case GT_SIMD:
         if ((*ppTree)->TypeGet() == TYP_SIMD12)
index 6754b7b..03f7a6e 100644 (file)
@@ -161,6 +161,7 @@ private:
     void HandleIndirAddressExpression(GenTree *indirTree, GenTree* tree);
     void LowerGCWriteBarrier(GenTree *tree);
     void LowerArrElem(GenTree **ppTree, Compiler::fgWalkData* data);
+    void LowerRotate(GenTree *tree);
 
     // Utility functions
     void MorphBlkIntoHelperCall         (GenTreePtr pTree, GenTreePtr treeStmt);
index d99347d..6669370 100644 (file)
@@ -41,6 +41,11 @@ void Lowering::LowerCntBlockOp(GenTreePtr *ppTree)
     NYI_ARM("ARM Lowering for BlockOp");
 }
 
+void Lowering::LowerRotate(GenTreePtr tree)
+{
+    NYI_ARM("ARM Lowering for ROL and ROR");
+}
+
 void Lowering::TreeNodeInfoInitCall(GenTree *tree, TreeNodeInfo &info, 
                                     int &srcCount, // out 
                                     int &dstCount  // out
index 9467813..f05fd80 100644 (file)
@@ -486,6 +486,7 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
         case GT_LSH:
         case GT_RSH:
         case GT_RSZ:
+        case GT_ROR:
             {
                 info->srcCount = 2;
                 info->dstCount = 1;
@@ -1625,6 +1626,31 @@ void Lowering::LowerCast( GenTreePtr* ppTree)
     }
 }
 
+void Lowering::LowerRotate(GenTreePtr tree)
+{
+    if (tree->OperGet() == GT_ROL)
+    {
+        // There is no ROL instruction on ARM. Convert rol into ROR.
+        GenTreePtr rotatedValue = tree->gtOp.gtOp1;
+        unsigned rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8;
+        GenTreePtr rotateLeftIndexNode = tree->gtOp.gtOp2;
+
+        if (rotateLeftIndexNode->IsCnsIntOrI())
+        {
+            ssize_t   rotateLeftIndex = rotateLeftIndexNode->gtIntCon.gtIconVal;
+            ssize_t   rotateRightIndex = rotatedValueBitSize - rotateLeftIndex;
+            rotateIndexNode->gtIntCon.gtIconVal = rotateRightIndex;
+        }
+        else
+        {
+            GenTreePtr tmp = gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode);
+            rotateLeftIndexNode->InsertAfterSelf(tmp);
+            tree->gtOp.gtOp2 = tmp;
+        }
+        tree->ChangeOper(GT_ROR);
+    }
+}
+
 // TODO-Cleanup: move to Lower.cpp?
 void Lowering::SetStoreIndOpCounts(GenTreePtr storeInd, GenTreePtr indirCandidate)
 {
index a7b4600..c9df42e 100644 (file)
@@ -719,6 +719,8 @@ void Lowering::TreeNodeInfoInit(GenTree* stmt)
         case GT_LSH:
         case GT_RSH:
         case GT_RSZ:
+        case GT_ROL:
+        case GT_ROR:
         {
             info->srcCount = 2;
             info->dstCount = 1;
@@ -2914,7 +2916,9 @@ bool Lowering::LowerStoreInd(GenTreePtr tree)
             oper != GT_XOR &&
             oper != GT_LSH &&
             oper != GT_RSH &&
-            oper != GT_RSZ)
+            oper != GT_RSZ &&
+            oper != GT_ROL &&
+            oper != GT_ROR)
         {
             JITDUMP("Lower of StoreInd didn't mark the node as self contained\n");
             JITDUMP("because the node operator not yet supported:\n");
@@ -2924,7 +2928,9 @@ bool Lowering::LowerStoreInd(GenTreePtr tree)
 
         if ((oper == GT_LSH ||
              oper == GT_RSH ||
-             oper == GT_RSZ) &&
+             oper == GT_RSZ ||
+             oper == GT_ROL ||
+             oper == GT_ROR) &&
             varTypeIsSmall(tree))
         {
             //In ldind, Integer values smaller than 4 bytes, a boolean, or a character converted to 4 bytes by sign or zero-extension as appropriate.
@@ -3017,6 +3023,11 @@ bool Lowering::LowerStoreInd(GenTreePtr tree)
     }
 }
 
+void Lowering::LowerRotate(GenTreePtr tree)
+{
+    // xarch supports both ROL and ROR instructions so no lowering is required.
+}
+
 void Lowering::SetStoreIndOpCounts(GenTreePtr storeInd, GenTreePtr indirCandidate)
 {
     GenTreePtr indirDst = storeInd->gtGetOp1();
index 8f11af9..5515f83 100644 (file)
@@ -2479,6 +2479,8 @@ LinearScan::getKillSetForNode(GenTree* tree)
     case GT_LSH:
     case GT_RSH:
     case GT_RSZ:
+    case GT_ROL:
+    case GT_ROR:
         if (tree->gtLsraInfo.isHelperCallWithKills)
         {
             killMask = RBM_CALLEE_TRASH;
index b000f58..2cdbe0e 100644 (file)
@@ -10324,6 +10324,17 @@ CM_ADD_OP:
             }
 #endif // LEA_AVAILABLE
         }
+        else if(oper == GT_OR)
+        {
+            tree = fgMorphRotation(tree);
+
+            // fgMorphRotation may return a new tree
+            oper = tree->OperGet();
+            typ = tree->TypeGet();
+            op1 = tree->gtOp.gtOp1;
+            op2 = tree->gtOp.gtOp2;
+        }
+
         break;
 
     case GT_CHS:
@@ -10929,7 +10940,8 @@ CM_ADD_OP:
                 /* for the shift nodes the type of op2 can differ from the tree type */
                 if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT))
                 {
-                    noway_assert((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ));
+                    noway_assert((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ) ||
+                                 (oper == GT_ROL) || (oper == GT_ROR));
 
                     GenTreePtr commaOp2 = op2->gtOp.gtOp2;
 
@@ -11864,6 +11876,208 @@ GenTree* Compiler::fgMorphDivByConst(GenTreeOp* tree)
     return result;
 }
 
+//------------------------------------------------------------------------------
+// fgMorphRotation : Check if the tree represents a left or right rotation. If so, return
+//                   an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree.
+//
+// Arguments:
+//    tree  - tree to check for a rotation pattern
+//
+// Return Value:
+//    An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise.
+//
+// Assumption:
+//    The input is a GT_OR tree.
+
+GenTreePtr Compiler::fgMorphRotation(GenTreePtr tree)
+{
+#ifndef LEGACY_BACKEND
+    //
+    // Check for a rotation pattern, e.g.,
+    //
+    //                         OR                      ROL
+    //                      /      \                   / \
+    //                    LSH      RSZ      ->        x   y
+    //                    / \      / \
+    //                   x  AND   x  AND
+    //                      / \      / \
+    //                     y  31   ADD  31
+    //                             / \
+    //                            NEG 32
+    //                             |
+    //                             y
+
+    genTreeOps      oper = tree->OperGet();
+    noway_assert(oper == GT_OR);
+
+    if ((tree->gtFlags & GTF_ALL_EFFECT) != 0)
+    {
+        return tree; // Can't do anything due to side effects.
+    }
+
+    // Check if we have an LSH on one side of the OR and an RSZ on the other side.
+    GenTreePtr op1 = tree->gtGetOp1();
+    GenTreePtr op2 = tree->gtGetOp2();
+    GenTreePtr leftShiftTree = nullptr;
+    GenTreePtr rightShiftTree = nullptr;
+    if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ))
+    {
+        leftShiftTree = op1;
+        rightShiftTree = op2;
+    }
+    else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH))
+    {
+        leftShiftTree = op2;
+        rightShiftTree = op1;
+    }
+    else
+    {
+        return tree;
+    }
+
+    // Check if the trees representing the value to shift are identical.
+    // We already checked that there are no side effects above.
+    if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1()))
+    {
+        GenTreePtr rotatedValue = leftShiftTree->gtGetOp1();
+        ssize_t rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8;
+        noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64));
+        GenTreePtr leftShiftIndex = leftShiftTree->gtGetOp2();
+        GenTreePtr rightShiftIndex = rightShiftTree->gtGetOp2();
+
+        // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits
+        // shouldn't be masked for the transformation to be valid. If additional
+        // higher bits are not masked, the transformation is still valid since the result
+        // of MSIL shift instructions is unspecified if the shift amount is greater or equal
+        // than the width of the value being shifted.
+        ssize_t minimalMask = rotatedValueBitSize - 1;
+        ssize_t leftShiftMask = -1;
+        ssize_t rightShiftMask = -1;
+
+        if ((leftShiftIndex->OperGet() == GT_AND))
+        {
+            if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI())
+            {
+                leftShiftMask = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
+                leftShiftIndex = leftShiftIndex->gtGetOp1();
+            }
+            else
+            {
+                return tree;
+            }
+        }
+
+        if ((rightShiftIndex->OperGet() == GT_AND))
+        {
+            if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI())
+            {
+                rightShiftMask = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
+                rightShiftIndex = rightShiftIndex->gtGetOp1();
+            }
+            else
+            {
+                return tree;
+            }
+        }
+
+        if (((minimalMask & leftShiftMask) != minimalMask) ||
+            ((minimalMask & rightShiftMask) != minimalMask))
+        {
+            // The shift index is overmasked, e.g., we have
+            // something like (x << y & 15) or
+            // (x >> (32 - y) & 15 with 32 bit x.
+            // The transformation is not valid.
+            return tree;
+        }
+
+        GenTreePtr shiftIndexWithAdd = nullptr;
+        GenTreePtr shiftIndexWithoutAdd = nullptr;
+        genTreeOps rotateOp = GT_NONE;
+        GenTreePtr rotateIndex = nullptr;
+
+        if (leftShiftIndex->OperGet() == GT_ADD)
+        {
+            shiftIndexWithAdd = leftShiftIndex;
+            shiftIndexWithoutAdd = rightShiftIndex;
+            rotateOp = GT_ROR;
+        }
+        else if (rightShiftIndex->OperGet() == GT_ADD)
+        {
+            shiftIndexWithAdd = rightShiftIndex;
+            shiftIndexWithoutAdd = leftShiftIndex;
+            rotateOp = GT_ROL;
+        }
+
+        if (shiftIndexWithAdd != nullptr)
+        {
+            if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI())
+            {
+                if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize)
+                {
+                    if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG)
+                    {
+                        if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd))
+                        {
+                            // We found one of these patterns:
+                            // (x << (y & M)) | (x >>> ((-y + N) & M))
+                            // (x << y) | (x >>> (-y + N))
+                            // (x >>> (y & M)) | (x << ((-y + N) & M))
+                            // (x >>> y) | (x << (-y + N))
+                            // where N == bitsize(x), M is const, and
+                            // M & (N - 1) == N - 1
+
+#ifndef _TARGET_64BIT_
+                            if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
+                            {
+                                // TODO: we need to handle variable-sized long shifts specially on x86.
+                                // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need
+                                // to add helpers for GT_ROL and GT_ROR.
+                                NYI("Rotation of a long value by variable amount");
+                            }
+#endif
+
+                            rotateIndex = shiftIndexWithoutAdd;
+                        }
+                    }
+                }
+            }
+        }
+        else if ((leftShiftIndex->IsCnsIntOrI() &&
+                  rightShiftIndex->IsCnsIntOrI()))
+        {
+            if (leftShiftIndex->gtIntCon.gtIconVal +
+                rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize)
+            {
+                // We found this pattern:
+                // (x << c1) | (x >>> c2)
+                // where c1 and c2 are const and c1 + c2 == bitsize(x)
+                rotateOp = GT_ROL;
+                rotateIndex = leftShiftIndex;
+            }
+        }
+
+        if (rotateIndex != nullptr)
+        {
+            noway_assert((rotateOp == GT_ROL) || (rotateOp == GT_ROR));
+
+            // We can use the same tree only during global morph; reusing the tree in a later morph
+            // may invalidate value numbers.
+            if (fgGlobalMorph)
+            {
+                tree->gtOp.gtOp1 = rotatedValue;
+                tree->gtOp.gtOp2 = rotateIndex;
+                tree->ChangeOper(rotateOp);
+            }
+            else
+            {
+                tree = gtNewOperNode(rotateOp, genActualType(rotatedValue->gtType), rotatedValue, rotateIndex);
+            }
+            return tree;
+        }
+    }
+#endif //LEGACY_BACKEND
+    return tree;
+}
 
 #if !CPU_HAS_FP_SUPPORT
 GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
@@ -12054,7 +12268,7 @@ GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
 
 /*****************************************************************************
  *
- *  Transform the given tree for code generation and returns an equivalent tree.
+ *  Transform the given tree for code generation and return an equivalent tree.
  */
 
 
@@ -15198,6 +15412,8 @@ Compiler::fgWalkResult      Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTr
     case GT_LSH:
     case GT_RSH:
     case GT_RSZ:
+    case GT_ROL:
+    case GT_ROR:
     case GT_EQ:
     case GT_NE:
     case GT_LT:
index adc32af..c188f4a 100644 (file)
@@ -2022,6 +2022,8 @@ bool                Compiler::optIsCSEcandidate(GenTreePtr tree)
     case GT_XOR:
     case GT_RSH:
     case GT_RSZ:
+    case GT_ROL:
+    case GT_ROR:
         return  true;     // CSE these Binary Operators 
 
     case GT_ADD:          // Check for ADDRMODE flag on these Binary Operators 
index 6178cdf..9476e54 100644 (file)
@@ -345,6 +345,25 @@ T ValueNumStore::EvalOpIntegral(VNFunc vnf, T v0, T v1, ValueNum* pExcSet)
         {
             return UINT32(v0) >> v1;
         }
+    case GT_ROL:
+        if (sizeof(T) == 8)
+        {
+            return (v0 << v1) | (UINT64(v0) >> (64 - v1));
+        }
+        else
+        {
+            return (v0 << v1) | (UINT32(v0) >> (32 - v1));
+        }
+
+    case GT_ROR:
+        if (sizeof(T) == 8)
+        {
+            return (v0 << (64 - v1)) | (UINT64(v0) >> v1);
+        }
+        else
+        {
+            return (v0 << (32 - v1)) | (UINT32(v0) >> v1);
+        }
 
     case GT_DIV:
     case GT_MOD:
@@ -1050,8 +1069,12 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V
                 case GT_LSH:
                 case GT_RSH:
                 case GT_RSZ:
+                case GT_ROL:
+                case GT_ROR:
                     // (x << 0) => x
                     // (x >> 0) => x
+                    // (x rol 0) => x
+                    // (x ror 0) => x
                     ZeroVN = VNZeroForType(typ);
                     if (arg1VN == ZeroVN)
                         return arg0VN;
diff --git a/tests/src/JIT/CodeGenBringUpTests/Rotate.cs b/tests/src/JIT/CodeGenBringUpTests/Rotate.cs
new file mode 100644 (file)
index 0000000..2218741
--- /dev/null
@@ -0,0 +1,236 @@
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+//
+
+
+using System;
+using System.Runtime.CompilerServices;
+
+public class Test
+{
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static uint rol32(uint value, int amount)
+    {
+        return (value << amount) | (value >> (32 - amount));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static uint rol32_1(uint value)
+    {
+        return (value << 1) | (value >> (32 - 1));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static uint rol32_3(uint value)
+    {
+        return (value << 3) | (value >> (32 - 3));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static uint rol32comm(uint value, int amount)
+    {
+        return  (value >> (32 - amount)) | (value << amount);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static bool flag()
+    {
+        return true;
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static uint rol32const()
+    {
+        uint value = flag() ? (uint)0x12345678 : (uint)0x12345678;
+        int amount = 16;
+        return  (value >> (32 - amount)) | (value << amount);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static uint ror32(uint value, int amount)
+    {
+        return (value << ((32 - amount))) | (value >> amount);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static uint ror32comm(uint value, int amount)
+    {
+        return (value >> amount) | (value << ((32 - amount)));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static uint ror32const()
+    {
+        uint value = flag() ? (uint)0x12345678 : (uint)0x12345678;
+        int amount = flag() ? 12 : 12;
+        return (value >> amount) | (value << ((32 - amount)));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static ulong rol64(ulong value, int amount)
+    {
+        return (value << amount) | (value >> (64 - amount));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static ulong rol64comm(ulong value, int amount)
+    {
+        return  (value >> (64 - amount)) | (value << amount);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static ulong rol64const()
+    {
+        ulong value = flag() ? (ulong)0x123456789abcdef : (ulong)0x123456789abcdef;
+        int amount = 16;
+        return (value >> (64 - amount)) | (value << amount);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static ulong ror64(ulong value, int amount)
+    {
+        return (value << (64 - amount)) | (value >> amount);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static ulong ror64comm(ulong value, int amount)
+    {
+        return (value >> amount) | (value << (64 - amount));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static ulong ror64const()
+    {
+        ulong value = flag() ? (ulong)0x123456789abcdef : (ulong)0x123456789abcdef;
+        int amount = flag() ? 5 : 5;
+        return (value << (64 - amount)) | (value >> amount);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static uint rol32_call(uint value, int amount)
+    {
+        return (foo(value) << amount) | (foo(value) >> (32 - amount));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static uint foo(uint value)
+    {
+        return value;
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static uint rol32_and(uint value, int amount)
+    { 
+        return (value << amount) | (value >> ((32 - amount) & 31));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static uint two_left_shifts(uint value, int amount)
+    {
+        return (value << amount) | (value << (32 - amount));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static uint not_rotation(uint value)
+    {
+        return (value >> 10) | (value << 5);
+    }
+
+    public static int Main()
+    {
+        const int Pass = 100;
+        const int Fail = -1;
+
+        if (rol32(0x12345678, 16) != 0x56781234)
+        {
+            return Fail;
+        }
+
+        if (rol32_1(0x12345678) != 0x2468ACF0)
+        {
+            return Fail;
+        }
+
+        if (rol32_3(0x12345678) != 0x91A2B3C0)
+        {
+            return Fail;
+        }
+
+        if (rol32comm(0x12345678, 16) != 0x56781234)
+        {
+            return Fail;
+        }
+
+        if (rol32const() != 0x56781234)
+        {
+            return Fail;
+        }
+
+        if (ror32(0x12345678, 12) != 0x67812345)
+        {
+            return Fail;
+        }
+
+        if (ror32comm(0x12345678, 12) != 0x67812345)
+        {
+            return Fail;
+        }
+        
+        if (ror32const() != 0x67812345)
+        {
+            return Fail;
+        }
+
+        if (rol64(0x123456789abcdef, 32) != 0x89abcdef01234567)
+        {
+            return Fail;
+        }
+
+        if (rol64comm(0x123456789abcdef, 32) != 0x89abcdef01234567)
+        {
+            return Fail;
+        }
+
+        if (rol64const() != 0x456789abcdef0123)
+        {
+            return Fail;
+        }
+
+        if (ror64(0x123456789abcdef, 0) != 0x123456789abcdef)
+        {
+            return Fail;
+        }
+
+        if (ror64comm(0x123456789abcdef, 0) != 0x123456789abcdef)
+        {
+            return Fail;
+        }
+
+        if (ror64const() != 0x78091a2b3c4d5e6f)
+        {
+            return Fail;
+        }
+
+        if (rol32_call(0x12345678, 16) != 0x56781234)
+        {
+            return Fail;
+        }
+
+        if (rol32_and(0x12345678, 16) != 0x56781234)
+        {
+            return Fail;
+        }
+
+        if (two_left_shifts(0x12345678, 7) != 0xfa2b3c00)
+        {
+            return Fail;
+        }
+
+        if (not_rotation(0x87654321) != 0xeca9fd70)
+        {
+            return Fail;
+        }
+
+        return Pass;
+    }
+}
diff --git a/tests/src/JIT/CodeGenBringUpTests/Rotate.csproj b/tests/src/JIT/CodeGenBringUpTests/Rotate.csproj
new file mode 100644 (file)
index 0000000..55dce6b
--- /dev/null
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <SchemaVersion>2.0</SchemaVersion>
+    <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+    <OutputType>Exe</OutputType>
+    <AppDesignerFolder>Properties</AppDesignerFolder>
+    <FileAlignment>512</FileAlignment>
+    <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+    <ReferencePath>$(ProgramFiles)\Common Files\microsoft shared\VSTT\11.0\UITestExtensionPackages</ReferencePath>
+    <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+    <NuGetPackageImportStamp>7a9bfb7d</NuGetPackageImportStamp>
+  </PropertyGroup>
+  <!-- Default configurations to help VS understand the configurations -->
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+  </PropertyGroup>
+  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+  </PropertyGroup>
+  <ItemGroup>
+    <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+      <Visible>False</Visible>
+    </CodeAnalysisDependentAssemblyPaths>
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="$(JitPackagesConfigFileDirectory)threading+thread\project.json" />
+  </ItemGroup>
+  <ItemGroup>
+    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+  </ItemGroup>
+  <ItemGroup>
+    <Compile Include="Rotate.cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <ProjectJson>$(JitPackagesConfigFileDirectory)threading+thread\project.json</ProjectJson>
+    <ProjectLockJson>$(JitPackagesConfigFileDirectory)threading+thread\project.lock.json</ProjectLockJson>
+  </PropertyGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' ">
+  </PropertyGroup>
+</Project>
\ No newline at end of file