This change adds code to recognize rotation idioms and generate efficient instructions for them.
Two new operators are added: GT_ROL and GT_ROR.
The patterns recognized:
(x << c1) | (x >>> c2) => x rol c1
(x >>> c1) | (x << c2) => x ror c2
where c1 and c2 are constant and c1 + c2 == bitsize(x)
(x << y) | (x >>> (N - y)) => x rol y
(x >>> y) | (x << (N - y)) => x ror y
where N == bitsize(x)
(x << y & M1) | (x >>> (N - y) & M2) => x rol y
(x >>> y & M1) | (x << (N - y) & M2) => x ror y
where N == bitsize(x)
M1 & (N - 1) == N - 1
M2 & (N - 1) == N - 1
For a simple benchmark with 4 rotation patterns in a tight loop
time goes from 7.324 to 2.600 (2.8 speedup).
Rotations found and optimized in mscorlib:
System.Security.Cryptography.SHA256Managed::RotateRight
System.Security.Cryptography.SHA384Managed::RotateRight
System.Security.Cryptography.SHA512Managed::RotateRight
System.Security.Cryptography.RIPEMD160Managed:MDTransform (320 instances!)
System.Diagnostics.Tracing.EventSource.Sha1ForNonSecretPurposes::Rol1
System.Diagnostics.Tracing.EventSource.Sha1ForNonSecretPurposes::Rol5
System.Diagnostics.Tracing.EventSource.Sha1ForNonSecretPurposes::Rol30
System.Diagnostics.Tracing.EventSource.Sha1ForNonSecretPurposes::Drain
(9 instances of Sha1ForNonSecretPurposes::Rol* inlined)
Closes #1619.
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROR:
genCodeForShift(treeNode->gtGetOp1(), treeNode->gtGetOp2(), treeNode);
// genCodeForShift() calls genProduceReg()
break;
case GT_OR:
ins = INS_orr;
break;
+ case GT_ROR:
+ ins = INS_ror;
+ break;
case GT_RSH:
ins = INS_asr;
break;
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
genCodeForShift(treeNode->gtGetOp1(), treeNode->gtGetOp2(), treeNode);
// genCodeForShift() calls genProduceReg()
break;
{
if (data->OperGet() == GT_LSH ||
data->OperGet() == GT_RSH ||
- data->OperGet() == GT_RSZ)
+ data->OperGet() == GT_RSZ ||
+ data->OperGet() == GT_ROL ||
+ data->OperGet() == GT_ROR)
{
genCodeForShift(addr, data->gtOp.gtOp2, data);
}
{
case GT_ADD: ins = INS_add; break;
case GT_AND: ins = INS_and; break;
- case GT_MUL: ins = INS_imul; break;
case GT_LSH: ins = INS_shl; break;
+ case GT_MUL: ins = INS_imul; break;
case GT_NEG: ins = INS_neg; break;
case GT_NOT: ins = INS_not; break;
case GT_OR: ins = INS_or; break;
+ case GT_ROL: ins = INS_rol; break;
+ case GT_ROR: ins = INS_ror; break;
case GT_RSH: ins = INS_sar; break;
case GT_RSZ: ins = INS_shr; break;
case GT_SUB: ins = INS_sub; break;
}
/** Generates the code sequence for a GenTree node that
- * represents a bit shift operation (<<, >>, >>>).
+ * represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
*
- * Arguments: operand: the value to be shifted by shiftBy bits.
- * shiftBy: the number of bits to shift the operand.
+ * Arguments: operand: the value to be shifted or rotated by shiftBy bits.
+ * shiftBy: the number of bits to shift or rotate the operand.
* parent: the actual bitshift node (that specifies the
* type of bitshift to perform.
*
case INS_shr:
ins = INS_shr_1;
break;
+ case INS_rol:
+ ins = INS_rol_1;
+ break;
+ case INS_ror:
+ ins = INS_ror_1;
+ break;
default:
// leave 'ins' unchanged
break;
case INS_shr:
ins = INS_shr_N;
break;
+ case INS_rol:
+ ins = INS_rol_N;
+ break;
+ case INS_ror:
+ ins = INS_ror_N;
+ break;
default:
// leave 'ins' unchanged
break;
void fgFixupIfCallArg(ArrayStack<GenTree *> *parentStack,
GenTree *oldChild,
GenTree *newChild);
-
+ // Recognize a rotation pattern and convert into a GT_ROL or a GT_ROR node.
+ GenTreePtr fgMorphRotation(GenTreePtr tree);
+
//-------- Determine the order in which the trees will be evaluated -------
unsigned fgTreeSeqNum;
case INS_rcl_N:
case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
case INS_shl_N:
case INS_shr_N:
case INS_sar_N:
{
case INS_rcl_N:
case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
case INS_shl_N:
case INS_shr_N:
case INS_sar_N:
{
case INS_rcl_N:
case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
case INS_shl_N:
case INS_shr_N:
case INS_sar_N:
{
case INS_rcl_N:
case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
case INS_shl_N:
case INS_shr_N:
case INS_sar_N:
{
case INS_rcl_N:
case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
case INS_shl_N:
case INS_shr_N:
case INS_sar_N:
{
case INS_rcl_N:
case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
case INS_shl_N:
case INS_shr_N:
case INS_sar_N:
{
case INS_rcl_N:
case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
case INS_shl_N:
case INS_shr_N:
case INS_sar_N:
{
case INS_rcl_N:
case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
case INS_shl_N:
case INS_shr_N:
case INS_sar_N:
{
case INS_rcl_1:
case INS_rcr_1:
+ case INS_rol_1:
+ case INS_ror_1:
case INS_shl_1:
case INS_shr_1:
case INS_sar_1:
case INS_rcl:
case INS_rcr:
+ case INS_rol:
+ case INS_ror:
case INS_shl:
case INS_shr:
case INS_sar:
case INS_rcl_N:
case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
case INS_shl_N:
case INS_shr_N:
case INS_sar_N:
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
case GT_ASG_LSH:
case GT_ASG_RSH:
case GT_ASG_RSZ:
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
case GT_ASG_LSH:
case GT_ASG_RSH:
case GT_ASG_RSZ:
case GT_LSH: i1 <<= (i2 & 0x1f); break;
case GT_RSH: i1 >>= (i2 & 0x1f); break;
case GT_RSZ:
- /* logical shift -> make it unsigned to propagate the sign bit */
+ /* logical shift -> make it unsigned to not propagate the sign bit */
i1 = UINT32(i1) >> (i2 & 0x1f);
break;
+ case GT_ROL: i1 = (i1 << (i2 & 0x1f)) | (UINT32(i1) >> ((32 - i2) & 0x1f));
+ break;
+ case GT_ROR: i1 = (i1 << ((32 - i2) & 0x1f)) | (UINT32(i1) >> (i2 & 0x1f));
+ break;
/* DIV and MOD can generate an INT 0 - if division by 0
* or overflow - when dividing MIN by -1 */
case GT_XOR: lval1 ^= lval2; break;
case GT_AND: lval1 &= lval2; break;
- case GT_LSH: lval1 <<= (op2->gtIntConCommon.IconValue() & 0x3f); break;
- case GT_RSH: lval1 >>= (op2->gtIntConCommon.IconValue() & 0x3f); break;
+ case GT_LSH: lval1 <<= (lval2 & 0x3f); break;
+ case GT_RSH: lval1 >>= (lval2 & 0x3f); break;
case GT_RSZ:
- /* logical shift -> make it unsigned to propagate the sign bit */
- lval1 = UINT64(lval1) >> (op2->gtIntConCommon.IconValue() & 0x3f);
+ /* logical shift -> make it unsigned to not propagate the sign bit */
+ lval1 = UINT64(lval1) >> (lval2 & 0x3f);
+ break;
+ case GT_ROL: lval1 = (lval1 << (lval2 & 0x3f)) | (UINT64(lval1) >> ((64 - lval2) & 0x3f));
+ break;
+ case GT_ROR: lval1 = (lval1 << ((64 - lval2) & 0x3f)) | (UINT64(lval1) >> (lval2 & 0x3f));
break;
//Both DIV and IDIV on x86 raise an exception for min_int (and min_long) / -1. So we preserve
int OperIsArithmetic() const
{
genTreeOps op = OperGet();
- return op==GT_ADD
- || op==GT_SUB
- || op==GT_MUL
+ return op==GT_ADD
+ || op==GT_SUB
+ || op==GT_MUL
|| op==GT_DIV
|| op==GT_MOD
-
+
|| op==GT_UDIV
|| op==GT_UMOD
- || op==GT_OR
+ || op==GT_OR
|| op==GT_XOR
|| op==GT_AND
|| op==GT_LSH
|| op==GT_RSH
- || op==GT_RSZ;
-
+ || op==GT_RSZ
+
+ || op==GT_ROL
+ || op==GT_ROR;
}
static
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
case GT_INDEX:
case GT_ASG:
case GT_ASG_ADD:
GTNODE(GT_LSH , "<<" ,0,GTK_BINOP)
GTNODE(GT_RSH , ">>" ,0,GTK_BINOP)
GTNODE(GT_RSZ , ">>>" ,0,GTK_BINOP)
+GTNODE(GT_ROL , "rol" ,0,GTK_BINOP)
+GTNODE(GT_ROR , "ror" ,0,GTK_BINOP)
GTNODE(GT_MULHI , "mulhi" ,1,GTK_BINOP) // returns high bits (top N bits of the 2N bit result of an NxN multiply)
GTNODE(GT_ASG , "=" ,0,GTK_BINOP|GTK_ASGOP)
assert(ins == INS_rcl ||
ins == INS_rcr ||
+ ins == INS_rol ||
+ ins == INS_ror ||
ins == INS_shl ||
ins == INS_shr ||
ins == INS_sar);
assert(INS_rcl + 1 == INS_rcl_1);
assert(INS_rcr + 1 == INS_rcr_1);
+ assert(INS_rol + 1 == INS_rol_1);
+ assert(INS_ror + 1 == INS_ror_1);
assert(INS_shl + 1 == INS_shl_1);
assert(INS_shr + 1 == INS_shr_1);
assert(INS_sar + 1 == INS_sar_1);
assert(INS_rcl + 2 == INS_rcl_N);
assert(INS_rcr + 2 == INS_rcr_N);
+ assert(INS_rol + 2 == INS_rol_N);
+ assert(INS_ror + 2 == INS_ror_N);
assert(INS_shl + 2 == INS_shl_N);
assert(INS_shr + 2 == INS_shr_N);
assert(INS_sar + 2 == INS_sar_N);
INST2(loop , "loop" , 0, IUM_RD, 0, 0, BAD_CODE, 0x0000E2)
INST2(call , "call" , 0, IUM_RD, 0, 1, 0x0010FF, 0x0000E8)
+INST2(rol , "rol" , 0, IUM_RW, 0, 1, 0x0000D2, BAD_CODE)
+INST2(rol_1 , "rol" , 0, IUM_RW, 0, 1, 0x0000D0, 0x0000D0)
+INST2(rol_N , "rol" , 0, IUM_RW, 0, 1, 0x0000C0, 0x0000C0)
+INST2(ror , "ror" , 0, IUM_RW, 0, 1, 0x0008D2, BAD_CODE)
+INST2(ror_1 , "ror" , 0, IUM_RW, 0, 1, 0x0008D0, 0x0008D0)
+INST2(ror_N , "ror" , 0, IUM_RW, 0, 1, 0x0008C0, 0x0008C0)
+
INST2(rcl , "rcl" , 0, IUM_RW, 1, 1, 0x0010D2, BAD_CODE)
INST2(rcl_1 , "rcl" , 0, IUM_RW, 1, 1, 0x0010D0, 0x0010D0)
INST2(rcl_N , "rcl" , 0, IUM_RW, 1, 1, 0x0010C0, 0x0010C0)
if (tree->gtOper == GT_LSH ||
tree->gtOper == GT_RSH ||
- tree->gtOper == GT_RSZ)
+ tree->gtOper == GT_RSZ ||
+ tree->gtOper == GT_ROL ||
+ tree->gtOper == GT_ROR)
{
if (tree->gtType == TYP_INT)
{
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
case GT_MULHI:
NYI("Arithmetic binary operators on TYP_LONG");
break;
}
break;
+ case GT_ROL:
+ case GT_ROR:
+ LowerRotate(*ppTree);
+ break;
+
#ifdef FEATURE_SIMD
case GT_SIMD:
if ((*ppTree)->TypeGet() == TYP_SIMD12)
void HandleIndirAddressExpression(GenTree *indirTree, GenTree* tree);
void LowerGCWriteBarrier(GenTree *tree);
void LowerArrElem(GenTree **ppTree, Compiler::fgWalkData* data);
+ void LowerRotate(GenTree *tree);
// Utility functions
void MorphBlkIntoHelperCall (GenTreePtr pTree, GenTreePtr treeStmt);
NYI_ARM("ARM Lowering for BlockOp");
}
+void Lowering::LowerRotate(GenTreePtr tree)
+{
+ NYI_ARM("ARM Lowering for ROL and ROR");
+}
+
void Lowering::TreeNodeInfoInitCall(GenTree *tree, TreeNodeInfo &info,
int &srcCount, // out
int &dstCount // out
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROR:
{
info->srcCount = 2;
info->dstCount = 1;
}
}
+void Lowering::LowerRotate(GenTreePtr tree)
+{
+ if (tree->OperGet() == GT_ROL)
+ {
+ // There is no ROL instruction on ARM. Convert rol into ROR.
+ GenTreePtr rotatedValue = tree->gtOp.gtOp1;
+ unsigned rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8;
+ GenTreePtr rotateLeftIndexNode = tree->gtOp.gtOp2;
+
+ if (rotateLeftIndexNode->IsCnsIntOrI())
+ {
+ ssize_t rotateLeftIndex = rotateLeftIndexNode->gtIntCon.gtIconVal;
+ ssize_t rotateRightIndex = rotatedValueBitSize - rotateLeftIndex;
+ rotateIndexNode->gtIntCon.gtIconVal = rotateRightIndex;
+ }
+ else
+ {
+ GenTreePtr tmp = gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode);
+ rotateLeftIndexNode->InsertAfterSelf(tmp);
+ tree->gtOp.gtOp2 = tmp;
+ }
+ tree->ChangeOper(GT_ROR);
+ }
+}
+
// TODO-Cleanup: move to Lower.cpp?
void Lowering::SetStoreIndOpCounts(GenTreePtr storeInd, GenTreePtr indirCandidate)
{
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
{
info->srcCount = 2;
info->dstCount = 1;
oper != GT_XOR &&
oper != GT_LSH &&
oper != GT_RSH &&
- oper != GT_RSZ)
+ oper != GT_RSZ &&
+ oper != GT_ROL &&
+ oper != GT_ROR)
{
JITDUMP("Lower of StoreInd didn't mark the node as self contained\n");
JITDUMP("because the node operator not yet supported:\n");
if ((oper == GT_LSH ||
oper == GT_RSH ||
- oper == GT_RSZ) &&
+ oper == GT_RSZ ||
+ oper == GT_ROL ||
+ oper == GT_ROR) &&
varTypeIsSmall(tree))
{
//In ldind, Integer values smaller than 4 bytes, a boolean, or a character converted to 4 bytes by sign or zero-extension as appropriate.
}
}
+void Lowering::LowerRotate(GenTreePtr tree)
+{
+ // xarch supports both ROL and ROR instructions so no lowering is required.
+}
+
void Lowering::SetStoreIndOpCounts(GenTreePtr storeInd, GenTreePtr indirCandidate)
{
GenTreePtr indirDst = storeInd->gtGetOp1();
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
if (tree->gtLsraInfo.isHelperCallWithKills)
{
killMask = RBM_CALLEE_TRASH;
}
#endif // LEA_AVAILABLE
}
+ else if(oper == GT_OR)
+ {
+ tree = fgMorphRotation(tree);
+
+ // fgMorphRotation may return a new tree
+ oper = tree->OperGet();
+ typ = tree->TypeGet();
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtOp.gtOp2;
+ }
+
break;
case GT_CHS:
/* for the shift nodes the type of op2 can differ from the tree type */
if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT))
{
- noway_assert((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ));
+ noway_assert((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ) ||
+ (oper == GT_ROL) || (oper == GT_ROR));
GenTreePtr commaOp2 = op2->gtOp.gtOp2;
return result;
}
+//------------------------------------------------------------------------------
+// fgMorphRotation : Check if the tree represents a left or right rotation. If so, return
+// an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree.
+//
+// Arguments:
+// tree - tree to check for a rotation pattern
+//
+// Return Value:
+// An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise.
+//
+// Assumption:
+// The input is a GT_OR tree.
+
+GenTreePtr Compiler::fgMorphRotation(GenTreePtr tree)
+{
+#ifndef LEGACY_BACKEND
+ //
+ // Check for a rotation pattern, e.g.,
+ //
+ // OR ROL
+ // / \ / \
+ // LSH RSZ -> x y
+ // / \ / \
+ // x AND x AND
+ // / \ / \
+ // y 31 ADD 31
+ // / \
+ // NEG 32
+ // |
+ // y
+
+ genTreeOps oper = tree->OperGet();
+ noway_assert(oper == GT_OR);
+
+ if ((tree->gtFlags & GTF_ALL_EFFECT) != 0)
+ {
+ return tree; // Can't do anything due to side effects.
+ }
+
+ // Check if we have an LSH on one side of the OR and an RSZ on the other side.
+ GenTreePtr op1 = tree->gtGetOp1();
+ GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr leftShiftTree = nullptr;
+ GenTreePtr rightShiftTree = nullptr;
+ if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ))
+ {
+ leftShiftTree = op1;
+ rightShiftTree = op2;
+ }
+ else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH))
+ {
+ leftShiftTree = op2;
+ rightShiftTree = op1;
+ }
+ else
+ {
+ return tree;
+ }
+
+ // Check if the trees representing the value to shift are identical.
+ // We already checked that there are no side effects above.
+ if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1()))
+ {
+ GenTreePtr rotatedValue = leftShiftTree->gtGetOp1();
+ ssize_t rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8;
+ noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64));
+ GenTreePtr leftShiftIndex = leftShiftTree->gtGetOp2();
+ GenTreePtr rightShiftIndex = rightShiftTree->gtGetOp2();
+
+ // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits
+ // shouldn't be masked for the transformation to be valid. If additional
+ // higher bits are not masked, the transformation is still valid since the result
+ // of MSIL shift instructions is unspecified if the shift amount is greater or equal
+ // than the width of the value being shifted.
+ ssize_t minimalMask = rotatedValueBitSize - 1;
+ ssize_t leftShiftMask = -1;
+ ssize_t rightShiftMask = -1;
+
+ if ((leftShiftIndex->OperGet() == GT_AND))
+ {
+ if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI())
+ {
+ leftShiftMask = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
+ leftShiftIndex = leftShiftIndex->gtGetOp1();
+ }
+ else
+ {
+ return tree;
+ }
+ }
+
+ if ((rightShiftIndex->OperGet() == GT_AND))
+ {
+ if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI())
+ {
+ rightShiftMask = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
+ rightShiftIndex = rightShiftIndex->gtGetOp1();
+ }
+ else
+ {
+ return tree;
+ }
+ }
+
+ if (((minimalMask & leftShiftMask) != minimalMask) ||
+ ((minimalMask & rightShiftMask) != minimalMask))
+ {
+ // The shift index is overmasked, e.g., we have
+ // something like (x << y & 15) or
+ // (x >> (32 - y) & 15 with 32 bit x.
+ // The transformation is not valid.
+ return tree;
+ }
+
+ GenTreePtr shiftIndexWithAdd = nullptr;
+ GenTreePtr shiftIndexWithoutAdd = nullptr;
+ genTreeOps rotateOp = GT_NONE;
+ GenTreePtr rotateIndex = nullptr;
+
+ if (leftShiftIndex->OperGet() == GT_ADD)
+ {
+ shiftIndexWithAdd = leftShiftIndex;
+ shiftIndexWithoutAdd = rightShiftIndex;
+ rotateOp = GT_ROR;
+ }
+ else if (rightShiftIndex->OperGet() == GT_ADD)
+ {
+ shiftIndexWithAdd = rightShiftIndex;
+ shiftIndexWithoutAdd = leftShiftIndex;
+ rotateOp = GT_ROL;
+ }
+
+ if (shiftIndexWithAdd != nullptr)
+ {
+ if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI())
+ {
+ if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize)
+ {
+ if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG)
+ {
+ if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd))
+ {
+ // We found one of these patterns:
+ // (x << (y & M)) | (x >>> ((-y + N) & M))
+ // (x << y) | (x >>> (-y + N))
+ // (x >>> (y & M)) | (x << ((-y + N) & M))
+ // (x >>> y) | (x << (-y + N))
+ // where N == bitsize(x), M is const, and
+ // M & (N - 1) == N - 1
+
+#ifndef _TARGET_64BIT_
+ if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
+ {
+ // TODO: we need to handle variable-sized long shifts specially on x86.
+ // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need
+ // to add helpers for GT_ROL and GT_ROR.
+ NYI("Rotation of a long value by variable amount");
+ }
+#endif
+
+ rotateIndex = shiftIndexWithoutAdd;
+ }
+ }
+ }
+ }
+ }
+ else if ((leftShiftIndex->IsCnsIntOrI() &&
+ rightShiftIndex->IsCnsIntOrI()))
+ {
+ if (leftShiftIndex->gtIntCon.gtIconVal +
+ rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize)
+ {
+ // We found this pattern:
+ // (x << c1) | (x >>> c2)
+ // where c1 and c2 are const and c1 + c2 == bitsize(x)
+ rotateOp = GT_ROL;
+ rotateIndex = leftShiftIndex;
+ }
+ }
+
+ if (rotateIndex != nullptr)
+ {
+ noway_assert((rotateOp == GT_ROL) || (rotateOp == GT_ROR));
+
+ // We can use the same tree only during global morph; reusing the tree in a later morph
+ // may invalidate value numbers.
+ if (fgGlobalMorph)
+ {
+ tree->gtOp.gtOp1 = rotatedValue;
+ tree->gtOp.gtOp2 = rotateIndex;
+ tree->ChangeOper(rotateOp);
+ }
+ else
+ {
+ tree = gtNewOperNode(rotateOp, genActualType(rotatedValue->gtType), rotatedValue, rotateIndex);
+ }
+ return tree;
+ }
+ }
+#endif //LEGACY_BACKEND
+ return tree;
+}
#if !CPU_HAS_FP_SUPPORT
GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
/*****************************************************************************
*
- * Transform the given tree for code generation and returns an equivalent tree.
+ * Transform the given tree for code generation and return an equivalent tree.
*/
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
case GT_EQ:
case GT_NE:
case GT_LT:
case GT_XOR:
case GT_RSH:
case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
return true; // CSE these Binary Operators
case GT_ADD: // Check for ADDRMODE flag on these Binary Operators
{
return UINT32(v0) >> v1;
}
+ case GT_ROL:
+ if (sizeof(T) == 8)
+ {
+ return (v0 << v1) | (UINT64(v0) >> (64 - v1));
+ }
+ else
+ {
+ return (v0 << v1) | (UINT32(v0) >> (32 - v1));
+ }
+
+ case GT_ROR:
+ if (sizeof(T) == 8)
+ {
+ return (v0 << (64 - v1)) | (UINT64(v0) >> v1);
+ }
+ else
+ {
+ return (v0 << (32 - v1)) | (UINT32(v0) >> v1);
+ }
case GT_DIV:
case GT_MOD:
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
// (x << 0) => x
// (x >> 0) => x
+ // (x rol 0) => x
+ // (x ror 0) => x
ZeroVN = VNZeroForType(typ);
if (arg1VN == ZeroVN)
return arg0VN;
--- /dev/null
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+//
+
+
+using System;
+using System.Runtime.CompilerServices;
+
+public class Test
+{
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static uint rol32(uint value, int amount)
+ {
+ return (value << amount) | (value >> (32 - amount));
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static uint rol32_1(uint value)
+ {
+ return (value << 1) | (value >> (32 - 1));
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static uint rol32_3(uint value)
+ {
+ return (value << 3) | (value >> (32 - 3));
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static uint rol32comm(uint value, int amount)
+ {
+ return (value >> (32 - amount)) | (value << amount);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static bool flag()
+ {
+ return true;
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static uint rol32const()
+ {
+ uint value = flag() ? (uint)0x12345678 : (uint)0x12345678;
+ int amount = 16;
+ return (value >> (32 - amount)) | (value << amount);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static uint ror32(uint value, int amount)
+ {
+ return (value << ((32 - amount))) | (value >> amount);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static uint ror32comm(uint value, int amount)
+ {
+ return (value >> amount) | (value << ((32 - amount)));
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static uint ror32const()
+ {
+ uint value = flag() ? (uint)0x12345678 : (uint)0x12345678;
+ int amount = flag() ? 12 : 12;
+ return (value >> amount) | (value << ((32 - amount)));
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static ulong rol64(ulong value, int amount)
+ {
+ return (value << amount) | (value >> (64 - amount));
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static ulong rol64comm(ulong value, int amount)
+ {
+ return (value >> (64 - amount)) | (value << amount);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static ulong rol64const()
+ {
+ ulong value = flag() ? (ulong)0x123456789abcdef : (ulong)0x123456789abcdef;
+ int amount = 16;
+ return (value >> (64 - amount)) | (value << amount);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static ulong ror64(ulong value, int amount)
+ {
+ return (value << (64 - amount)) | (value >> amount);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static ulong ror64comm(ulong value, int amount)
+ {
+ return (value >> amount) | (value << (64 - amount));
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static ulong ror64const()
+ {
+ ulong value = flag() ? (ulong)0x123456789abcdef : (ulong)0x123456789abcdef;
+ int amount = flag() ? 5 : 5;
+ return (value << (64 - amount)) | (value >> amount);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static uint rol32_call(uint value, int amount)
+ {
+ return (foo(value) << amount) | (foo(value) >> (32 - amount));
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static uint foo(uint value)
+ {
+ return value;
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static uint rol32_and(uint value, int amount)
+ {
+ return (value << amount) | (value >> ((32 - amount) & 31));
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static uint two_left_shifts(uint value, int amount)
+ {
+ return (value << amount) | (value << (32 - amount));
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static uint not_rotation(uint value)
+ {
+ return (value >> 10) | (value << 5);
+ }
+
+ public static int Main()
+ {
+ const int Pass = 100;
+ const int Fail = -1;
+
+ if (rol32(0x12345678, 16) != 0x56781234)
+ {
+ return Fail;
+ }
+
+ if (rol32_1(0x12345678) != 0x2468ACF0)
+ {
+ return Fail;
+ }
+
+ if (rol32_3(0x12345678) != 0x91A2B3C0)
+ {
+ return Fail;
+ }
+
+ if (rol32comm(0x12345678, 16) != 0x56781234)
+ {
+ return Fail;
+ }
+
+ if (rol32const() != 0x56781234)
+ {
+ return Fail;
+ }
+
+ if (ror32(0x12345678, 12) != 0x67812345)
+ {
+ return Fail;
+ }
+
+ if (ror32comm(0x12345678, 12) != 0x67812345)
+ {
+ return Fail;
+ }
+
+ if (ror32const() != 0x67812345)
+ {
+ return Fail;
+ }
+
+ if (rol64(0x123456789abcdef, 32) != 0x89abcdef01234567)
+ {
+ return Fail;
+ }
+
+ if (rol64comm(0x123456789abcdef, 32) != 0x89abcdef01234567)
+ {
+ return Fail;
+ }
+
+ if (rol64const() != 0x456789abcdef0123)
+ {
+ return Fail;
+ }
+
+ if (ror64(0x123456789abcdef, 0) != 0x123456789abcdef)
+ {
+ return Fail;
+ }
+
+ if (ror64comm(0x123456789abcdef, 0) != 0x123456789abcdef)
+ {
+ return Fail;
+ }
+
+ if (ror64const() != 0x78091a2b3c4d5e6f)
+ {
+ return Fail;
+ }
+
+ if (rol32_call(0x12345678, 16) != 0x56781234)
+ {
+ return Fail;
+ }
+
+ if (rol32_and(0x12345678, 16) != 0x56781234)
+ {
+ return Fail;
+ }
+
+ if (two_left_shifts(0x12345678, 7) != 0xfa2b3c00)
+ {
+ return Fail;
+ }
+
+ if (not_rotation(0x87654321) != 0xeca9fd70)
+ {
+ return Fail;
+ }
+
+ return Pass;
+ }
+}
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <SchemaVersion>2.0</SchemaVersion>
+ <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+ <OutputType>Exe</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <FileAlignment>512</FileAlignment>
+ <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+ <ReferencePath>$(ProgramFiles)\Common Files\microsoft shared\VSTT\11.0\UITestExtensionPackages</ReferencePath>
+ <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+ <NuGetPackageImportStamp>7a9bfb7d</NuGetPackageImportStamp>
+ </PropertyGroup>
+ <!-- Default configurations to help VS understand the configurations -->
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ </PropertyGroup>
+ <ItemGroup>
+ <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+ <Visible>False</Visible>
+ </CodeAnalysisDependentAssemblyPaths>
+ </ItemGroup>
+ <ItemGroup>
+ <None Include="$(JitPackagesConfigFileDirectory)threading+thread\project.json" />
+ </ItemGroup>
+ <ItemGroup>
+ <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="Rotate.cs" />
+ </ItemGroup>
+ <PropertyGroup>
+ <ProjectJson>$(JitPackagesConfigFileDirectory)threading+thread\project.json</ProjectJson>
+ <ProjectLockJson>$(JitPackagesConfigFileDirectory)threading+thread\project.lock.json</ProjectLockJson>
+ </PropertyGroup>
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+ <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' ">
+ </PropertyGroup>
+</Project>
\ No newline at end of file