// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
// in direction shift1 by Neg. The range [0, EltSize) means that we only need
// to consider shift amounts with defined behavior.
+//
+// The IsRotate flag should be set when the LHS of both shifts is the same.
+// Otherwise if matching a general funnel shift, it should be clear.
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG, bool IsRotate) {
// If EltSize is a power of 2 then:
//
// (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
// always invokes undefined behavior for 32-bit X.
//
// Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
+ //
+ // NOTE: We can only do this when matching an AND and not a general
+ // funnel shift.
unsigned MaskLoBits = 0;
- if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
+ if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
unsigned Bits = Log2_64(EltSize);
// (srl x, (*ext y))) ->
// (rotr x, y) or (rotl x, (sub 32, y))
EVT VT = Shifted.getValueType();
- if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
+ if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
+ /*IsRotate*/ true)) {
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
HasPos ? Pos : Neg);
// fold (or (shl x0, (*ext (sub 32, y))),
// (srl x1, (*ext y))) ->
// (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
- if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) {
+ if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
HasPos ? Pos : Neg);
ret i32 %sh
}
-; PR34641 - Masked Shift Counts
+; These are not valid shld/shrd patterns. When the shift amount modulo
+; the bitwidth is zero, the result should be an OR of both operands not a
+; shift.
-define i32 @shld_safe_i32(i32, i32, i32) {
-; X86-LABEL: shld_safe_i32:
+define i32 @not_shld_i32(i32, i32, i32) {
+; X86-LABEL: not_shld_i32:
; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: shll %cl, %edx
+; X86-NEXT: negb %cl
+; X86-NEXT: shrl %cl, %eax
+; X86-NEXT: orl %edx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: shld_safe_i32:
+; X64-LABEL: not_shld_i32:
; X64: # %bb.0:
; X64-NEXT: movl %edx, %ecx
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: shll %cl, %edi
+; X64-NEXT: negb %cl
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shldl %cl, %esi, %eax
+; X64-NEXT: shrl %cl, %eax
+; X64-NEXT: orl %edi, %eax
; X64-NEXT: retq
%4 = and i32 %2, 31
%5 = shl i32 %0, %4
ret i32 %9
}
-define i32 @shrd_safe_i32(i32, i32, i32) {
-; X86-LABEL: shrd_safe_i32:
+define i32 @not_shrd_i32(i32, i32, i32) {
+; X86-LABEL: not_shrd_i32:
; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: shrdl %cl, %edx, %eax
+; X86-NEXT: shrl %cl, %edx
+; X86-NEXT: negb %cl
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: orl %edx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: shrd_safe_i32:
+; X64-LABEL: not_shrd_i32:
; X64: # %bb.0:
; X64-NEXT: movl %edx, %ecx
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: shrl %cl, %edi
+; X64-NEXT: negb %cl
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shrdl %cl, %esi, %eax
+; X64-NEXT: shll %cl, %eax
+; X64-NEXT: orl %edi, %eax
; X64-NEXT: retq
%4 = and i32 %2, 31
%5 = lshr i32 %0, %4