From 5de09ef02e24d234d9fc0cd1c6dfe18a1bb784b0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 28 Feb 2021 11:23:46 -0800 Subject: [PATCH] [DAGCombiner][X86] Don't peek through ANDs on the shift amount in matchRotateSub when called from MatchFunnelPosNeg. Peeking through AND is only valid if the input to both shifts is the same. If the inputs are different, then the original pattern ORs the two values when the masked shift amount is 0. This is ok if the values are the same since the OR would be a NOP which is why its ok for rotate. Fixes PR49365 and reverts PR34641 Differential Revision: https://reviews.llvm.org/D97637 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 15 ++++++--- llvm/test/CodeGen/X86/shift-double.ll | 44 ++++++++++++++++++--------- 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 01b9873..faaa289 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6630,8 +6630,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate // in direction shift1 by Neg. The range [0, EltSize) means that we only need // to consider shift amounts with defined behavior. +// +// The IsRotate flag should be set when the LHS of both shifts is the same. +// Otherwise if matching a general funnel shift, it should be clear. static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, - SelectionDAG &DAG) { + SelectionDAG &DAG, bool IsRotate) { // If EltSize is a power of 2 then: // // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) @@ -6663,8 +6666,11 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, // always invokes undefined behavior for 32-bit X. // // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise. + // + // NOTE: We can only do this when matching an AND and not a general + // funnel shift. unsigned MaskLoBits = 0; - if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { + if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0)); unsigned Bits = Log2_64(EltSize); @@ -6754,7 +6760,8 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, // (srl x, (*ext y))) -> // (rotr x, y) or (rotl x, (sub 32, y)) EVT VT = Shifted.getValueType(); - if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) { + if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG, + /*IsRotate*/ true)) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, HasPos ? Pos : Neg); @@ -6783,7 +6790,7 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, // fold (or (shl x0, (*ext (sub 32, y))), // (srl x1, (*ext y))) -> // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y)) - if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) { + if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1, HasPos ? Pos : Neg); diff --git a/llvm/test/CodeGen/X86/shift-double.ll b/llvm/test/CodeGen/X86/shift-double.ll index c087295..1213a809 100644 --- a/llvm/test/CodeGen/X86/shift-double.ll +++ b/llvm/test/CodeGen/X86/shift-double.ll @@ -480,23 +480,31 @@ define i32 @test18(i32 %hi, i32 %lo, i32 %bits) nounwind { ret i32 %sh } -; PR34641 - Masked Shift Counts +; These are not valid shld/shrd patterns. When the shift amount modulo +; the bitwidth is zero, the result should be an OR of both operands not a +; shift. -define i32 @shld_safe_i32(i32, i32, i32) { -; X86-LABEL: shld_safe_i32: +define i32 @not_shld_i32(i32, i32, i32) { +; X86-LABEL: not_shld_i32: ; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: shll %cl, %edx +; X86-NEXT: negb %cl +; X86-NEXT: shrl %cl, %eax +; X86-NEXT: orl %edx, %eax ; X86-NEXT: retl ; -; X64-LABEL: shld_safe_i32: +; X64-LABEL: not_shld_i32: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %esi, %eax +; X64-NEXT: shll %cl, %edi +; X64-NEXT: negb %cl ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shldl %cl, %esi, %eax +; X64-NEXT: shrl %cl, %eax +; X64-NEXT: orl %edi, %eax ; X64-NEXT: retq %4 = and i32 %2, 31 %5 = shl i32 %0, %4 @@ -507,21 +515,27 @@ define i32 @shld_safe_i32(i32, i32, i32) { ret i32 %9 } -define i32 @shrd_safe_i32(i32, i32, i32) { -; X86-LABEL: shrd_safe_i32: +define i32 @not_shrd_i32(i32, i32, i32) { +; X86-LABEL: not_shrd_i32: ; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrdl %cl, %edx, %eax +; X86-NEXT: shrl %cl, %edx +; X86-NEXT: negb %cl +; X86-NEXT: shll %cl, %eax +; X86-NEXT: orl %edx, %eax ; X86-NEXT: retl ; -; X64-LABEL: shrd_safe_i32: +; X64-LABEL: not_shrd_i32: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %esi, %eax +; X64-NEXT: shrl %cl, %edi +; X64-NEXT: negb %cl ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrdl %cl, %esi, %eax +; X64-NEXT: shll %cl, %eax +; X64-NEXT: orl %edi, %eax ; X64-NEXT: retq %4 = and i32 %2, 31 %5 = lshr i32 %0, %4 -- 2.7.4