From: Sanjay Patel Date: Mon, 11 Apr 2022 11:09:47 +0000 (-0400) Subject: [SDAG] try to reduce compare of funnel shift equal 0 X-Git-Tag: upstream/15.0.7~10842 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2ed15984b49a1af87be37ec8bd6ee3ab7f724767;p=platform%2Fupstream%2Fllvm.git [SDAG] try to reduce compare of funnel shift equal 0 fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0 fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0 This is similar to an existing setcc-of-rotate fold, but the matching requires more checks for the more general funnel op: https://alive2.llvm.org/ce/z/Ab2jDd We are effectively decomposing the funnel shift into logical shifts, reassociating, and removing a shift. This should get us the final improvements for x86-64 that were originally shown in D111530 ( https://github.com/llvm/llvm-project/issues/49541 ); x86-32 still shows some SHLD/SHRD, so the pattern is not matching there yet. Differential Revision: https://reviews.llvm.org/D122919 --- diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 58f5ae4..d38a5a1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3875,6 +3875,72 @@ static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, return SDValue(); } +static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, const SDLoc &dl, + SelectionDAG &DAG) { + // If we are testing for all-bits-clear, we might be able to do that with + // less shifting since bit-order does not matter. + if (Cond != ISD::SETEQ && Cond != ISD::SETNE) + return SDValue(); + + auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true); + if (!C1 || !C1->isZero()) + return SDValue(); + + if (!N0.hasOneUse() || + (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR)) + return SDValue(); + + unsigned BitWidth = N0.getScalarValueSizeInBits(); + auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2)); + if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth)) + return SDValue(); + + // Canonicalize fshr as fshl to reduce pattern-matching. + unsigned ShAmt = ShAmtC->getZExtValue(); + if (N0.getOpcode() == ISD::FSHR) + ShAmt = BitWidth - ShAmt; + + // Match an 'or' with a specific operand 'Other' in either commuted variant. + SDValue X, Y; + auto matchOr = [&X, &Y](SDValue Or, SDValue Other) { + if (Or.getOpcode() != ISD::OR || !Or.hasOneUse()) + return false; + if (Or.getOperand(0) == Other) { + X = Or.getOperand(0); + Y = Or.getOperand(1); + return true; + } + if (Or.getOperand(1) == Other) { + X = Or.getOperand(1); + Y = Or.getOperand(0); + return true; + } + return false; + }; + + EVT OpVT = N0.getValueType(); + EVT ShAmtVT = N0.getOperand(2).getValueType(); + SDValue F0 = N0.getOperand(0); + SDValue F1 = N0.getOperand(1); + if (matchOr(F0, F1)) { + // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0 + SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT); + SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt); + SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X); + return DAG.getSetCC(dl, VT, NewOr, N1, Cond); + } + if (matchOr(F1, F0)) { + // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0 + SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT); + SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt); + SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X); + return DAG.getSetCC(dl, VT, NewOr, N1, Cond); + } + + return SDValue(); +} + /// Try to simplify a setcc built with the specified operands and cc. If it is /// unable to simplify it, return a null SDValue. SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, @@ -3914,6 +3980,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG)) return V; + if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG)) + return V; + if (auto *N1C = isConstOrConstSplat(N1)) { const APInt &C1 = N1C->getAPIntValue(); diff --git a/llvm/test/CodeGen/AArch64/setcc-fsh.ll b/llvm/test/CodeGen/AArch64/setcc-fsh.ll index d70f0b4..aaf3a6c 100644 --- a/llvm/test/CodeGen/AArch64/setcc-fsh.ll +++ b/llvm/test/CodeGen/AArch64/setcc-fsh.ll @@ -9,8 +9,7 @@ declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) define i1 @fshl_or_eq_0(i32 %x, i32 %y) { ; CHECK-LABEL: fshl_or_eq_0: ; CHECK: // %bb.0: -; CHECK-NEXT: ror w8, w0, #27 -; CHECK-NEXT: orr w8, w8, w1, lsl #5 +; CHECK-NEXT: orr w8, w0, w1, lsl #5 ; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret @@ -23,8 +22,7 @@ define i1 @fshl_or_eq_0(i32 %x, i32 %y) { define i1 @fshl_or_commute_eq_0(i32 %x, i32 %y) { ; CHECK-LABEL: fshl_or_commute_eq_0: ; CHECK: // %bb.0: -; CHECK-NEXT: ror w8, w0, #27 -; CHECK-NEXT: orr w8, w8, w1, lsl #5 +; CHECK-NEXT: orr w8, w0, w1, lsl #5 ; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret @@ -37,10 +35,8 @@ define i1 @fshl_or_commute_eq_0(i32 %x, i32 %y) { define <4 x i1> @fshl_or2_eq_0(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: fshl_or2_eq_0: ; CHECK: // %bb.0: -; CHECK-NEXT: orr v1.16b, v0.16b, v1.16b -; CHECK-NEXT: shl v0.4s, v0.4s, #25 ; CHECK-NEXT: ushr v1.4s, v1.4s, #7 -; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -53,10 +49,8 @@ define <4 x i1> @fshl_or2_eq_0(<4 x i32> %x, <4 x i32> %y) { define <4 x i1> @fshl_or2_commute_eq_0(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: fshl_or2_commute_eq_0: ; CHECK: // %bb.0: -; CHECK-NEXT: orr v1.16b, v1.16b, v0.16b -; CHECK-NEXT: shl v0.4s, v0.4s, #25 ; CHECK-NEXT: ushr v1.4s, v1.4s, #7 -; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -69,9 +63,7 @@ define <4 x i1> @fshl_or2_commute_eq_0(<4 x i32> %x, <4 x i32> %y) { define i1 @fshr_or_eq_0(i16 %x, i16 %y) { ; CHECK-LABEL: fshr_or_eq_0: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #16 -; CHECK-NEXT: orr w9, w0, w1 -; CHECK-NEXT: extr w8, w9, w8, #24 +; CHECK-NEXT: orr w8, w0, w1, lsl #8 ; CHECK-NEXT: tst w8, #0xffff ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret @@ -84,9 +76,7 @@ define i1 @fshr_or_eq_0(i16 %x, i16 %y) { define i1 @fshr_or_commute_eq_0(i16 %x, i16 %y) { ; CHECK-LABEL: fshr_or_commute_eq_0: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #16 -; CHECK-NEXT: orr w9, w1, w0 -; CHECK-NEXT: extr w8, w9, w8, #24 +; CHECK-NEXT: orr w8, w0, w1, lsl #8 ; CHECK-NEXT: tst w8, #0xffff ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret @@ -99,8 +89,7 @@ define i1 @fshr_or_commute_eq_0(i16 %x, i16 %y) { define i1 @fshr_or2_eq_0(i64 %x, i64 %y) { ; CHECK-LABEL: fshr_or2_eq_0: ; CHECK: // %bb.0: -; CHECK-NEXT: ror x8, x0, #3 -; CHECK-NEXT: orr x8, x8, x1, lsr #3 +; CHECK-NEXT: orr x8, x0, x1, lsr #3 ; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret @@ -113,8 +102,7 @@ define i1 @fshr_or2_eq_0(i64 %x, i64 %y) { define i1 @fshl_or_ne_0(i32 %x, i32 %y) { ; CHECK-LABEL: fshl_or_ne_0: ; CHECK: // %bb.0: -; CHECK-NEXT: ror w8, w0, #25 -; CHECK-NEXT: orr w8, w8, w1, lsl #7 +; CHECK-NEXT: orr w8, w0, w1, lsl #7 ; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret @@ -127,8 +115,7 @@ define i1 @fshl_or_ne_0(i32 %x, i32 %y) { define i1 @fshl_or_commute_ne_0(i32 %x, i32 %y) { ; CHECK-LABEL: fshl_or_commute_ne_0: ; CHECK: // %bb.0: -; CHECK-NEXT: ror w8, w0, #25 -; CHECK-NEXT: orr w8, w8, w1, lsl #7 +; CHECK-NEXT: orr w8, w0, w1, lsl #7 ; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret @@ -141,10 +128,8 @@ define i1 @fshl_or_commute_ne_0(i32 %x, i32 %y) { define <4 x i1> @fshl_or2_ne_0(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: fshl_or2_ne_0: ; CHECK: // %bb.0: -; CHECK-NEXT: orr v1.16b, v0.16b, v1.16b -; CHECK-NEXT: shl v0.4s, v0.4s, #5 ; CHECK-NEXT: ushr v1.4s, v1.4s, #27 -; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmtst v0.4s, v0.4s, v0.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -157,10 +142,8 @@ define <4 x i1> @fshl_or2_ne_0(<4 x i32> %x, <4 x i32> %y) { define <4 x i1> @fshl_or2_commute_ne_0(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: fshl_or2_commute_ne_0: ; CHECK: // %bb.0: -; CHECK-NEXT: orr v1.16b, v1.16b, v0.16b -; CHECK-NEXT: shl v0.4s, v0.4s, #5 ; CHECK-NEXT: ushr v1.4s, v1.4s, #27 -; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmtst v0.4s, v0.4s, v0.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -173,8 +156,7 @@ define <4 x i1> @fshl_or2_commute_ne_0(<4 x i32> %x, <4 x i32> %y) { define i1 @fshr_or_ne_0(i64 %x, i64 %y) { ; CHECK-LABEL: fshr_or_ne_0: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w0, w1 -; CHECK-NEXT: extr x8, x8, x0, #1 +; CHECK-NEXT: orr x8, x0, x1, lsl #63 ; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret @@ -187,8 +169,7 @@ define i1 @fshr_or_ne_0(i64 %x, i64 %y) { define i1 @fshr_or_commute_ne_0(i64 %x, i64 %y) { ; CHECK-LABEL: fshr_or_commute_ne_0: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w1, w0 -; CHECK-NEXT: extr x8, x8, x0, #1 +; CHECK-NEXT: orr x8, x0, x1, lsl #63 ; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret @@ -201,9 +182,8 @@ define i1 @fshr_or_commute_ne_0(i64 %x, i64 %y) { define i1 @fshr_or2_ne_0(i16 %x, i16 %y) { ; CHECK-LABEL: fshr_or2_ne_0: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w0, w1 -; CHECK-NEXT: lsl w8, w8, #16 -; CHECK-NEXT: extr w8, w0, w8, #18 +; CHECK-NEXT: and w8, w1, #0xfffc +; CHECK-NEXT: orr w8, w0, w8, lsr #2 ; CHECK-NEXT: tst w8, #0xffff ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret @@ -216,9 +196,8 @@ define i1 @fshr_or2_ne_0(i16 %x, i16 %y) { define i1 @fshr_or2_commute_ne_0(i16 %x, i16 %y) { ; CHECK-LABEL: fshr_or2_commute_ne_0: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w1, w0 -; CHECK-NEXT: lsl w8, w8, #16 -; CHECK-NEXT: extr w8, w0, w8, #18 +; CHECK-NEXT: and w8, w1, #0xfffc +; CHECK-NEXT: orr w8, w0, w8, lsr #2 ; CHECK-NEXT: tst w8, #0xffff ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/X86/icmp-shift-opt.ll b/llvm/test/CodeGen/X86/icmp-shift-opt.ll index 0afe8d3..23524d2 100644 --- a/llvm/test/CodeGen/X86/icmp-shift-opt.ll +++ b/llvm/test/CodeGen/X86/icmp-shift-opt.ll @@ -53,8 +53,8 @@ define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind { ; X64-NEXT: addq $1, %rax ; X64-NEXT: adcq $0, %rdx ; X64-NEXT: movq %rax, %rcx +; X64-NEXT: shrq $60, %rcx ; X64-NEXT: orq %rdx, %rcx -; X64-NEXT: shrdq $60, %rdx, %rcx ; X64-NEXT: jne .LBB0_1 ; X64-NEXT: # %bb.2: # %exit ; X64-NEXT: retq @@ -90,8 +90,8 @@ define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind { ; ; X64-LABEL: opt_setcc_srl_eq_zero: ; X64: # %bb.0: +; X64-NEXT: shrq $17, %rdi ; X64-NEXT: orq %rsi, %rdi -; X64-NEXT: shrdq $17, %rsi, %rdi ; X64-NEXT: sete %al ; X64-NEXT: retq %srl = lshr i128 %a, 17 @@ -119,8 +119,8 @@ define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind { ; ; X64-LABEL: opt_setcc_srl_ne_zero: ; X64: # %bb.0: +; X64-NEXT: shrq $17, %rdi ; X64-NEXT: orq %rsi, %rdi -; X64-NEXT: shrdq $17, %rsi, %rdi ; X64-NEXT: setne %al ; X64-NEXT: retq %srl = lshr i128 %a, 17 @@ -148,8 +148,8 @@ define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind { ; ; X64-LABEL: opt_setcc_shl_eq_zero: ; X64: # %bb.0: +; X64-NEXT: shlq $17, %rsi ; X64-NEXT: orq %rdi, %rsi -; X64-NEXT: shldq $17, %rdi, %rsi ; X64-NEXT: sete %al ; X64-NEXT: retq %shl = shl i128 %a, 17 @@ -177,8 +177,8 @@ define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind { ; ; X64-LABEL: opt_setcc_shl_ne_zero: ; X64: # %bb.0: +; X64-NEXT: shlq $17, %rsi ; X64-NEXT: orq %rdi, %rsi -; X64-NEXT: shldq $17, %rdi, %rsi ; X64-NEXT: setne %al ; X64-NEXT: retq %shl = shl i128 %a, 17 @@ -255,8 +255,8 @@ define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind { ; ; X64-LABEL: opt_setcc_expanded_shl_correct_shifts: ; X64: # %bb.0: +; X64-NEXT: shlq $17, %rdi ; X64-NEXT: orq %rsi, %rdi -; X64-NEXT: shldq $17, %rsi, %rdi ; X64-NEXT: sete %al ; X64-NEXT: retq %shl.a = shl i64 %a, 17 diff --git a/llvm/test/CodeGen/X86/setcc-fsh.ll b/llvm/test/CodeGen/X86/setcc-fsh.ll index 59319a7..7ab6395 100644 --- a/llvm/test/CodeGen/X86/setcc-fsh.ll +++ b/llvm/test/CodeGen/X86/setcc-fsh.ll @@ -265,8 +265,8 @@ define <4 x i1> @or_rotl_ne_eq0(<4 x i32> %x, <4 x i32> %y) nounwind { define i1 @fshl_or_eq_0(i32 %x, i32 %y) { ; CHECK-LABEL: fshl_or_eq_0: ; CHECK: # %bb.0: +; CHECK-NEXT: shll $5, %esi ; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: shldl $5, %edi, %esi ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq %or = or i32 %x, %y @@ -278,8 +278,8 @@ define i1 @fshl_or_eq_0(i32 %x, i32 %y) { define i1 @fshl_or_commute_eq_0(i32 %x, i32 %y) { ; CHECK-LABEL: fshl_or_commute_eq_0: ; CHECK: # %bb.0: +; CHECK-NEXT: shll $5, %esi ; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: shldl $5, %edi, %esi ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq %or = or i32 %y, %x @@ -291,12 +291,10 @@ define i1 @fshl_or_commute_eq_0(i32 %x, i32 %y) { define <4 x i1> @fshl_or2_eq_0(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: fshl_or2_eq_0: ; CHECK: # %bb.0: -; CHECK-NEXT: por %xmm0, %xmm1 +; CHECK-NEXT: pxor %xmm2, %xmm2 ; CHECK-NEXT: psrld $7, %xmm1 -; CHECK-NEXT: pslld $25, %xmm0 ; CHECK-NEXT: por %xmm1, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm2, %xmm0 ; CHECK-NEXT: retq %or = or <4 x i32> %x, %y %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %or, <4 x i32> ) @@ -307,12 +305,10 @@ define <4 x i1> @fshl_or2_eq_0(<4 x i32> %x, <4 x i32> %y) { define <4 x i1> @fshl_or2_commute_eq_0(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: fshl_or2_commute_eq_0: ; CHECK: # %bb.0: -; CHECK-NEXT: por %xmm0, %xmm1 +; CHECK-NEXT: pxor %xmm2, %xmm2 ; CHECK-NEXT: psrld $7, %xmm1 -; CHECK-NEXT: pslld $25, %xmm0 ; CHECK-NEXT: por %xmm1, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm2, %xmm0 ; CHECK-NEXT: retq %or = or <4 x i32> %y, %x %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %or, <4 x i32> ) @@ -323,8 +319,8 @@ define <4 x i1> @fshl_or2_commute_eq_0(<4 x i32> %x, <4 x i32> %y) { define i1 @fshr_or_eq_0(i16 %x, i16 %y) { ; CHECK-LABEL: fshr_or_eq_0: ; CHECK: # %bb.0: -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: shldw $8, %di, %si +; CHECK-NEXT: shll $8, %esi +; CHECK-NEXT: orw %di, %si ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq %or = or i16 %x, %y @@ -336,8 +332,8 @@ define i1 @fshr_or_eq_0(i16 %x, i16 %y) { define i1 @fshr_or_commute_eq_0(i16 %x, i16 %y) { ; CHECK-LABEL: fshr_or_commute_eq_0: ; CHECK: # %bb.0: -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: shldw $8, %di, %si +; CHECK-NEXT: shll $8, %esi +; CHECK-NEXT: orw %di, %si ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq %or = or i16 %y, %x @@ -349,8 +345,8 @@ define i1 @fshr_or_commute_eq_0(i16 %x, i16 %y) { define i1 @fshr_or2_eq_0(i64 %x, i64 %y) { ; CHECK-LABEL: fshr_or2_eq_0: ; CHECK: # %bb.0: +; CHECK-NEXT: shrq $3, %rsi ; CHECK-NEXT: orq %rdi, %rsi -; CHECK-NEXT: shrdq $3, %rdi, %rsi ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq %or = or i64 %x, %y @@ -362,8 +358,8 @@ define i1 @fshr_or2_eq_0(i64 %x, i64 %y) { define i1 @fshr_or2_commute_eq_0(i64 %x, i64 %y) { ; CHECK-LABEL: fshr_or2_commute_eq_0: ; CHECK: # %bb.0: +; CHECK-NEXT: shrq $3, %rsi ; CHECK-NEXT: orq %rdi, %rsi -; CHECK-NEXT: shrdq $3, %rdi, %rsi ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq %or = or i64 %y, %x @@ -375,8 +371,8 @@ define i1 @fshr_or2_commute_eq_0(i64 %x, i64 %y) { define i1 @fshl_or_ne_0(i32 %x, i32 %y) { ; CHECK-LABEL: fshl_or_ne_0: ; CHECK: # %bb.0: +; CHECK-NEXT: shll $7, %esi ; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: shldl $7, %edi, %esi ; CHECK-NEXT: setne %al ; CHECK-NEXT: retq %or = or i32 %x, %y @@ -388,8 +384,8 @@ define i1 @fshl_or_ne_0(i32 %x, i32 %y) { define i1 @fshl_or_commute_ne_0(i32 %x, i32 %y) { ; CHECK-LABEL: fshl_or_commute_ne_0: ; CHECK: # %bb.0: +; CHECK-NEXT: shll $7, %esi ; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: shldl $7, %edi, %esi ; CHECK-NEXT: setne %al ; CHECK-NEXT: retq %or = or i32 %y, %x @@ -401,12 +397,10 @@ define i1 @fshl_or_commute_ne_0(i32 %x, i32 %y) { define <4 x i1> @fshl_or2_ne_0(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: fshl_or2_ne_0: ; CHECK: # %bb.0: -; CHECK-NEXT: por %xmm0, %xmm1 +; CHECK-NEXT: pxor %xmm2, %xmm2 ; CHECK-NEXT: psrld $27, %xmm1 -; CHECK-NEXT: pslld $5, %xmm0 ; CHECK-NEXT: por %xmm1, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm2, %xmm0 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 ; CHECK-NEXT: pxor %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -419,12 +413,10 @@ define <4 x i1> @fshl_or2_ne_0(<4 x i32> %x, <4 x i32> %y) { define <4 x i1> @fshl_or2_commute_ne_0(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: fshl_or2_commute_ne_0: ; CHECK: # %bb.0: -; CHECK-NEXT: por %xmm0, %xmm1 +; CHECK-NEXT: pxor %xmm2, %xmm2 ; CHECK-NEXT: psrld $27, %xmm1 -; CHECK-NEXT: pslld $5, %xmm0 ; CHECK-NEXT: por %xmm1, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm2, %xmm0 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 ; CHECK-NEXT: pxor %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -437,8 +429,8 @@ define <4 x i1> @fshl_or2_commute_ne_0(<4 x i32> %x, <4 x i32> %y) { define i1 @fshr_or_ne_0(i64 %x, i64 %y) { ; CHECK-LABEL: fshr_or_ne_0: ; CHECK: # %bb.0: -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: shldq $63, %rdi, %rsi +; CHECK-NEXT: shlq $63, %rsi +; CHECK-NEXT: orq %rdi, %rsi ; CHECK-NEXT: setne %al ; CHECK-NEXT: retq %or = or i64 %x, %y @@ -450,8 +442,8 @@ define i1 @fshr_or_ne_0(i64 %x, i64 %y) { define i1 @fshr_or_commute_ne_0(i64 %x, i64 %y) { ; CHECK-LABEL: fshr_or_commute_ne_0: ; CHECK: # %bb.0: -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: shldq $63, %rdi, %rsi +; CHECK-NEXT: shlq $63, %rsi +; CHECK-NEXT: orq %rdi, %rsi ; CHECK-NEXT: setne %al ; CHECK-NEXT: retq %or = or i64 %y, %x @@ -463,8 +455,9 @@ define i1 @fshr_or_commute_ne_0(i64 %x, i64 %y) { define i1 @fshr_or2_ne_0(i16 %x, i16 %y) { ; CHECK-LABEL: fshr_or2_ne_0: ; CHECK: # %bb.0: -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: shrdw $2, %di, %si +; CHECK-NEXT: movzwl %si, %eax +; CHECK-NEXT: shrl $2, %eax +; CHECK-NEXT: orw %di, %ax ; CHECK-NEXT: setne %al ; CHECK-NEXT: retq %or = or i16 %x, %y @@ -476,8 +469,9 @@ define i1 @fshr_or2_ne_0(i16 %x, i16 %y) { define i1 @fshr_or2_commute_ne_0(i16 %x, i16 %y) { ; CHECK-LABEL: fshr_or2_commute_ne_0: ; CHECK: # %bb.0: -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: shrdw $2, %di, %si +; CHECK-NEXT: movzwl %si, %eax +; CHECK-NEXT: shrl $2, %eax +; CHECK-NEXT: orw %di, %ax ; CHECK-NEXT: setne %al ; CHECK-NEXT: retq %or = or i16 %y, %x