From bfc7ffa40ffc570f54e2b4462dc6aae38da1a85a Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 6 Dec 2018 19:18:56 +0000 Subject: [PATCH] [DAGCombiner] don't hoist logic op if operands have other uses, part 2 The PPC test with 2 extra uses seems clearly better by avoiding this transform. With 1 extra use, we also prevent an extra register move (although that might be an RA problem). The general rule should be to only make a change here if it is always profitable. The x86 diffs are all neutral. llvm-svn: 348518 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 12 ++++++---- llvm/test/CodeGen/PowerPC/hoist-logic.ll | 19 ++++++++------- llvm/test/CodeGen/X86/packss.ll | 6 ++--- llvm/test/CodeGen/X86/vector-shift-ashr-128.ll | 6 ++--- llvm/test/CodeGen/X86/vector-shift-ashr-256.ll | 24 +++++++++---------- llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll | 28 +++++++++++------------ 6 files changed, 48 insertions(+), 47 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5a70dee..166577e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3764,11 +3764,13 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) { if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL || HandOpcode == ISD::SRA || HandOpcode == ISD::AND) && N0.getOperand(1) == N1.getOperand(1)) { - SDValue ORNode = DAG.getNode(LogicOpcode, SDLoc(N0), - N0.getOperand(0).getValueType(), - N0.getOperand(0), N1.getOperand(0)); - AddToWorklist(ORNode.getNode()); - return DAG.getNode(HandOpcode, SDLoc(N), VT, ORNode, N0.getOperand(1)); + // If either operand has other uses, this transform is not an improvement. + if (!N0.hasOneUse() || !N1.hasOneUse()) + return SDValue(); + SDValue Logic = DAG.getNode(LogicOpcode, SDLoc(N0), Op0VT, + N0.getOperand(0), N1.getOperand(0)); + AddToWorklist(Logic.getNode()); + return DAG.getNode(HandOpcode, SDLoc(N), VT, Logic, N0.getOperand(1)); } // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) diff --git a/llvm/test/CodeGen/PowerPC/hoist-logic.ll b/llvm/test/CodeGen/PowerPC/hoist-logic.ll index 8c9f86b..e9b052e 100644 --- a/llvm/test/CodeGen/PowerPC/hoist-logic.ll +++ b/llvm/test/CodeGen/PowerPC/hoist-logic.ll @@ -16,15 +16,15 @@ define i32 @lshr_or(i32 %x, i32 %y, i32 %z, i32* %p1, i32* %p2) { } ; This is questionable - hoisting doesn't eliminate anything. +; It might result in an extra register move. define i32 @lshr_or_multiuse1(i32 %x, i32 %y, i32 %z, i32* %p1, i32* %p2) { ; CHECK-LABEL: lshr_or_multiuse1: ; CHECK: # %bb.0: -; CHECK-NEXT: or 4, 3, 4 -; CHECK-NEXT: srw 4, 4, 5 -; CHECK-NEXT: srw 5, 3, 5 -; CHECK-NEXT: mr 3, 4 -; CHECK-NEXT: stw 5, 0(6) +; CHECK-NEXT: srw 7, 3, 5 +; CHECK-NEXT: srw 3, 4, 5 +; CHECK-NEXT: or 3, 7, 3 +; CHECK-NEXT: stw 7, 0(6) ; CHECK-NEXT: blr %xt = lshr i32 %x, %z %yt = lshr i32 %y, %z @@ -38,9 +38,9 @@ define i32 @lshr_or_multiuse1(i32 %x, i32 %y, i32 %z, i32* %p1, i32* %p2) { define i32 @lshr_multiuse2(i32 %x, i32 %y, i32 %z, i32* %p1, i32* %p2) { ; CHECK-LABEL: lshr_multiuse2: ; CHECK: # %bb.0: -; CHECK-NEXT: or 3, 3, 4 ; CHECK-NEXT: srw 3, 3, 5 ; CHECK-NEXT: srw 4, 4, 5 +; CHECK-NEXT: or 3, 3, 4 ; CHECK-NEXT: stw 4, 0(7) ; CHECK-NEXT: blr %xt = lshr i32 %x, %z @@ -50,16 +50,15 @@ define i32 @lshr_multiuse2(i32 %x, i32 %y, i32 %z, i32* %p1, i32* %p2) { ret i32 %r } -; FIXME: This is not profitable to hoist. We need an extra shift instruction. +; This is not profitable to hoist. We need an extra shift instruction. define i32 @lshr_multiuse3(i32 %x, i32 %y, i32 %z, i32* %p1, i32* %p2) { ; CHECK-LABEL: lshr_multiuse3: ; CHECK: # %bb.0: -; CHECK-NEXT: or 8, 3, 4 ; CHECK-NEXT: srw 3, 3, 5 -; CHECK-NEXT: stw 3, 0(6) -; CHECK-NEXT: srw 3, 8, 5 ; CHECK-NEXT: srw 4, 4, 5 +; CHECK-NEXT: stw 3, 0(6) +; CHECK-NEXT: or 3, 3, 4 ; CHECK-NEXT: stw 4, 0(7) ; CHECK-NEXT: blr %xt = lshr i32 %x, %z diff --git a/llvm/test/CodeGen/X86/packss.ll b/llvm/test/CodeGen/X86/packss.ll index 3feb0d0..c8ea793 100644 --- a/llvm/test/CodeGen/X86/packss.ll +++ b/llvm/test/CodeGen/X86/packss.ll @@ -206,10 +206,10 @@ define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) { ; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [63,0,0,0,63,0,0,0] ; X86-AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 ; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648] -; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm3 -; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2 ; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 -; X86-AVX2-NEXT: vpsubq %ymm3, %ymm0, %ymm0 +; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; X86-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0 ; X86-AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X86-AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll index 584a54e68..8b30982 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -67,10 +67,10 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; AVX2-LABEL: var_shift_v2i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm3 -; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2 ; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsubq %xmm3, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; XOP-LABEL: var_shift_v2i64: diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll index 6d79996..7c33876 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -46,10 +46,10 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { ; AVX2-LABEL: var_shift_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm3 -; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2 ; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpsubq %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: var_shift_v4i64: @@ -67,10 +67,10 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { ; XOPAVX2-LABEL: var_shift_v4i64: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm3 -; XOPAVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2 ; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 -; XOPAVX2-NEXT: vpsubq %ymm3, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; XOPAVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq ; ; AVX512-LABEL: var_shift_v4i64: @@ -115,10 +115,10 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { ; X32-AVX2-LABEL: var_shift_v4i64: ; X32-AVX2: # %bb.0: ; X32-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648] -; X32-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm3 -; X32-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; X32-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2 ; X32-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 -; X32-AVX2-NEXT: vpsubq %ymm3, %ymm0, %ymm0 +; X32-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; X32-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0 ; X32-AVX2-NEXT: retl %shift = ashr <4 x i64> %a, %b ret <4 x i64> %shift @@ -1086,10 +1086,10 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind { ; X32-AVX2: # %bb.0: ; X32-AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,7,0,31,0,62,0] ; X32-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648] -; X32-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm3 -; X32-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; X32-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2 ; X32-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 -; X32-AVX2-NEXT: vpsubq %ymm3, %ymm0, %ymm0 +; X32-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; X32-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0 ; X32-AVX2-NEXT: retl %shift = ashr <4 x i64> %a, ret <4 x i64> %shift diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll index 374429c..732b901 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll @@ -90,10 +90,10 @@ define <2 x i32> @var_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind { ; AVX2-NEXT: vpsllq $32, %xmm0, %xmm2 ; AVX2-NEXT: vpsrad $31, %xmm2, %xmm2 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -382,10 +382,10 @@ define <2 x i16> @var_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind { ; AVX2-NEXT: vpsrad $16, %xmm0, %xmm0 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -857,10 +857,10 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind { ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm3 -; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2 ; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsubq %xmm3, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; XOP-LABEL: var_shift_v2i8: @@ -999,10 +999,10 @@ define <2 x i32> @splatvar_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind { ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -1310,10 +1310,10 @@ define <2 x i16> @splatvar_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind { ; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7] -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -1810,10 +1810,10 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind { ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[0],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm3 -; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm2 ; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsubq %xmm3, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpsubq %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; XOP-LABEL: splatvar_shift_v2i8: -- 2.7.4