From 893a6b89ffe0c247f2df02497f01e07970f2f51c Mon Sep 17 00:00:00 2001 From: Amaury Sechet Date: Fri, 23 Feb 2018 11:50:42 +0000 Subject: [PATCH] [DAGCOmbine] Ensure that (brcond (setcc ...)) is handled in a canonical manner. Summary: There are transformation that change setcc into other constructs, and transform that try to reconstruct a setcc from the brcond condition. Depending on what order these transform are done, the end result differs. Most of the time, it is preferable to get a setcc as a brcond argument (and this is why brcond try to recreate the setcc in the first place) so we ensure this is done every time by also doing it at the setcc level when the only user is a brcond. Reviewers: spatel, hfinkel, niravd, craig.topper Subscribers: nhaehnle, llvm-commits Differential Revision: https://reviews.llvm.org/D41235 llvm-svn: 325892 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 142 ++++++++++++------------- llvm/lib/Target/Hexagon/HexagonPatterns.td | 2 + llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll | 2 - llvm/test/CodeGen/AMDGPU/setcc.ll | 6 +- llvm/test/CodeGen/X86/and-sink.ll | 4 +- llvm/test/CodeGen/X86/fold-rmw-ops.ll | 15 +-- llvm/test/CodeGen/X86/or-branch.ll | 27 +++-- 7 files changed, 90 insertions(+), 108 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3f99843..b3a8eeb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -415,7 +415,8 @@ namespace { SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, const SDLoc &DL); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, - const SDLoc &DL, bool foldBooleans = true); + const SDLoc &DL, bool foldBooleans); + SDValue rebuildSetCC(SDValue N); bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC) const; @@ -7157,9 +7158,33 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { } SDValue DAGCombiner::visitSETCC(SDNode *N) { - return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), - cast(N->getOperand(2))->get(), - SDLoc(N)); + // setcc is very commonly used as an argument to brcond. This pattern + // also lend itself to numerous combines and, as a result, it is desired + // we keep the argument to a brcond as a setcc as much as possible. + bool PreferSetCC = + N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND; + + SDValue Combined = SimplifySetCC( + N->getValueType(0), N->getOperand(0), N->getOperand(1), + cast(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC); + + if (!Combined) + return SDValue(); + + // If we prefer to have a setcc, and we don't, we'll try our best to + // recreate one using rebuildSetCC. + if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) { + SDValue NewSetCC = rebuildSetCC(Combined); + + // We don't have anything interesting to combine to. + if (NewSetCC.getNode() == N) + return SDValue(); + + if (NewSetCC) + return NewSetCC; + } + + return Combined; } SDValue DAGCombiner::visitSETCCE(SDNode *N) { @@ -11151,16 +11176,22 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { N1.getOperand(0), N1.getOperand(1), N2); } - if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || - ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && - (N1.getOperand(0).hasOneUse() && - N1.getOperand(0).getOpcode() == ISD::SRL))) { - SDNode *Trunc = nullptr; - if (N1.getOpcode() == ISD::TRUNCATE) { - // Look pass the truncate. - Trunc = N1.getNode(); - N1 = N1.getOperand(0); - } + if (N1.hasOneUse()) { + if (SDValue NewN1 = rebuildSetCC(N1)) + return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2); + } + + return SDValue(); +} + +SDValue DAGCombiner::rebuildSetCC(SDValue N) { + if (N.getOpcode() == ISD::SRL || + (N.getOpcode() == ISD::TRUNCATE && + (N.getOperand(0).hasOneUse() && + N.getOperand(0).getOpcode() == ISD::SRL))) { + // Look pass the truncate. + if (N.getOpcode() == ISD::TRUNCATE) + N = N.getOperand(0); // Match this pattern so that we can generate simpler code: // @@ -11179,75 +11210,43 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // This applies only when the AND constant value has one bit set and the // SRL constant is equal to the log2 of the AND constant. The back-end is // smart enough to convert the result into a TEST/JMP sequence. - SDValue Op0 = N1.getOperand(0); - SDValue Op1 = N1.getOperand(1); + SDValue Op0 = N.getOperand(0); + SDValue Op1 = N.getOperand(1); - if (Op0.getOpcode() == ISD::AND && - Op1.getOpcode() == ISD::Constant) { + if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) { SDValue AndOp1 = Op0.getOperand(1); if (AndOp1.getOpcode() == ISD::Constant) { const APInt &AndConst = cast(AndOp1)->getAPIntValue(); if (AndConst.isPowerOf2() && - cast(Op1)->getAPIntValue()==AndConst.logBase2()) { + cast(Op1)->getAPIntValue() == AndConst.logBase2()) { SDLoc DL(N); - SDValue SetCC = - DAG.getSetCC(DL, - getSetCCResultType(Op0.getValueType()), - Op0, DAG.getConstant(0, DL, Op0.getValueType()), - ISD::SETNE); - - SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL, - MVT::Other, Chain, SetCC, N2); - // Don't add the new BRCond into the worklist or else SimplifySelectCC - // will convert it back to (X & C1) >> C2. - CombineTo(N, NewBRCond, false); - // Truncate is dead. - if (Trunc) - deleteAndRecombine(Trunc); - // Replace the uses of SRL with SETCC - WorklistRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, SetCC); - deleteAndRecombine(N1.getNode()); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()), + Op0, DAG.getConstant(0, DL, Op0.getValueType()), + ISD::SETNE); } } } - - if (Trunc) - // Restore N1 if the above transformation doesn't match. - N1 = N->getOperand(1); } // Transform br(xor(x, y)) -> br(x != y) // Transform br(xor(xor(x,y), 1)) -> br (x == y) - if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { - SDNode *TheXor = N1.getNode(); - SDValue Op0 = TheXor->getOperand(0); - SDValue Op1 = TheXor->getOperand(1); - if (Op0.getOpcode() == Op1.getOpcode()) { - // Avoid missing important xor optimizations. - if (SDValue Tmp = visitXOR(TheXor)) { - if (Tmp.getNode() != TheXor) { - DEBUG(dbgs() << "\nReplacing.8 "; - TheXor->dump(&DAG); - dbgs() << "\nWith: "; - Tmp.getNode()->dump(&DAG); - dbgs() << '\n'); - WorklistRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, Tmp); - deleteAndRecombine(TheXor); - return DAG.getNode(ISD::BRCOND, SDLoc(N), - MVT::Other, Chain, Tmp, N2); - } + if (N.getOpcode() == ISD::XOR) { + SDNode *TheXor = N.getNode(); - // visitXOR has changed XOR's operands or replaced the XOR completely, - // bail out. - return SDValue(N, 0); - } + // Avoid missing important xor optimizations. + while (SDValue Tmp = visitXOR(TheXor)) { + // We don't have a XOR anymore, bail. + if (Tmp.getOpcode() != ISD::XOR) + return Tmp; + + TheXor = Tmp.getNode(); } + SDValue Op0 = TheXor->getOperand(0); + SDValue Op1 = TheXor->getOperand(1); + if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { bool Equal = false; if (isOneConstant(Op0) && Op0.hasOneUse() && @@ -11256,19 +11255,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { Equal = true; } - EVT SetCCVT = N1.getValueType(); + EVT SetCCVT = N.getValueType(); if (LegalTypes) SetCCVT = getSetCCResultType(SetCCVT); - SDValue SetCC = DAG.getSetCC(SDLoc(TheXor), - SetCCVT, - Op0, Op1, - Equal ? ISD::SETEQ : ISD::SETNE); // Replace the uses of XOR with SETCC - WorklistRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, SetCC); - deleteAndRecombine(N1.getNode()); - return DAG.getNode(ISD::BRCOND, SDLoc(N), - MVT::Other, Chain, SetCC, N2); + return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1, + Equal ? ISD::SETEQ : ISD::SETNE); } } diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index 70969e2..b0aaded 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -2800,6 +2800,8 @@ def: Pat<(brcond (not I1:$Pu), bb:$dst), (J2_jumpf I1:$Pu, bb:$dst)>; def: Pat<(brcond (i1 (setne I1:$Pu, -1)), bb:$dst), (J2_jumpf I1:$Pu, bb:$dst)>; +def: Pat<(brcond (i1 (seteq I1:$Pu, 0)), bb:$dst), + (J2_jumpf I1:$Pu, bb:$dst)>; def: Pat<(brcond (i1 (setne I1:$Pu, 0)), bb:$dst), (J2_jumpt I1:$Pu, bb:$dst)>; diff --git a/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll b/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll index 208d97f..789decb 100644 --- a/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll +++ b/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll @@ -7,7 +7,6 @@ declare i1 @llvm.amdgcn.class.f32(float, i32) ; GCN-LABEL: {{^}}vcc_shrink_vcc_def: ; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}} ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc -; GCN: v_cndmask_b32_e64 v0, 0, 1, s{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @vcc_shrink_vcc_def(float %arg, i32 %arg1, float %arg2, i32 %arg3) { bb0: %tmp = icmp sgt i32 %arg1, 4 @@ -34,7 +33,6 @@ bb2: ; GCN-LABEL: {{^}}preserve_condition_undef_flag: ; GCN-NOT: vcc ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc -; GCN: v_cndmask_b32_e64 v0, 0, 1, s{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @preserve_condition_undef_flag(float %arg, i32 %arg1, float %arg2) { bb0: %tmp = icmp sgt i32 %arg1, 4 diff --git a/llvm/test/CodeGen/AMDGPU/setcc.ll b/llvm/test/CodeGen/AMDGPU/setcc.ll index 442a316..f0a2b89 100644 --- a/llvm/test/CodeGen/AMDGPU/setcc.ll +++ b/llvm/test/CodeGen/AMDGPU/setcc.ll @@ -397,9 +397,9 @@ endif: } ; FUNC-LABEL: setcc-i1-and-xor -; GCN-DAG: v_cmp_ge_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} -; GCN-DAG: v_cmp_le_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0 -; GCN: s_and_b64 s[2:3], [[A]], [[B]] +; GCN-DAG: v_cmp_nge_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_cmp_nle_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0 +; GCN: s_or_b64 s[2:3], [[A]], [[B]] define amdgpu_kernel void @setcc-i1-and-xor(i32 addrspace(1)* %out, float %cond) #0 { bb0: %tmp5 = fcmp oge float %cond, 0.000000e+00 diff --git a/llvm/test/CodeGen/X86/and-sink.ll b/llvm/test/CodeGen/X86/and-sink.ll index 6d23d6c..0a36366 100644 --- a/llvm/test/CodeGen/X86/and-sink.ll +++ b/llvm/test/CodeGen/X86/and-sink.ll @@ -14,8 +14,8 @@ define i32 @and_sink1(i32 %a, i1 %c) { ; CHECK-NEXT: je .LBB0_3 ; CHECK-NEXT: # %bb.1: # %bb0 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl $0, A ; CHECK-NEXT: testb $4, %al +; CHECK-NEXT: movl $0, A ; CHECK-NEXT: jne .LBB0_3 ; CHECK-NEXT: # %bb.2: # %bb1 ; CHECK-NEXT: movl $1, %eax @@ -61,8 +61,8 @@ define i32 @and_sink2(i32 %a, i1 %c, i1 %c2) { ; CHECK-NEXT: je .LBB1_5 ; CHECK-NEXT: # %bb.3: # %bb1 ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: movl $0, C ; CHECK-NEXT: testb $4, %cl +; CHECK-NEXT: movl $0, C ; CHECK-NEXT: jne .LBB1_2 ; CHECK-NEXT: # %bb.4: # %bb2 ; CHECK-NEXT: movl $1, %eax diff --git a/llvm/test/CodeGen/X86/fold-rmw-ops.ll b/llvm/test/CodeGen/X86/fold-rmw-ops.ll index bb89d4b..c524486 100644 --- a/llvm/test/CodeGen/X86/fold-rmw-ops.ll +++ b/llvm/test/CodeGen/X86/fold-rmw-ops.ll @@ -1146,12 +1146,9 @@ b: define void @and32_imm_br() nounwind { ; CHECK-LABEL: and32_imm_br: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl $-2147483648, %eax # encoding: [0xb8,0x00,0x00,0x00,0x80] +; CHECK-NEXT: andl $-2147483648, {{.*}}(%rip) # encoding: [0x81,0x25,A,A,A,A,0x00,0x00,0x00,0x80] +; CHECK-NEXT: # fixup A - offset: 2, value: g32-8, kind: reloc_riprel_4byte ; CHECK-NEXT: # imm = 0x80000000 -; CHECK-NEXT: andl {{.*}}(%rip), %eax # encoding: [0x23,0x05,A,A,A,A] -; CHECK-NEXT: # fixup A - offset: 2, value: g32-4, kind: reloc_riprel_4byte -; CHECK-NEXT: movl %eax, {{.*}}(%rip) # encoding: [0x89,0x05,A,A,A,A] -; CHECK-NEXT: # fixup A - offset: 2, value: g32-4, kind: reloc_riprel_4byte ; CHECK-NEXT: jne .LBB35_2 # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: .LBB35_2-1, kind: FK_PCRel_1 ; CHECK-NEXT: # %bb.1: # %a @@ -1244,13 +1241,9 @@ b: define void @and16_imm_br() nounwind { ; CHECK-LABEL: and16_imm_br: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movzwl {{.*}}(%rip), %eax # encoding: [0x0f,0xb7,0x05,A,A,A,A] -; CHECK-NEXT: # fixup A - offset: 3, value: g16-4, kind: reloc_riprel_4byte -; CHECK-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] +; CHECK-NEXT: andw $-32768, {{.*}}(%rip) # encoding: [0x66,0x81,0x25,A,A,A,A,0x00,0x80] +; CHECK-NEXT: # fixup A - offset: 3, value: g16-6, kind: reloc_riprel_4byte ; CHECK-NEXT: # imm = 0x8000 -; CHECK-NEXT: movw %ax, {{.*}}(%rip) # encoding: [0x66,0x89,0x05,A,A,A,A] -; CHECK-NEXT: # fixup A - offset: 3, value: g16-4, kind: reloc_riprel_4byte -; CHECK-NEXT: testw %ax, %ax # encoding: [0x66,0x85,0xc0] ; CHECK-NEXT: jne .LBB38_2 # encoding: [0x75,A] ; CHECK-NEXT: # fixup A - offset: 1, value: .LBB38_2-1, kind: FK_PCRel_1 ; CHECK-NEXT: # %bb.1: # %a diff --git a/llvm/test/CodeGen/X86/or-branch.ll b/llvm/test/CodeGen/X86/or-branch.ll index 276258a..a1a29cf 100644 --- a/llvm/test/CodeGen/X86/or-branch.ll +++ b/llvm/test/CodeGen/X86/or-branch.ll @@ -19,11 +19,10 @@ define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind { ; JUMP1-LABEL: foo: ; JUMP1: # %bb.0: # %entry ; JUMP1-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; JUMP1-NEXT: sete %al -; JUMP1-NEXT: cmpl $5, {{[0-9]+}}(%esp) -; JUMP1-NEXT: setl %cl -; JUMP1-NEXT: orb %al, %cl -; JUMP1-NEXT: cmpb $1, %cl +; JUMP1-NEXT: setne %al +; JUMP1-NEXT: cmpl $4, {{[0-9]+}}(%esp) +; JUMP1-NEXT: setg %cl +; JUMP1-NEXT: testb %al, %cl ; JUMP1-NEXT: jne .LBB0_1 ; JUMP1-NEXT: # %bb.2: # %cond_true ; JUMP1-NEXT: jmp bar # TAILCALL @@ -50,11 +49,10 @@ define void @unpredictable(i32 %X, i32 %Y, i32 %Z) nounwind { ; JUMP2-LABEL: unpredictable: ; JUMP2: # %bb.0: # %entry ; JUMP2-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; JUMP2-NEXT: sete %al -; JUMP2-NEXT: cmpl $5, {{[0-9]+}}(%esp) -; JUMP2-NEXT: setl %cl -; JUMP2-NEXT: orb %al, %cl -; JUMP2-NEXT: cmpb $1, %cl +; JUMP2-NEXT: setne %al +; JUMP2-NEXT: cmpl $4, {{[0-9]+}}(%esp) +; JUMP2-NEXT: setg %cl +; JUMP2-NEXT: testb %al, %cl ; JUMP2-NEXT: jne .LBB1_1 ; JUMP2-NEXT: # %bb.2: # %cond_true ; JUMP2-NEXT: jmp bar # TAILCALL @@ -64,11 +62,10 @@ define void @unpredictable(i32 %X, i32 %Y, i32 %Z) nounwind { ; JUMP1-LABEL: unpredictable: ; JUMP1: # %bb.0: # %entry ; JUMP1-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; JUMP1-NEXT: sete %al -; JUMP1-NEXT: cmpl $5, {{[0-9]+}}(%esp) -; JUMP1-NEXT: setl %cl -; JUMP1-NEXT: orb %al, %cl -; JUMP1-NEXT: cmpb $1, %cl +; JUMP1-NEXT: setne %al +; JUMP1-NEXT: cmpl $4, {{[0-9]+}}(%esp) +; JUMP1-NEXT: setg %cl +; JUMP1-NEXT: testb %al, %cl ; JUMP1-NEXT: jne .LBB1_1 ; JUMP1-NEXT: # %bb.2: # %cond_true ; JUMP1-NEXT: jmp bar # TAILCALL -- 2.7.4