From d3406bc45c55a2e019cb30b31a7a7e5e6e0b9928 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 19 Apr 2017 18:29:07 +0000 Subject: [PATCH] StructurizeCFG: Directly invert cmp instructions The most common case for a branch condition is a single use compare. Directly invert the branch predicate rather than adding a lot of xor i1 true which the DAG will have to fold later. This produces nicer to read structurizer output. This produces some random changes in codegen due to the DAG swapping branch conditions itself, and then does a poor job of dealing with those inverts. llvm-svn: 300732 --- llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 14 +- llvm/test/CodeGen/AMDGPU/loop_break.ll | 2 +- .../CodeGen/AMDGPU/multi-divergent-exit-region.ll | 180 ++++++++++----------- llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll | 23 ++- llvm/test/CodeGen/AMDGPU/ret_jump.ll | 2 +- .../Transforms/StructurizeCFG/invert-compare.ll | 60 +++++++ .../StructurizeCFG/one-loop-multiple-backedges.ll | 12 +- .../StructurizeCFG/post-order-traversal-bug.ll | 3 +- 8 files changed, 184 insertions(+), 112 deletions(-) create mode 100644 llvm/test/Transforms/StructurizeCFG/invert-compare.ll diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index 49ce026..659353e 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -352,10 +352,20 @@ Value *StructurizeCFG::invert(Value *Condition) { if (Instruction *Inst = dyn_cast(Condition)) { // Third: Check all the users for an invert BasicBlock *Parent = Inst->getParent(); - for (User *U : Condition->users()) - if (Instruction *I = dyn_cast(U)) + for (User *U : Condition->users()) { + if (Instruction *I = dyn_cast(U)) { if (I->getParent() == Parent && match(I, m_Not(m_Specific(Condition)))) return I; + } + } + + // Avoid creating a new instruction in the common case of a compare. + if (CmpInst *Cmp = dyn_cast(Inst)) { + if (Cmp->hasOneUse()) { + Cmp->setPredicate(Cmp->getInversePredicate()); + return Cmp; + } + } // Last option: Create a new instruction return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator()); diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll index b9df2cb..84c42e8 100644 --- a/llvm/test/CodeGen/AMDGPU/loop_break.ll +++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll @@ -10,7 +10,7 @@ ; OPT: bb4: ; OPT: load volatile -; OPT: xor i1 %cmp1 +; OPT: %cmp1 = icmp sge i32 %tmp, %load ; OPT: call i64 @llvm.amdgcn.if.break( ; OPT: br label %Flow diff --git a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll index 9d0b6b3..4bd8bff 100644 --- a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll +++ b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll @@ -9,18 +9,19 @@ ; StructurizeCFG. ; IR-LABEL: @multi_divergent_region_exit_ret_ret( -; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0) -; IR: %2 = extractvalue { i1, i64 } %1, 0 -; IR: %3 = extractvalue { i1, i64 } %1, 1 -; IR: br i1 %2, label %LeafBlock1, label %Flow +; IR: %Pivot = icmp sge i32 %tmp16, 2 +; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %Pivot) +; IR: %1 = extractvalue { i1, i64 } %0, 0 +; IR: %2 = extractvalue { i1, i64 } %0, 1 +; IR: br i1 %1, label %LeafBlock1, label %Flow ; IR: Flow: -; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] -; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ] -; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3) -; IR: %7 = extractvalue { i1, i64 } %6, 0 -; IR: %8 = extractvalue { i1, i64 } %6, 1 -; IR: br i1 %7, label %LeafBlock, label %Flow1 +; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] +; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ] +; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2) +; IR: %6 = extractvalue { i1, i64 } %5, 0 +; IR: %7 = extractvalue { i1, i64 } %5, 1 +; IR: br i1 %6, label %LeafBlock, label %Flow1 ; IR: LeafBlock: ; IR: br label %Flow1 @@ -29,32 +30,32 @@ ; IR: br label %Flow{{$}} ; IR: Flow2: -; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %19) -; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11) -; IR: %13 = extractvalue { i1, i64 } %12, 0 -; IR: %14 = extractvalue { i1, i64 } %12, 1 -; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock +; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ] +; IR: call void @llvm.amdgcn.end.cf(i64 %16) +; IR: [[IF:%[0-9]+]] = call { i1, i64 } @llvm.amdgcn.if(i1 %8) +; IR: %10 = extractvalue { i1, i64 } [[IF]], 0 +; IR: %11 = extractvalue { i1, i64 } [[IF]], 1 +; IR: br i1 %10, label %exit0, label %UnifiedReturnBlock ; IR: exit0: ; IR: store volatile i32 9, i32 addrspace(1)* undef ; IR: br label %UnifiedReturnBlock ; IR: Flow1: -; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ] -; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ] -; IR: call void @llvm.amdgcn.end.cf(i64 %8) -; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16) -; IR: %18 = extractvalue { i1, i64 } %17, 0 -; IR: %19 = extractvalue { i1, i64 } %17, 1 -; IR: br i1 %18, label %exit1, label %Flow2 +; IR: %12 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %3, %Flow ] +; IR: %13 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ] +; IR: call void @llvm.amdgcn.end.cf(i64 %7) +; IR: %14 = call { i1, i64 } @llvm.amdgcn.if(i1 %13) +; IR: %15 = extractvalue { i1, i64 } %14, 0 +; IR: %16 = extractvalue { i1, i64 } %14, 1 +; IR: br i1 %15, label %exit1, label %Flow2 ; IR: exit1: ; IR: store volatile i32 17, i32 addrspace(3)* undef ; IR: br label %Flow2 ; IR: UnifiedReturnBlock: -; IR: call void @llvm.amdgcn.end.cf(i64 %14) +; IR: call void @llvm.amdgcn.end.cf(i64 %11) ; IR: ret void @@ -64,11 +65,9 @@ ; GCN: s_xor_b64 -; FIXME: Why is this compare essentially repeated? -; GCN: v_cmp_eq_u32_e32 vcc, 1, [[REG:v[0-9]+]] -; GCN-NEXT: v_cmp_ne_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1, [[REG]] +; GCN: ; %LeafBlock +; GCN: v_cmp_ne_u32_e32 vcc, 1, [[REG:v[0-9]+]] ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1, vcc -; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, -1 ; GCN: ; %Flow1 ; GCN-NEXT: s_or_b64 exec, exec @@ -126,14 +125,15 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 } ; IR-LABEL: @multi_divergent_region_exit_unreachable_unreachable( -; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0) +; IR: %Pivot = icmp sge i32 %tmp16, 2 +; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %Pivot) -; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3) +; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2) -; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %19) -; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11) -; IR: br i1 %13, label %exit0, label %UnifiedUnreachableBlock +; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ] +; IR: call void @llvm.amdgcn.end.cf(i64 %16) +; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8) +; IR: br i1 %10, label %exit0, label %UnifiedUnreachableBlock ; IR: UnifiedUnreachableBlock: @@ -181,51 +181,49 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 } ; IR-LABEL: @multi_exit_region_divergent_ret_uniform_ret( -; IR: %divergent.cond0 = icmp slt i32 %tmp16, 2 +; IR: %divergent.cond0 = icmp sge i32 %tmp16, 2 ; IR: llvm.amdgcn.if ; IR: br i1 ; IR: {{^}}Flow: -; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] -; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ] -; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3) -; IR: br i1 %7, label %LeafBlock, label %Flow1 +; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] +; IR: %4 = phi i1 [ %uniform.cond0, %LeafBlock1 ], [ false, %entry ] +; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2) +; IR: br i1 %6, label %LeafBlock, label %Flow1 ; IR: {{^}}LeafBlock: -; IR: %divergent.cond1 = icmp eq i32 %tmp16, 1 -; IR: %9 = xor i1 %divergent.cond1, true +; IR: %divergent.cond1 = icmp ne i32 %tmp16, 1 ; IR: br label %Flow1 ; IR: LeafBlock1: -; IR: %uniform.cond0 = icmp eq i32 %arg3, 2 -; IR: %10 = xor i1 %uniform.cond0, true +; IR: %uniform.cond0 = icmp ne i32 %arg3, 2 ; IR: br label %Flow ; IR: Flow2: -; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %19) -; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11) -; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock +; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ] +; IR: call void @llvm.amdgcn.end.cf(i64 %16) +; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8) +; IR: br i1 %10, label %exit0, label %UnifiedReturnBlock ; IR: exit0: ; IR: store volatile i32 9, i32 addrspace(1)* undef ; IR: br label %UnifiedReturnBlock ; IR: {{^}}Flow1: -; IR: %15 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %4, %Flow ] -; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ] -; IR: call void @llvm.amdgcn.end.cf(i64 %8) -; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16) -; IR: %18 = extractvalue { i1, i64 } %17, 0 -; IR: %19 = extractvalue { i1, i64 } %17, 1 -; IR: br i1 %18, label %exit1, label %Flow2 +; IR: %12 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %3, %Flow ] +; IR: %13 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %4, %Flow ] +; IR: call void @llvm.amdgcn.end.cf(i64 %7) +; IR: %14 = call { i1, i64 } @llvm.amdgcn.if(i1 %13) +; IR: %15 = extractvalue { i1, i64 } %14, 0 +; IR: %16 = extractvalue { i1, i64 } %14, 1 +; IR: br i1 %15, label %exit1, label %Flow2 ; IR: exit1: ; IR: store volatile i32 17, i32 addrspace(3)* undef ; IR: br label %Flow2 ; IR: UnifiedReturnBlock: -; IR: call void @llvm.amdgcn.end.cf(i64 %14) +; IR: call void @llvm.amdgcn.end.cf(i64 %11) ; IR: ret void define amdgpu_kernel void @multi_exit_region_divergent_ret_uniform_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 { entry: @@ -264,17 +262,18 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 } ; IR-LABEL: @multi_exit_region_uniform_ret_divergent_ret( -; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0) -; IR: br i1 %2, label %LeafBlock1, label %Flow +; IR: %Pivot = icmp sge i32 %tmp16, 2 +; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %Pivot) +; IR: br i1 %1, label %LeafBlock1, label %Flow ; IR: Flow: -; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] -; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ] -; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3) +; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] +; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ] +; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2) -; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %19) -; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11) +; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ] +; IR: call void @llvm.amdgcn.end.cf(i64 %16) +; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8) define amdgpu_kernel void @multi_exit_region_uniform_ret_divergent_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 { entry: @@ -314,13 +313,13 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 ; IR-LABEL: @multi_divergent_region_exit_ret_ret_return_value( ; IR: Flow2: -; IR: %11 = phi float [ 2.000000e+00, %exit1 ], [ undef, %Flow1 ] -; IR: %12 = phi i1 [ false, %exit1 ], [ %16, %Flow1 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %20) +; IR: %8 = phi float [ 2.000000e+00, %exit1 ], [ undef, %Flow1 ] +; IR: %9 = phi i1 [ false, %exit1 ], [ %13, %Flow1 ] +; IR: call void @llvm.amdgcn.end.cf(i64 %17) ; IR: UnifiedReturnBlock: -; IR: %UnifiedRetVal = phi float [ %11, %Flow2 ], [ 1.000000e+00, %exit0 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %15) +; IR: %UnifiedRetVal = phi float [ %8, %Flow2 ], [ 1.000000e+00, %exit0 ] +; IR: call void @llvm.amdgcn.end.cf(i64 %12) ; IR: ret float %UnifiedRetVal define amdgpu_ps float @multi_divergent_region_exit_ret_ret_return_value(i32 %vgpr) #0 { entry: @@ -387,31 +386,32 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 } ; IR-LABEL: @multi_divergent_region_exit_ret_unreachable( -; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0) +; IR: %Pivot = icmp sge i32 %tmp16, 2 +; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %Pivot) ; IR: Flow: -; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] -; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ] -; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3) +; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] +; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ] +; IR: %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %2) ; IR: Flow2: -; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %19) -; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11) -; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock +; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ] +; IR: call void @llvm.amdgcn.end.cf(i64 %16) +; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8) +; IR: br i1 %10, label %exit0, label %UnifiedReturnBlock ; IR: exit0: ; IR-NEXT: store volatile i32 17, i32 addrspace(3)* undef ; IR-NEXT: br label %UnifiedReturnBlock ; IR: Flow1: -; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ] -; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ] -; IR: call void @llvm.amdgcn.end.cf(i64 %8) -; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16) -; IR: %18 = extractvalue { i1, i64 } %17, 0 -; IR: %19 = extractvalue { i1, i64 } %17, 1 -; IR: br i1 %18, label %exit1, label %Flow2 +; IR: %12 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %3, %Flow ] +; IR: %13 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ] +; IR: call void @llvm.amdgcn.end.cf(i64 %7) +; IR: %14 = call { i1, i64 } @llvm.amdgcn.if(i1 %13) +; IR: %15 = extractvalue { i1, i64 } %14, 0 +; IR: %16 = extractvalue { i1, i64 } %14, 1 +; IR: br i1 %15, label %exit1, label %Flow2 ; IR: exit1: ; IR-NEXT: store volatile i32 9, i32 addrspace(1)* undef @@ -419,7 +419,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 ; IR-NEXT: br label %Flow2 ; IR: UnifiedReturnBlock: -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14) +; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11) ; IR-NEXT: ret void define amdgpu_kernel void @multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 { entry: @@ -475,7 +475,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 ; IR-NEXT: br label %Flow2 ; IR: UnifiedReturnBlock: ; preds = %exit0, %Flow2 -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14) +; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11) ; IR-NEXT: ret void define amdgpu_kernel void @indirect_multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 { entry: @@ -622,15 +622,15 @@ uniform.ret: ; IR-LABEL: @uniform_complex_multi_ret_nest_in_divergent_triangle( ; IR: Flow1: ; preds = %uniform.ret1, %uniform.multi.exit.region -; IR: %8 = phi i1 [ false, %uniform.ret1 ], [ true, %uniform.multi.exit.region ] -; IR: br i1 %8, label %uniform.if, label %Flow2 +; IR: %6 = phi i1 [ false, %uniform.ret1 ], [ true, %uniform.multi.exit.region ] +; IR: br i1 %6, label %uniform.if, label %Flow2 ; IR: Flow: ; preds = %uniform.then, %uniform.if -; IR: %11 = phi i1 [ %10, %uniform.then ], [ %9, %uniform.if ] -; IR: br i1 %11, label %uniform.endif, label %uniform.ret0 +; IR: %7 = phi i1 [ %uniform.cond2, %uniform.then ], [ %uniform.cond1, %uniform.if ] +; IR: br i1 %7, label %uniform.endif, label %uniform.ret0 ; IR: UnifiedReturnBlock: ; preds = %Flow3, %Flow2 -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %6) +; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %5) ; IR-NEXT: ret void define amdgpu_kernel void @uniform_complex_multi_ret_nest_in_divergent_triangle(i32 %arg0) #0 { entry: diff --git a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll index 672549c..c0b4eaf 100644 --- a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll @@ -133,9 +133,9 @@ bb23: ; preds = %bb10 ; IR: Flow1: ; IR-NEXT: %loop.phi = phi i64 [ %loop.phi9, %Flow6 ], [ %phi.broken, %bb14 ] -; IR-NEXT: %13 = phi <4 x i32> [ %29, %Flow6 ], [ undef, %bb14 ] -; IR-NEXT: %14 = phi i32 [ %30, %Flow6 ], [ undef, %bb14 ] -; IR-NEXT: %15 = phi i1 [ %31, %Flow6 ], [ false, %bb14 ] +; IR-NEXT: %13 = phi <4 x i32> [ %28, %Flow6 ], [ undef, %bb14 ] +; IR-NEXT: %14 = phi i32 [ %29, %Flow6 ], [ undef, %bb14 ] +; IR-NEXT: %15 = phi i1 [ %30, %Flow6 ], [ false, %bb14 ] ; IR-NEXT: %16 = phi i1 [ false, %Flow6 ], [ %8, %bb14 ] ; IR-NEXT: %17 = call i64 @llvm.amdgcn.else.break(i64 %11, i64 %loop.phi) ; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11) @@ -144,9 +144,9 @@ bb23: ; preds = %bb10 ; IR: Flow2: ; IR-NEXT: %loop.phi10 = phi i64 [ %loop.phi11, %Flow5 ], [ %12, %bb16 ] -; IR-NEXT: %19 = phi <4 x i32> [ %29, %Flow5 ], [ undef, %bb16 ] -; IR-NEXT: %20 = phi i32 [ %30, %Flow5 ], [ undef, %bb16 ] -; IR-NEXT: %21 = phi i1 [ %31, %Flow5 ], [ false, %bb16 ] +; IR-NEXT: %19 = phi <4 x i32> [ %28, %Flow5 ], [ undef, %bb16 ] +; IR-NEXT: %20 = phi i32 [ %29, %Flow5 ], [ undef, %bb16 ] +; IR-NEXT: %21 = phi i1 [ %30, %Flow5 ], [ false, %bb16 ] ; IR-NEXT: %22 = phi i1 [ false, %Flow5 ], [ false, %bb16 ] ; IR-NEXT: %23 = phi i1 [ false, %Flow5 ], [ %8, %bb16 ] ; IR-NEXT: %24 = call { i1, i64 } @llvm.amdgcn.if(i1 %23) @@ -156,16 +156,15 @@ bb23: ; preds = %bb10 ; IR: bb21: ; IR: %tmp12 = icmp slt i32 %tmp11, 9 -; IR-NEXT: %27 = xor i1 %tmp12, true -; IR-NEXT: %28 = call i64 @llvm.amdgcn.if.break(i1 %27, i64 %phi.broken) +; IR-NEXT: %27 = call i64 @llvm.amdgcn.if.break(i1 %tmp12, i64 %phi.broken) ; IR-NEXT: br label %Flow3 ; IR: Flow3: ; IR-NEXT: %loop.phi11 = phi i64 [ %phi.broken, %bb21 ], [ %phi.broken, %Flow2 ] -; IR-NEXT: %loop.phi9 = phi i64 [ %28, %bb21 ], [ %loop.phi10, %Flow2 ] -; IR-NEXT: %29 = phi <4 x i32> [ %tmp9, %bb21 ], [ %19, %Flow2 ] -; IR-NEXT: %30 = phi i32 [ %tmp10, %bb21 ], [ %20, %Flow2 ] -; IR-NEXT: %31 = phi i1 [ %27, %bb21 ], [ %21, %Flow2 ] +; IR-NEXT: %loop.phi9 = phi i64 [ %27, %bb21 ], [ %loop.phi10, %Flow2 ] +; IR-NEXT: %28 = phi <4 x i32> [ %tmp9, %bb21 ], [ %19, %Flow2 ] +; IR-NEXT: %29 = phi i32 [ %tmp10, %bb21 ], [ %20, %Flow2 ] +; IR-NEXT: %30 = phi i1 [ %tmp12, %bb21 ], [ %21, %Flow2 ] ; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %26) ; IR-NEXT: br i1 %22, label %bb31.loopexit, label %Flow4 diff --git a/llvm/test/CodeGen/AMDGPU/ret_jump.ll b/llvm/test/CodeGen/AMDGPU/ret_jump.ll index f2fbacb..748f98a 100644 --- a/llvm/test/CodeGen/AMDGPU/ret_jump.ll +++ b/llvm/test/CodeGen/AMDGPU/ret_jump.ll @@ -56,7 +56,7 @@ ret.bb: ; preds = %else, %main_body } ; GCN-LABEL: {{^}}uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable: -; GCN: s_cbranch_vccnz [[RET_BB:BB[0-9]+_[0-9]+]] +; GCN: s_cbranch_scc1 [[RET_BB:BB[0-9]+_[0-9]+]] ; GCN: ; BB#{{[0-9]+}}: ; %else ; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc diff --git a/llvm/test/Transforms/StructurizeCFG/invert-compare.ll b/llvm/test/Transforms/StructurizeCFG/invert-compare.ll new file mode 100644 index 0000000..87d9c6d --- /dev/null +++ b/llvm/test/Transforms/StructurizeCFG/invert-compare.ll @@ -0,0 +1,60 @@ +; RUN: opt -S -structurizecfg %s | FileCheck %s + +; CHECK-LABEL: @directly_invert_compare_condition_jump_into_loop( +; CHECK: %cmp0 = fcmp uge float %arg0, %arg1 +; CHECK-NEXT: br i1 %cmp0, label %end.loop, label %Flow +define void @directly_invert_compare_condition_jump_into_loop(i32 addrspace(1)* %out, i32 %n, float %arg0, float %arg1) #0 { +entry: + br label %for.body + +for.body: + %i = phi i32 [0, %entry], [%i.inc, %end.loop] + %ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %i + store i32 %i, i32 addrspace(1)* %ptr, align 4 + %cmp0 = fcmp olt float %arg0, %arg1 + br i1 %cmp0, label %mid.loop, label %end.loop + +mid.loop: + store i32 333, i32 addrspace(1)* %out, align 4 + br label %for.end + +end.loop: + %i.inc = add i32 %i, 1 + %cmp = icmp ne i32 %i.inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + +; CHECK-LABEL: @invert_multi_use_compare_condition_jump_into_loop( +; CHECK: %cmp0 = fcmp olt float %arg0, %arg1 +; CHECK: store volatile i1 %cmp0, i1 addrspace(1)* undef +; CHECK: %0 = xor i1 %cmp0, true +; CHECK-NEXT: br i1 %0, label %end.loop, label %Flow +define void @invert_multi_use_compare_condition_jump_into_loop(i32 addrspace(1)* %out, i32 %n, float %arg0, float %arg1) #0 { +entry: + br label %for.body + +for.body: + %i = phi i32 [0, %entry], [%i.inc, %end.loop] + %ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %i + store i32 %i, i32 addrspace(1)* %ptr, align 4 + %cmp0 = fcmp olt float %arg0, %arg1 + store volatile i1 %cmp0, i1 addrspace(1)* undef + br i1 %cmp0, label %mid.loop, label %end.loop + +mid.loop: + store i32 333, i32 addrspace(1)* %out, align 4 + br label %for.end + +end.loop: + %i.inc = add i32 %i, 1 + %cmp = icmp ne i32 %i.inc, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + +attributes #0 = { nounwind } \ No newline at end of file diff --git a/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll b/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll index 668a1e9..aff5964 100644 --- a/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll +++ b/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll @@ -11,8 +11,8 @@ bb: bb3: ; preds = %bb7, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb7 ] %tmp4 = fcmp ult float %arg1, 3.500000e+00 -; CHECK: %0 = xor i1 %tmp4, true -; CHECK: br i1 %0, label %bb5, label %Flow +; CHECK: %tmp4 = fcmp oge float %arg1, 3.500000e+00 +; CHECK: br i1 %tmp4, label %bb5, label %Flow br i1 %tmp4, label %bb7, label %bb5 ; CHECK: bb5: @@ -22,7 +22,8 @@ bb5: ; preds = %bb3 br i1 %tmp6, label %bb10, label %bb7 ; CHECK: Flow: -; CHECK: br i1 %3, label %bb7, label %Flow1 +; CHECK: %1 = phi i1 [ %tmp6, %bb5 ], [ %tmp4, %bb3 ] +; CHECK-NEXT: br i1 %1, label %bb7, label %Flow1 ; CHECK: bb7 bb7: ; preds = %bb5, %bb3 @@ -32,9 +33,10 @@ bb7: ; preds = %bb5, %bb3 br i1 %tmp9, label %bb3, label %bb10 ; CHECK: Flow1: -; CHECK: br i1 %7, label %bb10, label %bb3 +; CHECK: %4 = phi i1 [ %tmp9, %bb7 ], [ true, %Flow ] +; CHECK-NEXT: br i1 %4, label %bb10, label %bb3 -; CHECK: bb10 +; CHECK: bb10: bb10: ; preds = %bb7, %bb5 %tmp11 = phi i32 [ 15, %bb5 ], [ 255, %bb7 ] store i32 %tmp11, i32 addrspace(1)* %arg, align 4 diff --git a/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll b/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll index ba9aa29..a8835f1 100644 --- a/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll +++ b/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll @@ -59,7 +59,8 @@ for.end: ; preds = %for.body.1, %if.the ; CHECK: br i1 %{{[0-9]}}, label %for.body.1, label %Flow2 ; CHECK: for.body.1: -; CHECK: br i1 %{{[0-9]+}}, label %for.body.6, label %Flow3 +; CHECK: %cmp1.5 = icmp ne i32 %tmp22, %K1 +; CHECK-NEXT: br i1 %cmp1.5, label %for.body.6, label %Flow3 for.body.1: ; preds = %if.then, %lor.lhs.false %best_val.233 = phi float [ %tmp5, %if.then ], [ %best_val.027, %lor.lhs.false ] %best_count.231 = phi i32 [ %sub4, %if.then ], [ %best_count.025, %lor.lhs.false ] -- 2.7.4