From: Nicolai Haehnle Date: Wed, 17 Oct 2018 15:37:41 +0000 (+0000) Subject: StructurizeCFG: Simplify inserted PHI nodes X-Git-Tag: llvmorg-8.0.0-rc1~6349 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0823050b9f4ca2dd0aaf27ee34cb45bde5bf05ea;p=platform%2Fupstream%2Fllvm.git StructurizeCFG: Simplify inserted PHI nodes Summary: This improves subsequent divergence analysis in some cases. Change-Id: I5e95e7ec7fd3fa80d414d1a53a02fea23e3d67d3 Reviewers: arsenm, rampitec Subscribers: jvesely, wdng, llvm-commits Differential Revision: https://reviews.llvm.org/D53316 llvm-svn: 344697 --- diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index 2bfd992..0db762d 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/RegionInfo.h" @@ -596,7 +597,8 @@ void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { /// Add the real PHI value as soon as everything is set up void StructurizeCFG::setPhiValues() { - SSAUpdater Updater; + SmallVector InsertedPhis; + SSAUpdater Updater(&InsertedPhis); for (const auto &AddedPhi : AddedPhis) { BasicBlock *To = AddedPhi.first; const BBVector &From = AddedPhi.second; @@ -632,6 +634,26 @@ void StructurizeCFG::setPhiValues() { DeletedPhis.erase(To); } assert(DeletedPhis.empty()); + + // Simplify any phis inserted by the SSAUpdater if possible + bool Changed; + do { + Changed = false; + + SimplifyQuery Q(Func->getParent()->getDataLayout()); + Q.DT = DT; + for (size_t i = 0; i < InsertedPhis.size(); ++i) { + PHINode *Phi = InsertedPhis[i]; + if (Value *V = SimplifyInstruction(Phi, Q)) { + Phi->replaceAllUsesWith(V); + Phi->eraseFromParent(); + InsertedPhis[i] = InsertedPhis.back(); + InsertedPhis.pop_back(); + i--; + Changed = true; + } + } + } while (Changed); } /// Remove phi values from all successors and then remove the terminator. diff --git a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll index 9e6efc5..fbdf983 100644 --- a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll +++ b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll @@ -312,13 +312,12 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 ; IR-LABEL: @multi_divergent_region_exit_ret_ret_return_value( ; IR: Flow2: -; IR: %11 = phi float [ 2.000000e+00, %exit1 ], [ undef, %Flow1 ] -; IR: %12 = phi i1 [ false, %exit1 ], [ %16, %Flow1 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %20) +; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ] +; IR: call void @llvm.amdgcn.end.cf(i64 %19) ; IR: UnifiedReturnBlock: -; IR: %UnifiedRetVal = phi float [ %11, %Flow2 ], [ 1.000000e+00, %exit0 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %15) +; IR: %UnifiedRetVal = phi float [ 2.000000e+00, %Flow2 ], [ 1.000000e+00, %exit0 ] +; IR: call void @llvm.amdgcn.end.cf(i64 %14) ; IR: ret float %UnifiedRetVal define amdgpu_ps float @multi_divergent_region_exit_ret_ret_return_value(i32 %vgpr) #0 { entry: @@ -353,8 +352,8 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 ; GCN: {{^}}[[FLOW]]: ; GCN: s_cbranch_vccnz [[FLOW1:BB[0-9]+]] -; GCN: v_mov_b32_e32 v0, 2.0 ; GCN: s_or_b64 exec, exec +; GCN: v_mov_b32_e32 v0, 2.0 ; GCN-NOT: s_and_b64 exec, exec ; GCN: v_mov_b32_e32 v0, 1.0 diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll index 3f7df7b..216ca19 100644 --- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll +++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll @@ -33,6 +33,8 @@ ; GCN-NEXT: s_mov_b64 ; GCN-NEXT: s_and_b64 [[MASKED_SAVE_BREAK:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE_BREAK]] ; GCN-NEXT: s_or_b64 [[OR_BREAK:s\[[0-9]+:[0-9]+\]]], [[MASKED_SAVE_BREAK]], s{{\[[0-9]+:[0-9]+\]}} +; TODO: get rid of redundant loop counter moves +; GCN-NEXT: v_mov_b32_e32 ; GCN-NEXT: s_andn2_b64 exec, exec, [[OR_BREAK]] ; GCN-NEXT: s_cbranch_execnz [[INNER_LOOP]] @@ -43,6 +45,7 @@ ; GCN-NEXT: s_or_b64 exec, exec, [[OR_BREAK]] ; GCN-NEXT: s_and_b64 [[MASKED2_SAVE_BREAK:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE_BREAK]] ; GCN-NEXT: s_or_b64 [[OUTER_OR_BREAK:s\[[0-9]+:[0-9]+\]]], [[MASKED2_SAVE_BREAK]], s{{\[[0-9]+:[0-9]+\]}} +; GCN-NEXT: v_mov_b32_e32 ; GCN-NEXT: s_andn2_b64 exec, exec, [[OUTER_OR_BREAK]] ; GCN-NEXT: s_cbranch_execnz [[OUTER_LOOP]] define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) { diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll index 0e89f85..f453cfd 100644 --- a/llvm/test/CodeGen/AMDGPU/smrd.ll +++ b/llvm/test/CodeGen/AMDGPU/smrd.ll @@ -592,11 +592,12 @@ exit: ; GCN-LABEL: {{^}}smrd_uniform_loop2: ; (this test differs from smrd_uniform_loop by the more complex structure of phis, -; which currently confuses the DivergenceAnalysis after structurization) +; which used to confuse the DivergenceAnalysis after structurization) ; -; TODO: this should use an s_buffer_load +; TODO: we should keep the loop counter in an SGPR ; -; GCN: buffer_load_dword +; GCN: v_readfirstlane_b32 +; GCN: s_buffer_load_dword define amdgpu_ps float @smrd_uniform_loop2(<4 x i32> inreg %desc, i32 %bound, i32 %bound.a) #0 { main_body: br label %loop diff --git a/llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll b/llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll index ac12b5d6..61482bb 100644 --- a/llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll +++ b/llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll @@ -12,13 +12,12 @@ define void @invert_constantexpr_condition(i32 %arg, i32 %arg1) #0 { ; CHECK: bb2: ; CHECK-NEXT: br label [[FLOW]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP1:%.*]], [[FLOW]] ], [ [[TMP7:%.*]], [[BB6:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ undef, [[FLOW]] ], [ [[TMP7:%.*]], [[BB6:%.*]] ] ; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], icmp eq (i32 ptrtoint (i32* @g to i32), i32 0) ; CHECK-NEXT: br label [[BB8:%.*]] ; CHECK: Flow: -; CHECK-NEXT: [[TMP1]] = phi i1 [ undef, [[BB2]] ], [ undef, [[BB:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ [[TMP0]], [[BB2]] ], [ icmp ne (i32 ptrtoint (i32* @g to i32), i32 0), [[BB]] ] -; CHECK-NEXT: br i1 [[TMP2]], label [[BB6]], label [[BB3:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[BB2]] ], [ icmp ne (i32 ptrtoint (i32* @g to i32), i32 0), [[BB:%.*]] ] +; CHECK-NEXT: br i1 [[TMP1]], label [[BB6]], label [[BB3:%.*]] ; CHECK: bb6: ; CHECK-NEXT: [[TMP7]] = icmp slt i32 [[ARG]], [[ARG1:%.*]] ; CHECK-NEXT: br label [[BB3]] diff --git a/llvm/test/Transforms/StructurizeCFG/loop-continue-phi.ll b/llvm/test/Transforms/StructurizeCFG/loop-continue-phi.ll index 7e1c0b9..2300aea 100644 --- a/llvm/test/Transforms/StructurizeCFG/loop-continue-phi.ll +++ b/llvm/test/Transforms/StructurizeCFG/loop-continue-phi.ll @@ -1,28 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -o - -structurizecfg < %s | FileCheck %s -; -; TODO: eliminate redundant phis for the loop counter -; define void @test1() { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: Flow: -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[CTR_NEXT:%.*]], [[LOOP_B:%.*]] ], [ [[CTR_NEXT]], [[LOOP_A:%.*]] ] ; CHECK-NEXT: br label [[FLOW1:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[CTR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[FLOW1]] ] +; CHECK-NEXT: [[CTR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[CTR_NEXT:%.*]], [[FLOW1]] ] ; CHECK-NEXT: [[CTR_NEXT]] = add i32 [[CTR]], 1 -; CHECK-NEXT: br i1 undef, label [[LOOP_A]], label [[FLOW1]] +; CHECK-NEXT: br i1 undef, label [[LOOP_A:%.*]], label [[FLOW1]] ; CHECK: loop.a: -; CHECK-NEXT: br i1 undef, label [[LOOP_B]], label [[FLOW:%.*]] +; CHECK-NEXT: br i1 undef, label [[LOOP_B:%.*]], label [[FLOW:%.*]] ; CHECK: loop.b: ; CHECK-NEXT: br label [[FLOW]] ; CHECK: Flow1: -; CHECK-NEXT: [[TMP1]] = phi i32 [ [[TMP0]], [[FLOW]] ], [ undef, [[LOOP]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ false, [[FLOW]] ], [ true, [[LOOP]] ] -; CHECK-NEXT: br i1 [[TMP2]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[FLOW]] ], [ true, [[LOOP]] ] +; CHECK-NEXT: br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll b/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll index 668a1e9..0af25d6 100644 --- a/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll +++ b/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll @@ -8,33 +8,36 @@ bb: br label %bb3 ; CHECK: bb3: +; CHECK: %0 = xor i1 %tmp4, true +; CHECK: br i1 %0, label %bb5, label %Flow bb3: ; preds = %bb7, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb7 ] %tmp4 = fcmp ult float %arg1, 3.500000e+00 -; CHECK: %0 = xor i1 %tmp4, true -; CHECK: br i1 %0, label %bb5, label %Flow br i1 %tmp4, label %bb7, label %bb5 ; CHECK: bb5: +; CHECK: %1 = xor i1 %tmp6, true +; CHECK: br label %Flow bb5: ; preds = %bb3 %tmp6 = fcmp olt float 0.000000e+00, %arg2 -; CHECK: br label %Flow br i1 %tmp6, label %bb10, label %bb7 ; CHECK: Flow: -; CHECK: br i1 %3, label %bb7, label %Flow1 +; CHECK: %2 = phi i1 [ %1, %bb5 ], [ %tmp4, %bb3 ] +; CHECK: br i1 %2, label %bb7, label %Flow1 -; CHECK: bb7 +; CHECK: bb7: +; CHECK: br label %Flow1 bb7: ; preds = %bb5, %bb3 %tmp8 = add nuw nsw i64 %tmp, 1 %tmp9 = icmp slt i64 %tmp8, 5 -; CHECK: br label %Flow1 br i1 %tmp9, label %bb3, label %bb10 ; CHECK: Flow1: -; CHECK: br i1 %7, label %bb10, label %bb3 +; CHECK: %6 = phi i1 [ %3, %bb7 ], [ true, %Flow ] +; CHECK: br i1 %6, label %bb10, label %bb3 -; CHECK: bb10 +; CHECK: bb10: bb10: ; preds = %bb7, %bb5 %tmp11 = phi i32 [ 15, %bb5 ], [ 255, %bb7 ] store i32 %tmp11, i32 addrspace(1)* %arg, align 4