[AMDGPU] LCSSA pass added in preISel. Uniform values defined in the divergent loop...
authorAlexander Timofeev <Alexander.Timofeev@amd.com>
Tue, 2 Jul 2019 17:59:44 +0000 (17:59 +0000)
committerAlexander Timofeev <Alexander.Timofeev@amd.com>
Tue, 2 Jul 2019 17:59:44 +0000 (17:59 +0000)
Differential Revision: https://reviews.llvm.org/D63953

Reviewers: rampitec, nhaehnle, arsenm
llvm-svn: 364950

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll

index c8d4557..9b83b50 100644 (file)
@@ -39,6 +39,9 @@
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/BasicBlock.h"
+#ifdef EXPENSIVE_CHECKS
+#include "llvm/IR/Dominators.h"
+#endif
 #include "llvm/IR/Instruction.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/Casting.h"
@@ -138,6 +141,10 @@ public:
     AU.addRequired<AMDGPUArgumentUsageInfo>();
     AU.addRequired<AMDGPUPerfHintAnalysis>();
     AU.addRequired<LegacyDivergenceAnalysis>();
+#ifndef EXPENSIVE_CHECKS
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
+#endif
     SelectionDAGISel::getAnalysisUsage(AU);
   }
 
@@ -351,6 +358,10 @@ INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
 INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+#ifdef EXPENSIVE_CHECKS
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+#endif
 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
                     "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
 
@@ -369,6 +380,13 @@ FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
 }
 
 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+#ifdef EXPENSIVE_CHECKS
+  DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  for (auto &L : LI->getLoopsInPreorder()) {
+    assert(L->isLCSSAForm(DT));
+  }
+#endif
   Subtarget = &MF.getSubtarget<GCNSubtarget>();
   return SelectionDAGISel::runOnMachineFunction(MF);
 }
index b10f53c..8e4f35a 100644 (file)
@@ -835,6 +835,7 @@ bool GCNPassConfig::addPreISel() {
   if (!LateCFGStructurize) {
     addPass(createSIAnnotateControlFlowPass());
   }
+  addPass(createLCSSAPass());
 
   return false;
 }
index 01cd11a..c903a04 100644 (file)
@@ -17,52 +17,55 @@ define amdgpu_ps void @main(i32, float) {
 ; CHECK-NEXT:    v_interp_p1_f32_e32 v0, v1, attr0.x
 ; CHECK-NEXT:    v_cmp_nlt_f32_e64 s[0:1], 0, v0
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
-; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:    ; implicit-def: $sgpr8_sgpr9
 ; CHECK-NEXT:    ; implicit-def: $sgpr6_sgpr7
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
 ; CHECK-NEXT:    s_branch BB0_3
-; CHECK-NEXT:  BB0_1: ; in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    ; implicit-def: $vgpr1
+; CHECK-NEXT:  BB0_1: ; %Flow1
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
+; CHECK-NEXT:    s_mov_b64 s[8:9], 0
 ; CHECK-NEXT:  BB0_2: ; %Flow
 ; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    s_and_b64 s[8:9], exec, s[6:7]
-; CHECK-NEXT:    s_or_b64 s[8:9], s[8:9], s[4:5]
-; CHECK-NEXT:    s_mov_b64 s[4:5], s[8:9]
-; CHECK-NEXT:    s_andn2_b64 exec, exec, s[8:9]
-; CHECK-NEXT:    s_cbranch_execz BB0_7
+; CHECK-NEXT:    s_and_b64 s[10:11], exec, s[6:7]
+; CHECK-NEXT:    s_or_b64 s[10:11], s[10:11], s[4:5]
+; CHECK-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
+; CHECK-NEXT:    s_and_b64 s[4:5], s[8:9], exec
+; CHECK-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
+; CHECK-NEXT:    s_mov_b64 s[4:5], s[10:11]
+; CHECK-NEXT:    s_andn2_b64 exec, exec, s[10:11]
+; CHECK-NEXT:    s_cbranch_execz BB0_6
 ; CHECK-NEXT:  BB0_3: ; %loop
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 32, v1
 ; CHECK-NEXT:    s_and_b64 vcc, exec, vcc
 ; CHECK-NEXT:    s_or_b64 s[6:7], s[6:7], exec
-; CHECK-NEXT:    s_or_b64 s[2:3], s[2:3], exec
-; CHECK-NEXT:    s_cbranch_vccz BB0_1
+; CHECK-NEXT:    s_or_b64 s[8:9], s[8:9], exec
+; CHECK-NEXT:    s_cbranch_vccz BB0_2
 ; CHECK-NEXT:  ; %bb.4: ; %endif1
 ; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
 ; CHECK-NEXT:    s_mov_b64 s[6:7], -1
 ; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], s[0:1]
 ; CHECK-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; CHECK-NEXT:    ; mask branch BB0_6
+; CHECK-NEXT:    ; mask branch BB0_1
+; CHECK-NEXT:    s_cbranch_execz BB0_1
 ; CHECK-NEXT:  BB0_5: ; %endif2
 ; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
 ; CHECK-NEXT:    v_add_u32_e32 v1, 1, v1
 ; CHECK-NEXT:    s_xor_b64 s[6:7], exec, -1
-; CHECK-NEXT:  BB0_6: ; %Flow1
-; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
-; CHECK-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
-; CHECK-NEXT:    s_branch BB0_2
-; CHECK-NEXT:  BB0_7: ; %Flow2
-; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
+; CHECK-NEXT:    s_branch BB0_1
+; CHECK-NEXT:  BB0_6: ; %Flow2
+; CHECK-NEXT:    s_or_b64 exec, exec, s[10:11]
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
-; this is the divergent branch with the condition not marked as divergent
 ; CHECK-NEXT:    s_and_saveexec_b64 s[0:1], s[2:3]
-; CHECK-NEXT:    ; mask branch BB0_9
-; CHECK-NEXT:  BB0_8: ; %if1
+; CHECK-NEXT:    ; mask branch BB0_8
+; CHECK-NEXT:  BB0_7: ; %if1
 ; CHECK-NEXT:    v_sqrt_f32_e32 v1, v0
-; CHECK-NEXT:  BB0_9: ; %endloop
+; CHECK-NEXT:  BB0_8: ; %endloop
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; CHECK-NEXT:    exp mrt0 v1, v1, v1, v1 done vm
 ; CHECK-NEXT:    s_endpgm
+; this is the divergent branch with the condition not marked as divergent
 start:
   %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)
   br label %loop