[AMDGPU] improve fragile test for divergent branches

author Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>

Thu, 27 Feb 2020 10:29:25 +0000 (15:59 +0530)

committer Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>

Thu, 27 Feb 2020 18:01:03 +0000 (23:31 +0530)
author Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>
Thu, 27 Feb 2020 10:29:25 +0000 (15:59 +0530)
committer Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>
Thu, 27 Feb 2020 18:01:03 +0000 (23:31 +0530)
diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll

index 55841aa..a9a9180 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
@@ -1,97 +1,78 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
+; RUN: opt --amdgpu-annotate-uniform -S %s |  FileCheck %s -check-prefix=UNIFORM
+; RUN: opt --amdgpu-annotate-uniform --si-annotate-control-flow -S %s |  FileCheck %s -check-prefix=CONTROLFLOW
  
-; This module creates a divergent branch. The branch is marked as divergent by
-; the divergence analysis but the condition is not. This test ensures that the
-; divergence of the branch is tested, not its condition, so that branch is
-; correctly emitted as divergent.
+; This module creates a divergent branch in block Flow2. The branch is
+; marked as divergent by the divergence analysis but the condition is
+; not. This test ensures that the divergence of the branch is tested,
+; not its condition, so that branch is correctly emitted as divergent.
  
  target triple = "amdgcn-mesa-mesa3d"
  
-define amdgpu_ps void @main(i32, float) {
-; CHECK-LABEL: main:
-; CHECK:       ; %bb.0: ; %start
-; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
-; CHECK-NEXT:    s_mov_b32 m0, s0
-; CHECK-NEXT:    s_mov_b32 s0, 0
-; CHECK-NEXT:    v_interp_p1_f32_e32 v0, v1, attr0.x
-; CHECK-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
-; CHECK-NEXT:    s_mov_b64 s[2:3], 0
-; CHECK-NEXT:    ; implicit-def: $sgpr6_sgpr7
-; CHECK-NEXT:    ; implicit-def: $sgpr4_sgpr5
-; CHECK-NEXT:    s_branch BB0_3
-; CHECK-NEXT:  BB0_1: ; %Flow1
-; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
-; CHECK-NEXT:    s_mov_b64 s[8:9], 0
-; CHECK-NEXT:  BB0_2: ; %Flow
-; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    s_and_b64 s[10:11], exec, s[6:7]
-; CHECK-NEXT:    s_or_b64 s[2:3], s[10:11], s[2:3]
-; CHECK-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
-; CHECK-NEXT:    s_and_b64 s[8:9], s[8:9], exec
-; CHECK-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
-; CHECK-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; CHECK-NEXT:    s_cbranch_execz BB0_6
-; CHECK-NEXT:  BB0_3: ; %loop
-; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    s_or_b64 s[6:7], s[6:7], exec
-; CHECK-NEXT:    s_cmp_lt_u32 s0, 32
-; CHECK-NEXT:    s_mov_b64 s[8:9], -1
-; CHECK-NEXT:    s_cbranch_scc0 BB0_2
-; CHECK-NEXT:  ; %bb.4: ; %endif1
-; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    s_mov_b64 s[6:7], -1
-; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], vcc
-; CHECK-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; CHECK-NEXT:    s_cbranch_execz BB0_1
-; CHECK-NEXT:  ; %bb.5: ; %endif2
-; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    s_add_i32 s0, s0, 1
-; CHECK-NEXT:    s_xor_b64 s[6:7], exec, -1
-; CHECK-NEXT:    s_branch BB0_1
-; CHECK-NEXT:  BB0_6: ; %Flow2
-; CHECK-NEXT:    s_or_b64 exec, exec, s[2:3]
-; CHECK-NEXT:    v_mov_b32_e32 v1, 0
-; CHECK-NEXT:    s_and_saveexec_b64 s[0:1], s[4:5]
-; CHECK-NEXT:  ; %bb.7: ; %if1
-; CHECK-NEXT:    v_sqrt_f32_e32 v1, v0
-; CHECK-NEXT:  ; %bb.8: ; %endloop
-; CHECK-NEXT:    s_or_b64 exec, exec, s[0:1]
-; CHECK-NEXT:    exp mrt0 v1, v1, v1, v1 done vm
-; CHECK-NEXT:    s_endpgm
-
-; this is the divergent branch with the condition not marked as divergent
+define amdgpu_ps void @main(i32 %0, float %1) {
  start:
    %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)
    br label %loop
  
-loop:
-  %v1 = phi i32 [ 0, %start ], [ %v5, %endif2 ]
+loop:                                             ; preds = %Flow, %start
+  %v1 = phi i32 [ 0, %start ], [ %6, %Flow ]
    %v2 = icmp ugt i32 %v1, 31
-  br i1 %v2, label %if1, label %endif1
+  %2 = xor i1 %v2, true
+  br i1 %2, label %endif1, label %Flow
  
-if1:
+Flow1:                                            ; preds = %endif2, %endif1
+  %3 = phi i32 [ %v5, %endif2 ], [ undef, %endif1 ]
+  %4 = phi i1 [ false, %endif2 ], [ true, %endif1 ]
+  br label %Flow
+
+; UNIFORM-LABEL: Flow2:
+; UNIFORM-NEXT: br i1 %8, label %if1, label %endloop
+; UNIFORM-NOT: !amdgpu.uniform
+; UNIFORM: if1:
+
+; CONTROLFLOW-LABEL: Flow2:
+; CONTROLFLOW-NEXT:  call void @llvm.amdgcn.end.cf.i64(i64 %{{.*}})
+; CONTROLFLOW-NEXT:  [[IF:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %{{.*}})
+; CONTROLFLOW-NEXT:  [[COND:%.*]] = extractvalue { i1, i64 } [[IF]], 0
+; CONTROLFLOW-NEXT:  %{{.*}} = extractvalue { i1, i64 } [[IF]], 1
+; CONTROLFLOW-NEXT:  br i1 [[COND]], label %if1, label %endloop
+
+Flow2:                                            ; preds = %Flow
+  br i1 %8, label %if1, label %endloop
+
+if1:                                              ; preds = %Flow2
    %v3 = call float @llvm.sqrt.f32(float %v0)
    br label %endloop
  
-endif1:
+endif1:                                           ; preds = %loop
    %v4 = fcmp ogt float %v0, 0.000000e+00
-  br i1 %v4, label %endloop, label %endif2
+  %5 = xor i1 %v4, true
+  br i1 %5, label %endif2, label %Flow1
  
-endif2:
+Flow:                                             ; preds = %Flow1, %loop
+  %6 = phi i32 [ %3, %Flow1 ], [ undef, %loop ]
+  %7 = phi i1 [ %4, %Flow1 ], [ true, %loop ]
+  %8 = phi i1 [ false, %Flow1 ], [ true, %loop ]
+  br i1 %7, label %Flow2, label %loop
+
+endif2:                                           ; preds = %endif1
    %v5 = add i32 %v1, 1
-  br label %loop
+  br label %Flow1
  
-endloop:
-  %v6 = phi float [ %v3, %if1 ], [ 0.0, %endif1 ]
-  call void @llvm.amdgcn.exp.v4f32(i32 0, i32 15, float %v6, float %v6, float %v6, float %v6, i1 true, i1 true)
+endloop:                                          ; preds = %if1, %Flow2
+  %v6 = phi float [ 0.000000e+00, %Flow2 ], [ %v3, %if1 ]
+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %v6, float %v6, float %v6, float %v6, i1 true, i1 true)
    ret void
  }
  
-declare float @llvm.sqrt.f32(float) #1
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
-declare void @llvm.amdgcn.exp.v4f32(i32, i32, float, float, float, float, i1, i1) #0
+; Function Attrs: nounwind readnone speculatable willreturn
+declare float @llvm.sqrt.f32(float) #0
+
+; Function Attrs: nounwind readnone speculatable
+declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) #1
+
+; Function Attrs: inaccessiblememonly nounwind writeonly
+declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #2
  
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+attributes #0 = { nounwind readnone speculatable willreturn }
+attributes #1 = { nounwind readnone speculatable }
+attributes #2 = { inaccessiblememonly nounwind writeonly }
author	Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>
	Thu, 27 Feb 2020 10:29:25 +0000 (15:59 +0530)
committer	Sameer Sahasrabuddhe <sameer.sahasrabuddhe@amd.com>
	Thu, 27 Feb 2020 18:01:03 +0000 (23:31 +0530)