AMDGPU: Fix divergence analysis of control flow intrinsics

author Matt Arsenault <Matthew.Arsenault@amd.com>

Fri, 31 Jan 2020 22:23:59 +0000 (17:23 -0500)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 5 Feb 2020 17:30:54 +0000 (09:30 -0800)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Fri, 31 Jan 2020 22:23:59 +0000 (17:23 -0500)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 5 Feb 2020 17:30:54 +0000 (09:30 -0800)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td

index 8d70536..a7eb081 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -270,5 +270,13 @@ def : SourceOfDivergence<int_amdgcn_mfma_i32_32x32x8i8>;
  def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x2bf16>;
  def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x4bf16>;
  
+// The dummy boolean output is divergent from the IR's perspective,
+// but the mask results are uniform. These produce a divergent and
+// uniform result, so the returned struct is collectively divergent.
+// isAlwaysUniform can override the extract of the uniform component.
+def : SourceOfDivergence<int_amdgcn_if>;
+def : SourceOfDivergence<int_amdgcn_else>;
+def : SourceOfDivergence<int_amdgcn_loop>;
+
  foreach intr = AMDGPUImageDimAtomicIntrinsics in
  def : SourceOfDivergence<intr>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

index b15ef76..15ee82b 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -706,6 +706,7 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
      case Intrinsic::amdgcn_readlane:
      case Intrinsic::amdgcn_icmp:
      case Intrinsic::amdgcn_fcmp:
+    case Intrinsic::amdgcn_if_break:
        return true;
      }
    }
@@ -720,14 +721,28 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
    if (!ExtValue)
      return false;
  
-  if (const CallInst *CI = dyn_cast<CallInst>(ExtValue->getOperand(0))) {
-    // If we have inline asm returning mixed SGPR and VGPR results, we inferred
-    // divergent for the overall struct return. We need to override it in the
-    // case we're extracting an SGPR component here.
-    if (isa<InlineAsm>(CI->getCalledValue()))
-      return !isInlineAsmSourceOfDivergence(CI, ExtValue->getIndices());
+  const CallInst *CI = dyn_cast<CallInst>(ExtValue->getOperand(0));
+  if (!CI)
+    return false;
+
+  if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(CI)) {
+    switch (Intrinsic->getIntrinsicID()) {
+    default:
+      return false;
+    case Intrinsic::amdgcn_if:
+    case Intrinsic::amdgcn_else: {
+      ArrayRef<unsigned> Indices = ExtValue->getIndices();
+      return Indices.size() == 1 && Indices[0] == 1;
+    }
+    }
    }
  
+  // If we have inline asm returning mixed SGPR and VGPR results, we inferred
+  // divergent for the overall struct return. We need to override it in the
+  // case we're extracting an SGPR component here.
+  if (isa<InlineAsm>(CI->getCalledValue()))
+    return !isInlineAsmSourceOfDivergence(CI, ExtValue->getIndices());
+
    return false;
  }
  
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll

new file mode 100644 (file)

index 0000000..9446a7e
--- /dev/null
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll
@@ -0,0 +1,102 @@
+; RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+
+; Tests control flow intrinsics that should be treated as uniform
+
+; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if_break':
+; CHECK: DIVERGENT: %cond = icmp eq i32 %arg0, 0
+; CHECK-NOT: DIVERGENT
+; CHECK: ret void
+define amdgpu_ps void @test_if_break(i32 %arg0, i64 inreg %saved) {
+entry:
+  %cond = icmp eq i32 %arg0, 0
+  %break = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %cond, i64 %saved)
+  store volatile i64 %break, i64 addrspace(1)* undef
+  ret void
+}
+
+; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if':
+; CHECK: DIVERGENT: %cond = icmp eq i32 %arg0, 0
+; CHECK-NEXT: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond)
+; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0
+; CHECK-NOT: DIVERGENT
+; CHECK: DIVERGENT: %if.bool.ext = zext i1 %if.bool to i32
+define void @test_if(i32 %arg0) {
+entry:
+  %cond = icmp eq i32 %arg0, 0
+  %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond)
+  %if.bool = extractvalue { i1, i64 } %if, 0
+  %if.mask = extractvalue { i1, i64 } %if, 1
+  %if.bool.ext = zext i1 %if.bool to i32
+  store volatile i32 %if.bool.ext, i32 addrspace(1)* undef
+  store volatile i64 %if.mask, i64 addrspace(1)* undef
+  ret void
+}
+
+; The result should still be treated as divergent, even with a uniform source.
+; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if_uniform':
+; CHECK-NOT: DIVERGENT
+; CHECK: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond)
+; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0
+; CHECK-NOT: DIVERGENT
+; CHECK: DIVERGENT: %if.bool.ext = zext i1 %if.bool to i32
+define amdgpu_ps void @test_if_uniform(i32 inreg %arg0) {
+entry:
+  %cond = icmp eq i32 %arg0, 0
+  %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond)
+  %if.bool = extractvalue { i1, i64 } %if, 0
+  %if.mask = extractvalue { i1, i64 } %if, 1
+  %if.bool.ext = zext i1 %if.bool to i32
+  store volatile i32 %if.bool.ext, i32 addrspace(1)* undef
+  store volatile i64 %if.mask, i64 addrspace(1)* undef
+  ret void
+}
+
+; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_loop_uniform':
+; CHECK: DIVERGENT: %loop = call i1 @llvm.amdgcn.loop.i64(i64 %mask)
+define amdgpu_ps void @test_loop_uniform(i64 inreg %mask) {
+entry:
+  %loop = call i1 @llvm.amdgcn.loop.i64(i64 %mask)
+  %loop.ext = zext i1 %loop to i32
+  store volatile i32 %loop.ext, i32 addrspace(1)* undef
+  ret void
+}
+
+; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_else':
+; CHECK: DIVERGENT: %else = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask)
+; CHECK: DIVERGENT:       %else.bool = extractvalue { i1, i64 } %else, 0
+; CHECK: {{^[ \t]+}}%else.mask = extractvalue { i1, i64 } %else, 1
+define amdgpu_ps void @test_else(i64 inreg %mask) {
+entry:
+  %else = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask)
+  %else.bool = extractvalue { i1, i64 } %else, 0
+  %else.mask = extractvalue { i1, i64 } %else, 1
+  %else.bool.ext = zext i1 %else.bool to i32
+  store volatile i32 %else.bool.ext, i32 addrspace(1)* undef
+  store volatile i64 %else.mask, i64 addrspace(1)* undef
+  ret void
+}
+
+; This case is probably always broken
+; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_else_divergent_mask':
+; CHECK: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask)
+; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0
+; CHECK-NOT: DIVERGENT
+; CHECK: DIVERGENT: %if.bool.ext = zext i1 %if.bool to i32
+define void @test_else_divergent_mask(i64 %mask) {
+entry:
+  %if = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask)
+  %if.bool = extractvalue { i1, i64 } %if, 0
+  %if.mask = extractvalue { i1, i64 } %if, 1
+  %if.bool.ext = zext i1 %if.bool to i32
+  store volatile i32 %if.bool.ext, i32 addrspace(1)* undef
+  store volatile i64 %if.mask, i64 addrspace(1)* undef
+  ret void
+}
+
+declare { i1, i64 } @llvm.amdgcn.if.i64(i1) #0
+declare { i1, i64 } @llvm.amdgcn.else.i64.i64(i64) #0
+declare i64 @llvm.amdgcn.if.break.i64.i64(i1, i64) #1
+declare i1 @llvm.amdgcn.loop.i64(i64) #1
+
+attributes #0 = { convergent nounwind }
+attributes #1 = { convergent nounwind readnone }
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Fri, 31 Jan 2020 22:23:59 +0000 (17:23 -0500)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 5 Feb 2020 17:30:54 +0000 (09:30 -0800)
llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp		patch \| blob \| history
llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll	[new file with mode: 0644]	patch \| blob