AMDGPU/GFX10: Fix NSA reassign pass when operands are undef

author Nicolai Hähnle <nicolai.haehnle@amd.com>

Sat, 1 Feb 2020 14:12:24 +0000 (15:12 +0100)

committer Nicolai Hähnle <nicolai.haehnle@amd.com>

Sat, 1 Feb 2020 21:41:40 +0000 (22:41 +0100)
author Nicolai Hähnle <nicolai.haehnle@amd.com>
Sat, 1 Feb 2020 14:12:24 +0000 (15:12 +0100)
committer Nicolai Hähnle <nicolai.haehnle@amd.com>
Sat, 1 Feb 2020 21:41:40 +0000 (22:41 +0100)
diff --git a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp

index f6023f3..5734608 100644 (file)
--- a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
@@ -286,8 +286,15 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
        }
        Intervals.push_back(LI);
        OrigRegs.push_back(VRM->getPhys(Reg));
-      MinInd = I ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
-      MaxInd = I ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
+      if (LI->empty()) {
+        // The address input is undef, so it doesn't contribute to the relevant
+        // range. Seed a reasonable index range if required.
+        if (I == 0)
+          MinInd = MaxInd = LIS->getInstructionIndex(*MI);
+        continue;
+      }
+      MinInd = I != 0 ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
+      MaxInd = I != 0 ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
      }
  
      if (Intervals.empty())
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll

index 1ed4e2f..52730f6 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
@@ -80,6 +80,45 @@ main_body:
    ret <2 x float> %r
  }
  
+; Test that undef inputs with NSA are handled safely; these tests used to crash.
+
+; GCN-LABEL: {{^}}sample_undef_undef_undef_undef:
+; GCN: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+define amdgpu_ps float @sample_undef_undef_undef_undef(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp) {
+  %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float undef, float undef, float undef, float undef, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  ret float %r
+}
+
+; GCN-LABEL: {{^}}sample_undef_undef_undef_def:
+; NONSA: v_mov_b32_e32 v3, v0
+; NONSA: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+; NSA: image_sample_c_b v0, [v0, v0, v0, v0], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+define amdgpu_ps float @sample_undef_undef_undef_def(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %layer) {
+  %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float undef, float undef, float undef, float %layer, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  ret float %r
+}
+
+; GCN-LABEL: {{^}}sample_undef_undef_undef_def_rnd:
+; GCN: v_rndne_f32_e32 v3, v0
+; GCN: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+define amdgpu_ps float @sample_undef_undef_undef_def_rnd(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %layer) {
+  %layer_rnd = call float @llvm.rint.f32(float %layer)
+  %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float undef, float undef, float undef, float %layer_rnd, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  ret float %r
+}
+
+; GCN-LABEL: {{^}}sample_def_undef_undef_undef:
+; GCN: v_add_f32_e32 v0, 1.0, v0
+; GCN: image_sample_c_b v0, v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+define amdgpu_ps float @sample_def_undef_undef_undef(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %z0) {
+  ; The NSA reassign pass is conservative (quite reasonably!) when one of the operands
+  ; comes directly from a function argument (via COPY). To test that NSA can be
+  ; eliminated in the presence of undef, just add an arbitrary intermediate
+  ; computation.
+  %c0 = fadd float %z0, 1.0
+  %r = call float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 1, float %c0, float undef, float undef, float undef, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  ret float %r
+}
  
  declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
@@ -88,4 +127,8 @@ declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32(i32, float, float,
  declare float @llvm.amdgcn.image.sample.3d.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  declare float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  
+declare float @llvm.rint.f32(float) #2
+declare float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
+
  attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone speculatable willreturn }
author	Nicolai Hähnle <nicolai.haehnle@amd.com>
	Sat, 1 Feb 2020 14:12:24 +0000 (15:12 +0100)
committer	Nicolai Hähnle <nicolai.haehnle@amd.com>
	Sat, 1 Feb 2020 21:41:40 +0000 (22:41 +0100)
llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll		patch \| blob \| history