From bef1ceb8154f771d5f52a10fa60fdcf86bd7d528 Mon Sep 17 00:00:00 2001
From: Nicolai Haehnle <nhaehnle@gmail.com>
Date: Mon, 18 Jul 2016 09:02:47 +0000
Subject: [PATCH] AMDGPU: Disable AMDGPUPromoteAlloca pass for shader calling
 conventions.

Summary:
The work item intrinsics are not available for the shader
calling conventions. And even if we did hook them up most
shader stages haves some extra restrictions on the amount
of available LDS.

Reviewers: tstellarAMD, arsenm

Subscribers: nhaehnle, arsenm, llvm-commits, kzhuravl

Differential Revision: https://reviews.llvm.org/D20728

llvm-svn: 275779
---
 llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp     |  6 +++++
 llvm/test/CodeGen/AMDGPU/promote-alloca-shaders.ll | 29 ++++++++++++++++++++++
 2 files changed, 35 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/promote-alloca-shaders.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index fa8709e..7754638 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -649,6 +649,12 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
 
   const Function &ContainingFunction = *I.getParent()->getParent();
 
+  // Don't promote the alloca to LDS for shader calling conventions as the work
+  // item ID intrinsics are not supported for these calling conventions.
+  // Furthermore not all LDS is available for some of the stages.
+  if (AMDGPU::isShader(ContainingFunction.getCallingConv()))
+    return;
+
   // FIXME: We should also try to get this value from the reqd_work_group_size
   // function attribute if it is available.
   unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction);
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-shaders.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-shaders.ll
new file mode 100644
index 0000000..d40fca9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-shaders.ll
@@ -0,0 +1,29 @@
+; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=ASM %s
+
+; IR-LABEL: define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
+; IR: alloca [5 x i32]
+; ASM-LABEL: {{^}}promote_alloca_shaders:
+; ASM: ; LDSByteSize: 0 bytes/workgroup (compile time only)
+
+define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
+entry:
+  %stack = alloca [5 x i32], align 4
+  %tmp0 = load i32, i32 addrspace(1)* %in, align 4
+  %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0
+  store i32 4, i32* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
+  %tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1
+  store i32 5, i32* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
+  %tmp2 = load i32, i32* %arrayidx4, align 4
+  store i32 %tmp2, i32 addrspace(1)* %out, align 4
+  %arrayidx5 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
+  %tmp3 = load i32, i32* %arrayidx5
+  %arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
+  store i32 %tmp3, i32 addrspace(1)* %arrayidx6
+  ret void
+}
+
+attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" }
-- 
2.7.4