From 6043d4dfec1e47b6e314f3643f576ab808f246dc Mon Sep 17 00:00:00 2001
From: Jon Chesterfield <jonathanchesterfield@gmail.com>
Date: Sat, 15 Jul 2023 20:38:15 +0100
Subject: [PATCH] [amdgpu] Accept an optional max to amdgpu-lds-size attribute
 for use in PromoteAlloca

---
 llvm/docs/AMDGPUUsage.rst                            | 12 +++++++-----
 llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp  | 20 ++++++++++++++++++--
 llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp     |  9 ++++++++-
 ...s-indirect-extern-uses-max-reachable-alignment.ll |  2 +-
 4 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index dfe64fb..1e6f421 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1090,11 +1090,13 @@ The AMDGPU backend supports the following LLVM IR attributes.
                                              kernel argument that holds the completion action pointer. If this
                                              attribute is absent, then the amdgpu-no-implicitarg-ptr is also removed.
 
-     "amdgpu-lds-size"                       The number of bytes that will be allocated in the Local Data Store at
-                                             address zero. Variables are allocated within this frame using absolute
-                                             symbol metadata, primarily by the AMDGPULowerModuleLDS pass. Internal
-                                             detail of how LDS variables are lowered, language front ends should not
-                                             set this.
+     "amdgpu-lds-size"="min[,max]"           Min is the minimum number of bytes that will be allocated in the Local
+                                             Data Store at address zero. Variables are allocated within this frame
+                                             using absolute symbol metadata, primarily by the AMDGPULowerModuleLDS
+                                             pass. Optional max is the maximum number of bytes that will be allocated.
+                                             Note that min==max indicates that no further variables can be added to
+                                             the frame. This is an internal detail of how LDS variables are lowered,
+                                             language front ends should not set this attribute.
 
      ======================================= ==========================================================
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index e265de1..0df07d3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -198,7 +198,9 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/OptimizedStructLayout.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 
@@ -1252,8 +1254,22 @@ public:
           recordLDSAbsoluteAddress(&M, DynamicVariable, Offset);
         }
 
-        if (Offset != 0)
-          Func.addFnAttr("amdgpu-lds-size", std::to_string(Offset));
+        if (Offset != 0) {
+          std::string Buffer;
+          raw_string_ostream SS{Buffer};
+          SS << format("%u", Offset);
+
+          // Instead of explictly marking kernels that access dynamic variables
+          // using special case metadata, annotate with min-lds == max-lds, i.e.
+          // that there is no more space available for allocating more static
+          // LDS variables. That is the right condition to prevent allocating
+          // more variables which would collide with the addresses assigned to
+          // dynamic variables.
+          if (AllocateDynamicVariable)
+            SS << format(",%u", Offset);
+
+          Func.addFnAttr("amdgpu-lds-size", Buffer);
+        }
       }
     }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 8f3bb62..44bbfe6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -10,6 +10,7 @@
 #include "AMDGPU.h"
 #include "AMDGPUPerfHintAnalysis.h"
 #include "AMDGPUSubtarget.h"
+#include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
@@ -43,10 +44,16 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
   // Assume the attribute allocates before any known GDS globals.
   StaticGDSSize = GDSSize;
 
+  // Second value, if present, is the maximum value that can be assigned.
+  // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics
+  // during codegen.
+  std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute(
+      F, "amdgpu-lds-size", {0, UINT32_MAX}, true);
+
   // The two separate variables are only profitable when the LDS module lowering
   // pass is disabled. If graphics does not use dynamic LDS, this is never
   // profitable. Leaving cleanup for a later change.
-  LDSSize = F.getFnAttributeAsParsedInteger("amdgpu-lds-size", 0);
+  LDSSize = LDSSizeRange.first;
   StaticLDSSize = LDSSize;
 
   CallingConv::ID CC = F.getCallingConv();
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll
index 3f34cc3..810e1ed 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll
@@ -179,7 +179,7 @@ attributes #0 = { noinline }
 ; CHECK: declare i32 @llvm.amdgcn.lds.kernel.id() #3
 
 ; CHECK: attributes #0 = { noinline }
-; CHECK: attributes #1 = { "amdgpu-lds-size"="4" }
+; CHECK: attributes #1 = { "amdgpu-lds-size"="4,4" }
 ; CHECK: attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) }
 ; CHECK: attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 
-- 
2.7.4