From 6043d4dfec1e47b6e314f3643f576ab808f246dc Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Sat, 15 Jul 2023 20:38:15 +0100 Subject: [PATCH] [amdgpu] Accept an optional max to amdgpu-lds-size attribute for use in PromoteAlloca --- llvm/docs/AMDGPUUsage.rst | 12 +++++++----- llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp | 20 ++++++++++++++++++-- llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp | 9 ++++++++- ...s-indirect-extern-uses-max-reachable-alignment.ll | 2 +- 4 files changed, 34 insertions(+), 9 deletions(-) diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index dfe64fb..1e6f421 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1090,11 +1090,13 @@ The AMDGPU backend supports the following LLVM IR attributes. kernel argument that holds the completion action pointer. If this attribute is absent, then the amdgpu-no-implicitarg-ptr is also removed. - "amdgpu-lds-size" The number of bytes that will be allocated in the Local Data Store at - address zero. Variables are allocated within this frame using absolute - symbol metadata, primarily by the AMDGPULowerModuleLDS pass. Internal - detail of how LDS variables are lowered, language front ends should not - set this. + "amdgpu-lds-size"="min[,max]" Min is the minimum number of bytes that will be allocated in the Local + Data Store at address zero. Variables are allocated within this frame + using absolute symbol metadata, primarily by the AMDGPULowerModuleLDS + pass. Optional max is the maximum number of bytes that will be allocated. + Note that min==max indicates that no further variables can be added to + the frame. This is an internal detail of how LDS variables are lowered, + language front ends should not set this attribute. ======================================= ========================================================== diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index e265de1..0df07d3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -198,7 +198,9 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/OptimizedStructLayout.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -1252,8 +1254,22 @@ public: recordLDSAbsoluteAddress(&M, DynamicVariable, Offset); } - if (Offset != 0) - Func.addFnAttr("amdgpu-lds-size", std::to_string(Offset)); + if (Offset != 0) { + std::string Buffer; + raw_string_ostream SS{Buffer}; + SS << format("%u", Offset); + + // Instead of explictly marking kernels that access dynamic variables + // using special case metadata, annotate with min-lds == max-lds, i.e. + // that there is no more space available for allocating more static + // LDS variables. That is the right condition to prevent allocating + // more variables which would collide with the addresses assigned to + // dynamic variables. + if (AllocateDynamicVariable) + SS << format(",%u", Offset); + + Func.addFnAttr("amdgpu-lds-size", Buffer); + } } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index 8f3bb62..44bbfe6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -10,6 +10,7 @@ #include "AMDGPU.h" #include "AMDGPUPerfHintAnalysis.h" #include "AMDGPUSubtarget.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" @@ -43,10 +44,16 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F, // Assume the attribute allocates before any known GDS globals. StaticGDSSize = GDSSize; + // Second value, if present, is the maximum value that can be assigned. + // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics + // during codegen. + std::pair LDSSizeRange = AMDGPU::getIntegerPairAttribute( + F, "amdgpu-lds-size", {0, UINT32_MAX}, true); + // The two separate variables are only profitable when the LDS module lowering // pass is disabled. If graphics does not use dynamic LDS, this is never // profitable. Leaving cleanup for a later change. - LDSSize = F.getFnAttributeAsParsedInteger("amdgpu-lds-size", 0); + LDSSize = LDSSizeRange.first; StaticLDSSize = LDSSize; CallingConv::ID CC = F.getCallingConv(); diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll index 3f34cc3..810e1ed 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll @@ -179,7 +179,7 @@ attributes #0 = { noinline } ; CHECK: declare i32 @llvm.amdgcn.lds.kernel.id() #3 ; CHECK: attributes #0 = { noinline } -; CHECK: attributes #1 = { "amdgpu-lds-size"="4" } +; CHECK: attributes #1 = { "amdgpu-lds-size"="4,4" } ; CHECK: attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -- 2.7.4