From f90f4884c88659881b1b37be473bd95180e70bf4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 16 Apr 2022 10:25:36 -0400 Subject: [PATCH] AMDGPU: Serialize gds size in MIR --- llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h | 2 +- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 2 ++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 4 +++- .../MIR/AMDGPU/machine-function-info-after-pei.ll | 1 + .../CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir | 7 +++++++ llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll | 17 +++++++++++++++-- 6 files changed, 29 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h index 48cf46b..ca00b5d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -65,7 +65,7 @@ public: unsigned getMaxKernArgAlign() const { return MaxKernArgAlign.value(); } - unsigned getLDSSize() const { + uint32_t getLDSSize() const { return LDSSize; } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index c07511b..1c6038f 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -578,6 +578,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo( const llvm::MachineFunction &MF) : ExplicitKernArgSize(MFI.getExplicitKernArgSize()), MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()), + GDSSize(MFI.getGDSSize()), DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()), NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()), MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()), @@ -607,6 +608,7 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields( ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize; MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign); LDSSize = YamlMFI.LDSSize; + GDSSize = YamlMFI.GDSSize; DynLDSAlign = YamlMFI.DynLDSAlign; HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress; Occupancy = YamlMFI.Occupancy; diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 90568650..0293880 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -270,7 +270,8 @@ template <> struct MappingTraits { struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo { uint64_t ExplicitKernArgSize = 0; unsigned MaxKernArgAlign = 0; - unsigned LDSSize = 0; + uint32_t LDSSize = 0; + uint32_t GDSSize = 0; Align DynLDSAlign; bool IsEntryFunction = false; bool NoSignedZerosFPMath = false; @@ -308,6 +309,7 @@ template <> struct MappingTraits { UINT64_C(0)); YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u); YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u); + YamlIO.mapOptional("gdsSize", MFI.GDSSize, 0u); YamlIO.mapOptional("dynLDSAlign", MFI.DynLDSAlign, Align()); YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false); YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false); diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll index c9170cd..6873683 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll @@ -7,6 +7,7 @@ ; AFTER-PEI-NEXT: explicitKernArgSize: 12 ; AFTER-PEI-NEXT: maxKernArgAlign: 8 ; AFTER-PEI-NEXT: ldsSize: 0 +; AFTER-PEI-NEXT: gdsSize: 0 ; AFTER-PEI-NEXT: dynLDSAlign: 1 ; AFTER-PEI-NEXT: isEntryFunction: true ; AFTER-PEI-NEXT: noSignedZerosFPMath: false diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir index 515a1fc..c0a18bb 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir @@ -8,6 +8,7 @@ # FULL-NEXT: explicitKernArgSize: 128 # FULL-NEXT: maxKernArgAlign: 64 # FULL-NEXT: ldsSize: 2048 +# FULL-NEXT: gdsSize: 256 # FULL-NEXT: dynLDSAlign: 1 # FULL-NEXT: isEntryFunction: true # FULL-NEXT: noSignedZerosFPMath: false @@ -47,6 +48,7 @@ # SIMPLE-NEXT: explicitKernArgSize: 128 # SIMPLE-NEXT: maxKernArgAlign: 64 # SIMPLE-NEXT: ldsSize: 2048 +# SIMPLE-NEXT: gdsSize: 256 # SIMPLE-NEXT: isEntryFunction: true # SIMPLE-NEXT: memoryBound: true # SIMPLE-NEXT: waveLimiter: true @@ -74,6 +76,7 @@ machineFunctionInfo: explicitKernArgSize: 128 maxKernArgAlign: 64 ldsSize: 2048 + gdsSize: 256 isEntryFunction: true noSignedZerosFPMath: false memoryBound: true @@ -100,6 +103,7 @@ body: | # FULL-NEXT: explicitKernArgSize: 0 # FULL-NEXT: maxKernArgAlign: 1 # FULL-NEXT: ldsSize: 0 +# FULL-NEXT: gdsSize: 0 # FULL-NEXT: dynLDSAlign: 1 # FULL-NEXT: isEntryFunction: false # FULL-NEXT: noSignedZerosFPMath: false @@ -163,6 +167,7 @@ body: | # FULL-NEXT: explicitKernArgSize: 0 # FULL-NEXT: maxKernArgAlign: 1 # FULL-NEXT: ldsSize: 0 +# FULL-NEXT: gdsSize: 0 # FULL-NEXT: dynLDSAlign: 1 # FULL-NEXT: isEntryFunction: false # FULL-NEXT: noSignedZerosFPMath: false @@ -227,6 +232,7 @@ body: | # FULL-NEXT: explicitKernArgSize: 0 # FULL-NEXT: maxKernArgAlign: 1 # FULL-NEXT: ldsSize: 0 +# FULL-NEXT: gdsSize: 0 # FULL-NEXT: dynLDSAlign: 1 # FULL-NEXT: isEntryFunction: true # FULL-NEXT: noSignedZerosFPMath: false @@ -387,6 +393,7 @@ body: | # ALL-LABEL: name: dyn_lds_with_alignment # FULL: ldsSize: 0 +# FULL-NEXT: gdsSize: 0 # FULL-NEXT: dynLDSAlign: 8 # SIMPLE: dynLDSAlign: 8 diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll index a3a6f08..da2836b 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll @@ -11,6 +11,7 @@ ; CHECK-NEXT: explicitKernArgSize: 128 ; CHECK-NEXT: maxKernArgAlign: 64 ; CHECK-NEXT: ldsSize: 2048 +; CHECK-NEXT: gdsSize: 0 ; CHECK-NEXT: dynLDSAlign: 1 ; CHECK-NEXT: isEntryFunction: true ; CHECK-NEXT: noSignedZerosFPMath: false @@ -43,11 +44,14 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) { ret void } +@gds = addrspace(2) global [128 x i32] undef, align 4 + ; CHECK-LABEL: {{^}}name: ps_shader ; CHECK: machineFunctionInfo: ; CHECK-NEXT: explicitKernArgSize: 0 ; CHECK-NEXT: maxKernArgAlign: 4 ; CHECK-NEXT: ldsSize: 0 +; CHECK-NEXT: gdsSize: 0 ; CHECK-NEXT: dynLDSAlign: 1 ; CHECK-NEXT: isEntryFunction: true ; CHECK-NEXT: noSignedZerosFPMath: false @@ -75,11 +79,18 @@ define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) { ret void } +; CHECK-LABEL: {{^}}name: gds_size_shader +; CHECK: gdsSize: 4096 +define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 { + ret void +} + ; CHECK-LABEL: {{^}}name: function ; CHECK: machineFunctionInfo: ; CHECK-NEXT: explicitKernArgSize: 0 ; CHECK-NEXT: maxKernArgAlign: 1 ; CHECK-NEXT: ldsSize: 0 +; CHECK-NEXT: gdsSize: 0 ; CHECK-NEXT: dynLDSAlign: 1 ; CHECK-NEXT: isEntryFunction: false ; CHECK-NEXT: noSignedZerosFPMath: false @@ -121,6 +132,7 @@ define void @function() { ; CHECK-NEXT: explicitKernArgSize: 0 ; CHECK-NEXT: maxKernArgAlign: 1 ; CHECK-NEXT: ldsSize: 0 +; CHECK-NEXT: gdsSize: 0 ; CHECK-NEXT: dynLDSAlign: 1 ; CHECK-NEXT: isEntryFunction: false ; CHECK-NEXT: noSignedZerosFPMath: true @@ -214,11 +226,12 @@ define amdgpu_cs void @wwm_reserved_regs(i32 addrspace(1)* %ptr, <4 x i32> inreg ret void } -declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #5 +declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #6 attributes #0 = { "no-signed-zeros-fp-math" = "true" } attributes #1 = { "amdgpu-dx10-clamp" = "false" } attributes #2 = { "amdgpu-ieee" = "false" } attributes #3 = { "amdgpu-dx10-clamp" = "false" "amdgpu-ieee" = "false" } attributes #4 = { "amdgpu-32bit-address-high-bits"="0xffff8000" } -attributes #5 = { convergent nounwind readnone willreturn } +attributes #5 = { "amdgpu-gds-size"="4096" } +attributes #6 = { convergent nounwind readnone willreturn } -- 2.7.4