unsigned getMaxKernArgAlign() const { return MaxKernArgAlign.value(); }
- unsigned getLDSSize() const {
+ uint32_t getLDSSize() const {
return LDSSize;
}
const llvm::MachineFunction &MF)
: ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
+ GDSSize(MFI.getGDSSize()),
DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
LDSSize = YamlMFI.LDSSize;
+ GDSSize = YamlMFI.GDSSize;
DynLDSAlign = YamlMFI.DynLDSAlign;
HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
Occupancy = YamlMFI.Occupancy;
struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
uint64_t ExplicitKernArgSize = 0;
unsigned MaxKernArgAlign = 0;
- unsigned LDSSize = 0;
+ uint32_t LDSSize = 0;
+ uint32_t GDSSize = 0;
Align DynLDSAlign;
bool IsEntryFunction = false;
bool NoSignedZerosFPMath = false;
UINT64_C(0));
YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
+ YamlIO.mapOptional("gdsSize", MFI.GDSSize, 0u);
YamlIO.mapOptional("dynLDSAlign", MFI.DynLDSAlign, Align());
YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
; AFTER-PEI-NEXT: explicitKernArgSize: 12
; AFTER-PEI-NEXT: maxKernArgAlign: 8
; AFTER-PEI-NEXT: ldsSize: 0
+; AFTER-PEI-NEXT: gdsSize: 0
; AFTER-PEI-NEXT: dynLDSAlign: 1
; AFTER-PEI-NEXT: isEntryFunction: true
; AFTER-PEI-NEXT: noSignedZerosFPMath: false
# FULL-NEXT: explicitKernArgSize: 128
# FULL-NEXT: maxKernArgAlign: 64
# FULL-NEXT: ldsSize: 2048
+# FULL-NEXT: gdsSize: 256
# FULL-NEXT: dynLDSAlign: 1
# FULL-NEXT: isEntryFunction: true
# FULL-NEXT: noSignedZerosFPMath: false
# SIMPLE-NEXT: explicitKernArgSize: 128
# SIMPLE-NEXT: maxKernArgAlign: 64
# SIMPLE-NEXT: ldsSize: 2048
+# SIMPLE-NEXT: gdsSize: 256
# SIMPLE-NEXT: isEntryFunction: true
# SIMPLE-NEXT: memoryBound: true
# SIMPLE-NEXT: waveLimiter: true
explicitKernArgSize: 128
maxKernArgAlign: 64
ldsSize: 2048
+ gdsSize: 256
isEntryFunction: true
noSignedZerosFPMath: false
memoryBound: true
# FULL-NEXT: explicitKernArgSize: 0
# FULL-NEXT: maxKernArgAlign: 1
# FULL-NEXT: ldsSize: 0
+# FULL-NEXT: gdsSize: 0
# FULL-NEXT: dynLDSAlign: 1
# FULL-NEXT: isEntryFunction: false
# FULL-NEXT: noSignedZerosFPMath: false
# FULL-NEXT: explicitKernArgSize: 0
# FULL-NEXT: maxKernArgAlign: 1
# FULL-NEXT: ldsSize: 0
+# FULL-NEXT: gdsSize: 0
# FULL-NEXT: dynLDSAlign: 1
# FULL-NEXT: isEntryFunction: false
# FULL-NEXT: noSignedZerosFPMath: false
# FULL-NEXT: explicitKernArgSize: 0
# FULL-NEXT: maxKernArgAlign: 1
# FULL-NEXT: ldsSize: 0
+# FULL-NEXT: gdsSize: 0
# FULL-NEXT: dynLDSAlign: 1
# FULL-NEXT: isEntryFunction: true
# FULL-NEXT: noSignedZerosFPMath: false
# ALL-LABEL: name: dyn_lds_with_alignment
# FULL: ldsSize: 0
+# FULL-NEXT: gdsSize: 0
# FULL-NEXT: dynLDSAlign: 8
# SIMPLE: dynLDSAlign: 8
; CHECK-NEXT: explicitKernArgSize: 128
; CHECK-NEXT: maxKernArgAlign: 64
; CHECK-NEXT: ldsSize: 2048
+; CHECK-NEXT: gdsSize: 0
; CHECK-NEXT: dynLDSAlign: 1
; CHECK-NEXT: isEntryFunction: true
; CHECK-NEXT: noSignedZerosFPMath: false
ret void
}
+@gds = addrspace(2) global [128 x i32] undef, align 4
+
; CHECK-LABEL: {{^}}name: ps_shader
; CHECK: machineFunctionInfo:
; CHECK-NEXT: explicitKernArgSize: 0
; CHECK-NEXT: maxKernArgAlign: 4
; CHECK-NEXT: ldsSize: 0
+; CHECK-NEXT: gdsSize: 0
; CHECK-NEXT: dynLDSAlign: 1
; CHECK-NEXT: isEntryFunction: true
; CHECK-NEXT: noSignedZerosFPMath: false
ret void
}
+; CHECK-LABEL: {{^}}name: gds_size_shader
+; CHECK: gdsSize: 4096
+define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 {
+ ret void
+}
+
; CHECK-LABEL: {{^}}name: function
; CHECK: machineFunctionInfo:
; CHECK-NEXT: explicitKernArgSize: 0
; CHECK-NEXT: maxKernArgAlign: 1
; CHECK-NEXT: ldsSize: 0
+; CHECK-NEXT: gdsSize: 0
; CHECK-NEXT: dynLDSAlign: 1
; CHECK-NEXT: isEntryFunction: false
; CHECK-NEXT: noSignedZerosFPMath: false
; CHECK-NEXT: explicitKernArgSize: 0
; CHECK-NEXT: maxKernArgAlign: 1
; CHECK-NEXT: ldsSize: 0
+; CHECK-NEXT: gdsSize: 0
; CHECK-NEXT: dynLDSAlign: 1
; CHECK-NEXT: isEntryFunction: false
; CHECK-NEXT: noSignedZerosFPMath: true
ret void
}
-declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #5
+declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #6
attributes #0 = { "no-signed-zeros-fp-math" = "true" }
attributes #1 = { "amdgpu-dx10-clamp" = "false" }
attributes #2 = { "amdgpu-ieee" = "false" }
attributes #3 = { "amdgpu-dx10-clamp" = "false" "amdgpu-ieee" = "false" }
attributes #4 = { "amdgpu-32bit-address-high-bits"="0xffff8000" }
-attributes #5 = { convergent nounwind readnone willreturn }
+attributes #5 = { "amdgpu-gds-size"="4096" }
+attributes #6 = { convergent nounwind readnone willreturn }