bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
- bool HasFlat = ST.hasFlatAddressSpace();
bool HasApertureRegs = ST.hasApertureRegs();
SmallPtrSet<const Constant *, 8> ConstantExprVisited;
// TODO: We could refine this to captured pointers that could possibly be
// accessed by flat instructions. For now this is mostly a poor way of
// estimating whether there are calls before argument lowering.
- if (HasFlat && !IsFunc && HaveCall) {
- F.addFnAttr("amdgpu-flat-scratch");
+ if (!IsFunc && HaveCall) {
+ F.addFnAttr("amdgpu-calls");
Changed = true;
}
Occupancy = ST.computeOccupancy(MF, getLDSSize());
CallingConv::ID CC = F.getCallingConv();
+ const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+
+ // FIXME: Should have analysis or something rather than attribute to detect
+ // calls.
+ const bool HasCalls = FrameInfo.hasCalls() || F.hasFnAttribute("amdgpu-calls");
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
if (!F.arg_empty())
if (F.hasFnAttribute("amdgpu-work-item-id-z"))
WorkItemIDZ = true;
- const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
bool HasStackObjects = FrameInfo.hasStackObjects();
if (isEntryFunction()) {
};
// TODO: This could be refined a lot. The attribute is a poor way of
// detecting calls that may require it before argument lowering.
- if (hasNonSpillStackObjects() || F.hasFnAttribute("amdgpu-flat-scratch"))
+ if (HasCalls || hasNonSpillStackObjects())
FlatScratchInit = true;
}
; HSA: attributes #16 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; HSA: attributes #17 = { nounwind "uniform-work-group-size"="false" }
; HSA: attributes #18 = { nounwind }
-; HSA: attributes #19 = { nounwind "amdgpu-flat-scratch" "uniform-work-group-size"="false" }
+; HSA: attributes #19 = { nounwind "amdgpu-calls" "uniform-work-group-size"="false" }
-; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
; If the kernel does not have the uniform-work-group-attribute, set both callee and caller as false
ret void
}
-; CHECK: define amdgpu_kernel void @kernel1() #[[FOO]] {
+; CHECK: define amdgpu_kernel void @kernel1() #[[KERNEL1:[0-9]+]] {
define amdgpu_kernel void @kernel1() #1 {
call void @foo()
ret void
attributes #0 = { "uniform-work-group-size"="true" }
; CHECK: attributes #[[FOO]] = { "uniform-work-group-size"="false" }
+; CHECK: attributes #[[KERNEL1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
-; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
; Test to verify if the attribute gets propagated across nested function calls
ret void
}
-; CHECK: define amdgpu_kernel void @kernel3() #[[FUNC:[0-9]+]] {
+; CHECK: define amdgpu_kernel void @kernel3() #[[KERNEL:[0-9]+]] {
define amdgpu_kernel void @kernel3() #2 {
call void @func2()
ret void
attributes #2 = { "uniform-work-group-size"="true" }
; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="true" }
+; CHECK: attributes #[[KERNEL]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
-; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
; Two kernels with different values of the uniform-work-group-attribute call the same function
ret void
}
-; CHECK: define amdgpu_kernel void @kernel2() #[[FUNC]] {
+; CHECK: define amdgpu_kernel void @kernel2() #[[KERNEL2:[0-9]+]] {
define amdgpu_kernel void @kernel2() #2 {
call void @func()
ret void
attributes #1 = { "uniform-work-group-size"="true" }
; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="false" }
-; CHECK: attributes #[[KERNEL1]] = { "uniform-work-group-size"="true" }
+; CHECK: attributes #[[KERNEL1]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[KERNEL2]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
-; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
; Propagate the uniform-work-group-attribute from the kernel to callee if it doesn't have it
; CHECK: define void @func() #[[FUNC:[0-9]+]] {
attributes #2 = { "uniform-work-group-size"="true" }
; CHECK: attributes #[[FUNC]] = { nounwind "uniform-work-group-size"="false" }
-; CHECK: attributes #[[KERNEL1]] = { "uniform-work-group-size"="false" }
-; CHECK: attributes #[[KERNEL2]] = { "uniform-work-group-size"="true" }
+; CHECK: attributes #[[KERNEL1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[KERNEL2]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
-; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
; Test to ensure recursive functions exhibit proper behaviour
; Test to generate fibonacci numbers
ret i32 1
}
-; CHECK: define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #[[FIB]] {
+; CHECK: define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #[[KERNEL:[0-9]+]] {
define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 {
%r = call i32 @fib(i32 5)
store i32 %r, i32 addrspace(1)* %m
attributes #1 = { "uniform-work-group-size"="true" }
; CHECK: attributes #[[FIB]] = { "uniform-work-group-size"="true" }
+; CHECK: attributes #[[KERNEL]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
attributes #0 = { "uniform-work-group-size"="false" }
-; CHECK: attributes #[[FUNC]] = { "amdgpu-flat-scratch" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[FUNC]] = { "amdgpu-calls" "uniform-work-group-size"="false" }