public:
AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(std::make_unique<AMDGPUABIInfo>(CGT)) {}
+
+ void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F,
+ CodeGenModule &CGM) const;
+
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &M) const override;
unsigned getOpenCLKernelCallingConv() const override;
cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType()));
}
-void AMDGPUTargetCodeGenInfo::setTargetAttributes(
- const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
- if (requiresAMDGPUProtectedVisibility(D, GV)) {
- GV->setVisibility(llvm::GlobalValue::ProtectedVisibility);
- GV->setDSOLocal(true);
- }
-
- if (GV->isDeclaration())
- return;
- const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
- if (!FD)
- return;
-
- llvm::Function *F = cast<llvm::Function>(GV);
-
- const auto *ReqdWGS = M.getLangOpts().OpenCL ?
- FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
-
-
- const bool IsOpenCLKernel = M.getLangOpts().OpenCL &&
- FD->hasAttr<OpenCLKernelAttr>();
- const bool IsHIPKernel = M.getLangOpts().HIP &&
- FD->hasAttr<CUDAGlobalAttr>();
- if ((IsOpenCLKernel || IsHIPKernel) &&
- (M.getTriple().getOS() == llvm::Triple::AMDHSA))
- F->addFnAttr("amdgpu-implicitarg-num-bytes", "56");
-
- if (IsHIPKernel)
- F->addFnAttr("uniform-work-group-size", "true");
-
+void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
+ const FunctionDecl *FD, llvm::Function *F, CodeGenModule &M) const {
+ const auto *ReqdWGS =
+ M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
+ const bool IsOpenCLKernel =
+ M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>();
+ const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>();
const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
if (ReqdWGS || FlatWGS) {
if (NumVGPR != 0)
F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
}
+}
+
+void AMDGPUTargetCodeGenInfo::setTargetAttributes(
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
+ if (requiresAMDGPUProtectedVisibility(D, GV)) {
+ GV->setVisibility(llvm::GlobalValue::ProtectedVisibility);
+ GV->setDSOLocal(true);
+ }
+
+ if (GV->isDeclaration())
+ return;
+
+ llvm::Function *F = dyn_cast<llvm::Function>(GV);
+ if (!F)
+ return;
+
+ const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
+ if (FD)
+ setFunctionDeclAttributes(FD, F, M);
+
+ const bool IsOpenCLKernel =
+ M.getLangOpts().OpenCL && FD && FD->hasAttr<OpenCLKernelAttr>();
+ const bool IsHIPKernel =
+ M.getLangOpts().HIP && FD && FD->hasAttr<CUDAGlobalAttr>();
+
+ const bool IsOpenMP = M.getLangOpts().OpenMP && !FD;
+ if ((IsOpenCLKernel || IsHIPKernel || IsOpenMP) &&
+ (M.getTriple().getOS() == llvm::Triple::AMDHSA))
+ F->addFnAttr("amdgpu-implicitarg-num-bytes", "56");
+
+ if (IsHIPKernel)
+ F->addFnAttr("uniform-work-group-size", "true");
if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");
--- /dev/null
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefixes=DEFAULT,ALL %s
+// RUN: %clang_cc1 -target-cpu gfx900 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefixes=CPU,ALL %s
+
+// RUN: %clang_cc1 -menable-no-nans -mno-amdgpu-ieee -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefixes=NOIEEE,ALL %s
+// RUN: %clang_cc1 -munsafe-fp-atomics -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefixes=UNSAFEATOMIC,ALL %s
+
+// expected-no-diagnostics
+
+#define N 100
+
+int callable(int);
+
+// Check that the target attributes are set on the generated kernel
+int func() {
+ // ALL-LABEL: amdgpu_kernel void @__omp_offloading{{.*}} #0
+
+ int arr[N];
+
+#pragma omp target
+ for (int i = 0; i < N; i++) {
+ arr[i] = callable(arr[i]);
+ }
+
+ return arr[0];
+}
+
+int callable(int x) {
+ // ALL-LABEL: @_Z8callablei(i32 %x) #1
+ return x + 1;
+}
+
+ // DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+ // CPU: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" }
+ // NOIEEE: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-ieee"="false" "amdgpu-implicitarg-num-bytes"="56" "frame-pointer"="none" "min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+ // UNSAFEATOMIC: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+
+// DEFAULT: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CPU: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" }
+// NOIEEE: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "frame-pointer"="none" "min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// UNSAFEATOMIC: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }