Reapply "OpenMP: Start calling setTargetAttributes for generated kernels"

author Matt Arsenault <Matthew.Arsenault@amd.com>

Mon, 29 Nov 2021 20:47:42 +0000 (15:47 -0500)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 2 Dec 2021 19:55:56 +0000 (14:55 -0500)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Mon, 29 Nov 2021 20:47:42 +0000 (15:47 -0500)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 2 Dec 2021 19:55:56 +0000 (14:55 -0500)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp

index 75709b3..c3a0144 100644 (file)
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -15,6 +15,7 @@
  #include "CGCleanup.h"
  #include "CGRecordLayout.h"
  #include "CodeGenFunction.h"
+#include "TargetInfo.h"
  #include "clang/AST/APValue.h"
  #include "clang/AST/Attr.h"
  #include "clang/AST/Decl.h"
@@ -6620,6 +6621,8 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
      OutlinedFn->addFnAttr("omp_target_thread_limit",
                            std::to_string(DefaultValThreads));
    }
+
+  CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
  }
  
  /// Checks if the expression is constant or does not have non-trivial function
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp

index 36e0319..9e7e9b5 100644 (file)
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -9161,6 +9161,10 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
  public:
    AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
        : TargetCodeGenInfo(std::make_unique<AMDGPUABIInfo>(CGT)) {}
+
+  void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F,
+                                 CodeGenModule &CGM) const;
+
    void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                             CodeGen::CodeGenModule &M) const override;
    unsigned getOpenCLKernelCallingConv() const override;
@@ -9200,36 +9204,13 @@ static bool requiresAMDGPUProtectedVisibility(const Decl *D,
             cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType()));
  }
  
-void AMDGPUTargetCodeGenInfo::setTargetAttributes(
-    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
-  if (requiresAMDGPUProtectedVisibility(D, GV)) {
-    GV->setVisibility(llvm::GlobalValue::ProtectedVisibility);
-    GV->setDSOLocal(true);
-  }
-
-  if (GV->isDeclaration())
-    return;
-  const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
-  if (!FD)
-    return;
-
-  llvm::Function *F = cast<llvm::Function>(GV);
-
-  const auto *ReqdWGS = M.getLangOpts().OpenCL ?
-    FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
-
-
-  const bool IsOpenCLKernel = M.getLangOpts().OpenCL &&
-                              FD->hasAttr<OpenCLKernelAttr>();
-  const bool IsHIPKernel = M.getLangOpts().HIP &&
-                           FD->hasAttr<CUDAGlobalAttr>();
-  if ((IsOpenCLKernel || IsHIPKernel) &&
-      (M.getTriple().getOS() == llvm::Triple::AMDHSA))
-    F->addFnAttr("amdgpu-implicitarg-num-bytes", "56");
-
-  if (IsHIPKernel)
-    F->addFnAttr("uniform-work-group-size", "true");
-
+void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
+    const FunctionDecl *FD, llvm::Function *F, CodeGenModule &M) const {
+  const auto *ReqdWGS =
+      M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
+  const bool IsOpenCLKernel =
+      M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>();
+  const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>();
  
    const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
    if (ReqdWGS || FlatWGS) {
@@ -9297,6 +9278,38 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
      if (NumVGPR != 0)
        F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
    }
+}
+
+void AMDGPUTargetCodeGenInfo::setTargetAttributes(
+    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
+  if (requiresAMDGPUProtectedVisibility(D, GV)) {
+    GV->setVisibility(llvm::GlobalValue::ProtectedVisibility);
+    GV->setDSOLocal(true);
+  }
+
+  if (GV->isDeclaration())
+    return;
+
+  llvm::Function *F = dyn_cast<llvm::Function>(GV);
+  if (!F)
+    return;
+
+  const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
+  if (FD)
+    setFunctionDeclAttributes(FD, F, M);
+
+  const bool IsOpenCLKernel =
+      M.getLangOpts().OpenCL && FD && FD->hasAttr<OpenCLKernelAttr>();
+  const bool IsHIPKernel =
+      M.getLangOpts().HIP && FD && FD->hasAttr<CUDAGlobalAttr>();
+
+  const bool IsOpenMP = M.getLangOpts().OpenMP && !FD;
+  if ((IsOpenCLKernel || IsHIPKernel || IsOpenMP) &&
+      (M.getTriple().getOS() == llvm::Triple::AMDHSA))
+    F->addFnAttr("amdgpu-implicitarg-num-bytes", "56");
+
+  if (IsHIPKernel)
+    F->addFnAttr("uniform-work-group-size", "true");
  
    if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
      F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");
diff --git a/clang/test/OpenMP/amdgcn-attributes.cpp b/clang/test/OpenMP/amdgcn-attributes.cpp

new file mode 100644 (file)

index 0000000..4be5ad6
--- /dev/null
+++ b/clang/test/OpenMP/amdgcn-attributes.cpp
@@ -0,0 +1,43 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefixes=DEFAULT,ALL %s
+// RUN: %clang_cc1 -target-cpu gfx900 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefixes=CPU,ALL %s
+
+// RUN: %clang_cc1 -menable-no-nans -mno-amdgpu-ieee -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefixes=NOIEEE,ALL %s
+// RUN: %clang_cc1 -munsafe-fp-atomics -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefixes=UNSAFEATOMIC,ALL %s
+
+// expected-no-diagnostics
+
+#define N 100
+
+int callable(int);
+
+// Check that the target attributes are set on the generated kernel
+int func() {
+  // ALL-LABEL: amdgpu_kernel void @__omp_offloading{{.*}} #0
+
+  int arr[N];
+
+#pragma omp target
+  for (int i = 0; i < N; i++) {
+    arr[i] = callable(arr[i]);
+  }
+
+  return arr[0];
+}
+
+int callable(int x) {
+  // ALL-LABEL: @_Z8callablei(i32 %x) #1
+  return x + 1;
+}
+
+  // DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+  // CPU: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" }
+  // NOIEEE: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-ieee"="false" "amdgpu-implicitarg-num-bytes"="56" "frame-pointer"="none" "min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+  // UNSAFEATOMIC: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+
+// DEFAULT: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CPU: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" }
+// NOIEEE: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "frame-pointer"="none" "min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// UNSAFEATOMIC: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Mon, 29 Nov 2021 20:47:42 +0000 (15:47 -0500)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 2 Dec 2021 19:55:56 +0000 (14:55 -0500)
clang/lib/CodeGen/CGOpenMPRuntime.cpp		patch \| blob \| history
clang/lib/CodeGen/TargetInfo.cpp		patch \| blob \| history
clang/test/OpenMP/amdgcn-attributes.cpp	[new file with mode: 0644]	patch \| blob