From 25eb7fa01d7ebbe67648ea03841cda55b4239ab2 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 29 Nov 2021 15:47:10 -0500 Subject: [PATCH] Revert "OpenMP: Start calling setTargetAttributes for generated kernels" This reverts commit 6c27d389c8a00040aad998fe959f38ba709a8750. This is failing on the buildbots --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 3 -- clang/lib/CodeGen/TargetInfo.cpp | 73 ++++++++++++++------------------- clang/test/OpenMP/amdgcn-attributes.cpp | 43 ------------------- 3 files changed, 30 insertions(+), 89 deletions(-) delete mode 100644 clang/test/OpenMP/amdgcn-attributes.cpp diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index c3a0144..75709b3 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -15,7 +15,6 @@ #include "CGCleanup.h" #include "CGRecordLayout.h" #include "CodeGenFunction.h" -#include "TargetInfo.h" #include "clang/AST/APValue.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" @@ -6621,8 +6620,6 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( OutlinedFn->addFnAttr("omp_target_thread_limit", std::to_string(DefaultValThreads)); } - - CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); } /// Checks if the expression is constant or does not have non-trivial function diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index e94436d..4360269 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -9143,10 +9143,6 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { public: AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(std::make_unique(CGT)) {} - - void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F, - CodeGenModule &CGM) const; - void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; unsigned getOpenCLKernelCallingConv() const override; @@ -9186,13 +9182,36 @@ static bool requiresAMDGPUProtectedVisibility(const Decl *D, cast(D)->getType()->isCUDADeviceBuiltinTextureType())); } -void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( - const FunctionDecl *FD, llvm::Function *F, CodeGenModule &M) const { - const auto *ReqdWGS = - M.getLangOpts().OpenCL ? FD->getAttr() : nullptr; - const bool IsOpenCLKernel = - M.getLangOpts().OpenCL && FD->hasAttr(); - const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr(); +void AMDGPUTargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (requiresAMDGPUProtectedVisibility(D, GV)) { + GV->setVisibility(llvm::GlobalValue::ProtectedVisibility); + GV->setDSOLocal(true); + } + + if (GV->isDeclaration()) + return; + const FunctionDecl *FD = dyn_cast_or_null(D); + if (!FD) + return; + + llvm::Function *F = cast(GV); + + const auto *ReqdWGS = M.getLangOpts().OpenCL ? + FD->getAttr() : nullptr; + + + const bool IsOpenCLKernel = M.getLangOpts().OpenCL && + FD->hasAttr(); + const bool IsHIPKernel = M.getLangOpts().HIP && + FD->hasAttr(); + if ((IsOpenCLKernel || IsHIPKernel) && + (M.getTriple().getOS() == llvm::Triple::AMDHSA)) + F->addFnAttr("amdgpu-implicitarg-num-bytes", "56"); + + if (IsHIPKernel) + F->addFnAttr("uniform-work-group-size", "true"); + const auto *FlatWGS = FD->getAttr(); if (ReqdWGS || FlatWGS) { @@ -9260,38 +9279,6 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( if (NumVGPR != 0) F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); } -} - -void AMDGPUTargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { - if (requiresAMDGPUProtectedVisibility(D, GV)) { - GV->setVisibility(llvm::GlobalValue::ProtectedVisibility); - GV->setDSOLocal(true); - } - - if (GV->isDeclaration()) - return; - - llvm::Function *F = dyn_cast(GV); - if (!F) - return; - - const FunctionDecl *FD = dyn_cast_or_null(D); - if (FD) - setFunctionDeclAttributes(FD, F, M); - - const bool IsOpenCLKernel = - M.getLangOpts().OpenCL && FD && FD->hasAttr(); - const bool IsHIPKernel = - M.getLangOpts().HIP && FD && FD->hasAttr(); - - const bool IsOpenMP = M.getLangOpts().OpenMP && !FD; - if ((IsOpenCLKernel || IsHIPKernel || IsOpenMP) && - (M.getTriple().getOS() == llvm::Triple::AMDHSA)) - F->addFnAttr("amdgpu-implicitarg-num-bytes", "56"); - - if (IsHIPKernel) - F->addFnAttr("uniform-work-group-size", "true"); if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics()) F->addFnAttr("amdgpu-unsafe-fp-atomics", "true"); diff --git a/clang/test/OpenMP/amdgcn-attributes.cpp b/clang/test/OpenMP/amdgcn-attributes.cpp deleted file mode 100644 index 4be5ad6..0000000 --- a/clang/test/OpenMP/amdgcn-attributes.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// REQUIRES: amdgpu-registered-target - -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefixes=DEFAULT,ALL %s -// RUN: %clang_cc1 -target-cpu gfx900 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefixes=CPU,ALL %s - -// RUN: %clang_cc1 -menable-no-nans -mno-amdgpu-ieee -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefixes=NOIEEE,ALL %s -// RUN: %clang_cc1 -munsafe-fp-atomics -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefixes=UNSAFEATOMIC,ALL %s - -// expected-no-diagnostics - -#define N 100 - -int callable(int); - -// Check that the target attributes are set on the generated kernel -int func() { - // ALL-LABEL: amdgpu_kernel void @__omp_offloading{{.*}} #0 - - int arr[N]; - -#pragma omp target - for (int i = 0; i < N; i++) { - arr[i] = callable(arr[i]); - } - - return arr[0]; -} - -int callable(int x) { - // ALL-LABEL: @_Z8callablei(i32 %x) #1 - return x + 1; -} - - // DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } - // CPU: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" } - // NOIEEE: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-ieee"="false" "amdgpu-implicitarg-num-bytes"="56" "frame-pointer"="none" "min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } - // UNSAFEATOMIC: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-implicitarg-num-bytes"="56" "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } - -// DEFAULT: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// CPU: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" } -// NOIEEE: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "frame-pointer"="none" "min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// UNSAFEATOMIC: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -- 2.7.4