From: Yaxun (Sam) Liu Date: Tue, 22 Oct 2019 17:41:25 +0000 (-0400) Subject: [HIP] Add option -fgpu-allow-device-init X-Git-Tag: llvmorg-11-init~5952 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=68f5ca4e19c16f12895a6f0b9fbabc1d86c4b6b0;p=platform%2Fupstream%2Fllvm.git [HIP] Add option -fgpu-allow-device-init Add this option to allow device side class type global variables with non-trivial ctor/dtor. device side init/fini functions will be emitted, which will be executed by HIP runtime when the fat binary is loaded/unloaded. This feature is to facilitate implementation of device side sanitizer which requires global vars with non-trival ctors. By default this option is disabled. Differential Revision: https://reviews.llvm.org/D69268 --- diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td index 484cc31..4091195 100644 --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -304,6 +304,11 @@ def err_arcmt_nsinvocation_ownership : Error<"NSInvocation's %0 is not safe to b def err_openclcxx_not_supported : Error< "'%0' is not supported in C++ for OpenCL">; +// HIP +def warn_ignore_hip_only_option : Warning< + "'%0' is ignored since it is only supported for HIP">, + InGroup; + // OpenMP def err_omp_more_one_clause : Error< "directive '#pragma omp %0' cannot contain more than one '%1' clause%select{| with '%3' name modifier| with 'source' dependence}2">; diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 9280595..11218cc 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1077,6 +1077,10 @@ def SerializedDiagnostics : DiagGroup<"serialized-diagnostics">; // compiling CUDA C/C++ but which is not compatible with the CUDA spec. def CudaCompat : DiagGroup<"cuda-compat">; +// A warning group for warnings about features supported by HIP but +// ignored by CUDA. +def HIPOnly : DiagGroup<"hip-only">; + // Warnings which cause linking of the runtime libraries like // libc and the CRT to be skipped. def AVRRtlibLinkingQuirks : DiagGroup<"avr-rtlib-linking-quirks">; diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index a423654..eba4f83 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -224,6 +224,7 @@ LANGOPT(CUDAAllowVariadicFunctions, 1, 0, "allowing variadic functions in CUDA d LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr functions as __host__ __device__") LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions") LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code") +LANGOPT(GPUAllowDeviceInit, 1, 0, "allowing device side global init functions for HIP") LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 3ce6fcf..4db7cd8 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -602,6 +602,9 @@ def fhip_dump_offload_linker_script : Flag<["-"], "fhip-dump-offload-linker-scri def fhip_new_launch_api : Flag<["-"], "fhip-new-launch-api">, Flags<[CC1Option]>, HelpText<"Use new kernel launching API for HIP.">; def fno_hip_new_launch_api : Flag<["-"], "fno-hip-new-launch-api">; +def fgpu_allow_device_init : Flag<["-"], "fgpu-allow-device-init">, + Flags<[CC1Option]>, HelpText<"Allow device side init function in HIP">; +def fno_gpu_allow_device_init : Flag<["-"], "fno-gpu-allow-device-init">; def libomptarget_nvptx_path_EQ : Joined<["--"], "libomptarget-nvptx-path=">, Group, HelpText<"Path to libomptarget-nvptx libraries">; def dD : Flag<["-"], "dD">, Group, Flags<[CC1Option]>, diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index bf16b7b..5b172a3 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -437,7 +437,7 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, // that are of class type, cannot have a non-empty constructor. All // the checks have been done in Sema by now. Whatever initializers // are allowed are empty and we just need to ignore them here. - if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice && + if (getLangOpts().CUDAIsDevice && !getLangOpts().GPUAllowDeviceInit && (D->hasAttr() || D->hasAttr() || D->hasAttr())) return; @@ -608,6 +608,11 @@ CodeGenModule::EmitCXXGlobalInitFunc() { Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL); } + if (getLangOpts().HIP) { + Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); + Fn->addFnAttr("device-init"); + } + CXXGlobalInits.clear(); } diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp index ad9384d..d84a454 100644 --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -292,6 +292,10 @@ void HIPToolChain::addClangTargetOptions( false)) CC1Args.push_back("-fgpu-rdc"); + if (DriverArgs.hasFlag(options::OPT_fgpu_allow_device_init, + options::OPT_fno_gpu_allow_device_init, false)) + CC1Args.push_back("-fgpu-allow-device-init"); + // Default to "hidden" visibility, as object level linking will not be // supported for the foreseeable future. if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 665695e..767a071 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2528,6 +2528,13 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, Opts.CUDADeviceApproxTranscendentals = 1; Opts.GPURelocatableDeviceCode = Args.hasArg(OPT_fgpu_rdc); + if (Args.hasArg(OPT_fgpu_allow_device_init)) { + if (Opts.HIP) + Opts.GPUAllowDeviceInit = 1; + else + Diags.Report(diag::warn_ignore_hip_only_option) + << Args.getLastArg(OPT_fgpu_allow_device_init)->getAsString(Args); + } Opts.HIPUseNewLaunchAPI = Args.hasArg(OPT_fhip_new_launch_api); if (Opts.ObjC) { diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index d0ddfd0..0c61057 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -492,6 +492,8 @@ void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { const Expr *Init = VD->getInit(); if (VD->hasAttr() || VD->hasAttr() || VD->hasAttr()) { + if (LangOpts.GPUAllowDeviceInit) + return; assert(!VD->isStaticLocal() || VD->hasAttr()); bool AllowedInit = false; if (const CXXConstructExpr *CE = dyn_cast(Init)) diff --git a/clang/test/CodeGenCUDA/device-init-fun.cu b/clang/test/CodeGenCUDA/device-init-fun.cu new file mode 100644 index 0000000..4f3119a --- /dev/null +++ b/clang/test/CodeGenCUDA/device-init-fun.cu @@ -0,0 +1,19 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 \ +// RUN: -fgpu-allow-device-init -x hip \ +// RUN: -fno-threadsafe-statics -emit-llvm -o - %s \ +// RUN: | FileCheck %s + +#include "Inputs/cuda.h" + +// CHECK: define internal amdgpu_kernel void @_GLOBAL__sub_I_device_init_fun.cu() #[[ATTR:[0-9]*]] +// CHECK: attributes #[[ATTR]] = {{.*}}"device-init" + +__device__ void f(); + +struct A { + __device__ A() { f(); } +}; + +__device__ A a; diff --git a/clang/test/Frontend/warn-device-init-fun.cu b/clang/test/Frontend/warn-device-init-fun.cu new file mode 100644 index 0000000..479f3c9 --- /dev/null +++ b/clang/test/Frontend/warn-device-init-fun.cu @@ -0,0 +1,8 @@ +// REQUIRES: nvptx-registered-target + +// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \ +// RUN: -fgpu-allow-device-init \ +// RUN: %s 2>&1 | FileCheck %s + +// CHECK: warning: '-fgpu-allow-device-init' is ignored since it is only supported for HIP +