From 3f4b5893efed620d93015896d79eb276628286f8 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Mon, 16 Nov 2020 10:58:48 -0500 Subject: [PATCH] [AMDGPU] Add option -munsafe-fp-atomics Add an option -munsafe-fp-atomics for AMDGPU target. When enabled, clang adds function attribute "amdgpu-unsafe-fp-atomics" to any functions for amdgpu target. This allows amdgpu backend to use unsafe fp atomic instructions in these functions. Differential Revision: https://reviews.llvm.org/D91546 --- clang/include/clang/Basic/TargetInfo.h | 6 ++++++ clang/include/clang/Basic/TargetOptions.h | 3 +++ clang/include/clang/Driver/Options.td | 5 +++++ clang/lib/Basic/TargetInfo.cpp | 1 + clang/lib/Basic/Targets/AMDGPU.cpp | 1 + clang/lib/CodeGen/TargetInfo.cpp | 3 +++ clang/lib/Driver/ToolChains/Clang.cpp | 5 +++++ clang/lib/Frontend/CompilerInvocation.cpp | 3 +++ clang/test/CodeGenCUDA/amdgpu-func-attrs.cu | 22 ++++++++++++++++++++++ clang/test/Driver/hip-options.hip | 4 ++++ 10 files changed, 53 insertions(+) create mode 100644 clang/test/CodeGenCUDA/amdgpu-func-attrs.cu diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 26dc6ea..698964b 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -218,6 +218,8 @@ protected: unsigned HasAArch64SVETypes : 1; + unsigned AllowAMDGPUUnsafeFPAtomics : 1; + unsigned ARMCDECoprocMask : 8; unsigned MaxOpenCLWorkGroupSize; @@ -857,6 +859,10 @@ public: /// available on this target. bool hasAArch64SVETypes() const { return HasAArch64SVETypes; } + /// Returns whether or not the AMDGPU unsafe floating point atomics are + /// allowed. + bool allowAMDGPUUnsafeFPAtomics() const { return AllowAMDGPUUnsafeFPAtomics; } + /// For ARM targets returns a mask defining which coprocessors are configured /// as Custom Datapath. uint32_t getARMCDECoprocMask() const { return ARMCDECoprocMask; } diff --git a/clang/include/clang/Basic/TargetOptions.h b/clang/include/clang/Basic/TargetOptions.h index d1cc024..f81c150 100644 --- a/clang/include/clang/Basic/TargetOptions.h +++ b/clang/include/clang/Basic/TargetOptions.h @@ -75,6 +75,9 @@ public: /// address space. bool NVPTXUseShortPointers = false; + /// \brief If enabled, allow AMDGPU unsafe floating point atomics. + bool AllowAMDGPUUnsafeFPAtomics = false; + // The code model to be used as specified by the user. Corresponds to // CodeModel::Model enum defined in include/llvm/Support/CodeGen.h, plus // "default" for the case when the user has not explicitly specified a diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index ec86c5e..0168d70 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2546,6 +2546,11 @@ def mxnack : Flag<["-"], "mxnack">, Group, HelpText<"Specify XNACK mode (AMDGPU only)">; def mno_xnack : Flag<["-"], "mno-xnack">, Group; +def munsafe_fp_atomics : Flag<["-"], "munsafe-fp-atomics">, Group, + HelpText<"Enable unsafe floating point atomic instructions (AMDGPU only)">, + Flags<[CC1Option]>; +def mno_unsafe_fp_atomics : Flag<["-"], "mno-unsafe-fp-atomics">, Group; + def faltivec : Flag<["-"], "faltivec">, Group, Flags<[NoXarchOption]>; def fno_altivec : Flag<["-"], "fno-altivec">, Group, Flags<[NoXarchOption]>; def maltivec : Flag<["-"], "maltivec">, Group; diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index eccdc21..642ee75 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -115,6 +115,7 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : TargetOpts(), Triple(T) { HasBuiltinMSVaList = false; IsRenderScriptTarget = false; HasAArch64SVETypes = false; + AllowAMDGPUUnsafeFPAtomics = false; ARMCDECoprocMask = 0; // Default to no types using fpret. diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 4d6a9a5..9b88dff 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -323,6 +323,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, HasLegalHalfType = true; HasFloat16 = true; WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64; + AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics; // Set pointer width and alignment for target address space 0. PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 1e59203..a98e409 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -9080,6 +9080,9 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( if (NumVGPR != 0) F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); } + + if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics()) + F->addFnAttr("amdgpu-unsafe-fp-atomics", "true"); } unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 9ef408f..ae9e1ce 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6217,6 +6217,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } HandleAmdgcnLegacyOptions(D, Args, CmdArgs); + if (Triple.isAMDGPU()) { + if (Args.hasFlag(options::OPT_munsafe_fp_atomics, + options::OPT_mno_unsafe_fp_atomics)) + CmdArgs.push_back("-munsafe-fp-atomics"); + } // For all the host OpenMP offloading compile jobs we need to pass the targets // information using -fopenmp-targets= option. diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index c4133ec..5064230 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3747,6 +3747,9 @@ static void ParseTargetArgs(TargetOptions &Opts, ArgList &Args, Opts.ForceEnableInt128 = Args.hasArg(OPT_fforce_enable_int128); Opts.NVPTXUseShortPointers = Args.hasFlag( options::OPT_fcuda_short_ptr, options::OPT_fno_cuda_short_ptr, false); + Opts.AllowAMDGPUUnsafeFPAtomics = + Args.hasFlag(options::OPT_munsafe_fp_atomics, + options::OPT_mno_unsafe_fp_atomics, false); if (Arg *A = Args.getLastArg(options::OPT_target_sdk_version_EQ)) { llvm::VersionTuple Version; if (Version.tryParse(A->getValue())) diff --git a/clang/test/CodeGenCUDA/amdgpu-func-attrs.cu b/clang/test/CodeGenCUDA/amdgpu-func-attrs.cu new file mode 100644 index 0000000..6a798c6 --- /dev/null +++ b/clang/test/CodeGenCUDA/amdgpu-func-attrs.cu @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ +// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ +// RUN: | FileCheck -check-prefixes=NO-UNSAFE-FP-ATOMICS %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ +// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \ +// RUN: -munsafe-fp-atomics \ +// RUN: | FileCheck -check-prefixes=UNSAFE-FP-ATOMICS %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ +// RUN: -o - -x hip %s -munsafe-fp-atomics \ +// RUN: | FileCheck -check-prefix=NO-UNSAFE-FP-ATOMICS %s + +#include "Inputs/cuda.h" + +__device__ void test() { +// UNSAFE-FP-ATOMICS: define void @_Z4testv() [[ATTR:#[0-9]+]] +} + + +// Make sure this is silently accepted on other targets. +// NO-UNSAFE-FP-ATOMICS-NOT: "amdgpu-unsafe-fp-atomics" + +// UNSAFE-FP-ATOMICS-DAG: attributes [[ATTR]] = {{.*}}"amdgpu-unsafe-fp-atomics"="true" diff --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip index fa7b019..46cfe0a 100644 --- a/clang/test/Driver/hip-options.hip +++ b/clang/test/Driver/hip-options.hip @@ -31,3 +31,7 @@ // HOST-NOT: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" // HOST-NOT: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" // HOST: clang{{.*}} "-debug-info-kind={{.*}}" + +// RUN: %clang -### -nogpuinc -nogpulib -munsafe-fp-atomics \ +// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=UNSAFE-FP-ATOMICS %s +// UNSAFE-FP-ATOMICS: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-munsafe-fp-atomics" -- 2.7.4