From d2972116923a124de71cea006eca3068bdc381ea Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 26 Jul 2021 10:25:40 -0400 Subject: [PATCH] [OpenMP] Add a driver flag to enable the new device runtime library This patch adds a driver flag `-fopenmp-target-new-runtime` to optionally enable the new device runtime bitcode library. This allows users to enable the new experimental runtime before it becomes the default in the future. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D106793 --- clang/include/clang/Basic/LangOptions.def | 1 + clang/include/clang/Driver/Options.td | 4 ++++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 9 ++++++++- clang/lib/Driver/ToolChains/Cuda.cpp | 8 +++++++- .../Driver/Inputs/libomptarget/libomptarget-new-nvptx-sm_35.bc | 1 + clang/test/Driver/openmp-offload-gpu.c | 6 ++++++ 6 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 clang/test/Driver/Inputs/libomptarget/libomptarget-new-nvptx-sm_35.bc diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 8420a97..08b8d88 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -240,6 +240,7 @@ LANGOPT(OpenMPCUDAForceFullRuntime , 1, 0, "Force to use full runtime in all con LANGOPT(OpenMPCUDANumSMs , 32, 0, "Number of SMs for CUDA devices.") LANGOPT(OpenMPCUDABlocksPerSM , 32, 0, "Number of blocks per SM for CUDA devices.") LANGOPT(OpenMPCUDAReductionBufNum , 32, 1024, "Number of the reduction records in the intermediate reduction buffer used for the teams reductions.") +LANGOPT(OpenMPTargetNewRuntime , 1, 0, "Use the new bitcode library for OpenMP offloading") LANGOPT(OpenMPOptimisticCollapse , 1, 0, "Use at most 32 bits to represent the collapsed loop nest counter.") LANGOPT(RenderScript , 1, 0, "RenderScript") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 991408c0..0e0aa16 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2376,6 +2376,10 @@ def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], "fopenmp-cuda-blocks-per-sm="> Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; +defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime", + LangOpts<"OpenMPTargetNewRuntime">, DefaultFalse, + PosFlag, + NegFlag>; defm openmp_optimistic_collapse : BoolFOption<"openmp-optimistic-collapse", LangOpts<"OpenMPOptimisticCollapse">, DefaultFalse, PosFlag, NegFlag, BothFlags<[NoArgumentUnused, HelpHidden]>>; diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index 751b8df..5915882 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -222,7 +222,14 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions( if (DriverArgs.hasArg(options::OPT_nogpulib)) return; - std::string BitcodeSuffix = "amdgcn-" + GPUArch; + + std::string BitcodeSuffix; + if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime, + options::OPT_fno_openmp_target_new_runtime, false)) + BitcodeSuffix = "new-amdgcn-" + GPUArch; + else + BitcodeSuffix = "amdgcn-" + GPUArch; + addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix, getTriple()); } diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 9e2dd51..2ec5bdc 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -751,7 +751,13 @@ void CudaToolChain::addClangTargetOptions( return; } - std::string BitcodeSuffix = "nvptx-" + GpuArch.str(); + std::string BitcodeSuffix; + if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime, + options::OPT_fno_openmp_target_new_runtime, false)) + BitcodeSuffix = "new-nvptx-" + GpuArch.str(); + else + BitcodeSuffix = "nvptx-" + GpuArch.str(); + addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix, getTriple()); } diff --git a/clang/test/Driver/Inputs/libomptarget/libomptarget-new-nvptx-sm_35.bc b/clang/test/Driver/Inputs/libomptarget/libomptarget-new-nvptx-sm_35.bc new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/clang/test/Driver/Inputs/libomptarget/libomptarget-new-nvptx-sm_35.bc @@ -0,0 +1 @@ + diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c index 42b39db..93184fb 100644 --- a/clang/test/Driver/openmp-offload-gpu.c +++ b/clang/test/Driver/openmp-offload-gpu.c @@ -154,6 +154,11 @@ // RUN: -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ // RUN: -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-BCLIB %s +/// Check with the new runtime enabled +// RUN: env LIBRARY_PATH=%S/Inputs/libomptarget %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ +// RUN: -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ +// RUN: -fopenmp-relocatable-target -fopenmp-target-new-runtime -save-temps -no-canonical-prefixes %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-BCLIB-NEW %s /// The user can override default detection using --libomptarget-nvptx-bc-path=. // RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ // RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \ @@ -162,6 +167,7 @@ // RUN: | FileCheck -check-prefix=CHK-BCLIB-USER %s // CHK-BCLIB: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-nvptx-sm_35.bc +// CHK-BCLIB-NEW: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-new-nvptx-sm_35.bc // CHK-BCLIB-USER: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-nvptx-test.bc // CHK-BCLIB-NOT: {{error:|warning:}} -- 2.7.4