From 428bc510bf50f1517b4216c1499885d82c8bce9a Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 27 Dec 2022 20:43:10 -0800 Subject: [PATCH] [OpenMP] Unify "exec_mode" query code and default to SPMD Defaulting to Generic mode doesn't make much sense as the kernel needs to be prepared for it. SPMD mode is the "native" execution, e.g., for "bare" kernels. It also is the execution method for constructors and destructors (as we might otherwise throw an extra warp onto them). Differential Revision: https://reviews.llvm.org/D140718 --- .../plugins-nextgen/amdgpu/src/rtl.cpp | 26 ++++-------------- .../common/PluginInterface/PluginInterface.cpp | 31 ++++++++++++++++++++++ .../common/PluginInterface/PluginInterface.h | 4 +++ .../libomptarget/plugins-nextgen/cuda/src/rtl.cpp | 30 +++++---------------- 4 files changed, 46 insertions(+), 45 deletions(-) diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp index 2d1ca52..53c5a20 100644 --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -1630,31 +1630,15 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { Expected constructKernelEntry(const __tgt_offload_entry &KernelEntry, DeviceImageTy &Image) override { - // Create a metadata object for the exec mode global (auto-generated). - StaticGlobalTy ExecModeGlobal( - KernelEntry.name, "_exec_mode"); - - // Retrieve execution mode for the kernel. This may fail since some kernels - // may not have a execution mode. - GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler(); - if (auto Err = GHandler.readGlobalFromImage(*this, Image, ExecModeGlobal)) { - DP("Failed to read execution mode for '%s': %s\n" - "Using default GENERIC (1) execution mode\n", - KernelEntry.name, toString(std::move(Err)).data()); - // Consume the error since it is acceptable to fail. - consumeError(std::move(Err)); - // In some cases the execution mode is not included, so use the default. - ExecModeGlobal.setValue(llvm::omp::OMP_TGT_EXEC_MODE_GENERIC); - } - // Check that the retrieved execution mode is valid. - if (!GenericKernelTy::isValidExecutionMode(ExecModeGlobal.getValue())) - return Plugin::error("Invalid execution mode %d for '%s'", - ExecModeGlobal.getValue(), KernelEntry.name); + Expected ExecModeOrErr = + getExecutionModeForKernel(KernelEntry.name, Image); + if (!ExecModeOrErr) + return ExecModeOrErr.takeError(); // Allocate and initialize the AMDGPU kernel. AMDGPUKernelTy *AMDKernel = Plugin::get().allocate(); - new (AMDKernel) AMDGPUKernelTy(KernelEntry.name, ExecModeGlobal.getValue()); + new (AMDKernel) AMDGPUKernelTy(KernelEntry.name, ExecModeOrErr.get()); return AMDKernel; } diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp index 4d73ab6..8f71a38 100644 --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp @@ -16,6 +16,9 @@ #include "omptarget.h" #include "omptargetplugin.h" +#include "llvm/Frontend/OpenMP/OMPConstants.h" +#include "llvm/Support/Error.h" + #include #include @@ -357,6 +360,34 @@ Error GenericDeviceTy::registerKernelOffloadEntry( return Plugin::success(); } +Expected +GenericDeviceTy::getExecutionModeForKernel(StringRef Name, + DeviceImageTy &Image) { + // Create a metadata object for the exec mode global (auto-generated). + StaticGlobalTy ExecModeGlobal(Name.data(), + "_exec_mode"); + + // Retrieve execution mode for the kernel. This may fail since some kernels + // may not have an execution mode. + GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler(); + if (auto Err = GHandler.readGlobalFromImage(*this, Image, ExecModeGlobal)) { + // Consume the error since it is acceptable to fail. + [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); + DP("Failed to read execution mode for '%s': %s\n" + "Using default SPMD (2) execution mode\n", + Name.data(), ErrStr.data()); + + return OMP_TGT_EXEC_MODE_SPMD; + } + + // Check that the retrieved execution mode is valid. + if (!GenericKernelTy::isValidExecutionMode(ExecModeGlobal.getValue())) + return Plugin::error("Invalid execution mode %d for '%s'", + ExecModeGlobal.getValue(), Name.data()); + + return ExecModeGlobal.getValue(); +} + Error GenericDeviceTy::registerHostPinnedMemoryBuffer(const void *Buffer, size_t Size) { std::lock_guard Lock(HostAllocationsMutex); diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h index 836eb81..774a034 100644 --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h @@ -472,6 +472,10 @@ protected: return ((const char *)It->first + It->second > (const char *)Buffer); } + /// Return the execution mode used for kernel \p Name. + Expected getExecutionModeForKernel(StringRef Name, + DeviceImageTy &Image); + /// Environment variables defined by the LLVM OpenMP implementation /// regarding the initial number of streams and events. UInt32Envar OMPX_InitialNumStreams; diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp index 5b9fc77..cb5f004 100644 --- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp @@ -24,6 +24,7 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPGridValues.h" +#include "llvm/Support/Error.h" namespace llvm { namespace omp { @@ -348,33 +349,14 @@ struct CUDADeviceTy : public GenericDeviceTy { DP("Entry point " DPxMOD " maps to %s (" DPxMOD ")\n", DPxPTR(&KernelEntry), KernelEntry.name, DPxPTR(Func)); - // Create a metadata object for the exec mode global (auto-generated). - StaticGlobalTy ExecModeGlobal( - KernelEntry.name, "_exec_mode"); - - // Retrieve execution mode for the kernel. This may fail since some kernels - // may not have a execution mode. - GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler(); - if (auto Err = GHandler.readGlobalFromImage(*this, Image, ExecModeGlobal)) { - // In some cases the execution mode is not included, so use the default. - ExecModeGlobal.setValue(llvm::omp::OMP_TGT_EXEC_MODE_GENERIC); - // Consume the error since it is acceptable to fail. - [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); - - DP("Failed to read execution mode for '%s': %s\n" - "Using default GENERIC (1) execution mode\n", - KernelEntry.name, ErrStr.data()); - } - - // Check that the retrieved execution mode is valid. - if (!GenericKernelTy::isValidExecutionMode(ExecModeGlobal.getValue())) - return Plugin::error("Invalid execution mode %d for '%s'", - ExecModeGlobal.getValue(), KernelEntry.name); + Expected ExecModeOrErr = + getExecutionModeForKernel(KernelEntry.name, Image); + if (!ExecModeOrErr) + return ExecModeOrErr.takeError(); // Allocate and initialize the CUDA kernel. CUDAKernelTy *CUDAKernel = Plugin::get().allocate(); - new (CUDAKernel) - CUDAKernelTy(KernelEntry.name, ExecModeGlobal.getValue(), Func); + new (CUDAKernel) CUDAKernelTy(KernelEntry.name, ExecModeOrErr.get(), Func); return CUDAKernel; } -- 2.7.4