From c63dced93b46ec3ab925d9049f2a8c8901f5d913 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 27 Dec 2022 22:31:28 -0800 Subject: [PATCH] [OpenMP][JIT] Introduce support for AMDGPU To JIT kernels for AMDGPUs we need to provide the architecture, the triple, and a post-link callback. The first two are simple, the last one is a little more complicated since we need to invoke `lld`. There is some library interface but for that we need the lld library, which is not generally available, thus we go with the executable for now. In either way we need to manifest the (amdgcn) object file and read the output from another file. We should try to avoid that in the future. The options for `lld` are copied from the way clang invokes it. Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D140720 --- .../plugins-nextgen/amdgpu/src/rtl.cpp | 73 ++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp index 10477ae..28e7034 100644 --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -28,10 +29,17 @@ #include "Utilities.h" #include "UtilitiesRTL.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPGridValues.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { namespace omp { @@ -1519,6 +1527,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { if (auto Err = initMemoryPools()) return Err; + char GPUName[64]; + if (auto Err = getDeviceAttr(HSA_AGENT_INFO_NAME, GPUName)) + return Err; + Arch = GPUName; + // Get the wavefront size. uint32_t WavefrontSize = 0; if (auto Err = getDeviceAttr(HSA_AGENT_INFO_WAVEFRONT_SIZE, WavefrontSize)) @@ -1626,6 +1639,61 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return Plugin::success(); } + Expected> + doJITPostProcessing(std::unique_ptr MB) const override { + + // TODO: We should try to avoid materialization but there seems to be no + // good linker interface w/o file i/o. + SmallString<128> LinkerOutputFilePath; + std::error_code EC = sys::fs::createTemporaryFile( + "amdgpu-pre-link-jit", ".out", LinkerOutputFilePath); + if (EC) + return createStringError(EC, + "Failed to create temporary file for linker"); + + SmallString<128> LinkerInputFilePath = LinkerOutputFilePath; + LinkerInputFilePath.pop_back_n(2); + + auto FD = raw_fd_ostream(LinkerInputFilePath.data(), EC); + if (EC) + return createStringError(EC, "Failed to open temporary file for linker"); + FD.write(MB->getBufferStart(), MB->getBufferSize()); + FD.close(); + + const auto &ErrorOrPath = sys::findProgramByName("lld"); + if (!ErrorOrPath) + return createStringError(inconvertibleErrorCode(), + "Failed to find `lld` on the PATH."); + + std::string LLDPath = ErrorOrPath.get(); + INFO(OMP_INFOTYPE_PLUGIN_KERNEL, getDeviceId(), + "Using `%s` to link JITed amdgcn ouput.", LLDPath.c_str()); + + std::string MCPU = "-plugin-opt=mcpu=" + getArch(); + + StringRef Args[] = {LLDPath, + "-flavor", + "gnu", + "--no-undefined", + "-shared", + MCPU, + "-o", + LinkerOutputFilePath.data(), + LinkerInputFilePath.data()}; + + std::string Error; + int RC = sys::ExecuteAndWait(LLDPath, Args, std::nullopt, {}, 0, 0, &Error); + if (RC) + return createStringError(inconvertibleErrorCode(), + "Linking optimized bitcode failed: %s", + Error.c_str()); + + return std::move( + MemoryBuffer::getFileOrSTDIN(LinkerOutputFilePath.data()).get()); + } + + std::string getArch() const override { return Arch; } + /// Allocate and construct an AMDGPU kernel. Expected constructKernelEntry(const __tgt_offload_entry &KernelEntry, @@ -2027,6 +2095,9 @@ private: /// The agent handler corresponding to the device. hsa_agent_t Agent; + /// The GPU architecture. + std::string Arch; + /// Reference to the host device. AMDHostDeviceTy &HostDevice; @@ -2255,6 +2326,8 @@ struct AMDGPUPluginTy final : public GenericPluginTy { return Plugin::check(Status, "Error in hsa_shut_down: %s"); } + Triple::ArchType getTripleArch() const override { return Triple::amdgcn; } + /// Get the ELF code for recognizing the compatible image binary. uint16_t getMagicElfBits() const override { return ELF::EM_AMDGPU; } -- 2.7.4