`extensions to the OpenMP begin/end declare variant context selector
<https://clang.llvm.org/docs/AttributeReference.html#pragma-omp-declare-variant>`__
that are exposed through LLVM/Clang to the user as well.
+
+Q: What is a way to debug errors from mapping memory to a target device?
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+An experimental way to debug these errors is to use :ref:`remote process
+offloading <remote_offloading_plugin>`.
+By using ``libomptarget.rtl.rpc.so`` and ``openmp-offloading-server``, it is
+possible to explicitly perform memory transfers between processes on the host
+CPU and run sanitizers while doing so in order to catch these errors.
\ No newline at end of file
.. _device_runtime:
+
+.. _remote_offloading_plugin:
+
+Remote Offloading Plugin:
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The remote offloading plugin permits the execution of OpenMP target regions
+on devices in remote hosts in addition to the devices connected to the local
+host. All target devices on the remote host will be exposed to the
+application as if they were local devices, that is, the remote host CPU or
+its GPUs can be offloaded to with the appropriate device number. If the
+server is running on the same host, each device may be identified twice:
+once through the device plugins and once through the device plugins that the
+server application has access to.
+
+This plugin consists of ``libomptarget.rtl.rpc.so`` and
+``openmp-offloading-server`` which should be running on the (remote) host. The
+server application does not have to be running on a remote host, and can
+instead be used on the same host in order to debug memory mapping during offloading.
+These are implemented via gRPC/protobuf so these libraries are required to
+build and use this plugin. The server must also have access to the necessary
+target-specific plugins in order to perform the offloading.
+
+Due to the experimental nature of this plugin, the CMake variable
+``LIBOMPTARGET_ENABLE_EXPERIMENTAL_REMOTE_PLUGIN`` must be set in order to
+build this plugin. For example, the rpc plugin is not designed to be
+thread-safe, the server cannot concurrently handle offloading from multiple
+applications at once (it is synchronous) and will terminate after a single
+execution. Note that ``openmp-offloading-server`` is unable to
+remote offload onto a remote host itself and will error out if this is attempted.
+
+Remote offloading is configured via environment variables at runtime of the OpenMP application:
+ * ``LIBOMPTARGET_RPC_ADDRESS=<Address>:<Port>``
+ * ``LIBOMPTARGET_RPC_ALLOCATOR_MAX=<NumBytes>``
+ * ``LIBOMPTARGET_BLOCK_SIZE=<NumBytes>``
+ * ``LIBOMPTARGET_RPC_LATENCY=<Seconds>``
+
+LIBOMPTARGET_RPC_ADDRESS
+""""""""""""""""""""""""
+The address and port at which the server is running. This needs to be set for
+the server and the application, the default is ``0.0.0.0:50051``. A single
+OpenMP executable can offload onto multiple remote hosts by setting this to
+comma-seperated values of the addresses.
+
+LIBOMPTARGET_RPC_ALLOCATOR_MAX
+""""""""""""""""""""""""""""""
+After allocating this size, the protobuf allocator will clear. This can be set for both endpoints.
+
+LIBOMPTARGET_BLOCK_SIZE
+"""""""""""""""""""""""
+This is the maximum size of a single message while streaming data transfers between the two endpoints and can be set for both endpoints.
+
+LIBOMPTARGET_RPC_LATENCY
+""""""""""""""""""""""""
+This is the maximum amount of time the client will wait for a response from the server.
+
LLVM/OpenMP Target Device Runtime (``libomptarget-ARCH-SUBARCH.bc``)
--------------------------------------------------------------------
add_subdirectory(ppc64le)
add_subdirectory(ve)
add_subdirectory(x86_64)
+add_subdirectory(remote)
# Make sure the parent scope can see the plugins that will be created.
set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS}" PARENT_SCOPE)
__tgt_rtl_run_target_region;
__tgt_rtl_run_target_region_async;
__tgt_rtl_synchronize;
+ __tgt_rtl_register_lib;
+ __tgt_rtl_unregister_lib;
local:
*;
};
--- /dev/null
+##===----------------------------------------------------------------------===##
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+##===----------------------------------------------------------------------===##
+#
+# Build a plugin (client) and server for remote offloading.
+#
+##===----------------------------------------------------------------------===#
+
+cmake_minimum_required(VERSION 3.15)
+
+if (NOT(CMAKE_SYSTEM_NAME MATCHES "Linux"))
+ libomptarget_say("Not building remote offloading plugin: only support Linux hosts.")
+ return()
+endif()
+
+if (NOT(LIBOMPTARGET_ENABLE_EXPERIMENTAL_REMOTE_PLUGIN))
+ return()
+endif()
+
+find_package(Protobuf)
+find_package(gRPC CONFIG)
+
+find_program(PROTOC protoc)
+find_program(GRPC_CPP_PLUGIN grpc_cpp_plugin)
+
+if (Protobuf_FOUND AND gRPC_FOUND AND PROTOC AND GRPC_CPP_PLUGIN)
+ libomptarget_say("Building remote offloading plugin.")
+ set(directory "${CMAKE_BINARY_DIR}/include/openmp/libomptarget/plugins/remote/")
+ file(MAKE_DIRECTORY ${directory})
+ execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory ${directory})
+ execute_process(
+ COMMAND protoc --cpp_out=${directory} -I ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/include/openmp.proto
+ COMMAND protoc --grpc_out=${directory} -I ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/include/openmp.proto --plugin=protoc-gen-grpc=${GRPC_CPP_PLUGIN}
+ )
+
+ set(GRPC_SRC_FILES
+ ${directory}/openmp.grpc.pb.cc
+ ${directory}/openmp.pb.cc
+ )
+
+ set(GRPC_INCLUDE_DIR
+ ${directory}
+ )
+else()
+ libomptarget_say("Not building remote offloading plugin: required libraries were not found.")
+endif()
+
+set(RPC_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include/)
+set(RPC_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib/)
+
+add_subdirectory(src)
+add_subdirectory(server)
--- /dev/null
+//===----------------- Utils.h - Utilities for Remote RTL -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utilities for data transfer through protobuf and debugging.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef UTILS_H
+#define UTILS_H
+
+#include "Debug.h"
+#include "omptarget.h"
+#include "openmp.grpc.pb.h"
+#include "openmp.pb.h"
+#include "rtl.h"
+#include <string>
+
+#define CLIENT_DBG(...) \
+ { \
+ if (DebugLevel > 0) { \
+ fprintf(stderr, "[[Client]] --> "); \
+ fprintf(stderr, __VA_ARGS__); \
+ fprintf(stderr, "\n"); \
+ } \
+ }
+
+#define SERVER_DBG(...) \
+ { \
+ if (DebugLevel > 0) { \
+ fprintf(stderr, "[[Server]] --> "); \
+ fprintf(stderr, __VA_ARGS__); \
+ fprintf(stderr, "\n"); \
+ } \
+ }
+
+namespace RemoteOffloading {
+
+using namespace openmp::libomptarget::remote;
+
+using openmp::libomptarget::remote::DeviceOffloadEntry;
+using openmp::libomptarget::remote::TargetBinaryDescription;
+using openmp::libomptarget::remote::TargetOffloadEntry;
+using openmp::libomptarget::remote::TargetTable;
+
+struct RPCConfig {
+ std::vector<std::string> ServerAddresses;
+ uint64_t MaxSize;
+ uint64_t BlockSize;
+ RPCConfig() {
+ ServerAddresses = {"0.0.0.0:50051"};
+ MaxSize = 1 << 30;
+ BlockSize = 1 << 20;
+ }
+};
+
+/// Helper function to parse common environment variables between client/server
+void parseEnvironment(RPCConfig &Config);
+
+/// Loads a target binary description into protobuf.
+void loadTargetBinaryDescription(const __tgt_bin_desc *Desc,
+ TargetBinaryDescription &Request);
+
+/// Unload a target binary description from protobuf. The map is used to keep
+/// track of already copied device images.
+void unloadTargetBinaryDescription(
+ const TargetBinaryDescription *Request, __tgt_bin_desc *Desc,
+ std::unordered_map<const void *, __tgt_device_image *>
+ &HostToRemoteDeviceImage);
+
+/// Frees argument as constructed by loadTargetBinaryDescription
+void freeTargetBinaryDescription(__tgt_bin_desc *Desc);
+
+/// Copies from TargetOffloadEntry protobuf to a tgt_bin_desc during unloading.
+void copyOffloadEntry(const TargetOffloadEntry &EntryResponse,
+ __tgt_offload_entry *Entry);
+
+/// Copies from tgt_bin_desc into TargetOffloadEntry protobuf during loading.
+void copyOffloadEntry(const __tgt_offload_entry *Entry,
+ TargetOffloadEntry *EntryResponse);
+
+/// Shallow copy of offload entry from tgt_bin_desc to TargetOffloadEntry
+/// during loading.
+void shallowCopyOffloadEntry(const __tgt_offload_entry *Entry,
+ TargetOffloadEntry *EntryResponse);
+
+/// Copies DeviceOffloadEntries into table during unloading.
+void copyOffloadEntry(const DeviceOffloadEntry &EntryResponse,
+ __tgt_offload_entry *Entry);
+
+/// Loads tgt_target_table into a TargetTable protobuf message.
+void loadTargetTable(__tgt_target_table *Table, TargetTable &TableResponse,
+ __tgt_device_image *Image);
+
+/// Unloads from a target_table from protobuf.
+void unloadTargetTable(
+ TargetTable &TableResponse, __tgt_target_table *Table,
+ std::unordered_map<void *, void *> &HostToRemoteTargetTableMap);
+
+/// Frees argument as constructed by unloadTargetTable
+void freeTargetTable(__tgt_target_table *Table);
+
+void dump(const void *Start, const void *End);
+void dump(__tgt_offload_entry *Entry);
+void dump(TargetOffloadEntry Entry);
+void dump(__tgt_target_table *Table);
+void dump(__tgt_device_image *Image);
+} // namespace RemoteOffloading
+
+#endif
--- /dev/null
+syntax = "proto3";
+
+package openmp.libomptarget.remote;
+option cc_enable_arenas = true;
+
+service RemoteOffload {
+ rpc Shutdown(Null) returns (I32) {}
+
+ rpc RegisterLib(TargetBinaryDescription) returns (I32) {}
+ rpc UnregisterLib(Pointer) returns (I32) {}
+
+ rpc IsValidBinary(TargetDeviceImagePtr) returns (I32) {}
+ rpc GetNumberOfDevices(Null) returns (I32) {}
+
+ rpc InitDevice(I32) returns (I32) {}
+ rpc InitRequires(I64) returns (I32) {}
+
+ rpc LoadBinary(Binary) returns (TargetTable) {}
+ rpc Synchronize(SynchronizeDevice) returns (I32) {}
+
+ rpc DataAlloc(AllocData) returns (Pointer) {}
+ rpc DataDelete(DeleteData) returns (I32) {}
+
+ rpc DataSubmitAsync(stream SubmitDataAsync) returns (I32) {}
+ rpc DataRetrieveAsync(RetrieveDataAsync) returns (stream Data) {}
+
+ rpc IsDataExchangeable(DevicePair) returns (I32) {}
+ rpc DataExchangeAsync(ExchangeDataAsync) returns (I32) {}
+
+ rpc RunTargetRegionAsync(TargetRegionAsync) returns (I32) {}
+ rpc RunTargetTeamRegionAsync(TargetTeamRegionAsync) returns (I32) {}
+}
+
+message Null {}
+
+message Pointer { uint64 number = 1; }
+
+message I32 { int32 number = 1; }
+
+message I64 { int64 number = 1; }
+
+message DevicePair {
+ int32 src_dev_id = 1;
+ int32 dst_dev_id = 2;
+}
+
+message Binary {
+ uint64 image_ptr = 1;
+ int32 device_id = 2;
+}
+
+message TargetOffloadEntry {
+ bytes data = 1;
+ string name = 2;
+ int32 flags = 3;
+ int32 reserved = 4;
+}
+
+message DeviceOffloadEntry {
+ string name = 1;
+ uint64 addr = 2;
+ int32 flags = 3;
+ int32 reserved = 4;
+ int32 size = 5;
+}
+
+message TargetTable {
+ repeated DeviceOffloadEntry entries = 1;
+ repeated uint64 entry_ptrs = 2;
+}
+
+message TargetDeviceImagePtr {
+ uint64 image_ptr = 1;
+ repeated uint64 entry_ptrs = 2;
+}
+
+message TargetDeviceImage {
+ bytes binary = 1;
+ repeated TargetOffloadEntry entries = 2;
+}
+
+message ImagePtrs {
+ uint64 img_ptr = 1;
+ repeated uint64 entry_ptrs = 2;
+}
+
+message TargetBinaryDescription {
+ repeated ImagePtrs image_ptrs = 1;
+ repeated TargetOffloadEntry entries = 2;
+ repeated TargetDeviceImage images = 3;
+ repeated uint64 entry_ptrs = 4;
+ uint64 bin_ptr = 5;
+}
+
+message SynchronizeDevice {
+ uint64 queue_ptr = 1;
+ int32 device_id = 2;
+}
+
+message AllocData {
+ uint64 size = 1;
+ uint64 hst_ptr = 2;
+ int32 device_id = 3;
+}
+
+message SubmitDataAsync {
+ bytes data = 1;
+ uint64 hst_ptr = 2;
+ uint64 tgt_ptr = 3;
+ uint64 queue_ptr = 4;
+ uint64 start = 5;
+ uint64 size = 6;
+ int32 device_id = 7;
+}
+
+message RetrieveDataAsync {
+ uint64 hst_ptr = 1;
+ uint64 tgt_ptr = 2;
+ uint64 size = 3;
+ uint64 queue_ptr = 4;
+ int32 device_id = 5;
+}
+
+message Data {
+ bytes data = 1;
+ uint64 start = 2;
+ uint64 size = 3;
+ int32 ret = 4;
+}
+
+message ExchangeDataAsync {
+ uint64 src_dev_id = 1;
+ uint64 src_ptr = 2;
+ uint64 dst_dev_id = 3;
+ uint64 dst_ptr = 4;
+ uint64 queue_ptr = 5;
+ uint64 size = 6;
+}
+
+message DeleteData {
+ uint64 tgt_ptr = 1;
+ int32 device_id = 2;
+}
+
+message TargetRegionAsync {
+ repeated uint64 tgt_args = 1;
+ repeated int64 tgt_offsets = 2;
+ uint64 tgt_entry_ptr = 3;
+ uint64 queue_ptr = 4;
+ int32 device_id = 5;
+ int32 arg_num = 6;
+}
+
+message TargetTeamRegionAsync {
+ repeated uint64 tgt_args = 1;
+ repeated int64 tgt_offsets = 2;
+ uint64 tgt_entry_ptr = 3;
+ uint64 loop_tripcount = 4;
+ uint64 queue_ptr = 5;
+ int32 device_id = 6;
+ int32 arg_num = 7;
+ int32 team_num = 8;
+ int32 thread_limit = 9;
+}
--- /dev/null
+//===---------------- Utils.cpp - Utilities for Remote RTL ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utilities for data movement and debugging.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Utils.h"
+#include "omptarget.h"
+
+namespace RemoteOffloading {
+void parseEnvironment(RPCConfig &Config) {
+ // TODO: Error handle for incorrect inputs
+ if (const char *Env = std::getenv("LIBOMPTARGET_RPC_ADDRESS")) {
+ Config.ServerAddresses.clear();
+ std::string AddressString = Env;
+ const std::string Delimiter = ",";
+
+ size_t Pos = 0;
+ std::string Token;
+ while ((Pos = AddressString.find(Delimiter)) != std::string::npos) {
+ Token = AddressString.substr(0, Pos);
+ Config.ServerAddresses.push_back(Token);
+ AddressString.erase(0, Pos + Delimiter.length());
+ }
+ Config.ServerAddresses.push_back(AddressString);
+ }
+ if (const char *Env = std::getenv("LIBOMPTARGET_RPC_ALLOCATOR_MAX"))
+ Config.MaxSize = std::stoi(Env);
+ if (const char *Env = std::getenv("LIBOMPTARGET_RPC_BLOCK_SIZE"))
+ Config.BlockSize = std::stoi(Env);
+}
+
+void loadTargetBinaryDescription(const __tgt_bin_desc *Desc,
+ TargetBinaryDescription &Request) {
+ // Keeps track of entries which have already been deep copied.
+ std::vector<void *> DeepCopiedEntryAddrs;
+
+ // Copy Global Offload Entries
+ for (auto *CurEntry = Desc->HostEntriesBegin;
+ CurEntry != Desc->HostEntriesEnd; CurEntry++) {
+ auto *NewEntry = Request.add_entries();
+ copyOffloadEntry(CurEntry, NewEntry);
+
+ // Copy the pointer of the offload entry of the image into the Request
+ Request.add_entry_ptrs((uint64_t)CurEntry);
+ DeepCopiedEntryAddrs.push_back(CurEntry);
+ }
+
+ // Copy Device Images and Device Offload Entries
+ __tgt_device_image *CurImage = Desc->DeviceImages;
+ for (auto I = 0; I < Desc->NumDeviceImages; I++, CurImage++) {
+ auto *Image = Request.add_images();
+ auto Size = (char *)CurImage->ImageEnd - (char *)CurImage->ImageStart;
+ Image->set_binary(CurImage->ImageStart, Size);
+
+ // Copy the pointer of the image into the Request
+ auto *NewImagePtr = Request.add_image_ptrs();
+ NewImagePtr->set_img_ptr((uint64_t)CurImage->ImageStart);
+
+ // Copy Device Offload Entries
+ for (auto *CurEntry = CurImage->EntriesBegin;
+ CurEntry != CurImage->EntriesEnd; CurEntry++) {
+ auto *NewEntry = Image->add_entries();
+
+ auto Entry = std::find(DeepCopiedEntryAddrs.begin(),
+ DeepCopiedEntryAddrs.end(), CurEntry);
+ if (Entry != DeepCopiedEntryAddrs.end()) {
+ // Offload entry has already been loaded
+ shallowCopyOffloadEntry(CurEntry, NewEntry);
+ } else { // Offload Entry has not been loaded into the Request
+ copyOffloadEntry(CurEntry, NewEntry);
+ DeepCopiedEntryAddrs.push_back(CurEntry);
+ }
+
+ // Copy the pointer of the offload entry of the image into the Request
+ NewImagePtr->add_entry_ptrs((uint64_t)CurEntry);
+ }
+ }
+}
+
+void unloadTargetBinaryDescription(
+ const TargetBinaryDescription *Request, __tgt_bin_desc *Desc,
+ std::unordered_map<const void *, __tgt_device_image *>
+ &HostToRemoteDeviceImage) {
+ std::unordered_map<const void *, __tgt_offload_entry *> CopiedOffloadEntries;
+ Desc->NumDeviceImages = Request->images_size();
+ Desc->DeviceImages = new __tgt_device_image[Desc->NumDeviceImages];
+
+ if (Request->entries_size())
+ Desc->HostEntriesBegin = new __tgt_offload_entry[Request->entries_size()];
+ else {
+ Desc->HostEntriesBegin = nullptr;
+ Desc->HostEntriesEnd = nullptr;
+ }
+
+ // Copy Global Offload Entries
+ __tgt_offload_entry *CurEntry = Desc->HostEntriesBegin;
+ for (int i = 0; i < Request->entries_size(); i++) {
+ copyOffloadEntry(Request->entries()[i], CurEntry);
+ CopiedOffloadEntries[(void *)Request->entry_ptrs()[i]] = CurEntry;
+ CurEntry++;
+ }
+ Desc->HostEntriesEnd = CurEntry;
+
+ // Copy Device Images and Device Offload Entries
+ __tgt_device_image *CurImage = Desc->DeviceImages;
+ auto ImageItr = Request->image_ptrs().begin();
+ for (auto Image : Request->images()) {
+ // Copy Device Offload Entries
+ auto *CurEntry = Desc->HostEntriesBegin;
+ bool Found = false;
+
+ if (!Desc->HostEntriesBegin) {
+ CurImage->EntriesBegin = nullptr;
+ CurImage->EntriesEnd = nullptr;
+ }
+
+ for (int i = 0; i < Image.entries_size(); i++) {
+ auto TgtEntry =
+ CopiedOffloadEntries.find((void *)Request->entry_ptrs()[i]);
+ if (TgtEntry != CopiedOffloadEntries.end()) {
+ if (!Found)
+ CurImage->EntriesBegin = CurEntry;
+
+ Found = true;
+ if (Found) {
+ CurImage->EntriesEnd = CurEntry + 1;
+ }
+ } else {
+ Found = false;
+ copyOffloadEntry(Image.entries()[i], CurEntry);
+ CopiedOffloadEntries[(void *)(Request->entry_ptrs()[i])] = CurEntry;
+ }
+ CurEntry++;
+ }
+
+ // Copy Device Image
+ CurImage->ImageStart = new uint8_t[Image.binary().size()];
+ memcpy(CurImage->ImageStart,
+ static_cast<const void *>(Image.binary().data()),
+ Image.binary().size());
+ CurImage->ImageEnd =
+ (void *)((char *)CurImage->ImageStart + Image.binary().size());
+
+ HostToRemoteDeviceImage[(void *)ImageItr->img_ptr()] = CurImage;
+ CurImage++;
+ ImageItr++;
+ }
+}
+
+void freeTargetBinaryDescription(__tgt_bin_desc *Desc) {
+ __tgt_device_image *CurImage = Desc->DeviceImages;
+ for (auto I = 0; I < Desc->NumDeviceImages; I++, CurImage++)
+ delete[](uint64_t *) CurImage->ImageStart;
+
+ delete[] Desc->DeviceImages;
+
+ for (auto *Entry = Desc->HostEntriesBegin; Entry != Desc->HostEntriesEnd;
+ Entry++) {
+ free(Entry->name);
+ free(Entry->addr);
+ }
+
+ delete[] Desc->HostEntriesBegin;
+}
+
+void freeTargetTable(__tgt_target_table *Table) {
+ for (auto *Entry = Table->EntriesBegin; Entry != Table->EntriesEnd; Entry++)
+ free(Entry->name);
+
+ delete[] Table->EntriesBegin;
+}
+
+void loadTargetTable(__tgt_target_table *Table, TargetTable &TableResponse,
+ __tgt_device_image *Image) {
+ auto *ImageEntry = Image->EntriesBegin;
+ for (__tgt_offload_entry *CurEntry = Table->EntriesBegin;
+ CurEntry != Table->EntriesEnd; CurEntry++, ImageEntry++) {
+ // TODO: This can probably be trimmed substantially.
+ auto *NewEntry = TableResponse.add_entries();
+ NewEntry->set_name(CurEntry->name);
+ NewEntry->set_addr((uint64_t)CurEntry->addr);
+ NewEntry->set_flags(CurEntry->flags);
+ NewEntry->set_reserved(CurEntry->reserved);
+ NewEntry->set_size(CurEntry->size);
+ TableResponse.add_entry_ptrs((int64_t)CurEntry);
+ }
+}
+
+void unloadTargetTable(
+ TargetTable &TableResponse, __tgt_target_table *Table,
+ std::unordered_map<void *, void *> &HostToRemoteTargetTableMap) {
+ Table->EntriesBegin = new __tgt_offload_entry[TableResponse.entries_size()];
+
+ auto *CurEntry = Table->EntriesBegin;
+ for (int i = 0; i < TableResponse.entries_size(); i++) {
+ copyOffloadEntry(TableResponse.entries()[i], CurEntry);
+ HostToRemoteTargetTableMap[CurEntry->addr] =
+ (void *)TableResponse.entry_ptrs()[i];
+ CurEntry++;
+ }
+ Table->EntriesEnd = CurEntry;
+}
+
+void copyOffloadEntry(const TargetOffloadEntry &EntryResponse,
+ __tgt_offload_entry *Entry) {
+ Entry->name = strdup(EntryResponse.name().c_str());
+ Entry->reserved = EntryResponse.reserved();
+ Entry->flags = EntryResponse.flags();
+ Entry->addr = strdup(EntryResponse.data().c_str());
+ Entry->size = EntryResponse.data().size();
+}
+
+void copyOffloadEntry(const DeviceOffloadEntry &EntryResponse,
+ __tgt_offload_entry *Entry) {
+ Entry->name = strdup(EntryResponse.name().c_str());
+ Entry->reserved = EntryResponse.reserved();
+ Entry->flags = EntryResponse.flags();
+ Entry->addr = (void *)EntryResponse.addr();
+ Entry->size = EntryResponse.size();
+}
+
+/// We shallow copy with just the name because it is a convenient identifier, we
+/// do actually just match off of the address.
+void shallowCopyOffloadEntry(const __tgt_offload_entry *Entry,
+ TargetOffloadEntry *EntryResponse) {
+ EntryResponse->set_name(Entry->name);
+}
+
+void copyOffloadEntry(const __tgt_offload_entry *Entry,
+ TargetOffloadEntry *EntryResponse) {
+ shallowCopyOffloadEntry(Entry, EntryResponse);
+ EntryResponse->set_reserved(Entry->reserved);
+ EntryResponse->set_flags(Entry->flags);
+ EntryResponse->set_data(Entry->addr, Entry->size);
+}
+
+/// Dumps the memory region from Start to End in order to debug memory transfer
+/// errors within the plugin
+void dump(const void *Start, const void *End) {
+ unsigned char Line[17];
+ const unsigned char *PrintCharacter = (const unsigned char *)Start;
+
+ unsigned int I = 0;
+ for (; I < ((const int *)End - (const int *)Start); I++) {
+ if ((I % 16) == 0) {
+ if (I != 0)
+ printf(" %s\n", Line);
+
+ printf(" %04x ", I);
+ }
+
+ printf(" %02x", PrintCharacter[I]);
+
+ if ((PrintCharacter[I] < 0x20) || (PrintCharacter[I] > 0x7e))
+ Line[I % 16] = '.';
+ else
+ Line[I % 16] = PrintCharacter[I];
+
+ Line[(I % 16) + 1] = '\0';
+ }
+
+ while ((I % 16) != 0) {
+ printf(" ");
+ I++;
+ }
+
+ printf(" %s\n", Line);
+}
+
+void dump(__tgt_offload_entry *Entry) {
+ fprintf(stderr, "Entry (%p):\n", (void *)Entry);
+ fprintf(stderr, " Name: %s (%p)\n", Entry->name, (void *)&Entry->name);
+ fprintf(stderr, " Reserved: %d (%p)\n", Entry->reserved,
+ (void *)&Entry->reserved);
+ fprintf(stderr, " Flags: %d (%p)\n", Entry->flags, (void *)&Entry->flags);
+ fprintf(stderr, " Addr: %p\n", Entry->addr);
+ fprintf(stderr, " Size: %lu\n", Entry->size);
+}
+
+void dump(__tgt_target_table *Table) {
+ for (auto *CurEntry = Table->EntriesBegin; CurEntry != Table->EntriesEnd;
+ CurEntry++)
+ dump(CurEntry);
+}
+
+void dump(TargetOffloadEntry Entry) {
+ fprintf(stderr, "Entry: ");
+ fprintf(stderr, " %s\n", Entry.name().c_str());
+ fprintf(stderr, " %d\n", Entry.reserved());
+ fprintf(stderr, " %d\n", Entry.flags());
+ fprintf(stderr, " %ld\n", Entry.data().size());
+ dump(static_cast<const void *>(Entry.data().data()),
+ static_cast<const void *>((Entry.data().c_str() + Entry.data().size())));
+}
+
+void dump(__tgt_device_image *Image) {
+ dump(Image->ImageStart, Image->ImageEnd);
+ __tgt_offload_entry *EntryItr = Image->EntriesBegin;
+ for (; EntryItr != Image->EntriesEnd; EntryItr++)
+ dump(EntryItr);
+}
+
+void dump(std::unordered_map<void *, __tgt_offload_entry *> &Map) {
+ fprintf(stderr, "Host to Remote Entry Map:\n");
+ for (auto Entry : Map)
+ fprintf(stderr, " Host (%p) -> Tgt (%p): Addr((%p))\n", Entry.first,
+ (void *)Entry.second, (void *)Entry.second->addr);
+}
+} // namespace RemoteOffloading
\ No newline at end of file
--- /dev/null
+##===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+##===----------------------------------------------------------------------===##
+#
+# Build server for remote offloading.
+#
+##===----------------------------------------------------------------------===##
+
+include_directories(${LIBOMPTARGET_DEP_LIBELF_INCLUDE_DIRS})
+include_directories(${LIBOMPTARGET_SRC_DIR})
+include_directories(${LIBOMPTARGET_INCLUDE_DIR})
+include_directories(${GRPC_INCLUDE_DIR})
+include_directories(${RPC_INCLUDE_DIR})
+
+add_executable(openmp-offloading-server
+ ${LIBOMPTARGET_SRC_FILES}
+ ${GRPC_SRC_FILES}
+ ${RPC_SRC_DIR}/Utils.cpp
+ Server.cpp
+ OffloadingServer.cpp
+)
+
+target_link_libraries(openmp-offloading-server
+ grpc++
+ protobuf
+ "-ldl" "-lomp" "-fopenmp" "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../../exports" ${LIBOMPTARGET_DEP_LIBELF_LIBRARIES})
--- /dev/null
+//===------------- OffloadingServer.cpp - Server Application --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Offloading server for remote host.
+//
+//===----------------------------------------------------------------------===//
+
+#include <future>
+#include <grpcpp/server.h>
+#include <grpcpp/server_builder.h>
+#include <iostream>
+#include <thread>
+
+#include "Server.h"
+
+using grpc::Server;
+using grpc::ServerBuilder;
+
+std::promise<void> ShutdownPromise;
+
+int main() {
+ RPCConfig Config;
+ parseEnvironment(Config);
+
+ RemoteOffloadImpl Service(Config.MaxSize, Config.BlockSize);
+
+ ServerBuilder Builder;
+ Builder.AddListeningPort(Config.ServerAddresses[0],
+ grpc::InsecureServerCredentials());
+ Builder.RegisterService(&Service);
+ Builder.SetMaxMessageSize(INT_MAX);
+ std::unique_ptr<Server> Server(Builder.BuildAndStart());
+ if (getDebugLevel())
+ std::cerr << "Server listening on " << Config.ServerAddresses[0]
+ << std::endl;
+
+ auto WaitForServer = [&]() { Server->Wait(); };
+
+ std::thread ServerThread(WaitForServer);
+
+ auto ShutdownFuture = ShutdownPromise.get_future();
+ ShutdownFuture.wait();
+ Server->Shutdown();
+ ServerThread.join();
+
+ return 0;
+}
--- /dev/null
+//===----------------- Server.cpp - Server Implementation -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Offloading gRPC server for remote host.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cmath>
+#include <future>
+
+#include "Server.h"
+#include "omptarget.h"
+#include "openmp.grpc.pb.h"
+#include "openmp.pb.h"
+
+using grpc::WriteOptions;
+
+extern std::promise<void> ShutdownPromise;
+
+Status RemoteOffloadImpl::Shutdown(ServerContext *Context, const Null *Request,
+ I32 *Reply) {
+ SERVER_DBG("Shutting down the server");
+
+ Reply->set_number(0);
+ ShutdownPromise.set_value();
+ return Status::OK;
+}
+
+Status
+RemoteOffloadImpl::RegisterLib(ServerContext *Context,
+ const TargetBinaryDescription *Description,
+ I32 *Reply) {
+ SERVER_DBG("Registering library");
+
+ auto Desc = std::make_unique<__tgt_bin_desc>();
+
+ unloadTargetBinaryDescription(Description, Desc.get(),
+ HostToRemoteDeviceImage);
+ PM->RTLs.RegisterLib(Desc.get());
+
+ if (Descriptions.find((void *)Description->bin_ptr()) != Descriptions.end())
+ freeTargetBinaryDescription(
+ Descriptions[(void *)Description->bin_ptr()].get());
+ else
+ Descriptions[(void *)Description->bin_ptr()] = std::move(Desc);
+
+ SERVER_DBG("Registered library");
+ Reply->set_number(0);
+ return Status::OK;
+}
+
+Status RemoteOffloadImpl::UnregisterLib(ServerContext *Context,
+ const Pointer *Request, I32 *Reply) {
+ SERVER_DBG("Unregistering library");
+
+ if (Descriptions.find((void *)Request->number()) == Descriptions.end()) {
+ Reply->set_number(1);
+ return Status::OK;
+ }
+
+ PM->RTLs.UnregisterLib(Descriptions[(void *)Request->number()].get());
+ freeTargetBinaryDescription(Descriptions[(void *)Request->number()].get());
+ Descriptions.erase((void *)Request->number());
+
+ SERVER_DBG("Unregistered library");
+ Reply->set_number(0);
+ return Status::OK;
+}
+
+Status RemoteOffloadImpl::IsValidBinary(ServerContext *Context,
+ const TargetDeviceImagePtr *DeviceImage,
+ I32 *IsValid) {
+ SERVER_DBG("Checking if binary (%p) is valid",
+ (void *)(DeviceImage->image_ptr()));
+
+ __tgt_device_image *Image =
+ HostToRemoteDeviceImage[(void *)DeviceImage->image_ptr()];
+
+ IsValid->set_number(0);
+
+ for (auto &RTL : PM->RTLs.AllRTLs)
+ if (auto Ret = RTL.is_valid_binary(Image)) {
+ IsValid->set_number(Ret);
+ break;
+ }
+
+ SERVER_DBG("Checked if binary (%p) is valid",
+ (void *)(DeviceImage->image_ptr()));
+ return Status::OK;
+}
+
+Status RemoteOffloadImpl::GetNumberOfDevices(ServerContext *Context,
+ const Null *Null,
+ I32 *NumberOfDevices) {
+ SERVER_DBG("Getting number of devices");
+ std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, &PM->RTLs);
+
+ int32_t Devices = 0;
+ PM->RTLsMtx.lock();
+ for (auto &RTL : PM->RTLs.AllRTLs)
+ Devices += RTL.NumberOfDevices;
+ PM->RTLsMtx.unlock();
+
+ NumberOfDevices->set_number(Devices);
+
+ SERVER_DBG("Got number of devices");
+ return Status::OK;
+}
+
+Status RemoteOffloadImpl::InitDevice(ServerContext *Context,
+ const I32 *DeviceNum, I32 *Reply) {
+ SERVER_DBG("Initializing device %d", DeviceNum->number());
+
+ Reply->set_number(PM->Devices[DeviceNum->number()].RTL->init_device(
+ mapHostRTLDeviceId(DeviceNum->number())));
+
+ SERVER_DBG("Initialized device %d", DeviceNum->number());
+ return Status::OK;
+}
+
+Status RemoteOffloadImpl::InitRequires(ServerContext *Context,
+ const I64 *RequiresFlag, I32 *Reply) {
+ SERVER_DBG("Initializing requires for devices");
+
+ for (auto &Device : PM->Devices)
+ if (Device.RTL->init_requires)
+ Device.RTL->init_requires(RequiresFlag->number());
+ Reply->set_number(RequiresFlag->number());
+
+ SERVER_DBG("Initialized requires for devices");
+ return Status::OK;
+}
+
+Status RemoteOffloadImpl::LoadBinary(ServerContext *Context,
+ const Binary *Binary, TargetTable *Reply) {
+ SERVER_DBG("Loading binary (%p) to device %d", (void *)Binary->image_ptr(),
+ Binary->device_id());
+
+ __tgt_device_image *Image =
+ HostToRemoteDeviceImage[(void *)Binary->image_ptr()];
+
+ Table = PM->Devices[Binary->device_id()].RTL->load_binary(
+ mapHostRTLDeviceId(Binary->device_id()), Image);
+ if (Table)
+ loadTargetTable(Table, *Reply, Image);
+
+ SERVER_DBG("Loaded binary (%p) to device %d", (void *)Binary->image_ptr(),
+ Binary->device_id());
+ return Status::OK;
+}
+
+Status RemoteOffloadImpl::Synchronize(ServerContext *Context,
+ const SynchronizeDevice *Info,
+ I32 *Reply) {
+ SERVER_DBG("Synchronizing device %d (probably won't work)",
+ Info->device_id());
+
+ void *AsyncInfoPtr = (void *)Info->queue_ptr();
+ Reply->set_number(0);
+ if (PM->Devices[Info->device_id()].RTL->synchronize)
+ Reply->set_number(PM->Devices[Info->device_id()].synchronize(
+ (__tgt_async_info *)AsyncInfoPtr));
+
+ SERVER_DBG("Synchronized device %d", Info->device_id());
+ return Status::OK;
+}
+
+Status RemoteOffloadImpl::IsDataExchangeable(ServerContext *Context,
+ const DevicePair *Request,
+ I32 *Reply) {
+ SERVER_DBG("Checking if data exchangable between device %d and device %d",
+ Request->src_dev_id(), Request->dst_dev_id());
+
+ Reply->set_number(-1);
+ if (PM->Devices[mapHostRTLDeviceId(Request->src_dev_id())]
+ .RTL->is_data_exchangable)
+ Reply->set_number(PM->Devices[mapHostRTLDeviceId(Request->src_dev_id())]
+ .RTL->is_data_exchangable(Request->src_dev_id(),
+ Request->dst_dev_id()));
+
+ SERVER_DBG("Checked if data exchangable between device %d and device %d",
+ Request->src_dev_id(), Request->dst_dev_id());
+ return Status::OK;
+}
+
+Status RemoteOffloadImpl::DataAlloc(ServerContext *Context,
+ const AllocData *Request, Pointer *Reply) {
+ SERVER_DBG("Allocating %ld bytes on sevice %d", Request->size(),
+ Request->device_id());
+
+ uint64_t TgtPtr = (uint64_t)PM->Devices[Request->device_id()].RTL->data_alloc(
+ mapHostRTLDeviceId(Request->device_id()), Request->size(),
+ (void *)Request->hst_ptr());
+ Reply->set_number(TgtPtr);
+
+ SERVER_DBG("Allocated at " DPxMOD "", DPxPTR((void *)TgtPtr));
+
+ return Status::OK;
+}
+
+Status RemoteOffloadImpl::DataSubmitAsync(ServerContext *Context,
+ ServerReader<SubmitDataAsync> *Reader,
+ I32 *Reply) {
+ SubmitDataAsync Request;
+ uint8_t *HostCopy = nullptr;
+ while (Reader->Read(&Request)) {
+ if (Request.start() == 0 && Request.size() == Request.data().size()) {
+ SERVER_DBG("Submitting %lu bytes async to (%p) on device %d",
+ Request.data().size(), (void *)Request.tgt_ptr(),
+ Request.device_id());
+
+ SERVER_DBG(" Host Pointer Info: %p, %p", (void *)Request.hst_ptr(),
+ static_cast<const void *>(Request.data().data()));
+
+ Reader->SendInitialMetadata();
+
+ Reply->set_number(PM->Devices[Request.device_id()].RTL->data_submit(
+ mapHostRTLDeviceId(Request.device_id()), (void *)Request.tgt_ptr(),
+ (void *)Request.data().data(), Request.data().size()));
+
+ SERVER_DBG("Submitted %lu bytes async to (%p) on device %d",
+ Request.data().size(), (void *)Request.tgt_ptr(),
+ Request.device_id());
+
+ return Status::OK;
+ }
+ if (!HostCopy) {
+ HostCopy = new uint8_t[Request.size()];
+ Reader->SendInitialMetadata();
+ }
+
+ SERVER_DBG("Submitting %lu-%lu/%lu bytes async to (%p) on device %d",
+ Request.start(), Request.start() + Request.data().size(),
+ Request.size(), (void *)Request.tgt_ptr(), Request.device_id());
+
+ memcpy((void *)((char *)HostCopy + Request.start()), Request.data().data(),
+ Request.data().size());
+ }
+ SERVER_DBG(" Host Pointer Info: %p, %p", (void *)Request.hst_ptr(),
+ static_cast<const void *>(Request.data().data()));
+
+ Reply->set_number(PM->Devices[Request.device_id()].RTL->data_submit(
+ mapHostRTLDeviceId(Request.device_id()), (void *)Request.tgt_ptr(),
+ HostCopy, Request.size()));
+
+ delete[] HostCopy;
+
+ SERVER_DBG("Submitted %lu bytes to (%p) on device %d", Request.data().size(),
+ (void *)Request.tgt_ptr(), Request.device_id());
+
+ return Status::OK;
+}
+
+Status RemoteOffloadImpl::DataRetrieveAsync(ServerContext *Context,
+ const RetrieveDataAsync *Request,
+ ServerWriter<Data> *Writer) {
+ auto HstPtr = std::make_unique<char[]>(Request->size());
+ auto Ret = PM->Devices[Request->device_id()].RTL->data_retrieve(
+ mapHostRTLDeviceId(Request->device_id()), HstPtr.get(),
+ (void *)Request->tgt_ptr(), Request->size());
+
+ if (Arena->SpaceAllocated() >= MaxSize)
+ Arena->Reset();
+
+ if (Request->size() > BlockSize) {
+ uint64_t Start = 0, End = BlockSize;
+ for (auto I = 0; I < ceil((float)Request->size() / BlockSize); I++) {
+ auto *Reply = protobuf::Arena::CreateMessage<Data>(Arena.get());
+
+ Reply->set_start(Start);
+ Reply->set_size(Request->size());
+ Reply->set_data((char *)HstPtr.get() + Start, End - Start);
+ Reply->set_ret(Ret);
+
+ SERVER_DBG("Retrieving %lu-%lu/%lu bytes from (%p) on device %d", Start,
+ End, Request->size(), (void *)Request->tgt_ptr(),
+ mapHostRTLDeviceId(Request->device_id()));
+
+ if (!Writer->Write(*Reply)) {
+ CLIENT_DBG("Broken stream when submitting data");
+ }
+
+ SERVER_DBG("Retrieved %lu-%lu/%lu bytes from (%p) on device %d", Start,
+ End, Request->size(), (void *)Request->tgt_ptr(),
+ mapHostRTLDeviceId(Request->device_id()));
+
+ Start += BlockSize;
+ End += BlockSize;
+ if (End >= Request->size())
+ End = Request->size();
+ }
+ } else {
+ auto *Reply = protobuf::Arena::CreateMessage<Data>(Arena.get());
+
+ SERVER_DBG("Retrieve %lu bytes from (%p) on device %d", Request->size(),
+ (void *)Request->tgt_ptr(),
+ mapHostRTLDeviceId(Request->device_id()));
+
+ Reply->set_start(0);
+ Reply->set_size(Request->size());
+ Reply->set_data((char *)HstPtr.get(), Request->size());
+ Reply->set_ret(Ret);
+
+ SERVER_DBG("Retrieved %lu bytes from (%p) on device %d", Request->size(),
+ (void *)Request->tgt_ptr(),
+ mapHostRTLDeviceId(Request->device_id()));
+
+ Writer->WriteLast(*Reply, WriteOptions());
+ }
+
+ return Status::OK;
+}
+
+Status RemoteOffloadImpl::DataExchangeAsync(ServerContext *Context,
+ const ExchangeDataAsync *Request,
+ I32 *Reply) {
+ SERVER_DBG(
+ "Exchanging data asynchronously from device %d (%p) to device %d (%p) of "
+ "size %lu",
+ mapHostRTLDeviceId(Request->src_dev_id()), (void *)Request->src_ptr(),
+ mapHostRTLDeviceId(Request->dst_dev_id()), (void *)Request->dst_ptr(),
+ Request->size());
+
+ if (PM->Devices[Request->src_dev_id()].RTL->data_exchange) {
+ int32_t Ret = PM->Devices[Request->src_dev_id()].RTL->data_exchange(
+ mapHostRTLDeviceId(Request->src_dev_id()), (void *)Request->src_ptr(),
+ mapHostRTLDeviceId(Request->dst_dev_id()), (void *)Request->dst_ptr(),
+ Request->size());
+ Reply->set_number(Ret);
+ } else
+ Reply->set_number(-1);
+
+ SERVER_DBG(
+ "Exchanged data asynchronously from device %d (%p) to device %d (%p) of "
+ "size %lu",
+ mapHostRTLDeviceId(Request->src_dev_id()), (void *)Request->src_ptr(),
+ mapHostRTLDeviceId(Request->dst_dev_id()), (void *)Request->dst_ptr(),
+ Request->size());
+ return Status::OK;
+}
+
+Status RemoteOffloadImpl::DataDelete(ServerContext *Context,
+ const DeleteData *Request, I32 *Reply) {
+ SERVER_DBG("Deleting data from (%p) on device %d", (void *)Request->tgt_ptr(),
+ mapHostRTLDeviceId(Request->device_id()));
+
+ auto Ret = PM->Devices[Request->device_id()].RTL->data_delete(
+ mapHostRTLDeviceId(Request->device_id()), (void *)Request->tgt_ptr());
+ Reply->set_number(Ret);
+
+ SERVER_DBG("Deleted data from (%p) on device %d", (void *)Request->tgt_ptr(),
+ mapHostRTLDeviceId(Request->device_id()));
+ return Status::OK;
+}
+
+Status RemoteOffloadImpl::RunTargetRegionAsync(ServerContext *Context,
+ const TargetRegionAsync *Request,
+ I32 *Reply) {
+ SERVER_DBG("Running TargetRegionAsync on device %d with %d args",
+ mapHostRTLDeviceId(Request->device_id()), Request->arg_num());
+
+ std::vector<uint8_t> TgtArgs(Request->arg_num());
+ for (auto I = 0; I < Request->arg_num(); I++)
+ TgtArgs[I] = (uint64_t)Request->tgt_args()[I];
+
+ std::vector<ptrdiff_t> TgtOffsets(Request->arg_num());
+ const auto *TgtOffsetItr = Request->tgt_offsets().begin();
+ for (auto I = 0; I < Request->arg_num(); I++, TgtOffsetItr++)
+ TgtOffsets[I] = (ptrdiff_t)*TgtOffsetItr;
+
+ void *TgtEntryPtr = ((__tgt_offload_entry *)Request->tgt_entry_ptr())->addr;
+
+ int32_t Ret = PM->Devices[Request->device_id()].RTL->run_region(
+ mapHostRTLDeviceId(Request->device_id()), TgtEntryPtr,
+ (void **)TgtArgs.data(), TgtOffsets.data(), Request->arg_num());
+
+ Reply->set_number(Ret);
+
+ SERVER_DBG("Ran TargetRegionAsync on device %d with %d args",
+ mapHostRTLDeviceId(Request->device_id()), Request->arg_num());
+ return Status::OK;
+}
+
+Status RemoteOffloadImpl::RunTargetTeamRegionAsync(
+ ServerContext *Context, const TargetTeamRegionAsync *Request, I32 *Reply) {
+ SERVER_DBG("Running TargetTeamRegionAsync on device %d with %d args",
+ mapHostRTLDeviceId(Request->device_id()), Request->arg_num());
+
+ std::vector<uint64_t> TgtArgs(Request->arg_num());
+ for (auto I = 0; I < Request->arg_num(); I++)
+ TgtArgs[I] = (uint64_t)Request->tgt_args()[I];
+
+ std::vector<ptrdiff_t> TgtOffsets(Request->arg_num());
+ const auto *TgtOffsetItr = Request->tgt_offsets().begin();
+ for (auto I = 0; I < Request->arg_num(); I++, TgtOffsetItr++)
+ TgtOffsets[I] = (ptrdiff_t)*TgtOffsetItr;
+
+ void *TgtEntryPtr = ((__tgt_offload_entry *)Request->tgt_entry_ptr())->addr;
+ int32_t Ret = PM->Devices[Request->device_id()].RTL->run_team_region(
+ mapHostRTLDeviceId(Request->device_id()), TgtEntryPtr,
+ (void **)TgtArgs.data(), TgtOffsets.data(), Request->arg_num(),
+ Request->team_num(), Request->thread_limit(), Request->loop_tripcount());
+
+ Reply->set_number(Ret);
+
+ SERVER_DBG("Ran TargetTeamRegionAsync on device %d with %d args",
+ mapHostRTLDeviceId(Request->device_id()), Request->arg_num());
+ return Status::OK;
+}
+
+int32_t RemoteOffloadImpl::mapHostRTLDeviceId(int32_t RTLDeviceID) {
+ for (auto &RTL : PM->RTLs.UsedRTLs) {
+ if (RTLDeviceID - RTL->NumberOfDevices >= 0)
+ RTLDeviceID -= RTL->NumberOfDevices;
+ else
+ break;
+ }
+ return RTLDeviceID;
+}
--- /dev/null
+//===-------------------------- Server.h - Server -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Offloading gRPC server for remote host.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OPENMP_LIBOMPTARGET_PLUGINS_REMOTE_SERVER_SERVER_H
+#define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_REMOTE_SERVER_SERVER_H
+
+#include <grpcpp/server_context.h>
+
+#include "Utils.h"
+#include "device.h"
+#include "omptarget.h"
+#include "openmp.grpc.pb.h"
+#include "openmp.pb.h"
+#include "rtl.h"
+
+using grpc::ServerContext;
+using grpc::ServerReader;
+using grpc::ServerWriter;
+using grpc::Status;
+
+using namespace openmp::libomptarget::remote;
+using namespace RemoteOffloading;
+
+using namespace google;
+
+extern PluginManager *PM;
+
+class RemoteOffloadImpl final : public RemoteOffload::Service {
+private:
+ int32_t mapHostRTLDeviceId(int32_t RTLDeviceID);
+
+ std::unordered_map<const void *, __tgt_device_image *>
+ HostToRemoteDeviceImage;
+ std::unordered_map<const void *, __tgt_offload_entry *>
+ HostToRemoteOffloadEntry;
+ std::unordered_map<const void *, std::unique_ptr<__tgt_bin_desc>>
+ Descriptions;
+ __tgt_target_table *Table = nullptr;
+
+ int DebugLevel;
+ uint64_t MaxSize;
+ uint64_t BlockSize;
+ std::unique_ptr<protobuf::Arena> Arena;
+
+public:
+ RemoteOffloadImpl(uint64_t MaxSize, uint64_t BlockSize)
+ : MaxSize(MaxSize), BlockSize(BlockSize) {
+ DebugLevel = getDebugLevel();
+ Arena = std::make_unique<protobuf::Arena>();
+ }
+
+ Status Shutdown(ServerContext *Context, const Null *Request,
+ I32 *Reply) override;
+
+ Status RegisterLib(ServerContext *Context,
+ const TargetBinaryDescription *Description,
+ I32 *Reply) override;
+ Status UnregisterLib(ServerContext *Context, const Pointer *Request,
+ I32 *Reply) override;
+
+ Status IsValidBinary(ServerContext *Context,
+ const TargetDeviceImagePtr *Image,
+ I32 *IsValid) override;
+ Status GetNumberOfDevices(ServerContext *Context, const Null *Null,
+ I32 *NumberOfDevices) override;
+
+ Status InitDevice(ServerContext *Context, const I32 *DeviceNum,
+ I32 *Reply) override;
+ Status InitRequires(ServerContext *Context, const I64 *RequiresFlag,
+ I32 *Reply) override;
+
+ Status LoadBinary(ServerContext *Context, const Binary *Binary,
+ TargetTable *Reply) override;
+ Status Synchronize(ServerContext *Context, const SynchronizeDevice *Info,
+ I32 *Reply) override;
+ Status IsDataExchangeable(ServerContext *Context, const DevicePair *Request,
+ I32 *Reply) override;
+
+ Status DataAlloc(ServerContext *Context, const AllocData *Request,
+ Pointer *Reply) override;
+
+ Status DataSubmitAsync(ServerContext *Context,
+ ServerReader<SubmitDataAsync> *Reader,
+ I32 *Reply) override;
+ Status DataRetrieveAsync(ServerContext *Context,
+ const RetrieveDataAsync *Request,
+ ServerWriter<Data> *Writer) override;
+
+ Status DataExchangeAsync(ServerContext *Context,
+ const ExchangeDataAsync *Request,
+ I32 *Reply) override;
+
+ Status DataDelete(ServerContext *Context, const DeleteData *Request,
+ I32 *Reply) override;
+
+ Status RunTargetRegionAsync(ServerContext *Context,
+ const TargetRegionAsync *Request,
+ I32 *Reply) override;
+
+ Status RunTargetTeamRegionAsync(ServerContext *Context,
+ const TargetTeamRegionAsync *Request,
+ I32 *Reply) override;
+};
+
+#endif
--- /dev/null
+##===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+##===----------------------------------------------------------------------===##
+#
+# Build a plugin for remote offloading.
+#
+##===----------------------------------------------------------------------===##
+
+cmake_minimum_required(VERSION 3.15)
+
+# Define the suffix for the runtime messaging dumps.
+add_definitions(-DTARGET_NAME=RPC)
+
+include_directories(${LIBOMPTARGET_SRC_DIR})
+include_directories(${LIBOMPTARGET_INCLUDE_DIR})
+include_directories(${GRPC_INCLUDE_DIR})
+include_directories(${RPC_INCLUDE_DIR})
+
+add_library(omptarget.rtl.rpc SHARED
+ ${LIBOMPTARGET_SRC_FILES}
+ ${GRPC_SRC_FILES}
+ ${RPC_SRC_DIR}/Utils.cpp
+ Client.cpp
+ rtl.cpp
+)
+
+# Install plugin under the lib destination folder.
+install(TARGETS omptarget.rtl.rpc LIBRARY DESTINATION "${OPENMP_INSTALL_LIBDIR}")
+
+target_link_libraries(omptarget.rtl.rpc
+ grpc++
+ protobuf
+ "-ldl"
+ "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../../exports")
+
+# Report to the parent scope that we are building a plugin for RPC.
+set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} rpc" PARENT_SCOPE)
--- /dev/null
+//===----------------- Client.cpp - Client Implementation -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// gRPC (Client) for the remote plugin.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cmath>
+
+#include "Client.h"
+#include "omptarget.h"
+#include "openmp.pb.h"
+
+using namespace std::chrono;
+
+using grpc::ClientContext;
+using grpc::ClientReader;
+using grpc::ClientWriter;
+using grpc::Status;
+
+template <typename Fn1, typename Fn2, typename TReturn>
+auto RemoteOffloadClient::remoteCall(Fn1 Preprocess, Fn2 Postprocess,
+ TReturn ErrorValue, bool Timeout) {
+ ArenaAllocatorLock->lock();
+ if (Arena->SpaceAllocated() >= MaxSize)
+ Arena->Reset();
+ ArenaAllocatorLock->unlock();
+
+ ClientContext Context;
+ if (Timeout) {
+ auto Deadline =
+ std::chrono::system_clock::now() + std::chrono::seconds(Timeout);
+ Context.set_deadline(Deadline);
+ }
+
+ Status RPCStatus;
+ auto Reply = Preprocess(RPCStatus, Context);
+
+ // TODO: Error handle more appropriately
+ if (!RPCStatus.ok()) {
+ CLIENT_DBG("%s", RPCStatus.error_message().c_str());
+ } else {
+ return Postprocess(Reply);
+ }
+
+ CLIENT_DBG("Failed");
+ return ErrorValue;
+}
+
+int32_t RemoteOffloadClient::shutdown(void) {
+ ClientContext Context;
+ Null Request;
+ I32 Reply;
+ CLIENT_DBG("Shutting down server.");
+ auto Status = Stub->Shutdown(&Context, Request, &Reply);
+ if (Status.ok())
+ return Reply.number();
+ return 1;
+}
+
+int32_t RemoteOffloadClient::registerLib(__tgt_bin_desc *Desc) {
+ return remoteCall(
+ /* Preprocess */
+ [&](auto &RPCStatus, auto &Context) {
+ auto *Request = protobuf::Arena::CreateMessage<TargetBinaryDescription>(
+ Arena.get());
+ auto *Reply = protobuf::Arena::CreateMessage<I32>(Arena.get());
+ loadTargetBinaryDescription(Desc, *Request);
+ Request->set_bin_ptr((uint64_t)Desc);
+
+ CLIENT_DBG("Registering library");
+ RPCStatus = Stub->RegisterLib(&Context, *Request, Reply);
+ return Reply;
+ },
+ /* Postprocess */
+ [&](const auto &Reply) {
+ if (Reply->number() == 0) {
+ CLIENT_DBG("Registered library");
+ return 0;
+ }
+ return 1;
+ },
+ /* Error Value */ 1);
+}
+
+int32_t RemoteOffloadClient::unregisterLib(__tgt_bin_desc *Desc) {
+ return remoteCall(
+ /* Preprocess */
+ [&](auto &RPCStatus, auto &Context) {
+ auto *Request = protobuf::Arena::CreateMessage<Pointer>(Arena.get());
+ auto *Reply = protobuf::Arena::CreateMessage<I32>(Arena.get());
+
+ Request->set_number((uint64_t)Desc);
+
+ CLIENT_DBG("Unregistering library");
+ RPCStatus = Stub->UnregisterLib(&Context, *Request, Reply);
+ return Reply;
+ },
+ /* Postprocess */
+ [&](const auto &Reply) {
+ if (Reply->number() == 0) {
+ CLIENT_DBG("Unregistered library");
+ return 0;
+ }
+ CLIENT_DBG("Failed to unregister library");
+ return 1;
+ },
+ /* Error Value */ 1);
+}
+
+int32_t RemoteOffloadClient::isValidBinary(__tgt_device_image *Image) {
+ return remoteCall(
+ /* Preprocess */
+ [&](auto &RPCStatus, auto &Context) {
+ auto *Request =
+ protobuf::Arena::CreateMessage<TargetDeviceImagePtr>(Arena.get());
+ auto *Reply = protobuf::Arena::CreateMessage<I32>(Arena.get());
+
+ Request->set_image_ptr((uint64_t)Image->ImageStart);
+
+ auto *EntryItr = Image->EntriesBegin;
+ while (EntryItr != Image->EntriesEnd)
+ Request->add_entry_ptrs((uint64_t)EntryItr++);
+
+ CLIENT_DBG("Validating binary");
+ RPCStatus = Stub->IsValidBinary(&Context, *Request, Reply);
+ return Reply;
+ },
+ /* Postprocess */
+ [&](const auto &Reply) {
+ if (Reply->number()) {
+ CLIENT_DBG("Validated binary");
+ } else {
+ CLIENT_DBG("Could not validate binary");
+ }
+ return Reply->number();
+ },
+ /* Error Value */ 0);
+}
+
+int32_t RemoteOffloadClient::getNumberOfDevices() {
+ return remoteCall(
+ /* Preprocess */
+ [&](Status &RPCStatus, ClientContext &Context) {
+ auto *Request = protobuf::Arena::CreateMessage<Null>(Arena.get());
+ auto *Reply = protobuf::Arena::CreateMessage<I32>(Arena.get());
+
+ CLIENT_DBG("Getting number of devices");
+ RPCStatus = Stub->GetNumberOfDevices(&Context, *Request, Reply);
+
+ return Reply;
+ },
+ /* Postprocess */
+ [&](const auto &Reply) {
+ if (Reply->number()) {
+ CLIENT_DBG("Found %d devices", Reply->number());
+ } else {
+ CLIENT_DBG("Could not get the number of devices");
+ }
+ return Reply->number();
+ },
+ /*Error Value*/ -1);
+}
+
+int32_t RemoteOffloadClient::initDevice(int32_t DeviceId) {
+ return remoteCall(
+ /* Preprocess */
+ [&](auto &RPCStatus, auto &Context) {
+ auto *Request = protobuf::Arena::CreateMessage<I32>(Arena.get());
+ auto *Reply = protobuf::Arena::CreateMessage<I32>(Arena.get());
+
+ Request->set_number(DeviceId);
+
+ CLIENT_DBG("Initializing device %d", DeviceId);
+ RPCStatus = Stub->InitDevice(&Context, *Request, Reply);
+
+ return Reply;
+ },
+ /* Postprocess */
+ [&](const auto &Reply) {
+ if (!Reply->number()) {
+ CLIENT_DBG("Initialized device %d", DeviceId);
+ } else {
+ CLIENT_DBG("Could not initialize device %d", DeviceId);
+ }
+ return Reply->number();
+ },
+ /* Error Value */ -1);
+}
+
+int32_t RemoteOffloadClient::initRequires(int64_t RequiresFlags) {
+ return remoteCall(
+ /* Preprocess */
+ [&](auto &RPCStatus, auto &Context) {
+ auto *Request = protobuf::Arena::CreateMessage<I64>(Arena.get());
+ auto *Reply = protobuf::Arena::CreateMessage<I32>(Arena.get());
+ Request->set_number(RequiresFlags);
+ CLIENT_DBG("Initializing requires");
+ RPCStatus = Stub->InitRequires(&Context, *Request, Reply);
+ return Reply;
+ },
+ /* Postprocess */
+ [&](const auto &Reply) {
+ if (Reply->number()) {
+ CLIENT_DBG("Initialized requires");
+ } else {
+ CLIENT_DBG("Could not initialize requires");
+ }
+ return Reply->number();
+ },
+ /* Error Value */ -1);
+}
+
+__tgt_target_table *RemoteOffloadClient::loadBinary(int32_t DeviceId,
+ __tgt_device_image *Image) {
+ return remoteCall(
+ /* Preprocess */
+ [&](auto &RPCStatus, auto &Context) {
+ auto *ImageMessage =
+ protobuf::Arena::CreateMessage<Binary>(Arena.get());
+ auto *Reply = protobuf::Arena::CreateMessage<TargetTable>(Arena.get());
+ ImageMessage->set_image_ptr((uint64_t)Image->ImageStart);
+ ImageMessage->set_device_id(DeviceId);
+
+ CLIENT_DBG("Loading Image %p to device %d", Image, DeviceId);
+ RPCStatus = Stub->LoadBinary(&Context, *ImageMessage, Reply);
+ return Reply;
+ },
+ /* Postprocess */
+ [&](auto &Reply) {
+ if (Reply->entries_size() == 0) {
+ CLIENT_DBG("Could not load image %p onto device %d", Image, DeviceId);
+ return (__tgt_target_table *)nullptr;
+ }
+ DevicesToTables[DeviceId] = std::make_unique<__tgt_target_table>();
+ unloadTargetTable(*Reply, DevicesToTables[DeviceId].get(),
+ RemoteEntries[DeviceId]);
+
+ CLIENT_DBG("Loaded Image %p to device %d with %d entries", Image,
+ DeviceId, Reply->entries_size());
+
+ return DevicesToTables[DeviceId].get();
+ },
+ /* Error Value */ (__tgt_target_table *)nullptr,
+ /* Timeout */ false);
+}
+
+int64_t RemoteOffloadClient::synchronize(int32_t DeviceId,
+ __tgt_async_info *AsyncInfoPtr) {
+ return remoteCall(
+ /* Preprocess */
+ [&](auto &RPCStatus, auto &Context) {
+ auto *Reply = protobuf::Arena::CreateMessage<I32>(Arena.get());
+ auto *Info =
+ protobuf::Arena::CreateMessage<SynchronizeDevice>(Arena.get());
+
+ Info->set_device_id(DeviceId);
+ Info->set_queue_ptr((uint64_t)AsyncInfoPtr);
+
+ CLIENT_DBG("Synchronizing device %d", DeviceId);
+ RPCStatus = Stub->Synchronize(&Context, *Info, Reply);
+ return Reply;
+ },
+ /* Postprocess */
+ [&](auto &Reply) {
+ if (Reply->number()) {
+ CLIENT_DBG("Synchronized device %d", DeviceId);
+ } else {
+ CLIENT_DBG("Could not synchronize device %d", DeviceId);
+ }
+ return Reply->number();
+ },
+ /* Error Value */ -1);
+}
+
+int32_t RemoteOffloadClient::isDataExchangeable(int32_t SrcDevId,
+ int32_t DstDevId) {
+ return remoteCall(
+ /* Preprocess */
+ [&](auto &RPCStatus, auto &Context) {
+ auto *Request = protobuf::Arena::CreateMessage<DevicePair>(Arena.get());
+ auto *Reply = protobuf::Arena::CreateMessage<I32>(Arena.get());
+
+ Request->set_src_dev_id(SrcDevId);
+ Request->set_dst_dev_id(DstDevId);
+
+ CLIENT_DBG("Asking if data is exchangeable between %d, %d", SrcDevId,
+ DstDevId);
+ RPCStatus = Stub->IsDataExchangeable(&Context, *Request, Reply);
+ return Reply;
+ },
+ /* Postprocess */
+ [&](auto &Reply) {
+ if (Reply->number()) {
+ CLIENT_DBG("Data is exchangeable between %d, %d", SrcDevId, DstDevId);
+ } else {
+ CLIENT_DBG("Data is not exchangeable between %d, %d", SrcDevId,
+ DstDevId);
+ }
+ return Reply->number();
+ },
+ /* Error Value */ -1);
+}
+
+void *RemoteOffloadClient::dataAlloc(int32_t DeviceId, int64_t Size,
+ void *HstPtr) {
+ return remoteCall(
+ /* Preprocess */
+ [&](auto &RPCStatus, auto &Context) {
+ auto *Reply = protobuf::Arena::CreateMessage<Pointer>(Arena.get());
+ auto *Request = protobuf::Arena::CreateMessage<AllocData>(Arena.get());
+
+ Request->set_device_id(DeviceId);
+ Request->set_size(Size);
+ Request->set_hst_ptr((uint64_t)HstPtr);
+
+ CLIENT_DBG("Allocating %ld bytes on device %d", Size, DeviceId);
+ RPCStatus = Stub->DataAlloc(&Context, *Request, Reply);
+ return Reply;
+ },
+ /* Postprocess */
+ [&](auto &Reply) {
+ if (Reply->number()) {
+ CLIENT_DBG("Allocated %ld bytes on device %d at %p", Size, DeviceId,
+ (void *)Reply->number());
+ } else {
+ CLIENT_DBG("Could not allocate %ld bytes on device %d at %p", Size,
+ DeviceId, (void *)Reply->number());
+ }
+ return (void *)Reply->number();
+ },
+ /* Error Value */ (void *)nullptr);
+}
+
+int32_t RemoteOffloadClient::dataSubmitAsync(int32_t DeviceId, void *TgtPtr,
+ void *HstPtr, int64_t Size,
+ __tgt_async_info *AsyncInfoPtr) {
+
+ return remoteCall(
+ /* Preprocess */
+ [&](auto &RPCStatus, auto &Context) {
+ auto *Reply = protobuf::Arena::CreateMessage<I32>(Arena.get());
+ std::unique_ptr<ClientWriter<SubmitDataAsync>> Writer(
+ Stub->DataSubmitAsync(&Context, Reply));
+
+ if (Size > BlockSize) {
+ int64_t Start = 0, End = BlockSize;
+ for (auto I = 0; I < ceil((float)Size / BlockSize); I++) {
+ auto *Request =
+ protobuf::Arena::CreateMessage<SubmitDataAsync>(Arena.get());
+
+ Request->set_device_id(DeviceId);
+ Request->set_data((char *)HstPtr + Start, End - Start);
+ Request->set_hst_ptr((uint64_t)HstPtr);
+ Request->set_tgt_ptr((uint64_t)TgtPtr);
+ Request->set_start(Start);
+ Request->set_size(Size);
+ Request->set_queue_ptr((uint64_t)AsyncInfoPtr);
+
+ CLIENT_DBG("Submitting %ld-%ld/%ld bytes async on device %d at %p",
+ Start, End, Size, DeviceId, TgtPtr)
+
+ if (!Writer->Write(*Request)) {
+ CLIENT_DBG("Broken stream when submitting data");
+ Reply->set_number(0);
+ return Reply;
+ }
+
+ Start += BlockSize;
+ End += BlockSize;
+ if (End >= Size)
+ End = Size;
+ }
+ } else {
+ auto *Request =
+ protobuf::Arena::CreateMessage<SubmitDataAsync>(Arena.get());
+
+ Request->set_device_id(DeviceId);
+ Request->set_data(HstPtr, Size);
+ Request->set_hst_ptr((uint64_t)HstPtr);
+ Request->set_tgt_ptr((uint64_t)TgtPtr);
+ Request->set_start(0);
+ Request->set_size(Size);
+
+ CLIENT_DBG("Submitting %ld bytes async on device %d at %p", Size,
+ DeviceId, TgtPtr)
+ if (!Writer->Write(*Request)) {
+ CLIENT_DBG("Broken stream when submitting data");
+ Reply->set_number(0);
+ return Reply;
+ }
+ }
+
+ Writer->WritesDone();
+ RPCStatus = Writer->Finish();
+
+ return Reply;
+ },
+ /* Postprocess */
+ [&](auto &Reply) {
+ if (!Reply->number()) {
+ CLIENT_DBG("Async submitted %ld bytes on device %d at %p", Size,
+ DeviceId, TgtPtr)
+ } else {
+ CLIENT_DBG("Could not async submit %ld bytes on device %d at %p",
+ Size, DeviceId, TgtPtr)
+ }
+ return Reply->number();
+ },
+ /* Error Value */ -1,
+ /* Timeout */ false);
+}
+
+int32_t RemoteOffloadClient::dataRetrieveAsync(int32_t DeviceId, void *HstPtr,
+ void *TgtPtr, int64_t Size,
+ __tgt_async_info *AsyncInfoPtr) {
+ return remoteCall(
+ /* Preprocess */
+ [&](auto &RPCStatus, auto &Context) {
+ auto *Request =
+ protobuf::Arena::CreateMessage<RetrieveDataAsync>(Arena.get());
+
+ Request->set_device_id(DeviceId);
+ Request->set_size(Size);
+ Request->set_hst_ptr((int64_t)HstPtr);
+ Request->set_tgt_ptr((int64_t)TgtPtr);
+ Request->set_queue_ptr((uint64_t)AsyncInfoPtr);
+
+ auto *Reply = protobuf::Arena::CreateMessage<Data>(Arena.get());
+ std::unique_ptr<ClientReader<Data>> Reader(
+ Stub->DataRetrieveAsync(&Context, *Request));
+ Reader->WaitForInitialMetadata();
+ while (Reader->Read(Reply)) {
+ if (Reply->ret()) {
+ CLIENT_DBG("Could not async retrieve %ld bytes on device %d at %p "
+ "for %p",
+ Size, DeviceId, TgtPtr, HstPtr)
+ return Reply;
+ }
+
+ if (Reply->start() == 0 && Reply->size() == Reply->data().size()) {
+ CLIENT_DBG("Async retrieving %ld bytes on device %d at %p for %p",
+ Size, DeviceId, TgtPtr, HstPtr)
+
+ memcpy(HstPtr, Reply->data().data(), Reply->data().size());
+
+ return Reply;
+ }
+ CLIENT_DBG("Retrieving %lu-%lu/%lu bytes async from (%p) to (%p) "
+ "on Device %d",
+ Reply->start(), Reply->start() + Reply->data().size(),
+ Reply->size(), (void *)Request->tgt_ptr(), HstPtr,
+ Request->device_id());
+
+ memcpy((void *)((char *)HstPtr + Reply->start()),
+ Reply->data().data(), Reply->data().size());
+ }
+ RPCStatus = Reader->Finish();
+
+ return Reply;
+ },
+ /* Postprocess */
+ [&](auto &Reply) {
+ if (!Reply->ret()) {
+ CLIENT_DBG("Async retrieve %ld bytes on Device %d", Size, DeviceId);
+ } else {
+ CLIENT_DBG("Could not async retrieve %ld bytes on Device %d", Size,
+ DeviceId);
+ }
+ return Reply->ret();
+ },
+ /* Error Value */ -1,
+ /* Timeout */ false);
+}
+
+int32_t RemoteOffloadClient::dataExchangeAsync(int32_t SrcDevId, void *SrcPtr,
+ int32_t DstDevId, void *DstPtr,
+ int64_t Size,
+ __tgt_async_info *AsyncInfoPtr) {
+ return remoteCall(
+ /* Preprocess */
+ [&](auto &RPCStatus, auto &Context) {
+ auto *Reply = protobuf::Arena::CreateMessage<I32>(Arena.get());
+ auto *Request =
+ protobuf::Arena::CreateMessage<ExchangeDataAsync>(Arena.get());
+
+ Request->set_src_dev_id(SrcDevId);
+ Request->set_src_ptr((uint64_t)SrcPtr);
+ Request->set_dst_dev_id(DstDevId);
+ Request->set_dst_ptr((uint64_t)DstPtr);
+ Request->set_size(Size);
+ Request->set_queue_ptr((uint64_t)AsyncInfoPtr);
+
+ CLIENT_DBG(
+ "Exchanging %ld bytes on device %d at %p for %p on device %d", Size,
+ SrcDevId, SrcPtr, DstPtr, DstDevId);
+ RPCStatus = Stub->DataExchangeAsync(&Context, *Request, Reply);
+ return Reply;
+ },
+ /* Postprocess */
+ [&](auto &Reply) {
+ if (Reply->number()) {
+ CLIENT_DBG(
+ "Exchanged %ld bytes on device %d at %p for %p on device %d",
+ Size, SrcDevId, SrcPtr, DstPtr, DstDevId);
+ } else {
+ CLIENT_DBG("Could not exchange %ld bytes on device %d at %p for %p "
+ "on device %d",
+ Size, SrcDevId, SrcPtr, DstPtr, DstDevId);
+ }
+ return Reply->number();
+ },
+ /* Error Value */ -1);
+}
+
+int32_t RemoteOffloadClient::dataDelete(int32_t DeviceId, void *TgtPtr) {
+ return remoteCall(
+ /* Preprocess */
+ [&](auto &RPCStatus, auto &Context) {
+ auto *Reply = protobuf::Arena::CreateMessage<I32>(Arena.get());
+ auto *Request = protobuf::Arena::CreateMessage<DeleteData>(Arena.get());
+
+ Request->set_device_id(DeviceId);
+ Request->set_tgt_ptr((uint64_t)TgtPtr);
+
+ CLIENT_DBG("Deleting data at %p on device %d", TgtPtr, DeviceId)
+ RPCStatus = Stub->DataDelete(&Context, *Request, Reply);
+ return Reply;
+ },
+ /* Postprocess */
+ [&](auto &Reply) {
+ if (!Reply->number()) {
+ CLIENT_DBG("Deleted data at %p on device %d", TgtPtr, DeviceId)
+ } else {
+ CLIENT_DBG("Could not delete data at %p on device %d", TgtPtr,
+ DeviceId)
+ }
+ return Reply->number();
+ },
+ /* Error Value */ -1);
+}
+
+int32_t RemoteOffloadClient::runTargetRegionAsync(
+ int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets,
+ int32_t ArgNum, __tgt_async_info *AsyncInfoPtr) {
+ return remoteCall(
+ /* Preprocess */
+ [&](auto &RPCStatus, auto &Context) {
+ auto *Reply = protobuf::Arena::CreateMessage<I32>(Arena.get());
+ auto *Request =
+ protobuf::Arena::CreateMessage<TargetRegionAsync>(Arena.get());
+
+ Request->set_device_id(DeviceId);
+ Request->set_queue_ptr((uint64_t)AsyncInfoPtr);
+
+ Request->set_tgt_entry_ptr(
+ (uint64_t)RemoteEntries[DeviceId][TgtEntryPtr]);
+
+ char **ArgPtr = (char **)TgtArgs;
+ for (auto I = 0; I < ArgNum; I++, ArgPtr++)
+ Request->add_tgt_args((uint64_t)*ArgPtr);
+
+ char *OffsetPtr = (char *)TgtOffsets;
+ for (auto I = 0; I < ArgNum; I++, OffsetPtr++)
+ Request->add_tgt_offsets((uint64_t)*OffsetPtr);
+
+ Request->set_arg_num(ArgNum);
+
+ CLIENT_DBG("Running target region async on device %d", DeviceId);
+ RPCStatus = Stub->RunTargetRegionAsync(&Context, *Request, Reply);
+ return Reply;
+ },
+ /* Postprocess */
+ [&](auto &Reply) {
+ if (!Reply->number()) {
+ CLIENT_DBG("Ran target region async on device %d", DeviceId);
+ } else {
+ CLIENT_DBG("Could not run target region async on device %d",
+ DeviceId);
+ }
+ return Reply->number();
+ },
+ /* Error Value */ -1,
+ /* Timeout */ false);
+}
+
+int32_t RemoteOffloadClient::runTargetTeamRegionAsync(
+ int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets,
+ int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit,
+ uint64_t LoopTripcount, __tgt_async_info *AsyncInfoPtr) {
+ return remoteCall(
+ /* Preprocess */
+ [&](auto &RPCStatus, auto &Context) {
+ auto *Reply = protobuf::Arena::CreateMessage<I32>(Arena.get());
+ auto *Request =
+ protobuf::Arena::CreateMessage<TargetTeamRegionAsync>(Arena.get());
+
+ Request->set_device_id(DeviceId);
+ Request->set_queue_ptr((uint64_t)AsyncInfoPtr);
+
+ Request->set_tgt_entry_ptr(
+ (uint64_t)RemoteEntries[DeviceId][TgtEntryPtr]);
+
+ char **ArgPtr = (char **)TgtArgs;
+ for (auto I = 0; I < ArgNum; I++, ArgPtr++) {
+ Request->add_tgt_args((uint64_t)*ArgPtr);
+ }
+
+ char *OffsetPtr = (char *)TgtOffsets;
+ for (auto I = 0; I < ArgNum; I++, OffsetPtr++)
+ Request->add_tgt_offsets((uint64_t)*OffsetPtr);
+
+ Request->set_arg_num(ArgNum);
+ Request->set_team_num(TeamNum);
+ Request->set_thread_limit(ThreadLimit);
+ Request->set_loop_tripcount(LoopTripcount);
+
+ CLIENT_DBG("Running target team region async on device %d", DeviceId);
+ RPCStatus = Stub->RunTargetTeamRegionAsync(&Context, *Request, Reply);
+ return Reply;
+ },
+ /* Postprocess */
+ [&](auto &Reply) {
+ if (!Reply->number()) {
+ CLIENT_DBG("Ran target team region async on device %d", DeviceId);
+ } else {
+ CLIENT_DBG("Could not run target team region async on device %d",
+ DeviceId);
+ }
+ return Reply->number();
+ },
+ /* Error Value */ -1,
+ /* Timeout */ false);
+}
+
+// TODO: Better error handling for the next three functions
+int32_t RemoteClientManager::shutdown(void) {
+ int32_t Ret = 0;
+ for (auto &Client : Clients)
+ Ret &= Client.shutdown();
+ return Ret;
+}
+
+int32_t RemoteClientManager::registerLib(__tgt_bin_desc *Desc) {
+ int32_t Ret = 0;
+ for (auto &Client : Clients)
+ Ret &= Client.registerLib(Desc);
+ return Ret;
+}
+
+int32_t RemoteClientManager::unregisterLib(__tgt_bin_desc *Desc) {
+ int32_t Ret = 0;
+ for (auto &Client : Clients)
+ Ret &= Client.unregisterLib(Desc);
+ return Ret;
+}
+
+int32_t RemoteClientManager::isValidBinary(__tgt_device_image *Image) {
+ int32_t ClientIdx = 0;
+ for (auto &Client : Clients) {
+ if (auto Ret = Client.isValidBinary(Image))
+ return Ret;
+ ClientIdx++;
+ }
+ return 0;
+}
+
+int32_t RemoteClientManager::getNumberOfDevices() {
+ auto ClientIdx = 0;
+ for (auto &Client : Clients) {
+ if (auto NumDevices = Client.getNumberOfDevices()) {
+ Devices.push_back(NumDevices);
+ }
+ ClientIdx++;
+ }
+
+ return std::accumulate(Devices.begin(), Devices.end(), 0);
+}
+
+std::pair<int32_t, int32_t> RemoteClientManager::mapDeviceId(int32_t DeviceId) {
+ for (size_t ClientIdx = 0; ClientIdx < Devices.size(); ClientIdx++) {
+ if (!(DeviceId >= Devices[ClientIdx]))
+ return {ClientIdx, DeviceId};
+ DeviceId -= Devices[ClientIdx];
+ }
+ return {-1, -1};
+}
+
+int32_t RemoteClientManager::initDevice(int32_t DeviceId) {
+ int32_t ClientIdx, DeviceIdx;
+ std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId);
+ return Clients[ClientIdx].initDevice(DeviceIdx);
+}
+
+int32_t RemoteClientManager::initRequires(int64_t RequiresFlags) {
+ for (auto &Client : Clients)
+ Client.initRequires(RequiresFlags);
+
+ return RequiresFlags;
+}
+
+__tgt_target_table *RemoteClientManager::loadBinary(int32_t DeviceId,
+ __tgt_device_image *Image) {
+ int32_t ClientIdx, DeviceIdx;
+ std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId);
+ return Clients[ClientIdx].loadBinary(DeviceIdx, Image);
+}
+
+int64_t RemoteClientManager::synchronize(int32_t DeviceId,
+ __tgt_async_info *AsyncInfoPtr) {
+ int32_t ClientIdx, DeviceIdx;
+ std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId);
+ return Clients[ClientIdx].synchronize(DeviceIdx, AsyncInfoPtr);
+}
+
+int32_t RemoteClientManager::isDataExchangeable(int32_t SrcDevId,
+ int32_t DstDevId) {
+ int32_t SrcClientIdx, SrcDeviceIdx, DstClientIdx, DstDeviceIdx;
+ std::tie(SrcClientIdx, SrcDeviceIdx) = mapDeviceId(SrcDevId);
+ std::tie(DstClientIdx, DstDeviceIdx) = mapDeviceId(DstDevId);
+ return Clients[SrcClientIdx].isDataExchangeable(SrcDeviceIdx, DstDeviceIdx);
+}
+
+void *RemoteClientManager::dataAlloc(int32_t DeviceId, int64_t Size,
+ void *HstPtr) {
+ int32_t ClientIdx, DeviceIdx;
+ std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId);
+ return Clients[ClientIdx].dataAlloc(DeviceIdx, Size, HstPtr);
+}
+
+int32_t RemoteClientManager::dataDelete(int32_t DeviceId, void *TgtPtr) {
+ int32_t ClientIdx, DeviceIdx;
+ std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId);
+ return Clients[ClientIdx].dataDelete(DeviceIdx, TgtPtr);
+}
+
+int32_t RemoteClientManager::dataSubmitAsync(int32_t DeviceId, void *TgtPtr,
+ void *HstPtr, int64_t Size,
+ __tgt_async_info *AsyncInfoPtr) {
+ int32_t ClientIdx, DeviceIdx;
+ std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId);
+ return Clients[ClientIdx].dataSubmitAsync(DeviceIdx, TgtPtr, HstPtr, Size,
+ AsyncInfoPtr);
+}
+
+int32_t RemoteClientManager::dataRetrieveAsync(int32_t DeviceId, void *HstPtr,
+ void *TgtPtr, int64_t Size,
+ __tgt_async_info *AsyncInfoPtr) {
+ int32_t ClientIdx, DeviceIdx;
+ std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId);
+ return Clients[ClientIdx].dataRetrieveAsync(DeviceIdx, HstPtr, TgtPtr, Size,
+ AsyncInfoPtr);
+}
+
+int32_t RemoteClientManager::dataExchangeAsync(int32_t SrcDevId, void *SrcPtr,
+ int32_t DstDevId, void *DstPtr,
+ int64_t Size,
+ __tgt_async_info *AsyncInfoPtr) {
+ int32_t SrcClientIdx, SrcDeviceIdx, DstClientIdx, DstDeviceIdx;
+ std::tie(SrcClientIdx, SrcDeviceIdx) = mapDeviceId(SrcDevId);
+ std::tie(DstClientIdx, DstDeviceIdx) = mapDeviceId(DstDevId);
+ return Clients[SrcClientIdx].dataExchangeAsync(
+ SrcDeviceIdx, SrcPtr, DstDeviceIdx, DstPtr, Size, AsyncInfoPtr);
+}
+
+int32_t RemoteClientManager::runTargetRegionAsync(
+ int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets,
+ int32_t ArgNum, __tgt_async_info *AsyncInfoPtr) {
+ int32_t ClientIdx, DeviceIdx;
+ std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId);
+ return Clients[ClientIdx].runTargetRegionAsync(
+ DeviceIdx, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, AsyncInfoPtr);
+}
+
+int32_t RemoteClientManager::runTargetTeamRegionAsync(
+ int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets,
+ int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit,
+ uint64_t LoopTripCount, __tgt_async_info *AsyncInfoPtr) {
+ int32_t ClientIdx, DeviceIdx;
+ std::tie(ClientIdx, DeviceIdx) = mapDeviceId(DeviceId);
+ return Clients[ClientIdx].runTargetTeamRegionAsync(
+ DeviceIdx, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, TeamNum, ThreadLimit,
+ LoopTripCount, AsyncInfoPtr);
+}
--- /dev/null
+//===------------------ Client.h - Client Implementation ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// gRPC Client for the remote plugin.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OPENMP_LIBOMPTARGET_PLUGINS_REMOTE_SRC_CLIENT_H
+#define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_REMOTE_SRC_CLIENT_H
+
+#include "Utils.h"
+#include "omptarget.h"
+#include <google/protobuf/arena.h>
+#include <grpcpp/grpcpp.h>
+#include <grpcpp/security/credentials.h>
+#include <grpcpp/support/channel_arguments.h>
+#include <memory>
+#include <mutex>
+#include <numeric>
+
+using grpc::Channel;
+using openmp::libomptarget::remote::RemoteOffload;
+using namespace RemoteOffloading;
+
+using namespace google;
+
+class RemoteOffloadClient {
+ const int Timeout;
+
+ int DebugLevel;
+ uint64_t MaxSize;
+ int64_t BlockSize;
+
+ std::unique_ptr<RemoteOffload::Stub> Stub;
+ std::unique_ptr<protobuf::Arena> Arena;
+
+ std::unique_ptr<std::mutex> ArenaAllocatorLock;
+
+ std::map<int32_t, std::unordered_map<void *, void *>> RemoteEntries;
+ std::map<int32_t, std::unique_ptr<__tgt_target_table>> DevicesToTables;
+
+ template <typename Fn1, typename Fn2, typename TReturn>
+ auto remoteCall(Fn1 Preprocess, Fn2 Postprocess, TReturn ErrorValue,
+ bool Timeout = true);
+
+public:
+ RemoteOffloadClient(std::shared_ptr<Channel> Channel, int Timeout,
+ uint64_t MaxSize, int64_t BlockSize)
+ : Timeout(Timeout), MaxSize(MaxSize), BlockSize(BlockSize),
+ Stub(RemoteOffload::NewStub(Channel)) {
+ DebugLevel = getDebugLevel();
+ Arena = std::make_unique<protobuf::Arena>();
+ ArenaAllocatorLock = std::make_unique<std::mutex>();
+ }
+
+ RemoteOffloadClient(RemoteOffloadClient &&C) = default;
+
+ ~RemoteOffloadClient() {
+ for (auto &TableIt : DevicesToTables)
+ freeTargetTable(TableIt.second.get());
+ }
+
+ int32_t shutdown(void);
+
+ int32_t registerLib(__tgt_bin_desc *Desc);
+ int32_t unregisterLib(__tgt_bin_desc *Desc);
+
+ int32_t isValidBinary(__tgt_device_image *Image);
+ int32_t getNumberOfDevices();
+
+ int32_t initDevice(int32_t DeviceId);
+ int32_t initRequires(int64_t RequiresFlags);
+
+ __tgt_target_table *loadBinary(int32_t DeviceId, __tgt_device_image *Image);
+ int64_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr);
+ int32_t isDataExchangeable(int32_t SrcDevId, int32_t DstDevId);
+
+ void *dataAlloc(int32_t DeviceId, int64_t Size, void *HstPtr);
+ int32_t dataDelete(int32_t DeviceId, void *TgtPtr);
+
+ int32_t dataSubmitAsync(int32_t DeviceId, void *TgtPtr, void *HstPtr,
+ int64_t Size, __tgt_async_info *AsyncInfoPtr);
+ int32_t dataRetrieveAsync(int32_t DeviceId, void *HstPtr, void *TgtPtr,
+ int64_t Size, __tgt_async_info *AsyncInfoPtr);
+
+ int32_t dataExchangeAsync(int32_t SrcDevId, void *SrcPtr, int32_t DstDevId,
+ void *DstPtr, int64_t Size,
+ __tgt_async_info *AsyncInfoPtr);
+
+ int32_t runTargetRegionAsync(int32_t DeviceId, void *TgtEntryPtr,
+ void **TgtArgs, ptrdiff_t *TgtOffsets,
+ int32_t ArgNum, __tgt_async_info *AsyncInfoPtr);
+
+ int32_t runTargetTeamRegionAsync(int32_t DeviceId, void *TgtEntryPtr,
+ void **TgtArgs, ptrdiff_t *TgtOffsets,
+ int32_t ArgNum, int32_t TeamNum,
+ int32_t ThreadLimit, uint64_t LoopTripCount,
+ __tgt_async_info *AsyncInfoPtr);
+};
+
+class RemoteClientManager {
+private:
+ std::vector<std::string> Addresses;
+ std::vector<RemoteOffloadClient> Clients;
+ std::vector<int> Devices;
+
+ std::pair<int32_t, int32_t> mapDeviceId(int32_t DeviceId);
+ int DebugLevel;
+
+public:
+ RemoteClientManager(std::vector<std::string> Addresses, int Timeout,
+ uint64_t MaxSize, int64_t BlockSize)
+ : Addresses(Addresses) {
+ grpc::ChannelArguments ChArgs;
+ ChArgs.SetMaxReceiveMessageSize(-1);
+ DebugLevel = getDebugLevel();
+ for (auto Address : Addresses) {
+ Clients.push_back(RemoteOffloadClient(
+ grpc::CreateChannel(Address, grpc::InsecureChannelCredentials()),
+ Timeout, MaxSize, BlockSize));
+ }
+ }
+
+ int32_t shutdown(void);
+
+ int32_t registerLib(__tgt_bin_desc *Desc);
+ int32_t unregisterLib(__tgt_bin_desc *Desc);
+
+ int32_t isValidBinary(__tgt_device_image *Image);
+ int32_t getNumberOfDevices();
+
+ int32_t initDevice(int32_t DeviceId);
+ int32_t initRequires(int64_t RequiresFlags);
+
+ __tgt_target_table *loadBinary(int32_t DeviceId, __tgt_device_image *Image);
+ int64_t synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr);
+ int32_t isDataExchangeable(int32_t SrcDevId, int32_t DstDevId);
+
+ void *dataAlloc(int32_t DeviceId, int64_t Size, void *HstPtr);
+ int32_t dataDelete(int32_t DeviceId, void *TgtPtr);
+
+ int32_t dataSubmitAsync(int32_t DeviceId, void *TgtPtr, void *HstPtr,
+ int64_t Size, __tgt_async_info *AsyncInfoPtr);
+ int32_t dataRetrieveAsync(int32_t DeviceId, void *HstPtr, void *TgtPtr,
+ int64_t Size, __tgt_async_info *AsyncInfoPtr);
+
+ int32_t dataExchangeAsync(int32_t SrcDevId, void *SrcPtr, int32_t DstDevId,
+ void *DstPtr, int64_t Size,
+ __tgt_async_info *AsyncInfoPtr);
+
+ int32_t runTargetRegionAsync(int32_t DeviceId, void *TgtEntryPtr,
+ void **TgtArgs, ptrdiff_t *TgtOffsets,
+ int32_t ArgNum, __tgt_async_info *AsyncInfoPtr);
+
+ int32_t runTargetTeamRegionAsync(int32_t DeviceId, void *TgtEntryPtr,
+ void **TgtArgs, ptrdiff_t *TgtOffsets,
+ int32_t ArgNum, int32_t TeamNum,
+ int32_t ThreadLimit, uint64_t LoopTripCount,
+ __tgt_async_info *AsyncInfoPtr);
+};
+
+#endif
--- /dev/null
+//===--------------------- rtl.cpp - Remote RTL Plugin --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// RTL for Host.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cstddef>
+#include <memory>
+#include <string>
+
+#include "Client.h"
+#include "Utils.h"
+#include "omptarget.h"
+#include "omptargetplugin.h"
+
+#define TARGET_NAME RPC
+#define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
+
+RemoteClientManager *Manager;
+
+__attribute__((constructor(101))) void initRPC() {
+ DP("Init RPC library!\n");
+
+ RPCConfig Config;
+ parseEnvironment(Config);
+
+ int Timeout = 5;
+ if (const char *Env1 = std::getenv("LIBOMPTARGET_RPC_LATENCY"))
+ Timeout = std::stoi(Env1);
+
+ Manager = new RemoteClientManager(Config.ServerAddresses, Timeout,
+ Config.MaxSize, Config.BlockSize);
+}
+
+__attribute__((destructor(101))) void deinitRPC() {
+ Manager->shutdown(); // TODO: Error handle shutting down
+ DP("Deinit RPC library!\n");
+ delete Manager;
+}
+
+// Exposed library API function
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int32_t __tgt_rtl_register_lib(__tgt_bin_desc *Desc) {
+ return Manager->registerLib(Desc);
+}
+
+int32_t __tgt_rtl_unregister_lib(__tgt_bin_desc *Desc) {
+ return Manager->unregisterLib(Desc);
+}
+
+int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
+ return Manager->isValidBinary(Image);
+}
+
+int32_t __tgt_rtl_number_of_devices() { return Manager->getNumberOfDevices(); }
+
+int32_t __tgt_rtl_init_device(int32_t DeviceId) {
+ return Manager->initDevice(DeviceId);
+}
+
+int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) {
+ return Manager->initRequires(RequiresFlags);
+}
+
+__tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId,
+ __tgt_device_image *Image) {
+ return Manager->loadBinary(DeviceId, (__tgt_device_image *)Image);
+}
+
+int32_t __tgt_rtl_synchronize(int32_t DeviceId,
+ __tgt_async_info *AsyncInfoPtr) {
+ return Manager->synchronize(DeviceId, AsyncInfoPtr);
+}
+
+int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDevId, int32_t DstDevId) {
+ return Manager->isDataExchangeable(SrcDevId, DstDevId);
+}
+
+void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HstPtr) {
+ return Manager->dataAlloc(DeviceId, Size, HstPtr);
+}
+
+int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr,
+ int64_t Size) {
+ return Manager->dataSubmitAsync(DeviceId, TgtPtr, HstPtr, Size, nullptr);
+}
+
+int32_t __tgt_rtl_data_submit_async(int32_t DeviceId, void *TgtPtr,
+ void *HstPtr, int64_t Size,
+ __tgt_async_info *AsyncInfoPtr) {
+ return Manager->dataSubmitAsync(DeviceId, TgtPtr, HstPtr, Size, AsyncInfoPtr);
+}
+
+int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
+ int64_t Size) {
+ return Manager->dataRetrieveAsync(DeviceId, HstPtr, TgtPtr, Size, nullptr);
+}
+
+int32_t __tgt_rtl_data_retrieve_async(int32_t DeviceId, void *HstPtr,
+ void *TgtPtr, int64_t Size,
+ __tgt_async_info *AsyncInfoPtr) {
+ return Manager->dataRetrieveAsync(DeviceId, HstPtr, TgtPtr, Size,
+ AsyncInfoPtr);
+}
+
+int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr) {
+ return Manager->dataDelete(DeviceId, TgtPtr);
+}
+
+int32_t __tgt_rtl_data_exchange(int32_t SrcDevId, void *SrcPtr,
+ int32_t DstDevId, void *DstPtr, int64_t Size) {
+ return Manager->dataExchangeAsync(SrcDevId, SrcPtr, DstDevId, DstPtr, Size,
+ nullptr);
+}
+
+int32_t __tgt_rtl_data_exchange_async(int32_t SrcDevId, void *SrcPtr,
+ int32_t DstDevId, void *DstPtr,
+ int64_t Size,
+ __tgt_async_info *AsyncInfoPtr) {
+ return Manager->dataExchangeAsync(SrcDevId, SrcPtr, DstDevId, DstPtr, Size,
+ AsyncInfoPtr);
+}
+
+int32_t __tgt_rtl_run_target_region(int32_t DeviceId, void *TgtEntryPtr,
+ void **TgtArgs, ptrdiff_t *TgtOffsets,
+ int32_t ArgNum) {
+ return Manager->runTargetRegionAsync(DeviceId, TgtEntryPtr, TgtArgs,
+ TgtOffsets, ArgNum, nullptr);
+}
+
+int32_t __tgt_rtl_run_target_region_async(int32_t DeviceId, void *TgtEntryPtr,
+ void **TgtArgs, ptrdiff_t *TgtOffsets,
+ int32_t ArgNum,
+ __tgt_async_info *AsyncInfoPtr) {
+ return Manager->runTargetRegionAsync(DeviceId, TgtEntryPtr, TgtArgs,
+ TgtOffsets, ArgNum, AsyncInfoPtr);
+}
+
+int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr,
+ void **TgtArgs, ptrdiff_t *TgtOffsets,
+ int32_t ArgNum, int32_t TeamNum,
+ int32_t ThreadLimit,
+ uint64_t LoopTripCount) {
+ return Manager->runTargetTeamRegionAsync(DeviceId, TgtEntryPtr, TgtArgs,
+ TgtOffsets, ArgNum, TeamNum,
+ ThreadLimit, LoopTripCount, nullptr);
+}
+
+int32_t __tgt_rtl_run_target_team_region_async(
+ int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets,
+ int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit,
+ uint64_t LoopTripCount, __tgt_async_info *AsyncInfoPtr) {
+ return Manager->runTargetTeamRegionAsync(
+ DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, TeamNum, ThreadLimit,
+ LoopTripCount, AsyncInfoPtr);
+}
+
+// Exposed library API function
+#ifdef __cplusplus
+}
+#endif
${CMAKE_CURRENT_SOURCE_DIR}/omptarget.cpp
)
+set(LIBOMPTARGET_SRC_FILES ${LIBOMPTARGET_SRC_FILES} PARENT_SCOPE)
+
include_directories(${LIBOMPTARGET_LLVM_INCLUDE_DIRS})
# Build libomptarget library with libdl dependency. Add LLVMSupport