require support from the operating system. We instead implement a remote
procedure call (RPC) interface to allow submitting work from the GPU to a host
server that forwards it to the host system.
+
+Extensions
+----------
+
+We describe which operation the RPC server should take with a 16-bit opcode. We
+consider the first 32768 numbers to be reserved while the others are free to
+use.
RPC_MALLOC = 7,
RPC_FREE = 8,
RPC_HOST_CALL = 9,
- // TODO: Move these out of here and handle then with custom handlers in the
- // loader.
- RPC_TEST_INCREMENT = 1000,
- RPC_TEST_INTERFACE = 1001,
- RPC_TEST_STREAM = 1002,
} rpc_opcode_t;
#endif // __LLVM_LIBC_TYPES_RPC_OPCODE_H__
--- /dev/null
+//===-- Definition of RPC opcodes used for internal tests -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_TEST_RPC_OPCODE_H__
+#define __LLVM_LIBC_TYPES_TEST_RPC_OPCODE_H__
+
+// We consider the first 32768 opcodes as reserved for libc purposes. We allow
+// extensions to use any other number without conflicting with anything else.
+typedef enum : unsigned short {
+ RPC_TEST_NOOP = 1 << 15,
+ RPC_TEST_INCREMENT,
+ RPC_TEST_INTERFACE,
+ RPC_TEST_STREAM,
+} rpc_test_opcode_t;
+
+#endif // __LLVM_LIBC_TYPES_TEST_RPC_OPCODE_H__
//
//===----------------------------------------------------------------------===//
+#include "include/llvm-libc-types/test_rpc_opcodes_t.h"
#include "src/__support/GPU/utils.h"
#include "src/__support/RPC/rpc_client.h"
#include "test/IntegrationTest/test.h"
//
//===----------------------------------------------------------------------===//
+#include "include/llvm-libc-types/test_rpc_opcodes_t.h"
#include "src/__support/GPU/utils.h"
#include "src/__support/RPC/rpc_client.h"
#include "src/__support/integer_to_string.h"
//
//===----------------------------------------------------------------------===//
+#include "include/llvm-libc-types/test_rpc_opcodes_t.h"
#include "src/__support/GPU/utils.h"
#include "src/__support/RPC/rpc_client.h"
#include "test/IntegrationTest/test.h"
#define LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H
#include "utils/gpu/server/rpc_server.h"
+
+#include "include/llvm-libc-types/test_rpc_opcodes_t.h"
+
#include <cstddef>
#include <cstdint>
#include <cstdio>
handle_error("Failure in the RPC server\n");
}
+inline void register_rpc_callbacks(uint32_t device_id) {
+ // Register the ping test for the `libc` tests.
+ rpc_register_callback(
+ device_id, static_cast<rpc_opcode_t>(RPC_TEST_INCREMENT),
+ [](rpc_port_t port, void *data) {
+ rpc_recv_and_send(
+ port,
+ [](rpc_buffer_t *buffer, void *data) {
+ reinterpret_cast<uint64_t *>(buffer->data)[0] += 1;
+ },
+ data);
+ },
+ nullptr);
+
+ // Register the interface test callbacks.
+ rpc_register_callback(
+ device_id, static_cast<rpc_opcode_t>(RPC_TEST_INTERFACE),
+ [](rpc_port_t port, void *data) {
+ uint64_t cnt = 0;
+ bool end_with_recv;
+ rpc_recv(
+ port,
+ [](rpc_buffer_t *buffer, void *data) {
+ *reinterpret_cast<bool *>(data) = buffer->data[0];
+ },
+ &end_with_recv);
+ rpc_recv(
+ port,
+ [](rpc_buffer_t *buffer, void *data) {
+ *reinterpret_cast<uint64_t *>(data) = buffer->data[0];
+ },
+ &cnt);
+ rpc_send(
+ port,
+ [](rpc_buffer_t *buffer, void *data) {
+ uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
+ buffer->data[0] = cnt = cnt + 1;
+ },
+ &cnt);
+ rpc_recv(
+ port,
+ [](rpc_buffer_t *buffer, void *data) {
+ *reinterpret_cast<uint64_t *>(data) = buffer->data[0];
+ },
+ &cnt);
+ rpc_send(
+ port,
+ [](rpc_buffer_t *buffer, void *data) {
+ uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
+ buffer->data[0] = cnt = cnt + 1;
+ },
+ &cnt);
+ rpc_recv(
+ port,
+ [](rpc_buffer_t *buffer, void *data) {
+ *reinterpret_cast<uint64_t *>(data) = buffer->data[0];
+ },
+ &cnt);
+ rpc_recv(
+ port,
+ [](rpc_buffer_t *buffer, void *data) {
+ *reinterpret_cast<uint64_t *>(data) = buffer->data[0];
+ },
+ &cnt);
+ rpc_send(
+ port,
+ [](rpc_buffer_t *buffer, void *data) {
+ uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
+ buffer->data[0] = cnt = cnt + 1;
+ },
+ &cnt);
+ rpc_send(
+ port,
+ [](rpc_buffer_t *buffer, void *data) {
+ uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
+ buffer->data[0] = cnt = cnt + 1;
+ },
+ &cnt);
+ if (end_with_recv)
+ rpc_recv(
+ port,
+ [](rpc_buffer_t *buffer, void *data) {
+ *reinterpret_cast<uint64_t *>(data) = buffer->data[0];
+ },
+ &cnt);
+ else
+ rpc_send(
+ port,
+ [](rpc_buffer_t *buffer, void *data) {
+ uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
+ buffer->data[0] = cnt = cnt + 1;
+ },
+ &cnt);
+ },
+ nullptr);
+
+ // Register the stream test handler.
+ rpc_register_callback(
+ device_id, static_cast<rpc_opcode_t>(RPC_TEST_STREAM),
+ [](rpc_port_t port, void *data) {
+ uint64_t sizes[RPC_MAXIMUM_LANE_SIZE] = {0};
+ void *dst[RPC_MAXIMUM_LANE_SIZE] = {nullptr};
+ rpc_recv_n(
+ port, dst, sizes,
+ [](uint64_t size, void *) -> void * { return new char[size]; },
+ nullptr);
+ rpc_send_n(port, dst, sizes);
+ for (uint64_t i = 0; i < RPC_MAXIMUM_LANE_SIZE; ++i) {
+ if (dst[i])
+ delete[] reinterpret_cast<uint8_t *>(dst[i]);
+ }
+ },
+ nullptr);
+}
+
#endif
// Register RPC callbacks for the malloc and free functions on HSA.
uint32_t device_id = 0;
+ register_rpc_callbacks(device_id);
+
auto tuple = std::make_tuple(dev_agent, coarsegrained_pool);
rpc_register_callback(
device_id, RPC_MALLOC,
// Register RPC callbacks for the malloc and free functions on HSA.
uint32_t device_id = 0;
+ register_rpc_callbacks(device_id);
+
rpc_register_callback(
device_id, RPC_MALLOC,
[](rpc_port_t port, void *data) {
static_assert(RPC_MAXIMUM_PORT_COUNT == rpc::MAX_PORT_COUNT,
"Incorrect maximum port count");
+static_assert(RPC_MAXIMUM_LANE_SIZE == rpc::MAX_LANE_SIZE,
+ "Incorrect maximum port count");
+
// The client needs to support different lane sizes for the SIMT model. Because
// of this we need to select between the possible sizes that the client can use.
struct Server {
});
break;
}
- // TODO: Move handling of these test cases to the loader implementation.
- case RPC_TEST_INCREMENT: {
- port->recv_and_send([](rpc::Buffer *buffer) {
- reinterpret_cast<uint64_t *>(buffer->data)[0] += 1;
- });
- break;
- }
- case RPC_TEST_INTERFACE: {
- uint64_t cnt = 0;
- bool end_with_recv;
- port->recv([&](rpc::Buffer *buffer) { end_with_recv = buffer->data[0]; });
- port->recv([&](rpc::Buffer *buffer) { cnt = buffer->data[0]; });
- port->send([&](rpc::Buffer *buffer) { buffer->data[0] = cnt = cnt + 1; });
- port->recv([&](rpc::Buffer *buffer) { cnt = buffer->data[0]; });
- port->send([&](rpc::Buffer *buffer) { buffer->data[0] = cnt = cnt + 1; });
- port->recv([&](rpc::Buffer *buffer) { cnt = buffer->data[0]; });
- port->recv([&](rpc::Buffer *buffer) { cnt = buffer->data[0]; });
- port->send([&](rpc::Buffer *buffer) { buffer->data[0] = cnt = cnt + 1; });
- port->send([&](rpc::Buffer *buffer) { buffer->data[0] = cnt = cnt + 1; });
- if (end_with_recv)
- port->recv([&](rpc::Buffer *buffer) { cnt = buffer->data[0]; });
- else
- port->send(
- [&](rpc::Buffer *buffer) { buffer->data[0] = cnt = cnt + 1; });
- break;
- }
- case RPC_TEST_STREAM: {
- uint64_t sizes[rpc::MAX_LANE_SIZE] = {0};
- void *dst[rpc::MAX_LANE_SIZE] = {nullptr};
- port->recv_n(dst, sizes, [](uint64_t size) { return new char[size]; });
- port->send_n(dst, sizes);
- for (uint64_t i = 0; i < rpc::MAX_LANE_SIZE; ++i) {
- if (dst[i])
- delete[] reinterpret_cast<uint8_t *>(dst[i]);
- }
- break;
- }
case RPC_NOOP: {
port->recv([](rpc::Buffer *) {});
break;
port);
}
+void rpc_send_n(rpc_port_t ref, const void *const *src, uint64_t *size) {
+ auto port = get_port(ref);
+ std::visit([=](auto &port) { port->send_n(src, size); }, port);
+}
+
void rpc_recv(rpc_port_t ref, rpc_port_callback_ty callback, void *data) {
auto port = get_port(ref);
std::visit(
port);
}
+void rpc_recv_n(rpc_port_t ref, void **dst, uint64_t *size, rpc_alloc_ty alloc,
+ void *data) {
+ auto port = get_port(ref);
+ auto alloc_fn = [=](uint64_t size) { return alloc(size, data); };
+ std::visit([=](auto &port) { port->recv_n(dst, size, alloc_fn); }, port);
+}
+
void rpc_recv_and_send(rpc_port_t ref, rpc_port_callback_ty callback,
void *data) {
auto port = get_port(ref);
/// The maxium number of ports that can be opened for any server.
const uint64_t RPC_MAXIMUM_PORT_COUNT = 512;
+/// The maximum number of parallel lanes that we can support.
+const uint64_t RPC_MAXIMUM_LANE_SIZE = 64;
+
/// The symbol name associated with the client for use with the LLVM C library
/// implementation.
-inline const char *rpc_client_symbol_name = "__llvm_libc_rpc_client";
+const char *const rpc_client_symbol_name = "__llvm_libc_rpc_client";
/// status codes.
typedef enum {
/// Use the \p port to send a buffer using the \p callback.
void rpc_send(rpc_port_t port, rpc_port_callback_ty callback, void *data);
+/// Use the \p port to send \p bytes using the \p callback. The input is an
+/// array of at least the configured lane size.
+void rpc_send_n(rpc_port_t port, const void *const *src, uint64_t *size);
+
/// Use the \p port to recieve a buffer using the \p callback.
void rpc_recv(rpc_port_t port, rpc_port_callback_ty callback, void *data);
+/// Use the \p port to recieve \p bytes using the \p callback. The inputs is an
+/// array of at least the configured lane size. The \p alloc function allocates
+/// memory for the recieved bytes.
+void rpc_recv_n(rpc_port_t port, void **dst, uint64_t *size, rpc_alloc_ty alloc,
+ void *data);
+
/// Use the \p port to receive and send a buffer using the \p callback.
void rpc_recv_and_send(rpc_port_t port, rpc_port_callback_ty callback,
void *data);