#include <memory>
#include <mutex>
#include <string>
+#include <unordered_map>
#include <vector>
#include "Debug.h"
class CUDADeviceAllocatorTy : public DeviceAllocatorTy {
const int DeviceId;
const std::vector<DeviceDataTy> &DeviceData;
+ std::unordered_map<void *, TargetAllocTy> HostPinnedAllocs;
public:
CUDADeviceAllocatorTy(int DeviceId, std::vector<DeviceDataTy> &DeviceData)
: DeviceId(DeviceId), DeviceData(DeviceData) {}
- void *allocate(size_t Size, void *) override {
+ void *allocate(size_t Size, void *, TargetAllocTy Kind) override {
if (Size == 0)
return nullptr;
if (!checkResult(Err, "Error returned from cuCtxSetCurrent\n"))
return nullptr;
- CUdeviceptr DevicePtr;
- Err = cuMemAlloc(&DevicePtr, Size);
- if (!checkResult(Err, "Error returned from cuMemAlloc\n"))
- return nullptr;
+ void *MemAlloc = nullptr;
+ switch (Kind) {
+ case TARGET_ALLOC_DEFAULT:
+ case TARGET_ALLOC_DEVICE:
+ CUdeviceptr DevicePtr;
+ Err = cuMemAlloc(&DevicePtr, Size);
+ MemAlloc = (void *)DevicePtr;
+ if (!checkResult(Err, "Error returned from cuMemAlloc\n"))
+ return nullptr;
+ break;
+ case TARGET_ALLOC_HOST:
+ void *HostPtr;
+ Err = cuMemAllocHost(&HostPtr, Size);
+ MemAlloc = HostPtr;
+ if (!checkResult(Err, "Error returned from cuMemAllocHost\n"))
+ return nullptr;
+ HostPinnedAllocs[MemAlloc] = Kind;
+ break;
+ case TARGET_ALLOC_SHARED:
+ CUdeviceptr SharedPtr;
+ Err = cuMemAllocManaged(&SharedPtr, Size, CU_MEM_ATTACH_GLOBAL);
+ MemAlloc = (void *)SharedPtr;
+ if (!checkResult(Err, "Error returned from cuMemAllocManaged\n"))
+ return nullptr;
+ break;
+ }
- return (void *)DevicePtr;
+ return MemAlloc;
}
int free(void *TgtPtr) override {
if (!checkResult(Err, "Error returned from cuCtxSetCurrent\n"))
return OFFLOAD_FAIL;
- Err = cuMemFree((CUdeviceptr)TgtPtr);
- if (!checkResult(Err, "Error returned from cuMemFree\n"))
- return OFFLOAD_FAIL;
+ // Host pinned memory must be freed differently.
+ TargetAllocTy Kind =
+ (HostPinnedAllocs.find(TgtPtr) == HostPinnedAllocs.end())
+ ? TARGET_ALLOC_DEFAULT
+ : TARGET_ALLOC_HOST;
+ switch (Kind) {
+ case TARGET_ALLOC_DEFAULT:
+ case TARGET_ALLOC_DEVICE:
+ case TARGET_ALLOC_SHARED:
+ Err = cuMemFree((CUdeviceptr)TgtPtr);
+ if (!checkResult(Err, "Error returned from cuMemFree\n"))
+ return OFFLOAD_FAIL;
+ break;
+ case TARGET_ALLOC_HOST:
+ Err = cuMemFreeHost(TgtPtr);
+ if (!checkResult(Err, "Error returned from cuMemFreeHost\n"))
+ return OFFLOAD_FAIL;
+ break;
+ }
return OFFLOAD_SUCCESS;
}
return getOffloadEntriesTable(DeviceId);
}
- void *dataAlloc(const int DeviceId, const int64_t Size) {
- if (UseMemoryManager)
- return MemoryManagers[DeviceId]->allocate(Size, nullptr);
+ void *dataAlloc(const int DeviceId, const int64_t Size,
+ const TargetAllocTy Kind) {
+ switch (Kind) {
+ case TARGET_ALLOC_DEFAULT:
+ case TARGET_ALLOC_DEVICE:
+ if (UseMemoryManager)
+ return MemoryManagers[DeviceId]->allocate(Size, nullptr);
+ else
+ return DeviceAllocators[DeviceId].allocate(Size, nullptr, Kind);
+ case TARGET_ALLOC_HOST:
+ case TARGET_ALLOC_SHARED:
+ return DeviceAllocators[DeviceId].allocate(Size, nullptr, Kind);
+ }
- return DeviceAllocators[DeviceId].allocate(Size, nullptr);
+ REPORT("Invalid target data allocation kind or requested allocator not "
+ "implemented yet\n");
+
+ return nullptr;
}
int dataSubmit(const int DeviceId, const void *TgtPtr, const void *HstPtr,
int32_t kind) {
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
- if (kind != TARGET_ALLOC_DEFAULT) {
- REPORT("Invalid target data allocation kind or requested allocator not "
- "implemented yet\n");
- return NULL;
- }
-
- return DeviceRTL.dataAlloc(device_id, size);
+ return DeviceRTL.dataAlloc(device_id, size, (TargetAllocTy)kind);
}
int32_t __tgt_rtl_data_submit(int32_t device_id, void *tgt_ptr, void *hst_ptr,