};
pthread_mutex_t KernelArgPool::Mutex = PTHREAD_MUTEX_INITIALIZER;
-std::unordered_map<std::string /*kernel*/, std::unique_ptr<KernelArgPool>>
- KernelArgPoolMap;
-
/// Use a single entity to encode a kernel and a set of flags
struct KernelTy {
llvm::omp::OMPTgtExecModeFlags ExecutionMode;
KernelTy(llvm::omp::OMPTgtExecModeFlags ExecutionMode, int16_t ConstWgSize,
int32_t DeviceId, void *CallStackAddr, const char *Name,
uint32_t KernargSegmentSize,
- hsa_amd_memory_pool_t &KernArgMemoryPool)
+ hsa_amd_memory_pool_t &KernArgMemoryPool,
+ std::unordered_map<std::string, std::unique_ptr<KernelArgPool>>
+ &KernelArgPoolMap)
: ExecutionMode(ExecutionMode), ConstWGSize(ConstWgSize),
DeviceId(DeviceId), CallStackAddr(CallStackAddr), Name(Name) {
DP("Construct kernelinfo: ExecMode %d\n", ExecutionMode);
}
};
-/// List that contains all the kernels.
-/// FIXME: we may need this to be per device and per library.
-std::list<KernelTy> KernelsList;
-
template <typename Callback> static hsa_status_t findAgents(Callback CB) {
hsa_status_t Err =
int NumberOfDevices = 0;
+ /// List that contains all the kernels.
+ /// FIXME: we may need this to be per device and per library.
+ std::list<KernelTy> KernelsList;
+ std::unordered_map<std::string /*kernel*/, std::unique_ptr<KernelArgPool>>
+ KernelArgPoolMap;
+
// GPU devices
std::vector<hsa_agent_t> HSAAgents;
std::vector<HSAQueueScheduler> HSAQueueSchedulers; // one per gpu
"Unexpected device id!");
FuncGblEntries[DeviceId].emplace_back();
FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
- // KernelArgPoolMap.clear();
E.Entries.clear();
E.Table.EntriesBegin = E.Table.EntriesEnd = 0;
}
static RTLDeviceInfoTy *DeviceInfoState = nullptr;
static RTLDeviceInfoTy &DeviceInfo() { return *DeviceInfoState; }
-int32_t __tgt_rtl_init_plugin() {
- DeviceInfoState = new RTLDeviceInfoTy;
- return (DeviceInfoState && DeviceInfoState->ConstructionSucceeded)
- ? OFFLOAD_SUCCESS
- : OFFLOAD_FAIL;
-}
-
-int32_t __tgt_rtl_deinit_plugin() {
- if (DeviceInfoState)
- delete DeviceInfoState;
- return OFFLOAD_SUCCESS;
-}
-
namespace {
int32_t dataRetrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr, int64_t Size,
(long long unsigned)(Elf64_Addr)HstPtr,
(long long unsigned)(Elf64_Addr)TgtPtr);
Err = DeviceInfo().freesignalpoolMemcpyH2D(TgtPtr, HstPtr, (size_t)Size,
- DeviceId);
+ DeviceId);
if (Err != HSA_STATUS_SUCCESS) {
DP("Error when copying data from host to device. Pointers: "
"host = 0x%016lx, device = 0x%016lx, size = %lld\n",
KernelArgPool *ArgPool = nullptr;
void *KernArg = nullptr;
{
- auto It = KernelArgPoolMap.find(std::string(KernelInfo->Name));
- if (It != KernelArgPoolMap.end()) {
+ auto It =
+ DeviceInfo().KernelArgPoolMap.find(std::string(KernelInfo->Name));
+ if (It != DeviceInfo().KernelArgPoolMap.end()) {
ArgPool = (It->second).get();
}
}
}
extern "C" {
+
+int32_t __tgt_rtl_init_plugin() {
+ DeviceInfoState = new RTLDeviceInfoTy;
+ return (DeviceInfoState && DeviceInfoState->ConstructionSucceeded)
+ ? OFFLOAD_SUCCESS
+ : OFFLOAD_FAIL;
+}
+
+int32_t __tgt_rtl_deinit_plugin() {
+ if (DeviceInfoState)
+ delete DeviceInfoState;
+ return OFFLOAD_SUCCESS;
+}
+
int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
return elfMachineIdIsAmdgcn(Image);
}
}
// write ptr to device memory so it can be used by later kernels
- Err = DeviceInfo().freesignalpoolMemcpyH2D(StatePtr, &Ptr, sizeof(void *),
- DeviceId);
+ Err = DeviceInfo().freesignalpoolMemcpyH2D(StatePtr, &Ptr,
+ sizeof(void *), DeviceId);
if (Err != HSA_STATUS_SUCCESS) {
DP("memcpy install of state_ptr failed\n");
return NULL;
// If unified memory is present any target link variables
// can access host addresses directly. There is no longer a
// need for device copies.
- Err = DeviceInfo().freesignalpoolMemcpyH2D(Varptr, E->addr,
- sizeof(void *), DeviceId);
+ Err = DeviceInfo().freesignalpoolMemcpyH2D(Varptr, E->addr, sizeof(void *),
+ DeviceId);
if (Err != HSA_STATUS_SUCCESS)
DP("Error when copying USM\n");
DP("Copy linked variable host address (" DPxMOD ")"
}
check("Loading computation property", Err);
- KernelsList.push_back(KernelTy(ExecModeVal, WGSizeVal, DeviceId,
- CallStackAddr, E->name, KernargSegmentSize,
- DeviceInfo().KernArgPool));
+ DeviceInfo().KernelsList.push_back(
+ KernelTy(ExecModeVal, WGSizeVal, DeviceId, CallStackAddr, E->name,
+ KernargSegmentSize, DeviceInfo().KernArgPool,
+ DeviceInfo().KernelArgPoolMap));
__tgt_offload_entry Entry = *E;
- Entry.addr = (void *)&KernelsList.back();
+ Entry.addr = (void *)&DeviceInfo().KernelsList.back();
DeviceInfo().addOffloadEntry(DeviceId, Entry);
DP("Entry point %ld maps to %s\n", E - HostBegin, E->name);
}