std::vector<hsa_agent_t> gpu_agents;
int gpu_count = g_atl_machine.processorCount<ATLGPUProcessor>();
for (int gpu = 0; gpu < gpu_count; gpu++) {
- atmi_place_t place = ATMI_PLACE_GPU(0, gpu);
- ATLGPUProcessor &proc = get_processor<ATLGPUProcessor>(place);
+ ATLGPUProcessor &proc = get_processor<ATLGPUProcessor>(gpu);
gpu_agents.push_back(proc.agent());
}
atlc.g_tasks_initialized = true;
hsa_status_t RegisterModuleFromMemory(
std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
- void *module_bytes, size_t module_size, atmi_place_t place,
+ void *module_bytes, size_t module_size, int gpu,
hsa_status_t (*on_deserialized_data)(void *data, size_t size,
void *cb_state),
void *cb_state, std::vector<hsa_executable_t> &HSAExecutables) {
hsa_status_t err;
- int gpu = place.device_id;
assert(gpu >= 0);
DEBUG_PRINT("Trying to load module to GPU-%d\n", gpu);
- ATLGPUProcessor &proc = get_processor<ATLGPUProcessor>(place);
+ ATLGPUProcessor &proc = get_processor<ATLGPUProcessor>(gpu);
hsa_agent_t agent = proc.agent();
hsa_executable_t executable = {0};
hsa_profile_t agent_profile;
hsa_status_t RegisterModuleFromMemory(
std::map<std::string, atl_kernel_info_t> &KernelInfo,
std::map<std::string, atl_symbol_info_t> &SymbolInfoTable, void *, size_t,
- atmi_place_t,
+ int DeviceId,
hsa_status_t (*on_deserialized_data)(void *data, size_t size,
void *cb_state),
void *cb_state, std::vector<hsa_executable_t> &HSAExecutables);
/// FIXME: we may need this to be per device and per library.
std::list<KernelTy> KernelsList;
-// ATMI API to get gpu and gpu memory place
-static atmi_place_t get_gpu_place(int device_id) {
- return ATMI_PLACE_GPU(0, device_id);
-}
-
static std::vector<hsa_agent_t> find_gpu_agents() {
std::vector<hsa_agent_t> res;
hsa_status_t module_register_from_memory_to_place(
std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
- void *module_bytes, size_t module_size, atmi_place_t place, C cb,
+ void *module_bytes, size_t module_size, int DeviceId, C cb,
std::vector<hsa_executable_t> &HSAExecutables) {
auto L = [](void *data, size_t size, void *cb_state) -> hsa_status_t {
C *unwrapped = static_cast<C *>(cb_state);
return (*unwrapped)(data, size);
};
return core::RegisterModuleFromMemory(
- KernelInfoTable, SymbolInfoTable, module_bytes, module_size, place, L,
+ KernelInfoTable, SymbolInfoTable, module_bytes, module_size, DeviceId, L,
static_cast<void *>(&cb), HSAExecutables);
}
} // namespace
auto &KernelInfo = DeviceInfo.KernelInfoTable[device_id];
auto &SymbolInfo = DeviceInfo.SymbolInfoTable[device_id];
hsa_status_t err = module_register_from_memory_to_place(
- KernelInfo, SymbolInfo, (void *)image->ImageStart, img_size,
- get_gpu_place(device_id),
+ KernelInfo, SymbolInfo, (void *)image->ImageStart, img_size, device_id,
[&](void *data, size_t size) {
if (image_contains_symbol(data, size, "needs_hostcall_buffer")) {
__atomic_store_n(&DeviceInfo.hostcall_required, true,