From e191d3115921d9b5b6602747bff72a1f2cf565c4 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Wed, 9 Dec 2020 19:12:10 +0000 Subject: [PATCH] [libomptarget][amdgpu] Robust handling of device_environment symbol --- openmp/libomptarget/plugins/amdgpu/src/rtl.cpp | 155 ++++++++++++++++++++----- 1 file changed, 123 insertions(+), 32 deletions(-) diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp index 60040d1..e13d769 100644 --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -891,6 +891,7 @@ const Elf64_Sym *elf_lookup(Elf *elf, char *base, Elf64_Shdr *section_hash, typedef struct { void *addr = nullptr; uint32_t size = UINT32_MAX; + uint32_t sh_type = SHT_NULL; } symbol_info; int get_symbol_info_without_loading(Elf *elf, char *base, const char *symname, @@ -913,8 +914,23 @@ int get_symbol_info_without_loading(Elf *elf, char *base, const char *symname, return 1; } - res->size = static_cast(sym->st_size); + if (sym->st_shndx == SHN_UNDEF) { + return 1; + } + + Elf_Scn *section = elf_getscn(elf, sym->st_shndx); + if (!section) { + return 1; + } + + Elf64_Shdr *header = elf64_getshdr(section); + if (!header) { + return 1; + } + res->addr = sym->st_value + base; + res->size = static_cast(sym->st_size); + res->sh_type = header->sh_type; return 0; } @@ -992,6 +1008,99 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id, return res; } +struct device_environment { + // initialise an omptarget_device_environmentTy in the deviceRTL + // patches around differences in the deviceRTL between trunk, aomp, + // rocmcc. Over time these differences will tend to zero and this class + // simplified. + // Symbol may be in .data or .bss, and may be missing fields: + // - aomp has debug_level, num_devices, device_num + // - trunk has debug_level + // - under review in trunk is debug_level, device_num + // - rocmcc matches aomp, patch to swap num_devices and device_num + + // If the symbol is in .data (aomp, rocm) it can be written directly. + // If it is in .bss, we must wait for it to be allocated space on the + // gpu (trunk) and initialize after loading. + const char *sym() { return "omptarget_device_environment"; } + + omptarget_device_environmentTy host_device_env; + symbol_info si; + bool valid = false; + + __tgt_device_image *image; + const size_t img_size; + + device_environment(int device_id, int number_devices, + __tgt_device_image *image, const size_t img_size) + : image(image), img_size(img_size) { + + host_device_env.num_devices = number_devices; + host_device_env.device_num = device_id; + host_device_env.debug_level = 0; +#ifdef OMPTARGET_DEBUG + if (char *envStr = getenv("LIBOMPTARGET_DEVICE_RTL_DEBUG")) { + host_device_env.debug_level = std::stoi(envStr); + } +#endif + + int rc = get_symbol_info_without_loading((char *)image->ImageStart, + img_size, sym(), &si); + if (rc != 0) { + DP("Finding global device environment '%s' - symbol missing.\n", sym()); + return; + } + + if (si.size > sizeof(host_device_env)) { + DP("Symbol '%s' has size %u, expected at most %zu.\n", sym(), si.size, + sizeof(host_device_env)); + return; + } + + valid = true; + } + + bool in_image() { return si.sh_type != SHT_NOBITS; } + + atmi_status_t before_loading(void *data, size_t size) { + assert(valid); + if (in_image()) { + DP("Setting global device environment before load (%u bytes)\n", si.size); + uint64_t offset = (char *)si.addr - (char *)image->ImageStart; + void *pos = (char *)data + offset; + memcpy(pos, &host_device_env, si.size); + } + return ATMI_STATUS_SUCCESS; + } + + atmi_status_t after_loading() { + assert(valid); + if (!in_image()) { + DP("Setting global device environment after load (%u bytes)\n", si.size); + int device_id = host_device_env.device_num; + + void *state_ptr; + uint32_t state_ptr_size; + atmi_status_t err = atmi_interop_hsa_get_symbol_info( + get_gpu_mem_place(device_id), sym(), &state_ptr, &state_ptr_size); + if (err != ATMI_STATUS_SUCCESS) { + DP("failed to find %s in loaded image\n", sym()); + return err; + } + + if (state_ptr_size != si.size) { + DP("Symbol had size %u before loading, %u after\n", state_ptr_size, + si.size); + return ATMI_STATUS_ERROR; + } + + return DeviceInfo.freesignalpool_memcpy_h2d(state_ptr, &host_device_env, + state_ptr_size, device_id); + } + return ATMI_STATUS_SUCCESS; + } +}; + static atmi_status_t atmi_calloc(void **ret_ptr, size_t size, atmi_mem_place_t place) { uint64_t rounded = 4 * ((size + 3) / 4); @@ -1047,41 +1156,18 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id, return NULL; } - omptarget_device_environmentTy host_device_env; - host_device_env.num_devices = DeviceInfo.NumberOfDevices; - host_device_env.device_num = device_id; - host_device_env.debug_level = 0; -#ifdef OMPTARGET_DEBUG - if (char *envStr = getenv("LIBOMPTARGET_DEVICE_RTL_DEBUG")) { - host_device_env.debug_level = std::stoi(envStr); - } -#endif - - auto on_deserialized_data = [&](void *data, size_t size) -> atmi_status_t { - const char *device_env_Name = "omptarget_device_environment"; - symbol_info si; - int rc = get_symbol_info_without_loading((char *)image->ImageStart, - img_size, device_env_Name, &si); - if (rc != 0) { - DP("Finding global device environment '%s' - symbol missing.\n", - device_env_Name); - // no need to return FAIL, consider this is a not a device debug build. - return ATMI_STATUS_SUCCESS; - } - if (si.size != sizeof(host_device_env)) { - return ATMI_STATUS_ERROR; + { + auto env = device_environment(device_id, DeviceInfo.NumberOfDevices, image, + img_size); + if (!env.valid) { + return NULL; } - DP("Setting global device environment %u bytes\n", si.size); - uint64_t offset = (char *)si.addr - (char *)image->ImageStart; - void *pos = (char *)data + offset; - memcpy(pos, &host_device_env, sizeof(host_device_env)); - return ATMI_STATUS_SUCCESS; - }; - { atmi_status_t err = module_register_from_memory_to_place( (void *)image->ImageStart, img_size, get_gpu_place(device_id), - on_deserialized_data); + [&](void *data, size_t size) { + return env.before_loading(data, size); + }); check("Module registering", err); if (err != ATMI_STATUS_SUCCESS) { @@ -1092,6 +1178,11 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id, get_elf_mach_gfx_name(elf_e_flags(image))); return NULL; } + + err = env.after_loading(); + if (err != ATMI_STATUS_SUCCESS) { + return NULL; + } } DP("ATMI module successfully loaded!\n"); -- 2.7.4