From: Jon Chesterfield Date: Thu, 7 Oct 2021 11:03:46 +0000 (+0100) Subject: [libomptarget] Move device environment to shared header, remove divergence X-Git-Tag: upstream/15.0.7~29360 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0c554a4769f2e21233b687c7427c1a47f7bd375e;p=platform%2Fupstream%2Fllvm.git [libomptarget] Move device environment to shared header, remove divergence Follow on to D110006, related to D110957 Where implementations have diverged this resolves to match the new DeviceRTL - replaces definitions of this struct in deviceRTL and plugins with include - changes the dynamic_shared_size field from D110006 to 32 bits - handles stdint being unavailable in DeviceRTL - adds a zero initializer for the field to amdgpu - moves the extern declaration for deviceRTL to target_interface (omptarget.h is more natural, but doesn't work due to include order with debug.h) - Renames the fields everywhere to match the LLVM format used in DeviceRTL - Makes debug_level uint32_t everywhere (previously sometimes int32_t) Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D111069 --- diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index bf2b6b1..da6f34a 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -174,6 +174,7 @@ set(bc_flags -S -x c++ -std=c++17 -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device -Xclang -target-feature -Xclang +ptx61 -I${include_directory} + -I${devicertl_base_directory}/../include ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL} ) diff --git a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp index b725888..2b6f20f 100644 --- a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp @@ -12,18 +12,12 @@ //===----------------------------------------------------------------------===// #include "Configuration.h" +#include "DeviceEnvironment.h" #include "State.h" #include "Types.h" using namespace _OMP; -struct DeviceEnvironmentTy { - uint32_t DebugKind; - uint32_t NumDevices; - uint32_t DeviceNum; - uint64_t DynamicMemSize; -}; - #pragma omp declare target extern uint32_t __omp_rtl_debug_kind; diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt index 6e92634..3f4c026 100644 --- a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt @@ -92,7 +92,6 @@ set(h_files ${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_interface.h ${CMAKE_CURRENT_SOURCE_DIR}/src/target_impl.h ${devicertl_base_directory}/common/debug.h - ${devicertl_base_directory}/common/device_environment.h ${devicertl_base_directory}/common/omptarget.h ${devicertl_base_directory}/common/omptargeti.h ${devicertl_base_directory}/common/state-queue.h @@ -137,6 +136,7 @@ macro(add_cuda_bc_library) -I${CMAKE_CURRENT_SOURCE_DIR}/src -I${devicertl_base_directory}/common/include -I${devicertl_base_directory} + -I${devicertl_base_directory}/../include ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_AMDGCN}) set(bc1_files) diff --git a/openmp/libomptarget/deviceRTLs/common/debug.h b/openmp/libomptarget/deviceRTLs/common/debug.h index 99c9b6c..4ca1e55 100644 --- a/openmp/libomptarget/deviceRTLs/common/debug.h +++ b/openmp/libomptarget/deviceRTLs/common/debug.h @@ -28,7 +28,6 @@ #ifndef _OMPTARGET_NVPTX_DEBUG_H_ #define _OMPTARGET_NVPTX_DEBUG_H_ -#include "common/device_environment.h" #include "target_interface.h" //////////////////////////////////////////////////////////////////////////////// diff --git a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu index d420fad..aab16a3 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/omp_data.cu @@ -12,7 +12,6 @@ #pragma omp declare target #include "common/allocator.h" -#include "common/device_environment.h" #include "common/omptarget.h" //////////////////////////////////////////////////////////////////////////////// @@ -20,7 +19,7 @@ //////////////////////////////////////////////////////////////////////////////// PLUGIN_ACCESSIBLE -omptarget_device_environmentTy omptarget_device_environment; +DeviceEnvironmentTy omptarget_device_environment; //////////////////////////////////////////////////////////////////////////////// // global data holding OpenMP state information diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt index 4ccadec..42cfbaf2 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -173,6 +173,7 @@ set(bc_flags -S -x c++ -O1 -std=c++14 -I${devicertl_base_directory} -I${devicertl_common_directory}/include -I${devicertl_nvptx_directory}/src + -I${devicertl_base_directory}/../include ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_NVPTX}) if(${LIBOMPTARGET_NVPTX_DEBUG}) diff --git a/openmp/libomptarget/deviceRTLs/target_interface.h b/openmp/libomptarget/deviceRTLs/target_interface.h index c7ac065..8a1a2e4 100644 --- a/openmp/libomptarget/deviceRTLs/target_interface.h +++ b/openmp/libomptarget/deviceRTLs/target_interface.h @@ -13,6 +13,9 @@ #ifndef _OMPTARGET_TARGET_INTERFACE_H_ #define _OMPTARGET_TARGET_INTERFACE_H_ +#include + +#include "DeviceEnvironment.h" #include "target_impl.h" // Calls to the NVPTX layer (assuming 1D layout) @@ -70,4 +73,6 @@ EXTERN void __kmpc_impl_free(void *); // Barrier until num_threads arrive. EXTERN void __kmpc_impl_named_sync(uint32_t num_threads); +extern DeviceEnvironmentTy omptarget_device_environment; + #endif // _OMPTARGET_TARGET_INTERFACE_H_ diff --git a/openmp/libomptarget/deviceRTLs/common/device_environment.h b/openmp/libomptarget/include/DeviceEnvironment.h similarity index 71% rename from openmp/libomptarget/deviceRTLs/common/device_environment.h rename to openmp/libomptarget/include/DeviceEnvironment.h index 5f94567..231492c 100644 --- a/openmp/libomptarget/deviceRTLs/common/device_environment.h +++ b/openmp/libomptarget/include/DeviceEnvironment.h @@ -1,4 +1,4 @@ -//===---- device_environment.h - OpenMP GPU device environment --- CUDA -*-===// +//===---- device_environment.h - OpenMP GPU device environment ---- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -13,14 +13,13 @@ #ifndef _OMPTARGET_DEVICE_ENVIRONMENT_H_ #define _OMPTARGET_DEVICE_ENVIRONMENT_H_ -#include "target_impl.h" +// deviceRTL uses and DeviceRTL uses explicit definitions -struct omptarget_device_environmentTy { - int32_t debug_level; - uint32_t num_devices; - uint32_t device_num; +struct DeviceEnvironmentTy { + uint32_t DebugKind; + uint32_t NumDevices; + uint32_t DeviceNum; + uint32_t DynamicMemSize; }; -extern omptarget_device_environmentTy omptarget_device_environment; - #endif diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp index c4e49c7..ef1efef 100644 --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -30,6 +30,7 @@ #include "internal.h" #include "rt.h" +#include "DeviceEnvironment.h" #include "get_elf_mach_gfx_name.h" #include "omptargetplugin.h" #include "print_tracing.h" @@ -802,14 +803,6 @@ public: pthread_mutex_t SignalPoolT::mutex = PTHREAD_MUTEX_INITIALIZER; -// TODO: May need to drop the trailing to fields until deviceRTL is updated -struct omptarget_device_environmentTy { - int32_t debug_level; // gets value of envvar LIBOMPTARGET_DEVICE_RTL_DEBUG - // only useful for Debug build of deviceRTLs - int32_t num_devices; // gets number of active offload devices - int32_t device_num; // gets a value 0 to num_devices-1 -}; - static RTLDeviceInfoTy DeviceInfo; namespace { @@ -1300,15 +1293,12 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id, } struct device_environment { - // initialise an omptarget_device_environmentTy in the deviceRTL + // initialise an DeviceEnvironmentTy in the deviceRTL // patches around differences in the deviceRTL between trunk, aomp, // rocmcc. Over time these differences will tend to zero and this class // simplified. - // Symbol may be in .data or .bss, and may be missing fields: - // - aomp has debug_level, num_devices, device_num - // - trunk has debug_level - // - under review in trunk is debug_level, device_num - // - rocmcc matches aomp, patch to swap num_devices and device_num + // Symbol may be in .data or .bss, and may be missing fields, todo: + // review aomp/trunk/rocm and simplify the following // The symbol may also have been deadstripped because the device side // accessors were unused. @@ -1318,7 +1308,7 @@ struct device_environment { // gpu (trunk) and initialize after loading. const char *sym() { return "omptarget_device_environment"; } - omptarget_device_environmentTy host_device_env; + DeviceEnvironmentTy host_device_env; symbol_info si; bool valid = false; @@ -1329,12 +1319,13 @@ struct device_environment { __tgt_device_image *image, const size_t img_size) : image(image), img_size(img_size) { - host_device_env.num_devices = number_devices; - host_device_env.device_num = device_id; - host_device_env.debug_level = 0; + host_device_env.NumDevices = number_devices; + host_device_env.DeviceNum = device_id; + host_device_env.DebugKind = 0; + host_device_env.DynamicMemSize = 0; #ifdef OMPTARGET_DEBUG if (char *envStr = getenv("LIBOMPTARGET_DEVICE_RTL_DEBUG")) { - host_device_env.debug_level = std::stoi(envStr); + host_device_env.DebugKind = std::stoi(envStr); } #endif @@ -1374,7 +1365,7 @@ struct device_environment { if (!in_image()) { DP("Setting global device environment after load (%u bytes)\n", si.size); - int device_id = host_device_env.device_num; + int device_id = host_device_env.DeviceNum; auto &SymbolInfo = DeviceInfo.SymbolInfoTable[device_id]; void *state_ptr; uint32_t state_ptr_size; @@ -1430,9 +1421,9 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id, // This function loads the device image onto gpu[device_id] and does other // per-image initialization work. Specifically: // - // - Initialize an omptarget_device_environmentTy instance embedded in the + // - Initialize an DeviceEnvironmentTy instance embedded in the // image at the symbol "omptarget_device_environment" - // Fields debug_level, device_num, num_devices. Used by the deviceRTL. + // Fields DebugKind, DeviceNum, NumDevices. Used by the deviceRTL. // // - Allocate a large array per-gpu (could be moved to init_device) // - Read a uint64_t at symbol omptarget_nvptx_device_State_size diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp index 6b370f1..931ab12 100644 --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -21,6 +21,7 @@ #include #include "Debug.h" +#include "DeviceEnvironment.h" #include "omptargetplugin.h" #define TARGET_NAME CUDA @@ -87,16 +88,6 @@ struct KernelTy { : Func(_Func), ExecutionMode(_ExecutionMode) {} }; -/// Device environment data -/// Manually sync with the deviceRTL side for now, move to a dedicated header -/// file later. -struct omptarget_device_environmentTy { - int32_t debug_level; - uint32_t num_devices; - uint32_t device_num; - uint64_t dynamic_shared_size; -}; - namespace { bool checkResult(CUresult Err, const char *ErrMsg) { if (Err == CUDA_SUCCESS) @@ -897,13 +888,13 @@ public: // send device environment data to the device { // TODO: The device ID used here is not the real device ID used by OpenMP. - omptarget_device_environmentTy DeviceEnv{ - 0, static_cast(NumberOfDevices), - static_cast(DeviceId), DynamicMemorySize}; + DeviceEnvironmentTy DeviceEnv{0, static_cast(NumberOfDevices), + static_cast(DeviceId), + static_cast(DynamicMemorySize)}; #ifdef OMPTARGET_DEBUG if (const char *EnvStr = getenv("LIBOMPTARGET_DEVICE_RTL_DEBUG")) - DeviceEnv.debug_level = std::stoi(EnvStr); + DeviceEnv.DebugKind = std::stoi(EnvStr); #endif const char *DeviceEnvName = "omptarget_device_environment";