From 6e1b11087f080b1cb9a023f9f920d29d5465633e Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Wed, 19 Aug 2020 15:44:30 +0100 Subject: [PATCH] [libomptarget][amdgpu] Support building with static rocm libraries --- openmp/libomptarget/plugins/amdgpu/CMakeLists.txt | 33 +++++++++++++---------- openmp/libomptarget/plugins/amdgpu/impl/data.cpp | 6 ++--- openmp/libomptarget/plugins/amdgpu/src/rtl.cpp | 8 +++--- 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt index 47ae00e..6498565 100644 --- a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt +++ b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt @@ -18,26 +18,30 @@ if(NOT LIBOMPTARGET_DEP_LIBELF_FOUND) return() endif() -if(NOT ROCM_DIR) - libomptarget_say("Not building AMDGPU plugin: ROCM_DIR is not set") +# rocr cmake uses DHSAKMT_INC_PATH, DHSAKMT_LIB_PATH to find roct +# following that, look for DHSA_INC_PATH, DHSA_LIB_PATH, which allows +# builds to use source and library files from various locations + +if(ROCM_DIR) + set(HSA_INC_PATH ${ROCM_DIR}/hsa/include ${ROCM_DIR}/hsa/include/hsa) + set(HSA_LIB_PATH ${ROCM_DIR}/hsa/lib) + set(HSAKMT_INC_PATH "") + set(HSAKMT_LIB_PATH ${ROCM_DIR}/lib) +elseif(NOT (HSA_INC_PATH AND HSA_LIB_PATH AND HSAKMT_INC_PATH AND HSAKMT_LIB_PATH)) + libomptarget_say("Not building AMDGPU plugin: ROCM library paths unspecified") return() endif() -set(LIBOMPTARGET_DEP_LIBHSA_INCLUDE_DIRS ${ROCM_DIR}/hsa/include ${ROCM_DIR}/hsa/include/hsa) -set(LIBOMPTARGET_DEP_LIBHSA_LIBRARIES_DIRS ${ROCM_DIR}/hsa/lib) -set(LIBOMPTARGET_DEP_LIBHSAKMT_LIBRARIES_DIRS ${ROCM_DIR}/lib) - -mark_as_advanced( LIBOMPTARGET_DEP_LIBHSA_INCLUDE_DIRS LIBOMPTARGET_DEP_LIBHSA_LIBRARIES_DIRS) - if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$" AND CMAKE_SYSTEM_NAME MATCHES "Linux") libomptarget_say("Not building amdgpu plugin: only support amdgpu in Linux x86_64, ppc64le, or aarch64 hosts.") return() endif() -libomptarget_say("Building amdgpu offloading plugin using ROCM_DIR = ${ROCM_DIR}") +libomptarget_say("Building amdgpu offloading plugin") -libomptarget_say("LIBOMPTARGET_DEP_LIBHSA_INCLUDE_DIRS: ${LIBOMPTARGET_DEP_LIBHSA_INCLUDE_DIRS}") -libomptarget_say("LIBOMPTARGET_DEP_LIBHSA_LIBRARIES_DIRS ${LIBOMPTARGET_DEP_LIBHSA_LIBRARIES_DIRS}") -libomptarget_say("LIBOMPTARGET_DEP_LIBHSAKMT_LIBRARIES_DIRS: ${LIBOMPTARGET_DEP_LIBHSAKMT_LIBRARIES_DIRS}") +libomptarget_say("HSA plugin: HSA_INC_PATH: ${HSA_INC_PATH}") +libomptarget_say("HSA plugin: HSA_LIB_PATH: ${HSA_LIB_PATH}") +libomptarget_say("HSA plugin: HSAKMT_INC_PATH: ${HSAKMT_INC_PATH}") +libomptarget_say("HSA plugin: HSAKMT_LIB_PATH: ${HSAKMT_LIB_PATH}") ################################################################################ # Define the suffix for the runtime messaging dumps. @@ -51,7 +55,7 @@ if(CMAKE_BUILD_TYPE MATCHES Debug) endif() include_directories( - ${LIBOMPTARGET_DEP_LIBHSA_INCLUDE_DIRS} + ${HSA_INC_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/impl ) @@ -70,10 +74,11 @@ add_library(omptarget.rtl.amdgpu SHARED # When we build for debug, OPENMP_LIBDIR_SUFFIX get set to -debug install(TARGETS omptarget.rtl.amdgpu LIBRARY DESTINATION "lib${OPENMP_LIBDIR_SUFFIX}") +add_dependencies(omptarget.rtl.amdgpu hsa-runtime64 hsakmt) target_link_libraries( omptarget.rtl.amdgpu -lpthread -ldl -Wl,-rpath,${OPENMP_INSTALL_LIBDIR} - -L${LIBOMPTARGET_DEP_LIBHSA_LIBRARIES_DIRS} -L${LIBOMPTARGET_DEP_LIBHSAKMT_LIBRARIES_DIRS} -lhsa-runtime64 -lhsakmt -Wl,-rpath,${LIBOMPTARGET_DEP_LIBHSA_LIBRARIES_DIRS},-rpath,${LIBOMPTARGET_DEP_LIBHSAKMT_LIBRARIES_DIRS} + -L${HSA_LIB_PATH} -L${HSAKMT_LIB_PATH} -lhsa-runtime64 -lhsakmt -Wl,-rpath,${HSA_LIB_PATH},-rpath,${HSAKMT_LIB_PATH} -lelf "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports" "-Wl,-z,defs" diff --git a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp index cf5cd8f..1d20fc9 100644 --- a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp @@ -133,15 +133,15 @@ atmi_status_t Runtime::Memfree(void *ptr) { static hsa_status_t invoke_hsa_copy(void *dest, const void *src, size_t size, hsa_agent_t agent) { // TODO: Use thread safe signal - hsa_signal_store_release(IdentityCopySignal, 1); + hsa_signal_store_screlease(IdentityCopySignal, 1); hsa_status_t err = hsa_amd_memory_async_copy(dest, agent, src, agent, size, 0, NULL, IdentityCopySignal); ErrorCheck(Copy async between memory pools, err); // TODO: async reports errors in the signal, use NE 1 - hsa_signal_wait_acquire(IdentityCopySignal, HSA_SIGNAL_CONDITION_EQ, 0, - UINT64_MAX, ATMI_WAIT_STATE); + hsa_signal_wait_scacquire(IdentityCopySignal, HSA_SIGNAL_CONDITION_EQ, 0, + UINT64_MAX, ATMI_WAIT_STATE); return err; } diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp index 54d42e0..d6f0f95 100644 --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -1495,7 +1495,7 @@ static uint64_t acquire_available_packet_id(hsa_queue_t *queue) { bool full = true; while (full) { full = - packet_id >= (queue->size + hsa_queue_load_read_index_acquire(queue)); + packet_id >= (queue->size + hsa_queue_load_read_index_scacquire(queue)); } return packet_id; } @@ -1652,9 +1652,9 @@ int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr, hsa_signal_store_relaxed(queue->doorbell_signal, packet_id); - while (hsa_signal_wait_acquire(packet->completion_signal, - HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, - HSA_WAIT_STATE_BLOCKED) != 0) + while (hsa_signal_wait_scacquire(packet->completion_signal, + HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, + HSA_WAIT_STATE_BLOCKED) != 0) ; assert(ArgPool); -- 2.7.4