From d16a0877d8ac12a49fc75ae651247f338d46fead Mon Sep 17 00:00:00 2001 From: Jose Manuel Monsalve Diaz Date: Wed, 1 Jun 2022 21:49:23 +0000 Subject: [PATCH] [LIBOMPTARGET] Adding AMD to llvm-omp-device-info Adding device information print for AMD devices on the `llvm-omp-device-info` command line tool. The output is inspired by the rocminfo command line tool. This commit adds missing HSA functions, enums and structs needed to query additional information from the HSA agents. A generic message for the `generic-elf-64bit` plugin is also added Example of an output: ``` llvm-omp-device-info Device (0): This is a generic-elf-64bit device Device (1): This is a generic-elf-64bit device Device (2): This is a generic-elf-64bit device Device (3): This is a generic-elf-64bit device Device (4): HSA Runtime Version: 1.1 HSA OpenMP Device Number: 0 Device Name: gfx906 Vendor Name: AMD Device Type: GPU Max Queues: 128 Queue Min Size: 64 Queue Max Size: 131072 Cache: L0: 16384 bytes L1: 8388608 bytes Cacheline Size: 64 Max Clock Freq(MHz): 1725 Compute Units: 60 SIMD per CU: 4 Fast F16 Operation: TRUE Wavefront Size: 64 Workgroup Max Size: 1024 Workgroup Max Size per Dimension: x: 1024 y: 1024 z: 1024 Max Waves Per CU: 40 Max Work-item Per CU: 2560 Grid Max Size: 4294967295 Grid Max Size per Dimension: x: 4294967295 y: 4294967295 z: 4294967295 Max fbarriers/Workgrp: 32 Memory Pools: Pool GLOBAL; FLAGS: COARSE GRAINED, : Size: 34342961152 bytes Allocatable: TRUE Runtime Alloc Granule: 4096 bytes Runtime Alloc alignment: 4096 bytes Accessable by all: FALSE Pool GLOBAL; FLAGS: FINE GRAINED, : Size: 34342961152 bytes Allocatable: TRUE Runtime Alloc Granule: 4096 bytes Runtime Alloc alignment: 4096 bytes Accessable by all: FALSE Pool GROUP: Size: 65536 bytes Allocatable: FALSE Runtime Alloc Granule: 0 bytes Runtime Alloc alignment: 0 bytes Accessable by all: FALSE Device (5): HSA Runtime Version: 1.1 HSA OpenMP Device Number: 1 Device Name: gfx906 Vendor Name: AMD Device Type: GPU Max Queues: 128 Queue Min Size: 64 Queue Max Size: 131072 Cache: L0: 16384 bytes L1: 8388608 bytes Cacheline Size: 64 Max Clock Freq(MHz): 1725 Compute Units: 60 SIMD per CU: 4 Fast F16 Operation: TRUE Wavefront Size: 64 Workgroup Max Size: 1024 Workgroup Max Size per Dimension: x: 1024 y: 1024 z: 1024 Max Waves Per CU: 40 Max Work-item Per CU: 2560 Grid Max Size: 4294967295 Grid Max Size per Dimension: x: 4294967295 y: 4294967295 z: 4294967295 Max fbarriers/Workgrp: 32 Memory Pools: Pool GLOBAL; FLAGS: COARSE GRAINED, : Size: 34342961152 bytes Allocatable: TRUE Runtime Alloc Granule: 4096 bytes Runtime Alloc alignment: 4096 bytes Accessable by all: FALSE Pool GLOBAL; FLAGS: FINE GRAINED, : Size: 34342961152 bytes Allocatable: TRUE Runtime Alloc Granule: 4096 bytes Runtime Alloc alignment: 4096 bytes Accessable by all: FALSE Pool GROUP: Size: 65536 bytes Allocatable: FALSE Runtime Alloc Granule: 0 bytes Runtime Alloc alignment: 0 bytes Accessable by all: FALSE Device (6): HSA Runtime Version: 1.1 HSA OpenMP Device Number: 2 Device Name: gfx906 Vendor Name: AMD Device Type: GPU Max Queues: 128 Queue Min Size: 64 Queue Max Size: 131072 Cache: L0: 16384 bytes L1: 8388608 bytes Cacheline Size: 64 Max Clock Freq(MHz): 1725 Compute Units: 60 SIMD per CU: 4 Fast F16 Operation: TRUE Wavefront Size: 64 Workgroup Max Size: 1024 Workgroup Max Size per Dimension: x: 1024 y: 1024 z: 1024 Max Waves Per CU: 40 Max Work-item Per CU: 2560 Grid Max Size: 4294967295 Grid Max Size per Dimension: x: 4294967295 y: 4294967295 z: 4294967295 Max fbarriers/Workgrp: 32 Memory Pools: Pool GLOBAL; FLAGS: COARSE GRAINED, : Size: 34342961152 bytes Allocatable: TRUE Runtime Alloc Granule: 4096 bytes Runtime Alloc alignment: 4096 bytes Accessable by all: FALSE Pool GLOBAL; FLAGS: FINE GRAINED, : Size: 34342961152 bytes Allocatable: TRUE Runtime Alloc Granule: 4096 bytes Runtime Alloc alignment: 4096 bytes Accessable by all: FALSE Pool GROUP: Size: 65536 bytes Allocatable: FALSE Runtime Alloc Granule: 0 bytes Runtime Alloc alignment: 0 bytes Accessable by all: FALSE Device (7): HSA Runtime Version: 1.1 HSA OpenMP Device Number: 3 Device Name: gfx906 Vendor Name: AMD Device Type: GPU Max Queues: 128 Queue Min Size: 64 Queue Max Size: 131072 Cache: L0: 16384 bytes L1: 8388608 bytes Cacheline Size: 64 Max Clock Freq(MHz): 1725 Compute Units: 60 SIMD per CU: 4 Fast F16 Operation: TRUE Wavefront Size: 64 Workgroup Max Size: 1024 Workgroup Max Size per Dimension: x: 1024 y: 1024 z: 1024 Max Waves Per CU: 40 Max Work-item Per CU: 2560 Grid Max Size: 4294967295 Grid Max Size per Dimension: x: 4294967295 y: 4294967295 z: 4294967295 Max fbarriers/Workgrp: 32 Memory Pools: Pool GLOBAL; FLAGS: COARSE GRAINED, : Size: 34342961152 bytes Allocatable: TRUE Runtime Alloc Granule: 4096 bytes Runtime Alloc alignment: 4096 bytes Accessable by all: FALSE Pool GLOBAL; FLAGS: FINE GRAINED, : Size: 34342961152 bytes Allocatable: TRUE Runtime Alloc Granule: 4096 bytes Runtime Alloc alignment: 4096 bytes Accessable by all: FALSE Pool GROUP: Size: 65536 bytes Allocatable: FALSE Runtime Alloc Granule: 0 bytes Runtime Alloc alignment: 0 bytes Accessable by all: FALSE ``` Differential Revision: https://reviews.llvm.org/D126836 --- .../plugins/amdgpu/dynamic_hsa/hsa.cpp | 3 + .../libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h | 35 +++ .../plugins/amdgpu/dynamic_hsa/hsa_ext_amd.h | 17 ++ openmp/libomptarget/plugins/amdgpu/src/rtl.cpp | 271 +++++++++++++++++++++ .../plugins/generic-elf-64bit/src/rtl.cpp | 4 + 5 files changed, 330 insertions(+) diff --git a/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp b/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp index ea1498c..e748056 100644 --- a/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp +++ b/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp @@ -23,8 +23,11 @@ DLWRAP_INTERNAL(hsa_init, 0); DLWRAP(hsa_status_string, 2); DLWRAP(hsa_shut_down, 0); +DLWRAP(hsa_system_get_info, 2); DLWRAP(hsa_agent_get_info, 3); +DLWRAP(hsa_isa_get_info_alt, 3); DLWRAP(hsa_iterate_agents, 2); +DLWRAP(hsa_agent_iterate_isas, 3); DLWRAP(hsa_signal_create, 4); DLWRAP(hsa_signal_destroy, 1); DLWRAP(hsa_signal_store_relaxed, 2); diff --git a/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h b/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h index 54359c2..ebdd642 100644 --- a/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h +++ b/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h @@ -56,22 +56,57 @@ typedef enum { } hsa_device_type_t; typedef enum { + HSA_ISA_INFO_NAME = 1, +} hsa_isa_info_t; + +typedef enum { HSA_AGENT_INFO_NAME = 0, + HSA_AGENT_INFO_VENDOR_NAME = 1, HSA_AGENT_INFO_PROFILE = 4, HSA_AGENT_INFO_WAVEFRONT_SIZE = 6, HSA_AGENT_INFO_WORKGROUP_MAX_DIM = 7, + HSA_AGENT_INFO_WORKGROUP_MAX_SIZE = 8, HSA_AGENT_INFO_GRID_MAX_DIM = 9, + HSA_AGENT_INFO_GRID_MAX_SIZE = 10, + HSA_AGENT_INFO_FBARRIER_MAX_SIZE = 11, + HSA_AGENT_INFO_QUEUES_MAX = 12, + HSA_AGENT_INFO_QUEUE_MIN_SIZE = 13, HSA_AGENT_INFO_QUEUE_MAX_SIZE = 14, HSA_AGENT_INFO_DEVICE = 17, + HSA_AGENT_INFO_CACHE_SIZE = 18, + HSA_AGENT_INFO_FAST_F16_OPERATION = 24, } hsa_agent_info_t; +typedef enum { + HSA_SYSTEM_INFO_VERSION_MAJOR = 0, + HSA_SYSTEM_INFO_VERSION_MINOR = 1, +} hsa_system_info_t; + +typedef struct hsa_region_s { + uint64_t handle; +} hsa_region_t; + +typedef struct hsa_isa_s { + uint64_t handle; +} hsa_isa_t; + +hsa_status_t hsa_system_get_info(hsa_system_info_t attribute, void *value); + hsa_status_t hsa_agent_get_info(hsa_agent_t agent, hsa_agent_info_t attribute, void *value); +hsa_status_t hsa_isa_get_info_alt(hsa_isa_t isa, hsa_isa_info_t attribute, + void *value); + hsa_status_t hsa_iterate_agents(hsa_status_t (*callback)(hsa_agent_t agent, void *data), void *data); +hsa_status_t hsa_agent_iterate_isas(hsa_agent_t agent, + hsa_status_t (*callback)(hsa_isa_t isa, + void *data), + void *data); + typedef struct hsa_signal_s { uint64_t handle; } hsa_signal_t; diff --git a/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa_ext_amd.h b/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa_ext_amd.h index a8662ee..d27be93 100644 --- a/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa_ext_amd.h +++ b/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa_ext_amd.h @@ -29,9 +29,20 @@ typedef enum hsa_amd_memory_pool_global_flag_s { } hsa_amd_memory_pool_global_flag_t; typedef enum { + HSA_AMD_SEGMENT_GLOBAL = 0, + HSA_AMD_SEGMENT_READONLY = 1, + HSA_AMD_SEGMENT_PRIVATE = 2, + HSA_AMD_SEGMENT_GROUP = 3, +} hsa_amd_segment_t; + +typedef enum { + HSA_AMD_MEMORY_POOL_INFO_SEGMENT = 0, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS = 1, HSA_AMD_MEMORY_POOL_INFO_SIZE = 2, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED = 5, + HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE = 6, + HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT = 7, + HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL = 15, } hsa_amd_memory_pool_info_t; typedef enum { @@ -43,7 +54,13 @@ typedef enum { } hsa_amd_memory_pool_access_t; typedef enum hsa_amd_agent_info_s { + HSA_AMD_AGENT_INFO_CACHELINE_SIZE = 0xA001, HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT = 0xA002, + HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY = 0xA003, + HSA_AMD_AGENT_INFO_PRODUCT_NAME = 0xA009, + HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU = 0xA00A, + HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU = 0xA00B, + HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES = 0xA010 } hsa_amd_agent_info_t; hsa_status_t hsa_amd_memory_pool_get_info(hsa_amd_memory_pool_t memory_pool, diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp index 4b5dd0d..a7d2498 100644 --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -282,6 +282,16 @@ static void callbackQueue(hsa_status_t status, hsa_queue_t *source, namespace core { namespace { + +bool checkResult(hsa_status_t Err, const char *ErrMsg) { + if (Err == HSA_STATUS_SUCCESS) + return true; + + REPORT("%s", ErrMsg); + REPORT("%s", get_error_string(Err)); + return false; +} + void packet_store_release(uint32_t *packet, uint16_t header, uint16_t rest) { __atomic_store_n(packet, header | (rest << 16), __ATOMIC_RELEASE); } @@ -542,6 +552,259 @@ public: return freesignalpool_memcpy(dest, src, size, impl_memcpy_h2d, deviceId); } + static void printDeviceInfo(int32_t device_id, hsa_agent_t agent) { + char TmpChar[1000]; + uint16_t major, minor; + uint32_t TmpUInt; + uint32_t TmpUInt2; + uint32_t CacheSize[4]; + bool TmpBool; + uint16_t workgroupMaxDim[3]; + hsa_dim3_t gridMaxDim; + + // Getting basic information about HSA and Device + core::checkResult( + hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &major), + "Error from hsa_system_get_info when obtaining " + "HSA_SYSTEM_INFO_VERSION_MAJOR\n"); + core::checkResult( + hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &minor), + "Error from hsa_system_get_info when obtaining " + "HSA_SYSTEM_INFO_VERSION_MINOR\n"); + printf(" HSA Runtime Version: \t\t%u.%u \n", major, minor); + printf(" HSA OpenMP Device Number: \t\t%d \n", device_id); + core::checkResult( + hsa_agent_get_info( + agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_PRODUCT_NAME, TmpChar), + "Error returned from hsa_agent_get_info when obtaining " + "HSA_AMD_AGENT_INFO_PRODUCT_NAME\n"); + printf(" Product Name: \t\t\t%s \n", TmpChar); + core::checkResult(hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, TmpChar), + "Error returned from hsa_agent_get_info when obtaining " + "HSA_AGENT_INFO_NAME\n"); + printf(" Device Name: \t\t\t%s \n", TmpChar); + core::checkResult( + hsa_agent_get_info(agent, HSA_AGENT_INFO_VENDOR_NAME, TmpChar), + "Error returned from hsa_agent_get_info when obtaining " + "HSA_AGENT_INFO_NAME\n"); + printf(" Vendor Name: \t\t\t%s \n", TmpChar); + hsa_device_type_t devType; + core::checkResult( + hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &devType), + "Error returned from hsa_agent_get_info when obtaining " + "HSA_AGENT_INFO_DEVICE\n"); + printf(" Device Type: \t\t\t%s \n", + devType == HSA_DEVICE_TYPE_CPU + ? "CPU" + : (devType == HSA_DEVICE_TYPE_GPU + ? "GPU" + : (devType == HSA_DEVICE_TYPE_DSP ? "DSP" : "UNKNOWN"))); + core::checkResult( + hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUES_MAX, &TmpUInt), + "Error returned from hsa_agent_get_info when obtaining " + "HSA_AGENT_INFO_QUEUES_MAX\n"); + printf(" Max Queues: \t\t\t%u \n", TmpUInt); + core::checkResult( + hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MIN_SIZE, &TmpUInt), + "Error returned from hsa_agent_get_info when obtaining " + "HSA_AGENT_INFO_QUEUE_MIN_SIZE\n"); + printf(" Queue Min Size: \t\t\t%u \n", TmpUInt); + core::checkResult( + hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &TmpUInt), + "Error returned from hsa_agent_get_info when obtaining " + "HSA_AGENT_INFO_QUEUE_MAX_SIZE\n"); + printf(" Queue Max Size: \t\t\t%u \n", TmpUInt); + + // Getting cache information + printf(" Cache:\n"); + + // FIXME: This is deprecated according to HSA documentation. But using + // hsa_agent_iterate_caches and hsa_cache_get_info breaks execution during + // runtime. + core::checkResult( + hsa_agent_get_info(agent, HSA_AGENT_INFO_CACHE_SIZE, CacheSize), + "Error returned from hsa_agent_get_info when obtaining " + "HSA_AGENT_INFO_CACHE_SIZE\n"); + + for (int i = 0; i < 4; i++) { + if (CacheSize[i]) { + printf(" L%u: \t\t\t\t%u bytes\n", i, CacheSize[i]); + } + } + + core::checkResult( + hsa_agent_get_info(agent, + (hsa_agent_info_t)HSA_AMD_AGENT_INFO_CACHELINE_SIZE, + &TmpUInt), + "Error returned from hsa_agent_get_info when obtaining " + "HSA_AMD_AGENT_INFO_CACHELINE_SIZE\n"); + printf(" Cacheline Size: \t\t\t%u \n", TmpUInt); + core::checkResult( + hsa_agent_get_info( + agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, + &TmpUInt), + "Error returned from hsa_agent_get_info when obtaining " + "HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY\n"); + printf(" Max Clock Freq(MHz): \t\t%u \n", TmpUInt); + core::checkResult( + hsa_agent_get_info( + agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, + &TmpUInt), + "Error returned from hsa_agent_get_info when obtaining " + "HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT\n"); + printf(" Compute Units: \t\t\t%u \n", TmpUInt); + core::checkResult(hsa_agent_get_info( + agent, + (hsa_agent_info_t)HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU, + &TmpUInt), + "Error returned from hsa_agent_get_info when obtaining " + "HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU\n"); + printf(" SIMD per CU: \t\t\t%u \n", TmpUInt); + core::checkResult( + hsa_agent_get_info(agent, HSA_AGENT_INFO_FAST_F16_OPERATION, &TmpBool), + "Error returned from hsa_agent_get_info when obtaining " + "HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU\n"); + printf(" Fast F16 Operation: \t\t%s \n", (TmpBool ? "TRUE" : "FALSE")); + core::checkResult( + hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &TmpUInt2), + "Error returned from hsa_agent_get_info when obtaining " + "HSA_AGENT_INFO_WAVEFRONT_SIZE\n"); + printf(" Wavefront Size: \t\t\t%u \n", TmpUInt2); + core::checkResult( + hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, &TmpUInt), + "Error returned from hsa_agent_get_info when obtaining " + "HSA_AGENT_INFO_WORKGROUP_MAX_SIZE\n"); + printf(" Workgroup Max Size: \t\t%u \n", TmpUInt); + core::checkResult(hsa_agent_get_info(agent, + HSA_AGENT_INFO_WORKGROUP_MAX_DIM, + workgroupMaxDim), + "Error returned from hsa_agent_get_info when obtaining " + "HSA_AGENT_INFO_WORKGROUP_MAX_DIM\n"); + printf(" Workgroup Max Size per Dimension:\n"); + printf(" x: \t\t\t\t%u\n", workgroupMaxDim[0]); + printf(" y: \t\t\t\t%u\n", workgroupMaxDim[1]); + printf(" z: \t\t\t\t%u\n", workgroupMaxDim[2]); + core::checkResult(hsa_agent_get_info( + agent, + (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU, + &TmpUInt), + "Error returned from hsa_agent_get_info when obtaining " + "HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU\n"); + printf(" Max Waves Per CU: \t\t\t%u \n", TmpUInt); + printf(" Max Work-item Per CU: \t\t%u \n", TmpUInt * TmpUInt2); + core::checkResult( + hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_SIZE, &TmpUInt), + "Error returned from hsa_agent_get_info when obtaining " + "HSA_AGENT_INFO_GRID_MAX_SIZE\n"); + printf(" Grid Max Size: \t\t\t%u \n", TmpUInt); + core::checkResult( + hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_DIM, &gridMaxDim), + "Error returned from hsa_agent_get_info when obtaining " + "HSA_AGENT_INFO_GRID_MAX_DIM\n"); + printf(" Grid Max Size per Dimension: \t\t\n"); + printf(" x: \t\t\t\t%u\n", gridMaxDim.x); + printf(" y: \t\t\t\t%u\n", gridMaxDim.y); + printf(" z: \t\t\t\t%u\n", gridMaxDim.z); + core::checkResult( + hsa_agent_get_info(agent, HSA_AGENT_INFO_FBARRIER_MAX_SIZE, &TmpUInt), + "Error returned from hsa_agent_get_info when obtaining " + "HSA_AGENT_INFO_FBARRIER_MAX_SIZE\n"); + printf(" Max fbarriers/Workgrp: \t\t%u\n", TmpUInt); + + printf(" Memory Pools:\n"); + auto CB_mem = [](hsa_amd_memory_pool_t region, void *data) -> hsa_status_t { + std::string TmpStr; + size_t size; + bool alloc, access; + hsa_amd_segment_t segment; + hsa_amd_memory_pool_global_flag_t globalFlags; + core::checkResult( + hsa_amd_memory_pool_get_info( + region, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &globalFlags), + "Error returned from hsa_amd_memory_pool_get_info when obtaining " + "HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS\n"); + core::checkResult(hsa_amd_memory_pool_get_info( + region, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment), + "Error returned from hsa_amd_memory_pool_get_info when " + "obtaining HSA_AMD_MEMORY_POOL_INFO_SEGMENT\n"); + + switch (segment) { + case HSA_AMD_SEGMENT_GLOBAL: + TmpStr = "GLOBAL; FLAGS: "; + if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & globalFlags) + TmpStr += "KERNARG, "; + if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & globalFlags) + TmpStr += "FINE GRAINED, "; + if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED & globalFlags) + TmpStr += "COARSE GRAINED, "; + break; + case HSA_AMD_SEGMENT_READONLY: + TmpStr = "READONLY"; + break; + case HSA_AMD_SEGMENT_PRIVATE: + TmpStr = "PRIVATE"; + break; + case HSA_AMD_SEGMENT_GROUP: + TmpStr = "GROUP"; + break; + default: + TmpStr = "unknown"; + break; + } + printf(" Pool %s: \n", TmpStr.c_str()); + + core::checkResult(hsa_amd_memory_pool_get_info( + region, HSA_AMD_MEMORY_POOL_INFO_SIZE, &size), + "Error returned from hsa_amd_memory_pool_get_info when " + "obtaining HSA_AMD_MEMORY_POOL_INFO_SIZE\n"); + printf(" Size: \t\t\t\t %zu bytes\n", size); + core::checkResult( + hsa_amd_memory_pool_get_info( + region, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc), + "Error returned from hsa_amd_memory_pool_get_info when obtaining " + "HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED\n"); + printf(" Allocatable: \t\t\t %s\n", (alloc ? "TRUE" : "FALSE")); + core::checkResult( + hsa_amd_memory_pool_get_info( + region, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &size), + "Error returned from hsa_amd_memory_pool_get_info when obtaining " + "HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE\n"); + printf(" Runtime Alloc Granule: \t\t %zu bytes\n", size); + core::checkResult( + hsa_amd_memory_pool_get_info( + region, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, &size), + "Error returned from hsa_amd_memory_pool_get_info when obtaining " + "HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT\n"); + printf(" Runtime Alloc alignment: \t %zu bytes\n", size); + core::checkResult( + hsa_amd_memory_pool_get_info( + region, HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &access), + "Error returned from hsa_amd_memory_pool_get_info when obtaining " + "HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL\n"); + printf(" Accessable by all: \t\t %s\n", + (access ? "TRUE" : "FALSE")); + + return HSA_STATUS_SUCCESS; + }; + // Iterate over all the memory regions for this agent. Get the memory region + // type and size + hsa_amd_agent_iterate_memory_pools(agent, CB_mem, nullptr); + + printf(" ISAs:\n"); + auto CB_isas = [](hsa_isa_t isa, void *data) -> hsa_status_t { + char TmpChar[1000]; + core::checkResult(hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME, TmpChar), + "Error returned from hsa_isa_get_info_alt when " + "obtaining HSA_ISA_INFO_NAME\n"); + printf(" Name: \t\t\t\t %s\n", TmpChar); + + return HSA_STATUS_SUCCESS; + }; + // Iterate over all the memory regions for this agent. Get the memory region + // type and size + hsa_agent_iterate_isas(agent, CB_isas, nullptr); + } + // Record entry point associated with device void addOffloadEntry(int32_t device_id, __tgt_offload_entry entry) { assert(device_id < (int32_t)FuncGblEntries.size() && @@ -2338,4 +2601,12 @@ int32_t __tgt_rtl_synchronize(int32_t device_id, __tgt_async_info *AsyncInfo) { } return OFFLOAD_SUCCESS; } + +void __tgt_rtl_print_device_info(int32_t device_id) { + // TODO: Assertion to see if device_id is correct + // NOTE: We don't need to set context for print device info. + + DeviceInfo.printDeviceInfo(device_id, DeviceInfo.HSAAgents[device_id]); +} + } // extern "C" diff --git a/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp b/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp index c512e61..babe189 100644 --- a/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp +++ b/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp @@ -250,6 +250,10 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id, return DeviceInfo.getOffloadEntriesTable(device_id); } +void __tgt_rtl_print_device_info(int32_t device_id) { + printf(" This is a generic-elf-64bit device\n"); +} + // Sample implementation of explicit memory allocator. For this plugin all kinds // are equivalent to each other. void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr, -- 2.7.4