[AMDGPU][Libomptarget][NFC] Remove atmi_mem_place_t

author Pushpinder Singh <Pushpinder.Singh@amd.com>

Thu, 27 May 2021 10:55:38 +0000 (10:55 +0000)

committer Pushpinder Singh <Pushpinder.Singh@amd.com>

Thu, 27 May 2021 11:53:18 +0000 (11:53 +0000)
author Pushpinder Singh <Pushpinder.Singh@amd.com>
Thu, 27 May 2021 10:55:38 +0000 (10:55 +0000)
committer Pushpinder Singh <Pushpinder.Singh@amd.com>
Thu, 27 May 2021 11:53:18 +0000 (11:53 +0000)
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp b/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp

index 3b9e3c1..db04415 100644 (file)
--- a/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp
@@ -67,8 +67,8 @@ hsa_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
    }
  
    void *tempHostPtr;
-  atmi_mem_place_t CPU = ATMI_MEM_PLACE_CPU_MEM(0, 0, 0);
-  hsa_status_t ret = atmi_malloc(&tempHostPtr, size, CPU);
+  hsa_status_t ret =
+      atmi_malloc(&tempHostPtr, size, 0 /* DeviceId */, ATMI_DEVTYPE_CPU);
    if (ret != HSA_STATUS_SUCCESS) {
      DEBUG_PRINT("atmi_malloc: Unable to alloc %d bytes for temp scratch\n",
                  size);
@@ -97,8 +97,9 @@ hsa_status_t atmi_memcpy_d2h(hsa_signal_t signal, void *dest,
    }
  
    void *tempHostPtr;
-  atmi_mem_place_t CPU = ATMI_MEM_PLACE_CPU_MEM(0, 0, 0);
-  hsa_status_t ret = atmi_malloc(&tempHostPtr, size, CPU);
+
+  hsa_status_t ret =
+      atmi_malloc(&tempHostPtr, size, 0 /* DeviceId */, ATMI_DEVTYPE_CPU);
    if (ret != HSA_STATUS_SUCCESS) {
      DEBUG_PRINT("atmi_malloc: Unable to alloc %d bytes for temp scratch\n",
                  size);
@@ -117,6 +118,7 @@ hsa_status_t atmi_memcpy_d2h(hsa_signal_t signal, void *dest,
  
  hsa_status_t atmi_free(void *ptr) { return core::Runtime::Memfree(ptr); }
  
-hsa_status_t atmi_malloc(void **ptr, size_t size, atmi_mem_place_t place) {
-  return core::Runtime::Malloc(ptr, size, place);
+hsa_status_t atmi_malloc(void **ptr, size_t size, int DeviceId,
+                         atmi_devtype_t DeviceType) {
+  return core::Runtime::Malloc(ptr, size, DeviceId, DeviceType);
  }
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi.h b/openmp/libomptarget/plugins/amdgpu/impl/atmi.h

index c3028b4..c1386f1 100644 (file)
--- a/openmp/libomptarget/plugins/amdgpu/impl/atmi.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi.h
@@ -59,30 +59,6 @@ typedef struct atmi_place_s {
  } atmi_place_t;
  
  /**
- * @brief ATMI Memory Place
- */
-typedef struct atmi_mem_place_s {
-  /**
-   * The node in a cluster where computation should occur.
-   * Default is node_id = 0 for local computations.
-   */
-  unsigned int node_id;
-  /**
-   * Device type: CPU, GPU or DSP
-   */
-  atmi_devtype_t dev_type;
-  /**
-   * The device ordinal number ordered by runtime; -1 for any
-   */
-  int dev_id;
-  // atmi_memtype_t mem_type;        // Fine grained or Coarse grained
-  /**
-   * The memory space/region ordinal number ordered by runtime; -1 for any
-   */
-  int mem_id;
-} atmi_mem_place_t;
-
-/**
   * @brief ATMI Memory Space/region Structure
   */
  typedef struct atmi_memory_s {
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp

index 009d128..1359d7d 100644 (file)
--- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp
@@ -8,8 +8,7 @@
  
  hsa_status_t atmi_interop_hsa_get_symbol_info(
      const std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
-    atmi_mem_place_t place, const char *symbol, void **var_addr,
-    unsigned int *var_size) {
+    int DeviceId, const char *symbol, void **var_addr, unsigned int *var_size) {
    /*
       // Typical usage:
       void *var_addr;
@@ -22,8 +21,8 @@ hsa_status_t atmi_interop_hsa_get_symbol_info(
    atmi_machine_t *machine = atmi_machine_get_info();
    if (!symbol || !var_addr || !var_size || !machine)
      return HSA_STATUS_ERROR;
-  if (place.dev_id < 0 ||
-      place.dev_id >= machine->device_count_by_type[place.dev_type])
+  if (DeviceId < 0 ||
+      DeviceId >= machine->device_count_by_type[ATMI_DEVTYPE_GPU])
      return HSA_STATUS_ERROR;
  
    // get the symbol info
@@ -43,7 +42,7 @@ hsa_status_t atmi_interop_hsa_get_symbol_info(
  
  hsa_status_t atmi_interop_hsa_get_kernel_info(
      const std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
-    atmi_mem_place_t place, const char *kernel_name,
+    int DeviceId, const char *kernel_name,
      hsa_executable_symbol_info_t kernel_info, uint32_t *value) {
    /*
       // Typical usage:
@@ -56,8 +55,8 @@ hsa_status_t atmi_interop_hsa_get_kernel_info(
    atmi_machine_t *machine = atmi_machine_get_info();
    if (!kernel_name || !value || !machine)
      return HSA_STATUS_ERROR;
-  if (place.dev_id < 0 ||
-      place.dev_id >= machine->device_count_by_type[place.dev_type])
+  if (DeviceId < 0 ||
+      DeviceId >= machine->device_count_by_type[ATMI_DEVTYPE_GPU])
      return HSA_STATUS_ERROR;
  
    hsa_status_t status = HSA_STATUS_SUCCESS;
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.h b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.h

index f5c2867..b656f76 100644 (file)
--- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.h
@@ -48,8 +48,8 @@ extern "C" {
   */
  hsa_status_t atmi_interop_hsa_get_symbol_info(
      const std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
-    atmi_mem_place_t place, const char *symbol, void **var_addr,
-    unsigned int *var_size);
+    int DeviceId, const char *symbol, void **var_addr, unsigned int *var_size);
+
  /**
   * @brief Get the HSA-specific kernel info from a kernel name
   *
@@ -75,8 +75,8 @@ hsa_status_t atmi_interop_hsa_get_symbol_info(
   */
  hsa_status_t atmi_interop_hsa_get_kernel_info(
      const std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
-    atmi_mem_place_t place, const char *kernel_name,
-    hsa_executable_symbol_info_t info, uint32_t *value);
+    int DeviceId, const char *kernel_name, hsa_executable_symbol_info_t info,
+    uint32_t *value);
  
  /** @} */
  
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h b/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h

index 0eab7a1..b0de144 100644 (file)
--- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h
@@ -99,8 +99,8 @@ atmi_machine_t *atmi_machine_get_info();
   * @retval ::HSA_STATUS_ERROR The function encountered errors.
   *
   */
-hsa_status_t atmi_malloc(void **ptr, size_t size, atmi_mem_place_t place);
-
+hsa_status_t atmi_malloc(void **ptr, size_t size, int DeviceId,
+                         atmi_devtype_t DeviceType);
  /**
   * @brief Frees memory that was previously allocated.
   *
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp

index 0f9f05a..b96db1e 100644 (file)
--- a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
@@ -22,39 +22,41 @@ extern ATLMachine g_atl_machine;
  namespace core {
  
  namespace {
-ATLProcessor &get_processor_by_mem_place(atmi_mem_place_t place) {
-  int dev_id = place.dev_id;
-  switch (place.dev_type) {
+ATLProcessor &get_processor_by_mem_place(int DeviceId,
+                                         atmi_devtype_t DeviceType) {
+  switch (DeviceType) {
    case ATMI_DEVTYPE_CPU:
-    return g_atl_machine.processors<ATLCPUProcessor>()[dev_id];
+    return g_atl_machine.processors<ATLCPUProcessor>()[DeviceId];
    case ATMI_DEVTYPE_GPU:
-    return g_atl_machine.processors<ATLGPUProcessor>()[dev_id];
+    return g_atl_machine.processors<ATLGPUProcessor>()[DeviceId];
    }
  }
  
-hsa_amd_memory_pool_t get_memory_pool_by_mem_place(atmi_mem_place_t place) {
-  ATLProcessor &proc = get_processor_by_mem_place(place);
-  return get_memory_pool(proc, place.mem_id);
+hsa_amd_memory_pool_t get_memory_pool_by_mem_place(int DeviceId,
+                                                   atmi_devtype_t DeviceType) {
+  ATLProcessor &proc = get_processor_by_mem_place(DeviceId, DeviceType);
+  return get_memory_pool(proc, 0 /*Memory Type (always zero) */);
  }
  } // namespace
  
  hsa_status_t register_allocation(void *ptr, size_t size,
-                                 atmi_mem_place_t place) {
-  if (place.dev_type == ATMI_DEVTYPE_CPU)
+                                 atmi_devtype_t DeviceType) {
+  if (DeviceType == ATMI_DEVTYPE_CPU)
      return allow_access_to_all_gpu_agents(ptr);
    else
      return HSA_STATUS_SUCCESS;
  }
  
-hsa_status_t Runtime::Malloc(void **ptr, size_t size, atmi_mem_place_t place) {
-  hsa_amd_memory_pool_t pool = get_memory_pool_by_mem_place(place);
+hsa_status_t Runtime::Malloc(void **ptr, size_t size, int DeviceId,
+                             atmi_devtype_t DeviceType) {
+  hsa_amd_memory_pool_t pool =
+      get_memory_pool_by_mem_place(DeviceId, DeviceType);
    hsa_status_t err = hsa_amd_memory_pool_allocate(pool, size, 0, ptr);
    DEBUG_PRINT("Malloced [%s %d] %p\n",
-              place.dev_type == ATMI_DEVTYPE_CPU ? "CPU" : "GPU", place.dev_id,
-              *ptr);
+              DeviceType == ATMI_DEVTYPE_CPU ? "CPU" : "GPU", DeviceId, *ptr);
  
    if (err == HSA_STATUS_SUCCESS) {
-    err = register_allocation(*ptr, size, place);
+    err = register_allocation(*ptr, size, DeviceType);
    }
  
    return (err == HSA_STATUS_SUCCESS) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/internal.h b/openmp/libomptarget/plugins/amdgpu/impl/internal.h

index 66b6f61..7a8f0fd 100644 (file)
--- a/openmp/libomptarget/plugins/amdgpu/impl/internal.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/internal.h
@@ -209,7 +209,7 @@ template <typename T> inline T *alignUp(T *value, size_t alignment) {
  }
  
  hsa_status_t register_allocation(void *addr, size_t size,
-                                 atmi_mem_place_t place);
+                                 atmi_devtype_t DeviceType);
  
  extern bool atl_is_atmi_initialized();
  
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/rt.h b/openmp/libomptarget/plugins/amdgpu/impl/rt.h

index ab0b08d..4e6f028 100644 (file)
--- a/openmp/libomptarget/plugins/amdgpu/impl/rt.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/rt.h
@@ -61,7 +61,8 @@ public:
    // data
    static hsa_status_t Memcpy(hsa_signal_t, void *, const void *, size_t);
    static hsa_status_t Memfree(void *);
-  static hsa_status_t Malloc(void **, size_t, atmi_mem_place_t);
+  static hsa_status_t Malloc(void **ptr, size_t size, int DeviceId,
+                             atmi_devtype_t DeviceType);
  
    int getMaxQueueSize() const { return env_.getMaxQueueSize(); }
    int getDebugMode() const { return env_.getDebugMode(); }
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp

index 337f350..de06640 100644 (file)
--- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
@@ -1071,11 +1071,10 @@ populate_InfoTables(hsa_executable_symbol_t symbol, int gpu,
        return err;
      }
  
-    atmi_mem_place_t place = ATMI_MEM_PLACE(ATMI_DEVTYPE_GPU, gpu, 0);
      DEBUG_PRINT("Symbol %s = %p (%u bytes)\n", name, (void *)info.addr,
                  info.size);
      err = register_allocation(reinterpret_cast<void *>(info.addr),
-                              (size_t)info.size, place);
+                              (size_t)info.size, ATMI_DEVTYPE_GPU);
      if (err != HSA_STATUS_SUCCESS) {
        return err;
      }
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

index 808f132..84830a3 100644 (file)
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -246,9 +246,6 @@ std::list<KernelTy> KernelsList;
  static atmi_place_t get_gpu_place(int device_id) {
    return ATMI_PLACE_GPU(0, device_id);
  }
-static atmi_mem_place_t get_gpu_mem_place(int device_id) {
-  return ATMI_MEM_PLACE_GPU_MEM(0, device_id, 0);
-}
  
  static std::vector<hsa_agent_t> find_gpu_agents() {
    std::vector<hsa_agent_t> res;
@@ -1155,8 +1152,7 @@ struct device_environment {
          void *state_ptr;
          uint32_t state_ptr_size;
          hsa_status_t err = atmi_interop_hsa_get_symbol_info(
-            SymbolInfo, get_gpu_mem_place(device_id), sym(), &state_ptr,
-            &state_ptr_size);
+            SymbolInfo, device_id, sym(), &state_ptr, &state_ptr_size);
          if (err != HSA_STATUS_SUCCESS) {
            DP("failed to find %s in loaded image\n", sym());
            return err;
@@ -1176,11 +1172,10 @@ struct device_environment {
    }
  };
  
-static hsa_status_t atmi_calloc(void **ret_ptr, size_t size,
-                                atmi_mem_place_t place) {
+static hsa_status_t atmi_calloc(void **ret_ptr, size_t size, int DeviceId) {
    uint64_t rounded = 4 * ((size + 3) / 4);
    void *ptr;
-  hsa_status_t err = atmi_malloc(&ptr, rounded, place);
+  hsa_status_t err = atmi_malloc(&ptr, rounded, DeviceId, ATMI_DEVTYPE_GPU);
    if (err != HSA_STATUS_SUCCESS) {
      return err;
    }
@@ -1282,8 +1277,8 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
      uint32_t state_ptr_size;
      auto &SymbolInfoMap = DeviceInfo.SymbolInfoTable[device_id];
      hsa_status_t err = atmi_interop_hsa_get_symbol_info(
-        SymbolInfoMap, get_gpu_mem_place(device_id),
-        "omptarget_nvptx_device_State", &state_ptr, &state_ptr_size);
+        SymbolInfoMap, device_id, "omptarget_nvptx_device_State", &state_ptr,
+        &state_ptr_size);
  
      if (err != HSA_STATUS_SUCCESS) {
        DP("No device_state symbol found, skipping initialization\n");
@@ -1309,8 +1304,7 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
          if (dss.first.get() == nullptr) {
            assert(dss.second == 0);
            void *ptr = NULL;
-          hsa_status_t err = atmi_calloc(&ptr, device_State_bytes,
-                                         get_gpu_mem_place(device_id));
+          hsa_status_t err = atmi_calloc(&ptr, device_State_bytes, device_id);
            if (err != HSA_STATUS_SUCCESS) {
              DP("Failed to allocate device_state array\n");
              return NULL;
@@ -1367,8 +1361,7 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
  
        auto &SymbolInfoMap = DeviceInfo.SymbolInfoTable[device_id];
        hsa_status_t err = atmi_interop_hsa_get_symbol_info(
-          SymbolInfoMap, get_gpu_mem_place(device_id), e->name, &varptr,
-          &varsize);
+          SymbolInfoMap, device_id, e->name, &varptr, &varsize);
  
        if (err != HSA_STATUS_SUCCESS) {
          // Inform the user what symbol prevented offloading
@@ -1407,11 +1400,10 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
  
      DP("to find the kernel name: %s size: %lu\n", e->name, strlen(e->name));
  
-    atmi_mem_place_t place = get_gpu_mem_place(device_id);
      uint32_t kernarg_segment_size;
      auto &KernelInfoMap = DeviceInfo.KernelInfoTable[device_id];
      hsa_status_t err = atmi_interop_hsa_get_kernel_info(
-        KernelInfoMap, place, e->name,
+        KernelInfoMap, device_id, e->name,
          HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
          &kernarg_segment_size);
  
@@ -1578,7 +1570,7 @@ void *__tgt_rtl_data_alloc(int device_id, int64_t size, void *, int32_t kind) {
      return NULL;
    }
  
-  hsa_status_t err = atmi_malloc(&ptr, size, get_gpu_mem_place(device_id));
+  hsa_status_t err = atmi_malloc(&ptr, size, device_id, ATMI_DEVTYPE_GPU);
    DP("Tgt alloc data %ld bytes, (tgt:%016llx).\n", size,
       (long long unsigned)(Elf64_Addr)ptr);
    ptr = (err == HSA_STATUS_SUCCESS) ? ptr : NULL;
author	Pushpinder Singh <Pushpinder.Singh@amd.com>
	Thu, 27 May 2021 10:55:38 +0000 (10:55 +0000)
committer	Pushpinder Singh <Pushpinder.Singh@amd.com>
	Thu, 27 May 2021 11:53:18 +0000 (11:53 +0000)
openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp		patch \| blob \| history
openmp/libomptarget/plugins/amdgpu/impl/atmi.h		patch \| blob \| history
openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp		patch \| blob \| history
openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.h		patch \| blob \| history
openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h		patch \| blob \| history
openmp/libomptarget/plugins/amdgpu/impl/data.cpp		patch \| blob \| history
openmp/libomptarget/plugins/amdgpu/impl/internal.h		patch \| blob \| history
openmp/libomptarget/plugins/amdgpu/impl/rt.h		patch \| blob \| history
openmp/libomptarget/plugins/amdgpu/impl/system.cpp		patch \| blob \| history
openmp/libomptarget/plugins/amdgpu/src/rtl.cpp		patch \| blob \| history