From d6281caa3446af99c0ea89ac2ace5747b08c10d3 Mon Sep 17 00:00:00 2001 From: Carlo Bertolli Date: Wed, 14 Dec 2022 08:50:10 -0600 Subject: [PATCH] [OpenMP] Add API for pinned memory This patch adds API support for the atk_pinned trait for omp_alloc. It does not implement kmp_target_lock_mem and kmp_target_unlock_mem in libomptarget, but prepares libomp for it. Patches to libomptarget to implement lock/unlock coming after this one. Reviewed by: jlpeyton, jdoerfert Differential Revision: https://reviews.llvm.org/D138933 --- openmp/runtime/src/kmp.h | 1 + openmp/runtime/src/kmp_alloc.cpp | 38 ++++++++++++++++++++++++++++++++++---- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index e38ef81..c9471a6 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -1033,6 +1033,7 @@ typedef struct kmp_allocator_t { kmp_allocator_t *fb_data; kmp_uint64 pool_size; kmp_uint64 pool_used; + bool pinned; } kmp_allocator_t; extern omp_allocator_handle_t __kmpc_init_allocator(int gtid, diff --git a/openmp/runtime/src/kmp_alloc.cpp b/openmp/runtime/src/kmp_alloc.cpp index 06c16ea..ea2527b 100644 --- a/openmp/runtime/src/kmp_alloc.cpp +++ b/openmp/runtime/src/kmp_alloc.cpp @@ -1245,6 +1245,8 @@ static void **mk_dax_kmem_preferred; static void *(*kmp_target_alloc_host)(size_t size, int device); static void *(*kmp_target_alloc_shared)(size_t size, int device); static void *(*kmp_target_alloc_device)(size_t size, int device); +static void *(*kmp_target_lock_mem)(void *ptr, size_t size, int device); +static void *(*kmp_target_unlock_mem)(void *ptr, int device); static void *(*kmp_target_free_host)(void *ptr, int device); static void *(*kmp_target_free_shared)(void *ptr, int device); static void *(*kmp_target_free_device)(void *ptr, int device); @@ -1386,7 +1388,9 @@ omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms, switch (traits[i].key) { case omp_atk_sync_hint: case omp_atk_access: + break; case omp_atk_pinned: + al->pinned = true; break; case omp_atk_alignment: __kmp_type_convert(traits[i].value, &(al->alignment)); @@ -1545,6 +1549,8 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size, return NULL; if (allocator == omp_null_allocator) allocator = __kmp_threads[gtid]->th.th_def_allocator; + kmp_int32 default_device = + __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device; al = RCAST(kmp_allocator_t *, allocator); @@ -1560,6 +1566,9 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size, align = algn; // max of allocator trait, parameter and sizeof(void*) desc.size_orig = size; desc.size_a = size + sz_desc + align; + bool is_pinned = false; + if (allocator > kmp_max_mem_alloc) + is_pinned = al->pinned; // Use default allocator if libmemkind is not available int use_default_allocator = (__kmp_memkind_available) ? false : true; @@ -1589,7 +1598,10 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size, } else if (al->fb == omp_atv_allocator_fb) { KMP_ASSERT(al != al->fb_data); al = al->fb_data; - return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + if (is_pinned && kmp_target_lock_mem) + kmp_target_lock_mem(ptr, size, default_device); + return ptr; } // else ptr == NULL; } else { // pool has enough space @@ -1603,7 +1615,10 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size, } else if (al->fb == omp_atv_allocator_fb) { KMP_ASSERT(al != al->fb_data); al = al->fb_data; - return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + if (is_pinned && kmp_target_lock_mem) + kmp_target_lock_mem(ptr, size, default_device); + return ptr; } } } @@ -1619,7 +1634,10 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size, } else if (al->fb == omp_atv_allocator_fb) { KMP_ASSERT(al != al->fb_data); al = al->fb_data; - return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + if (is_pinned && kmp_target_lock_mem) + kmp_target_lock_mem(ptr, size, default_device); + return ptr; } } } @@ -1689,7 +1707,10 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size, } else if (al->fb == omp_atv_allocator_fb) { KMP_ASSERT(al != al->fb_data); al = al->fb_data; - return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al); + if (is_pinned && kmp_target_lock_mem) + kmp_target_lock_mem(ptr, size, default_device); + return ptr; } // else ptr == NULL; } else { // pool has enough space @@ -1709,6 +1730,9 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size, if (ptr == NULL) return NULL; + if (is_pinned && kmp_target_lock_mem) + kmp_target_lock_mem(ptr, desc.size_a, default_device); + addr = (kmp_uintptr_t)ptr; addr_align = (addr + sz_desc + align - 1) & ~(align - 1); addr_descr = addr_align - sz_desc; @@ -1825,6 +1849,12 @@ void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) { oal = (omp_allocator_handle_t)al; // cast to void* for comparisons KMP_DEBUG_ASSERT(al); + if (allocator > kmp_max_mem_alloc && kmp_target_unlock_mem && al->pinned) { + kmp_int32 device = + __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device; + kmp_target_unlock_mem(desc.ptr_alloc, device); + } + if (__kmp_memkind_available) { if (oal < kmp_max_mem_alloc) { // pre-defined allocator -- 2.7.4