From: Jakub Jelinek Date: Thu, 9 Jun 2022 08:14:42 +0000 (+0200) Subject: openmp: Add support for HBW or large capacity or interleaved memory through the libme... X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=17f52a1c725948befcc3dd3c90d1abad77b6f6fe;p=platform%2Fupstream%2Fgcc.git openmp: Add support for HBW or large capacity or interleaved memory through the libmemkind.so library This patch adds support for dlopening libmemkind.so on Linux and uses it for some kinds of allocations (but not yet e.g. pinned memory). 2022-06-09 Jakub Jelinek * allocator.c: Include dlfcn.h if LIBGOMP_USE_MEMKIND is defined. (enum gomp_memkind_kind): New type. (struct omp_allocator_data): Add memkind field if LIBGOMP_USE_MEMKIND is defined. (struct gomp_memkind_data): New type. (memkind_data, memkind_data_once): New variables. (gomp_init_memkind, gomp_get_memkind): New functions. (omp_init_allocator): Initialize data.memkind, don't fail for omp_high_bw_mem_space if libmemkind supports it. (omp_aligned_alloc, omp_free, omp_aligned_calloc, omp_realloc): Add memkind support of LIBGOMP_USE_MEMKIND is defined. * config/linux/allocator.c: New file. --- diff --git a/libgomp/allocator.c b/libgomp/allocator.c index 07a5645..c96d378 100644 --- a/libgomp/allocator.c +++ b/libgomp/allocator.c @@ -31,9 +31,28 @@ #include "libgomp.h" #include #include +#ifdef LIBGOMP_USE_MEMKIND +#include +#endif #define omp_max_predefined_alloc omp_thread_mem_alloc +enum gomp_memkind_kind +{ + GOMP_MEMKIND_NONE = 0, +#define GOMP_MEMKIND_KINDS \ + GOMP_MEMKIND_KIND (HBW_INTERLEAVE), \ + GOMP_MEMKIND_KIND (HBW_PREFERRED), \ + GOMP_MEMKIND_KIND (DAX_KMEM_ALL), \ + GOMP_MEMKIND_KIND (DAX_KMEM), \ + GOMP_MEMKIND_KIND (INTERLEAVE), \ + GOMP_MEMKIND_KIND (DEFAULT) +#define GOMP_MEMKIND_KIND(kind) GOMP_MEMKIND_##kind + GOMP_MEMKIND_KINDS, +#undef GOMP_MEMKIND_KIND + GOMP_MEMKIND_COUNT +}; + struct omp_allocator_data { omp_memspace_handle_t memspace; @@ -46,6 +65,9 @@ struct omp_allocator_data unsigned int fallback : 8; unsigned int pinned : 1; unsigned int partition : 7; +#ifdef LIBGOMP_USE_MEMKIND + unsigned int memkind : 8; +#endif #ifndef HAVE_SYNC_BUILTINS gomp_mutex_t lock; #endif @@ -59,13 +81,95 @@ struct omp_mem_header void *pad; }; +struct gomp_memkind_data +{ + void *memkind_handle; + void *(*memkind_malloc) (void *, size_t); + void *(*memkind_calloc) (void *, size_t, size_t); + void *(*memkind_realloc) (void *, void *, size_t); + void (*memkind_free) (void *, void *); + int (*memkind_check_available) (void *); + void **kinds[GOMP_MEMKIND_COUNT]; +}; + +#ifdef LIBGOMP_USE_MEMKIND +static struct gomp_memkind_data *memkind_data; +static pthread_once_t memkind_data_once = PTHREAD_ONCE_INIT; + +static void +gomp_init_memkind (void) +{ + void *handle = dlopen ("libmemkind.so", RTLD_LAZY); + struct gomp_memkind_data *data; + int i; + static const char *kinds[] = { + NULL, +#define GOMP_MEMKIND_KIND(kind) "MEMKIND_" #kind + GOMP_MEMKIND_KINDS +#undef GOMP_MEMKIND_KIND + }; + + data = calloc (1, sizeof (struct gomp_memkind_data)); + if (data == NULL) + { + if (handle) + dlclose (handle); + return; + } + if (!handle) + { + __atomic_store_n (&memkind_data, data, MEMMODEL_RELEASE); + return; + } + data->memkind_handle = handle; + data->memkind_malloc + = (__typeof (data->memkind_malloc)) dlsym (handle, "memkind_malloc"); + data->memkind_calloc + = (__typeof (data->memkind_calloc)) dlsym (handle, "memkind_calloc"); + data->memkind_realloc + = (__typeof (data->memkind_realloc)) dlsym (handle, "memkind_realloc"); + data->memkind_free + = (__typeof (data->memkind_free)) dlsym (handle, "memkind_free"); + data->memkind_check_available + = (__typeof (data->memkind_check_available)) + dlsym (handle, "memkind_check_available"); + if (data->memkind_malloc + && data->memkind_calloc + && data->memkind_realloc + && data->memkind_free + && data->memkind_check_available) + for (i = 1; i < GOMP_MEMKIND_COUNT; ++i) + { + data->kinds[i] = (void **) dlsym (handle, kinds[i]); + if (data->kinds[i] && data->memkind_check_available (*data->kinds[i])) + data->kinds[i] = NULL; + } + __atomic_store_n (&memkind_data, data, MEMMODEL_RELEASE); +} + +static struct gomp_memkind_data * +gomp_get_memkind (void) +{ + struct gomp_memkind_data *data + = __atomic_load_n (&memkind_data, MEMMODEL_ACQUIRE); + if (data) + return data; + pthread_once (&memkind_data_once, gomp_init_memkind); + return __atomic_load_n (&memkind_data, MEMMODEL_ACQUIRE); +} +#endif + omp_allocator_handle_t omp_init_allocator (omp_memspace_handle_t memspace, int ntraits, const omp_alloctrait_t traits[]) { struct omp_allocator_data data = { memspace, 1, ~(uintptr_t) 0, 0, 0, omp_atv_contended, omp_atv_all, - omp_atv_default_mem_fb, omp_atv_false, omp_atv_environment }; + omp_atv_default_mem_fb, omp_atv_false, omp_atv_environment, +#ifdef LIBGOMP_USE_MEMKIND + GOMP_MEMKIND_NONE +#endif + }; struct omp_allocator_data *ret; int i; @@ -179,8 +283,48 @@ omp_init_allocator (omp_memspace_handle_t memspace, int ntraits, if (data.alignment < sizeof (void *)) data.alignment = sizeof (void *); - /* No support for these so far (for hbw will use memkind). */ - if (data.pinned || data.memspace == omp_high_bw_mem_space) + switch (memspace) + { + case omp_high_bw_mem_space: +#ifdef LIBGOMP_USE_MEMKIND + struct gomp_memkind_data *memkind_data; + memkind_data = gomp_get_memkind (); + if (data.partition == omp_atv_interleaved + && memkind_data->kinds[GOMP_MEMKIND_HBW_INTERLEAVE]) + { + data.memkind = GOMP_MEMKIND_HBW_INTERLEAVE; + break; + } + else if (memkind_data->kinds[GOMP_MEMKIND_HBW_PREFERRED]) + { + data.memkind = GOMP_MEMKIND_HBW_PREFERRED; + break; + } +#endif + return omp_null_allocator; + case omp_large_cap_mem_space: +#ifdef LIBGOMP_USE_MEMKIND + memkind_data = gomp_get_memkind (); + if (memkind_data->kinds[GOMP_MEMKIND_DAX_KMEM_ALL]) + data.memkind = GOMP_MEMKIND_DAX_KMEM_ALL; + else if (memkind_data->kinds[GOMP_MEMKIND_DAX_KMEM]) + data.memkind = GOMP_MEMKIND_DAX_KMEM; +#endif + break; + default: +#ifdef LIBGOMP_USE_MEMKIND + if (data.partition == omp_atv_interleaved) + { + memkind_data = gomp_get_memkind (); + if (memkind_data->kinds[GOMP_MEMKIND_INTERLEAVE]) + data.memkind = GOMP_MEMKIND_INTERLEAVE; + } +#endif + break; + } + + /* No support for this so far. */ + if (data.pinned) return omp_null_allocator; ret = gomp_malloc (sizeof (struct omp_allocator_data)); @@ -213,6 +357,9 @@ omp_aligned_alloc (size_t alignment, size_t size, struct omp_allocator_data *allocator_data; size_t new_size, new_alignment; void *ptr, *ret; +#ifdef LIBGOMP_USE_MEMKIND + enum gomp_memkind_kind memkind; +#endif if (__builtin_expect (size == 0, 0)) return NULL; @@ -232,12 +379,28 @@ retry: allocator_data = (struct omp_allocator_data *) allocator; if (new_alignment < allocator_data->alignment) new_alignment = allocator_data->alignment; +#ifdef LIBGOMP_USE_MEMKIND + memkind = allocator_data->memkind; +#endif } else { allocator_data = NULL; if (new_alignment < sizeof (void *)) new_alignment = sizeof (void *); +#ifdef LIBGOMP_USE_MEMKIND + memkind = GOMP_MEMKIND_NONE; + if (allocator == omp_high_bw_mem_alloc) + memkind = GOMP_MEMKIND_HBW_PREFERRED; + else if (allocator == omp_large_cap_mem_alloc) + memkind = GOMP_MEMKIND_DAX_KMEM_ALL; + if (memkind) + { + struct gomp_memkind_data *memkind_data = gomp_get_memkind (); + if (!memkind_data->kinds[memkind]) + memkind = GOMP_MEMKIND_NONE; + } +#endif } new_size = sizeof (struct omp_mem_header); @@ -281,7 +444,16 @@ retry: allocator_data->used_pool_size = used_pool_size; gomp_mutex_unlock (&allocator_data->lock); #endif - ptr = malloc (new_size); +#ifdef LIBGOMP_USE_MEMKIND + if (memkind) + { + struct gomp_memkind_data *memkind_data = gomp_get_memkind (); + void *kind = *memkind_data->kinds[memkind]; + ptr = memkind_data->memkind_malloc (kind, new_size); + } + else +#endif + ptr = malloc (new_size); if (ptr == NULL) { #ifdef HAVE_SYNC_BUILTINS @@ -297,7 +469,16 @@ retry: } else { - ptr = malloc (new_size); +#ifdef LIBGOMP_USE_MEMKIND + if (memkind) + { + struct gomp_memkind_data *memkind_data = gomp_get_memkind (); + void *kind = *memkind_data->kinds[memkind]; + ptr = memkind_data->memkind_malloc (kind, new_size); + } + else +#endif + ptr = malloc (new_size); if (ptr == NULL) goto fail; } @@ -321,6 +502,9 @@ fail: { case omp_atv_default_mem_fb: if ((new_alignment > sizeof (void *) && new_alignment > alignment) +#ifdef LIBGOMP_USE_MEMKIND + || memkind +#endif || (allocator_data && allocator_data->pool_size < ~(uintptr_t) 0)) { @@ -393,7 +577,36 @@ omp_free (void *ptr, omp_allocator_handle_t allocator) gomp_mutex_unlock (&allocator_data->lock); #endif } +#ifdef LIBGOMP_USE_MEMKIND + if (allocator_data->memkind) + { + struct gomp_memkind_data *memkind_data = gomp_get_memkind (); + void *kind = *memkind_data->kinds[allocator_data->memkind]; + memkind_data->memkind_free (kind, data->ptr); + return; + } +#endif } +#ifdef LIBGOMP_USE_MEMKIND + else + { + enum gomp_memkind_kind memkind = GOMP_MEMKIND_NONE; + if (data->allocator == omp_high_bw_mem_alloc) + memkind = GOMP_MEMKIND_HBW_PREFERRED; + else if (data->allocator == omp_large_cap_mem_alloc) + memkind = GOMP_MEMKIND_DAX_KMEM_ALL; + if (memkind) + { + struct gomp_memkind_data *memkind_data = gomp_get_memkind (); + if (memkind_data->kinds[memkind]) + { + void *kind = *memkind_data->kinds[memkind]; + memkind_data->memkind_free (kind, data->ptr); + return; + } + } + } +#endif free (data->ptr); } @@ -412,6 +625,9 @@ omp_aligned_calloc (size_t alignment, size_t nmemb, size_t size, struct omp_allocator_data *allocator_data; size_t new_size, size_temp, new_alignment; void *ptr, *ret; +#ifdef LIBGOMP_USE_MEMKIND + enum gomp_memkind_kind memkind; +#endif if (__builtin_expect (size == 0 || nmemb == 0, 0)) return NULL; @@ -431,12 +647,28 @@ retry: allocator_data = (struct omp_allocator_data *) allocator; if (new_alignment < allocator_data->alignment) new_alignment = allocator_data->alignment; +#ifdef LIBGOMP_USE_MEMKIND + memkind = allocator_data->memkind; +#endif } else { allocator_data = NULL; if (new_alignment < sizeof (void *)) new_alignment = sizeof (void *); +#ifdef LIBGOMP_USE_MEMKIND + memkind = GOMP_MEMKIND_NONE; + if (allocator == omp_high_bw_mem_alloc) + memkind = GOMP_MEMKIND_HBW_PREFERRED; + else if (allocator == omp_large_cap_mem_alloc) + memkind = GOMP_MEMKIND_DAX_KMEM_ALL; + if (memkind) + { + struct gomp_memkind_data *memkind_data = gomp_get_memkind (); + if (!memkind_data->kinds[memkind]) + memkind = GOMP_MEMKIND_NONE; + } +#endif } new_size = sizeof (struct omp_mem_header); @@ -482,7 +714,16 @@ retry: allocator_data->used_pool_size = used_pool_size; gomp_mutex_unlock (&allocator_data->lock); #endif - ptr = calloc (1, new_size); +#ifdef LIBGOMP_USE_MEMKIND + if (memkind) + { + struct gomp_memkind_data *memkind_data = gomp_get_memkind (); + void *kind = *memkind_data->kinds[memkind]; + ptr = memkind_data->memkind_calloc (kind, 1, new_size); + } + else +#endif + ptr = calloc (1, new_size); if (ptr == NULL) { #ifdef HAVE_SYNC_BUILTINS @@ -498,7 +739,16 @@ retry: } else { - ptr = calloc (1, new_size); +#ifdef LIBGOMP_USE_MEMKIND + if (memkind) + { + struct gomp_memkind_data *memkind_data = gomp_get_memkind (); + void *kind = *memkind_data->kinds[memkind]; + ptr = memkind_data->memkind_calloc (kind, 1, new_size); + } + else +#endif + ptr = calloc (1, new_size); if (ptr == NULL) goto fail; } @@ -522,6 +772,9 @@ fail: { case omp_atv_default_mem_fb: if ((new_alignment > sizeof (void *) && new_alignment > alignment) +#ifdef LIBGOMP_USE_MEMKIND + || memkind +#endif || (allocator_data && allocator_data->pool_size < ~(uintptr_t) 0)) { @@ -562,6 +815,9 @@ omp_realloc (void *ptr, size_t size, omp_allocator_handle_t allocator, size_t new_size, old_size, new_alignment, old_alignment; void *new_ptr, *ret; struct omp_mem_header *data; +#ifdef LIBGOMP_USE_MEMKIND + enum gomp_memkind_kind memkind, free_memkind; +#endif if (__builtin_expect (ptr == NULL, 0)) return ialias_call (omp_aligned_alloc) (1, size, allocator); @@ -585,13 +841,51 @@ retry: allocator_data = (struct omp_allocator_data *) allocator; if (new_alignment < allocator_data->alignment) new_alignment = allocator_data->alignment; +#ifdef LIBGOMP_USE_MEMKIND + memkind = allocator_data->memkind; +#endif } else - allocator_data = NULL; + { + allocator_data = NULL; +#ifdef LIBGOMP_USE_MEMKIND + memkind = GOMP_MEMKIND_NONE; + if (allocator == omp_high_bw_mem_alloc) + memkind = GOMP_MEMKIND_HBW_PREFERRED; + else if (allocator == omp_large_cap_mem_alloc) + memkind = GOMP_MEMKIND_DAX_KMEM_ALL; + if (memkind) + { + struct gomp_memkind_data *memkind_data = gomp_get_memkind (); + if (!memkind_data->kinds[memkind]) + memkind = GOMP_MEMKIND_NONE; + } +#endif + } if (free_allocator > omp_max_predefined_alloc) - free_allocator_data = (struct omp_allocator_data *) free_allocator; + { + free_allocator_data = (struct omp_allocator_data *) free_allocator; +#ifdef LIBGOMP_USE_MEMKIND + free_memkind = free_allocator_data->memkind; +#endif + } else - free_allocator_data = NULL; + { + free_allocator_data = NULL; +#ifdef LIBGOMP_USE_MEMKIND + free_memkind = GOMP_MEMKIND_NONE; + if (free_allocator == omp_high_bw_mem_alloc) + free_memkind = GOMP_MEMKIND_HBW_PREFERRED; + else if (free_allocator == omp_large_cap_mem_alloc) + free_memkind = GOMP_MEMKIND_DAX_KMEM_ALL; + if (free_memkind) + { + struct gomp_memkind_data *memkind_data = gomp_get_memkind (); + if (!memkind_data->kinds[free_memkind]) + free_memkind = GOMP_MEMKIND_NONE; + } +#endif + } old_alignment = (uintptr_t) ptr - (uintptr_t) (data->ptr); new_size = sizeof (struct omp_mem_header); @@ -659,6 +953,19 @@ retry: allocator_data->used_pool_size = used_pool_size; gomp_mutex_unlock (&allocator_data->lock); #endif +#ifdef LIBGOMP_USE_MEMKIND + if (memkind) + { + struct gomp_memkind_data *memkind_data = gomp_get_memkind (); + void *kind = *memkind_data->kinds[memkind]; + if (prev_size) + new_ptr = memkind_data->memkind_realloc (kind, data->ptr, + new_size); + else + new_ptr = memkind_data->memkind_malloc (kind, new_size); + } + else +#endif if (prev_size) new_ptr = realloc (data->ptr, new_size); else @@ -687,10 +994,23 @@ retry: } else if (new_alignment == sizeof (void *) && old_alignment == sizeof (struct omp_mem_header) +#ifdef LIBGOMP_USE_MEMKIND + && memkind == free_memkind +#endif && (free_allocator_data == NULL || free_allocator_data->pool_size == ~(uintptr_t) 0)) { - new_ptr = realloc (data->ptr, new_size); +#ifdef LIBGOMP_USE_MEMKIND + if (memkind) + { + struct gomp_memkind_data *memkind_data = gomp_get_memkind (); + void *kind = *memkind_data->kinds[memkind]; + new_ptr = memkind_data->memkind_realloc (kind, data->ptr, + new_size); + } + else +#endif + new_ptr = realloc (data->ptr, new_size); if (new_ptr == NULL) goto fail; ret = (char *) new_ptr + sizeof (struct omp_mem_header); @@ -701,7 +1021,16 @@ retry: } else { - new_ptr = malloc (new_size); +#ifdef LIBGOMP_USE_MEMKIND + if (memkind) + { + struct gomp_memkind_data *memkind_data = gomp_get_memkind (); + void *kind = *memkind_data->kinds[memkind]; + new_ptr = memkind_data->memkind_malloc (kind, new_size); + } + else +#endif + new_ptr = malloc (new_size); if (new_ptr == NULL) goto fail; } @@ -731,6 +1060,15 @@ retry: gomp_mutex_unlock (&free_allocator_data->lock); #endif } +#ifdef LIBGOMP_USE_MEMKIND + if (free_memkind) + { + struct gomp_memkind_data *memkind_data = gomp_get_memkind (); + void *kind = *memkind_data->kinds[free_memkind]; + memkind_data->memkind_free (kind, data->ptr); + return ret; + } +#endif free (data->ptr); return ret; @@ -741,6 +1079,9 @@ fail: { case omp_atv_default_mem_fb: if (new_alignment > sizeof (void *) +#ifdef LIBGOMP_USE_MEMKIND + || memkind +#endif || (allocator_data && allocator_data->pool_size < ~(uintptr_t) 0)) { diff --git a/libgomp/config/linux/allocator.c b/libgomp/config/linux/allocator.c new file mode 100644 index 0000000..bef4e48 --- /dev/null +++ b/libgomp/config/linux/allocator.c @@ -0,0 +1,36 @@ +/* Copyright (C) 2022 Free Software Foundation, Inc. + Contributed by Jakub Jelinek . + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* This file contains wrappers for the system allocation routines. Most + places in the OpenMP API do not make any provision for failure, so in + general we cannot allow memory allocation to fail. */ + +#define _GNU_SOURCE +#include "libgomp.h" +#if defined(PLUGIN_SUPPORT) && defined(LIBGOMP_USE_PTHREADS) +#define LIBGOMP_USE_MEMKIND +#endif + +#include "../../../allocator.c"