From f4cce0f47b3e720cc6c7210b345690de210d015d Mon Sep 17 00:00:00 2001 From: Jonathan Peyton Date: Mon, 5 Dec 2022 09:06:01 -0600 Subject: [PATCH] [OpenMP][libomp] Add topology information to thread structure Each time a thread gets a new affinity assigned, it will not only assign its mask, but also topology information including which socket, core, thread and core-attributes (if available) it is now assigned. This occurs for all non-disabled KMP_AFFINITY values as well as OMP_PLACES/OMP_PROC_BIND. The information regarding which socket, core, etc. can take on three values: 1) The actual ID of the unit (0 - (N-1)), given N units 2) UNKNOWN_ID (-1) which indicates it does not know which ID 3) MULTIPLE_ID (-2) which indicates the thread is spread across multiple of this unit (e.g., affinity mask is spread across multiple hardware threads) This new information is stored in th_topology_ids[] array. An example how to get the socket Id, one would read th_topology_ids[KMP_HW_SOCKET]. This could be expanded in the future to something more descriptive for the "multiple" case, like a range of values. For now, the single value suffices. The information regarding the core attributes can take on two values: 1) The actual core-type or core-eff 2) KMP_HW_CORE_TYPE_UNKNOWN if the core type is unknown, and UNKNOWN_CORE_EFF (-1) if the core eff is unknown. This new information is stored in th_topology_attrs. An example how to get the core type, one would read th_topology_attrs.core_type. Differential Revision: https://reviews.llvm.org/D139854 --- openmp/runtime/src/kmp.h | 35 +++++++++- openmp/runtime/src/kmp_affinity.cpp | 133 +++++++++++++++++++++++++++++++++++- openmp/runtime/src/kmp_affinity.h | 5 ++ 3 files changed, 171 insertions(+), 2 deletions(-) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 417f1d2..08ce0a4 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -753,6 +753,15 @@ public: // Only 1 DWORD in the mask should have any procs set. // Return the appropriate index, or -1 for an invalid mask. virtual int get_proc_group() const { return -1; } + int get_max_cpu() const { + int cpu; + int max_cpu = -1; + KMP_CPU_SET_ITERATE(cpu, this) { + if (cpu > max_cpu) + max_cpu = cpu; + } + return max_cpu; + } }; void *operator new(size_t n); void operator delete(void *p); @@ -836,6 +845,26 @@ typedef struct kmp_affinity_flags_t { } kmp_affinity_flags_t; KMP_BUILD_ASSERT(sizeof(kmp_affinity_flags_t) == 4); +typedef struct kmp_affinity_ids_t { + int ids[KMP_HW_LAST]; + int operator[](size_t idx) const { return ids[idx]; } + int &operator[](size_t idx) { return ids[idx]; } + kmp_affinity_ids_t &operator=(const kmp_affinity_ids_t &rhs) { + for (int i = 0; i < KMP_HW_LAST; ++i) + ids[i] = rhs[i]; + return *this; + } +} kmp_affinity_ids_t; + +typedef struct kmp_affinity_attrs_t { + int core_type : 8; + int core_eff : 8; + unsigned valid : 1; + unsigned reserved : 15; +} kmp_affinity_attrs_t; +#define KMP_AFFINITY_ATTRS_UNKNOWN \ + { KMP_HW_CORE_TYPE_UNKNOWN, kmp_hw_attr_t::UNKNOWN_CORE_EFF, 0, 0 } + typedef struct kmp_affinity_t { char *proclist; enum affinity_type type; @@ -846,6 +875,8 @@ typedef struct kmp_affinity_t { kmp_affinity_flags_t flags; unsigned num_masks; kmp_affin_mask_t *masks; + kmp_affinity_ids_t *ids; + kmp_affinity_attrs_t *attrs; unsigned num_os_id_masks; kmp_affin_mask_t *os_id_masks; const char *env_var; @@ -855,7 +886,7 @@ typedef struct kmp_affinity_t { { \ nullptr, affinity_default, KMP_HW_UNKNOWN, -1, 0, 0, \ {TRUE, FALSE, TRUE, affinity_respect_mask_default, FALSE, FALSE}, 0, \ - nullptr, 0, nullptr, env \ + nullptr, nullptr, nullptr, 0, nullptr, env \ } extern enum affinity_top_method __kmp_affinity_top_method; @@ -2711,6 +2742,8 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info { #if KMP_AFFINITY_SUPPORTED kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */ + kmp_affinity_ids_t th_topology_ids; /* thread's current topology ids */ + kmp_affinity_attrs_t th_topology_attrs; /* thread's current topology attrs */ #endif omp_allocator_handle_t th_def_allocator; /* default allocator */ /* The data set by the primary thread at reinit, then R/W by the worker */ diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp index fb3f0ed..f4d4045 100644 --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -4053,6 +4053,107 @@ static int __kmp_affinity_max_proc_per_core(int nprocs, int bottom_level, static int *procarr = NULL; static int __kmp_aff_depth = 0; +static int *__kmp_osid_to_hwthread_map = NULL; + +static void __kmp_affinity_get_mask_topology_info(const kmp_affin_mask_t *mask, + kmp_affinity_ids_t &ids, + kmp_affinity_attrs_t &attrs) { + if (!KMP_AFFINITY_CAPABLE()) + return; + + // Initiailze ids and attrs thread data + for (int i = 0; i < KMP_HW_LAST; ++i) + ids[i] = kmp_hw_thread_t::UNKNOWN_ID; + attrs = KMP_AFFINITY_ATTRS_UNKNOWN; + + // Iterate through each os id within the mask and determine + // the topology id and attribute information + int cpu; + int depth = __kmp_topology->get_depth(); + KMP_CPU_SET_ITERATE(cpu, mask) { + int osid_idx = __kmp_osid_to_hwthread_map[cpu]; + const kmp_hw_thread_t &hw_thread = __kmp_topology->at(osid_idx); + for (int level = 0; level < depth; ++level) { + kmp_hw_t type = __kmp_topology->get_type(level); + int id = hw_thread.sub_ids[level]; + if (ids[type] == kmp_hw_thread_t::UNKNOWN_ID || ids[type] == id) { + ids[type] = id; + } else { + // This mask spans across multiple topology units, set it as such + // and mark every level below as such as well. + ids[type] = kmp_hw_thread_t::MULTIPLE_ID; + for (; level < depth; ++level) { + kmp_hw_t type = __kmp_topology->get_type(level); + ids[type] = kmp_hw_thread_t::MULTIPLE_ID; + } + } + } + if (!attrs.valid) { + attrs.core_type = hw_thread.attrs.get_core_type(); + attrs.core_eff = hw_thread.attrs.get_core_eff(); + attrs.valid = 1; + } else { + // This mask spans across multiple attributes, set it as such + if (attrs.core_type != hw_thread.attrs.get_core_type()) + attrs.core_type = KMP_HW_CORE_TYPE_UNKNOWN; + if (attrs.core_eff != hw_thread.attrs.get_core_eff()) + attrs.core_eff = kmp_hw_attr_t::UNKNOWN_CORE_EFF; + } + } +} + +static void __kmp_affinity_get_thread_topology_info(kmp_info_t *th) { + if (!KMP_AFFINITY_CAPABLE()) + return; + const kmp_affin_mask_t *mask = th->th.th_affin_mask; + kmp_affinity_ids_t &ids = th->th.th_topology_ids; + kmp_affinity_attrs_t &attrs = th->th.th_topology_attrs; + __kmp_affinity_get_mask_topology_info(mask, ids, attrs); +} + +// Assign the topology information to each place in the place list +// A thread can then grab not only its affinity mask, but the topology +// information associated with that mask. e.g., Which socket is a thread on +static void __kmp_affinity_get_topology_info(kmp_affinity_t &affinity) { + if (!KMP_AFFINITY_CAPABLE()) + return; + if (affinity.type != affinity_none) { + KMP_ASSERT(affinity.num_os_id_masks); + KMP_ASSERT(affinity.os_id_masks); + } + KMP_ASSERT(affinity.num_masks); + KMP_ASSERT(affinity.masks); + KMP_ASSERT(__kmp_affin_fullMask); + + int max_cpu = __kmp_affin_fullMask->get_max_cpu(); + int num_hw_threads = __kmp_topology->get_num_hw_threads(); + + // Allocate thread topology information + if (!affinity.ids) { + affinity.ids = (kmp_affinity_ids_t *)__kmp_allocate( + sizeof(kmp_affinity_ids_t) * affinity.num_masks); + } + if (!affinity.attrs) { + affinity.attrs = (kmp_affinity_attrs_t *)__kmp_allocate( + sizeof(kmp_affinity_attrs_t) * affinity.num_masks); + } + if (!__kmp_osid_to_hwthread_map) { + // Want the +1 because max_cpu should be valid index into map + __kmp_osid_to_hwthread_map = + (int *)__kmp_allocate(sizeof(int) * (max_cpu + 1)); + } + + // Create the OS proc to hardware thread map + for (int hw_thread = 0; hw_thread < num_hw_threads; ++hw_thread) + __kmp_osid_to_hwthread_map[__kmp_topology->at(hw_thread).os_id] = hw_thread; + + for (unsigned i = 0; i < affinity.num_masks; ++i) { + kmp_affinity_ids_t &ids = affinity.ids[i]; + kmp_affinity_attrs_t &attrs = affinity.attrs[i]; + kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.masks, i); + __kmp_affinity_get_mask_topology_info(mask, ids, attrs); + } +} // Create a one element mask array (set of places) which only contains the // initial process's affinity mask @@ -4063,6 +4164,7 @@ static void __kmp_create_affinity_none_places(kmp_affinity_t &affinity) { KMP_CPU_ALLOC_ARRAY(affinity.masks, affinity.num_masks); kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, 0); KMP_CPU_COPY(dest, __kmp_affin_fullMask); + __kmp_affinity_get_topology_info(affinity); } static void __kmp_aux_affinity_initialize_masks(kmp_affinity_t &affinity) { @@ -4432,6 +4534,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) { if ((nproc < 2) || (nproc < __kmp_avail_proc)) { KMP_AFF_WARNING(affinity, AffBalancedNotAvail, env_var); affinity.type = affinity_none; + __kmp_create_affinity_none_places(affinity); affinity.flags.initialized = TRUE; return; } @@ -4508,6 +4611,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) { default: KMP_ASSERT2(0, "Unexpected affinity setting"); } + __kmp_affinity_get_topology_info(affinity); affinity.flags.initialized = TRUE; } @@ -4538,6 +4642,10 @@ void __kmp_affinity_uninitialize(void) { KMP_CPU_FREE_ARRAY(affinity->os_id_masks, affinity->num_os_id_masks); if (affinity->proclist != NULL) __kmp_free(affinity->proclist); + if (affinity->ids != NULL) + __kmp_free(affinity->ids); + if (affinity->attrs != NULL) + __kmp_free(affinity->attrs); *affinity = KMP_AFFINITY_INIT(affinity->env_var); } if (__kmp_affin_origMask != NULL) { @@ -4552,6 +4660,10 @@ void __kmp_affinity_uninitialize(void) { __kmp_free(procarr); procarr = NULL; } + if (__kmp_osid_to_hwthread_map) { + __kmp_free(__kmp_osid_to_hwthread_map); + __kmp_osid_to_hwthread_map = NULL; + } #if KMP_USE_HWLOC if (__kmp_hwloc_topology != NULL) { hwloc_topology_destroy(__kmp_hwloc_topology); @@ -4584,12 +4696,21 @@ static void __kmp_select_mask_by_gtid(int gtid, const kmp_affinity_t *affinity, *mask = KMP_CPU_INDEX(affinity->masks, *place); } +// This function initializes the per-thread data concerning affinity including +// the mask and topology information void __kmp_affinity_set_init_mask(int gtid, int isa_root) { + + kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]); + + // Set the thread topology information to default of unknown + for (int id = 0; id < KMP_HW_LAST; ++id) + th->th.th_topology_ids[id] = kmp_hw_thread_t::UNKNOWN_ID; + th->th.th_topology_attrs = KMP_AFFINITY_ATTRS_UNKNOWN; + if (!KMP_AFFINITY_CAPABLE()) { return; } - kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]); if (th->th.th_affin_mask == NULL) { KMP_CPU_ALLOC(th->th.th_affin_mask); } else { @@ -4654,6 +4775,11 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) { th->th.th_first_place = 0; th->th.th_last_place = affinity->num_masks - 1; } + // Copy topology information associated with the place + if (i >= 0) { + th->th.th_topology_ids = __kmp_affinity.ids[i]; + th->th.th_topology_attrs = __kmp_affinity.attrs[i]; + } if (i == KMP_PLACE_ALL) { KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n", @@ -4718,6 +4844,9 @@ void __kmp_affinity_set_place(int gtid) { KMP_CPU_INDEX(__kmp_affinity.masks, th->th.th_new_place); KMP_CPU_COPY(th->th.th_affin_mask, mask); th->th.th_current_place = th->th.th_new_place; + // Copy topology information associated with the place + th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place]; + th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place]; if (__kmp_affinity.flags.verbose) { char buf[KMP_AFFIN_MASK_PRINT_LEN]; @@ -5037,6 +5166,7 @@ void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) { KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(), tid, buf); } + __kmp_affinity_get_thread_topology_info(th); __kmp_set_system_affinity(mask, TRUE); } else { // Non-uniform topology @@ -5203,6 +5333,7 @@ void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) { KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(), tid, buf); } + __kmp_affinity_get_thread_topology_info(th); __kmp_set_system_affinity(mask, TRUE); } } diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h index 5412aea..e685e2c 100644 --- a/openmp/runtime/src/kmp_affinity.h +++ b/openmp/runtime/src/kmp_affinity.h @@ -681,9 +681,14 @@ struct kmp_hw_attr_t { bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); } }; +#if KMP_AFFINITY_SUPPORTED +KMP_BUILD_ASSERT(sizeof(kmp_hw_attr_t) == sizeof(kmp_affinity_attrs_t)); +#endif + class kmp_hw_thread_t { public: static const int UNKNOWN_ID = -1; + static const int MULTIPLE_ID = -2; static int compare_ids(const void *a, const void *b); static int compare_compact(const void *a, const void *b); int ids[KMP_HW_LAST]; -- 2.7.4