From 858ac310541f8cc967f4daa8198d1d49bbf1fc22 Mon Sep 17 00:00:00 2001 From: Adeel Mujahid Date: Fri, 30 Jun 2017 07:41:26 +0300 Subject: [PATCH] Make libnuma soft dependency (dotnet/coreclr#12556) Load libnuma dynamically, so it is not linked at compile time and hence avoid making it a hard dependency at runtime. Commit migrated from https://github.com/dotnet/coreclr/commit/08d6a06f1da7be86ef34284fccf63cea68c6d90f --- src/coreclr/src/pal/src/numa/numa.cpp | 137 ++++++++++++++++++++------------ src/coreclr/src/pal/src/numa/numashim.h | 47 +++++++++++ 2 files changed, 133 insertions(+), 51 deletions(-) create mode 100644 src/coreclr/src/pal/src/numa/numashim.h diff --git a/src/coreclr/src/pal/src/numa/numa.cpp b/src/coreclr/src/pal/src/numa/numa.cpp index 629c1a6..9e7bab9 100644 --- a/src/coreclr/src/pal/src/numa/numa.cpp +++ b/src/coreclr/src/pal/src/numa/numa.cpp @@ -35,6 +35,9 @@ SET_DEFAULT_DEBUG_CHANNEL(NUMA); #endif #include +#include + +#include "numashim.h" using namespace CorUnix; @@ -71,6 +74,10 @@ int g_possibleCpuCount = 0; int g_groupCount = 0; // The highest NUMA node available int g_highestNumaNode = 0; +// Is numa available +bool g_numaAvailable = false; + +void* numaHandle = nullptr; static const int MaxCpusPerGroup = 8 * sizeof(KAFFINITY); static const WORD NO_GROUP = 0xffff; @@ -140,71 +147,92 @@ BOOL NUMASupportInitialize() { #if HAVE_NUMA_H - if (numa_available() != -1) + numaHandle = dlopen("libnuma.so", RTLD_LAZY); + if (numaHandle == 0) { - struct bitmask *mask = numa_allocate_cpumask(); - int numaNodesCount = numa_max_node() + 1; - - g_possibleCpuCount = numa_num_possible_cpus(); - g_cpuCount = 0; - g_groupCount = 0; - - for (int i = 0; i < numaNodesCount; i++) + numaHandle = dlopen("libnuma.so.1", RTLD_LAZY); + } + if (numaHandle != 0) + { + dlsym(numaHandle, "numa_allocate_cpumask"); +#define PER_FUNCTION_BLOCK(fn) \ + fn##_ptr = (decltype(fn)*)dlsym(numaHandle, #fn); \ + if (fn##_ptr == NULL) { fprintf(stderr, "Cannot get symbol " #fn " from libnuma\n"); abort(); } +FOR_ALL_NUMA_FUNCTIONS +#undef PER_FUNCTION_BLOCK + + if (numa_available() != -1) { - int st = numa_node_to_cpus(i, mask); - // The only failure that can happen is that the mask is not large enough - // but that cannot happen since the mask was allocated by numa_allocate_cpumask - _ASSERTE(st == 0); - unsigned int nodeCpuCount = numa_bitmask_weight(mask); - g_cpuCount += nodeCpuCount; - unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup; - g_groupCount += nodeGroupCount; + dlclose(numaHandle); } + else + { + g_numaAvailable = true; - AllocateLookupArrays(); + struct bitmask *mask = numa_allocate_cpumask(); + int numaNodesCount = numa_max_node() + 1; - WORD currentGroup = 0; - int currentGroupCpus = 0; + g_possibleCpuCount = numa_num_possible_cpus(); + g_cpuCount = 0; + g_groupCount = 0; - for (int i = 0; i < numaNodesCount; i++) - { - int st = numa_node_to_cpus(i, mask); - // The only failure that can happen is that the mask is not large enough - // but that cannot happen since the mask was allocated by numa_allocate_cpumask - _ASSERTE(st == 0); - unsigned int nodeCpuCount = numa_bitmask_weight(mask); - unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup; - for (int j = 0; j < g_possibleCpuCount; j++) + for (int i = 0; i < numaNodesCount; i++) + { + int st = numa_node_to_cpus(i, mask); + // The only failure that can happen is that the mask is not large enough + // but that cannot happen since the mask was allocated by numa_allocate_cpumask + _ASSERTE(st == 0); + unsigned int nodeCpuCount = numa_bitmask_weight(mask); + g_cpuCount += nodeCpuCount; + unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup; + g_groupCount += nodeGroupCount; + } + + AllocateLookupArrays(); + + WORD currentGroup = 0; + int currentGroupCpus = 0; + + for (int i = 0; i < numaNodesCount; i++) { - if (numa_bitmask_isbitset(mask, j)) + int st = numa_node_to_cpus(i, mask); + // The only failure that can happen is that the mask is not large enough + // but that cannot happen since the mask was allocated by numa_allocate_cpumask + _ASSERTE(st == 0); + unsigned int nodeCpuCount = numa_bitmask_weight(mask); + unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup; + for (int j = 0; j < g_possibleCpuCount; j++) { - if (currentGroupCpus == MaxCpusPerGroup) + if (numa_bitmask_isbitset(mask, j)) { - g_groupToCpuCount[currentGroup] = MaxCpusPerGroup; - g_groupToCpuMask[currentGroup] = GetFullAffinityMask(MaxCpusPerGroup); - currentGroupCpus = 0; - currentGroup++; + if (currentGroupCpus == MaxCpusPerGroup) + { + g_groupToCpuCount[currentGroup] = MaxCpusPerGroup; + g_groupToCpuMask[currentGroup] = GetFullAffinityMask(MaxCpusPerGroup); + currentGroupCpus = 0; + currentGroup++; + } + g_cpuToAffinity[j].Node = i; + g_cpuToAffinity[j].Group = currentGroup; + g_cpuToAffinity[j].Number = currentGroupCpus; + g_groupAndIndexToCpu[currentGroup * MaxCpusPerGroup + currentGroupCpus] = j; + currentGroupCpus++; } - g_cpuToAffinity[j].Node = i; - g_cpuToAffinity[j].Group = currentGroup; - g_cpuToAffinity[j].Number = currentGroupCpus; - g_groupAndIndexToCpu[currentGroup * MaxCpusPerGroup + currentGroupCpus] = j; - currentGroupCpus++; } - } - if (currentGroupCpus != 0) - { - g_groupToCpuCount[currentGroup] = currentGroupCpus; - g_groupToCpuMask[currentGroup] = GetFullAffinityMask(currentGroupCpus); - currentGroupCpus = 0; - currentGroup++; + if (currentGroupCpus != 0) + { + g_groupToCpuCount[currentGroup] = currentGroupCpus; + g_groupToCpuMask[currentGroup] = GetFullAffinityMask(currentGroupCpus); + currentGroupCpus = 0; + currentGroup++; + } } - } - numa_free_cpumask(mask); + numa_free_cpumask(mask); - g_highestNumaNode = numa_max_node(); + g_highestNumaNode = numa_max_node(); + } } else #endif // HAVE_NUMA_H @@ -237,6 +265,12 @@ VOID NUMASupportCleanup() { FreeLookupArrays(); +#if HAVE_NUMA_H + if (g_numaAvailable) + { + dlclose(numaHandle); + } +#endif // HAVE_NUMA_H } /*++ @@ -672,7 +706,7 @@ VirtualAllocExNuma( { result = VirtualAlloc(lpAddress, dwSize, flAllocationType, flProtect); #if HAVE_NUMA_H - if (result != NULL) + if (result != NULL && g_numaAvailable) { int nodeMaskLength = (g_highestNumaNode + 1 + sizeof(unsigned long) - 1) / sizeof(unsigned long); unsigned long *nodeMask = new unsigned long[nodeMaskLength]; @@ -684,6 +718,7 @@ VirtualAllocExNuma( nodeMask[index] = mask; int st = mbind(result, dwSize, MPOL_PREFERRED, nodeMask, g_highestNumaNode, 0); + free(nodeMask); _ASSERTE(st == 0); // If the mbind fails, we still return the allocated memory since the nndPreferred is just a hint diff --git a/src/coreclr/src/pal/src/numa/numashim.h b/src/coreclr/src/pal/src/numa/numashim.h new file mode 100644 index 0000000..4d13d8a --- /dev/null +++ b/src/coreclr/src/pal/src/numa/numashim.h @@ -0,0 +1,47 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// Enable calling numa functions through shims to make it a soft +// runtime dependency. + +#ifndef __NUMASHIM_H__ +#define __NUMASHIM_H__ + +#if HAVE_NUMA_H + +#include +#include + +// List of all functions from the numa library that are used +#define FOR_ALL_NUMA_FUNCTIONS \ + PER_FUNCTION_BLOCK(numa_available) \ + PER_FUNCTION_BLOCK(mbind) \ + PER_FUNCTION_BLOCK(numa_num_possible_cpus) \ + PER_FUNCTION_BLOCK(numa_max_node) \ + PER_FUNCTION_BLOCK(numa_allocate_cpumask) \ + PER_FUNCTION_BLOCK(numa_node_to_cpus) \ + PER_FUNCTION_BLOCK(numa_bitmask_weight) \ + PER_FUNCTION_BLOCK(numa_bitmask_isbitset) \ + PER_FUNCTION_BLOCK(numa_free_cpumask) + +// Declare pointers to all the used numa functions +#define PER_FUNCTION_BLOCK(fn) extern decltype(fn)* fn##_ptr; +FOR_ALL_NUMA_FUNCTIONS +#undef PER_FUNCTION_BLOCK + +// Redefine all calls to numa functions as calls through pointers that are set +// to the functions of libnuma in the initialization. +#define numa_available() numa_available_ptr() +#define mbind(...) mbind_ptr(__VA_ARGS__) +#define numa_num_possible_cpus() numa_num_possible_cpus_ptr() +#define numa_max_node() numa_max_node_ptr() +#define numa_allocate_cpumask() numa_allocate_cpumask_ptr() +#define numa_node_to_cpus(...) numa_node_to_cpus_ptr(__VA_ARGS__) +#define numa_bitmask_weight(...) numa_bitmask_weight_ptr(__VA_ARGS__) +#define numa_bitmask_isbitset(...) numa_bitmask_isbitset_ptr(__VA_ARGS__) +#define numa_free_cpumask(...) numa_free_cpumask_ptr(__VA_ARGS__) + +#endif // HAVE_NUMA_H + +#endif // __NUMASHIM_H__ -- 2.7.4