#endif
#include <pthread.h>
+#include <dlfcn.h>
+
+#include "numashim.h"
using namespace CorUnix;
int g_groupCount = 0;
// The highest NUMA node available
int g_highestNumaNode = 0;
+// Is numa available
+bool g_numaAvailable = false;
+
+void* numaHandle = nullptr;
static const int MaxCpusPerGroup = 8 * sizeof(KAFFINITY);
static const WORD NO_GROUP = 0xffff;
NUMASupportInitialize()
{
#if HAVE_NUMA_H
- if (numa_available() != -1)
+ numaHandle = dlopen("libnuma.so", RTLD_LAZY);
+ if (numaHandle == 0)
{
- struct bitmask *mask = numa_allocate_cpumask();
- int numaNodesCount = numa_max_node() + 1;
-
- g_possibleCpuCount = numa_num_possible_cpus();
- g_cpuCount = 0;
- g_groupCount = 0;
-
- for (int i = 0; i < numaNodesCount; i++)
+ numaHandle = dlopen("libnuma.so.1", RTLD_LAZY);
+ }
+ if (numaHandle != 0)
+ {
+ dlsym(numaHandle, "numa_allocate_cpumask");
+#define PER_FUNCTION_BLOCK(fn) \
+ fn##_ptr = (decltype(fn)*)dlsym(numaHandle, #fn); \
+ if (fn##_ptr == NULL) { fprintf(stderr, "Cannot get symbol " #fn " from libnuma\n"); abort(); }
+FOR_ALL_NUMA_FUNCTIONS
+#undef PER_FUNCTION_BLOCK
+
+ if (numa_available() != -1)
{
- int st = numa_node_to_cpus(i, mask);
- // The only failure that can happen is that the mask is not large enough
- // but that cannot happen since the mask was allocated by numa_allocate_cpumask
- _ASSERTE(st == 0);
- unsigned int nodeCpuCount = numa_bitmask_weight(mask);
- g_cpuCount += nodeCpuCount;
- unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup;
- g_groupCount += nodeGroupCount;
+ dlclose(numaHandle);
}
+ else
+ {
+ g_numaAvailable = true;
- AllocateLookupArrays();
+ struct bitmask *mask = numa_allocate_cpumask();
+ int numaNodesCount = numa_max_node() + 1;
- WORD currentGroup = 0;
- int currentGroupCpus = 0;
+ g_possibleCpuCount = numa_num_possible_cpus();
+ g_cpuCount = 0;
+ g_groupCount = 0;
- for (int i = 0; i < numaNodesCount; i++)
- {
- int st = numa_node_to_cpus(i, mask);
- // The only failure that can happen is that the mask is not large enough
- // but that cannot happen since the mask was allocated by numa_allocate_cpumask
- _ASSERTE(st == 0);
- unsigned int nodeCpuCount = numa_bitmask_weight(mask);
- unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup;
- for (int j = 0; j < g_possibleCpuCount; j++)
+ for (int i = 0; i < numaNodesCount; i++)
+ {
+ int st = numa_node_to_cpus(i, mask);
+ // The only failure that can happen is that the mask is not large enough
+ // but that cannot happen since the mask was allocated by numa_allocate_cpumask
+ _ASSERTE(st == 0);
+ unsigned int nodeCpuCount = numa_bitmask_weight(mask);
+ g_cpuCount += nodeCpuCount;
+ unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup;
+ g_groupCount += nodeGroupCount;
+ }
+
+ AllocateLookupArrays();
+
+ WORD currentGroup = 0;
+ int currentGroupCpus = 0;
+
+ for (int i = 0; i < numaNodesCount; i++)
{
- if (numa_bitmask_isbitset(mask, j))
+ int st = numa_node_to_cpus(i, mask);
+ // The only failure that can happen is that the mask is not large enough
+ // but that cannot happen since the mask was allocated by numa_allocate_cpumask
+ _ASSERTE(st == 0);
+ unsigned int nodeCpuCount = numa_bitmask_weight(mask);
+ unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup;
+ for (int j = 0; j < g_possibleCpuCount; j++)
{
- if (currentGroupCpus == MaxCpusPerGroup)
+ if (numa_bitmask_isbitset(mask, j))
{
- g_groupToCpuCount[currentGroup] = MaxCpusPerGroup;
- g_groupToCpuMask[currentGroup] = GetFullAffinityMask(MaxCpusPerGroup);
- currentGroupCpus = 0;
- currentGroup++;
+ if (currentGroupCpus == MaxCpusPerGroup)
+ {
+ g_groupToCpuCount[currentGroup] = MaxCpusPerGroup;
+ g_groupToCpuMask[currentGroup] = GetFullAffinityMask(MaxCpusPerGroup);
+ currentGroupCpus = 0;
+ currentGroup++;
+ }
+ g_cpuToAffinity[j].Node = i;
+ g_cpuToAffinity[j].Group = currentGroup;
+ g_cpuToAffinity[j].Number = currentGroupCpus;
+ g_groupAndIndexToCpu[currentGroup * MaxCpusPerGroup + currentGroupCpus] = j;
+ currentGroupCpus++;
}
- g_cpuToAffinity[j].Node = i;
- g_cpuToAffinity[j].Group = currentGroup;
- g_cpuToAffinity[j].Number = currentGroupCpus;
- g_groupAndIndexToCpu[currentGroup * MaxCpusPerGroup + currentGroupCpus] = j;
- currentGroupCpus++;
}
- }
- if (currentGroupCpus != 0)
- {
- g_groupToCpuCount[currentGroup] = currentGroupCpus;
- g_groupToCpuMask[currentGroup] = GetFullAffinityMask(currentGroupCpus);
- currentGroupCpus = 0;
- currentGroup++;
+ if (currentGroupCpus != 0)
+ {
+ g_groupToCpuCount[currentGroup] = currentGroupCpus;
+ g_groupToCpuMask[currentGroup] = GetFullAffinityMask(currentGroupCpus);
+ currentGroupCpus = 0;
+ currentGroup++;
+ }
}
- }
- numa_free_cpumask(mask);
+ numa_free_cpumask(mask);
- g_highestNumaNode = numa_max_node();
+ g_highestNumaNode = numa_max_node();
+ }
}
else
#endif // HAVE_NUMA_H
NUMASupportCleanup()
{
FreeLookupArrays();
+#if HAVE_NUMA_H
+ if (g_numaAvailable)
+ {
+ dlclose(numaHandle);
+ }
+#endif // HAVE_NUMA_H
}
/*++
{
result = VirtualAlloc(lpAddress, dwSize, flAllocationType, flProtect);
#if HAVE_NUMA_H
- if (result != NULL)
+ if (result != NULL && g_numaAvailable)
{
int nodeMaskLength = (g_highestNumaNode + 1 + sizeof(unsigned long) - 1) / sizeof(unsigned long);
unsigned long *nodeMask = new unsigned long[nodeMaskLength];
nodeMask[index] = mask;
int st = mbind(result, dwSize, MPOL_PREFERRED, nodeMask, g_highestNumaNode, 0);
+
free(nodeMask);
_ASSERTE(st == 0);
// If the mbind fails, we still return the allocated memory since the nndPreferred is just a hint
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Enable calling numa functions through shims to make it a soft
+// runtime dependency.
+
+#ifndef __NUMASHIM_H__
+#define __NUMASHIM_H__
+
+#if HAVE_NUMA_H
+
+#include <numa.h>
+#include <numaif.h>
+
+// List of all functions from the numa library that are used
+#define FOR_ALL_NUMA_FUNCTIONS \
+ PER_FUNCTION_BLOCK(numa_available) \
+ PER_FUNCTION_BLOCK(mbind) \
+ PER_FUNCTION_BLOCK(numa_num_possible_cpus) \
+ PER_FUNCTION_BLOCK(numa_max_node) \
+ PER_FUNCTION_BLOCK(numa_allocate_cpumask) \
+ PER_FUNCTION_BLOCK(numa_node_to_cpus) \
+ PER_FUNCTION_BLOCK(numa_bitmask_weight) \
+ PER_FUNCTION_BLOCK(numa_bitmask_isbitset) \
+ PER_FUNCTION_BLOCK(numa_free_cpumask)
+
+// Declare pointers to all the used numa functions
+#define PER_FUNCTION_BLOCK(fn) extern decltype(fn)* fn##_ptr;
+FOR_ALL_NUMA_FUNCTIONS
+#undef PER_FUNCTION_BLOCK
+
+// Redefine all calls to numa functions as calls through pointers that are set
+// to the functions of libnuma in the initialization.
+#define numa_available() numa_available_ptr()
+#define mbind(...) mbind_ptr(__VA_ARGS__)
+#define numa_num_possible_cpus() numa_num_possible_cpus_ptr()
+#define numa_max_node() numa_max_node_ptr()
+#define numa_allocate_cpumask() numa_allocate_cpumask_ptr()
+#define numa_node_to_cpus(...) numa_node_to_cpus_ptr(__VA_ARGS__)
+#define numa_bitmask_weight(...) numa_bitmask_weight_ptr(__VA_ARGS__)
+#define numa_bitmask_isbitset(...) numa_bitmask_isbitset_ptr(__VA_ARGS__)
+#define numa_free_cpumask(...) numa_free_cpumask_ptr(__VA_ARGS__)
+
+#endif // HAVE_NUMA_H
+
+#endif // __NUMASHIM_H__