Remove Unix CPU groups emulation

author Jan Vorlicek <janvorli@microsoft.com>

Fri, 5 Apr 2019 00:53:32 +0000 (02:53 +0200)

committer Jan Vorlicek <janvorli@microsoft.com>

Mon, 8 Apr 2019 23:26:55 +0000 (01:26 +0200)
author Jan Vorlicek <janvorli@microsoft.com>
Fri, 5 Apr 2019 00:53:32 +0000 (02:53 +0200)
committer Jan Vorlicek <janvorli@microsoft.com>
Mon, 8 Apr 2019 23:26:55 +0000 (01:26 +0200)
diff --git a/src/classlibnative/bcltype/system.cpp b/src/classlibnative/bcltype/system.cpp

index 12397a3..38e5bba 100644 (file)
--- a/src/classlibnative/bcltype/system.cpp
+++ b/src/classlibnative/bcltype/system.cpp
@@ -325,13 +325,14 @@ INT32 QCALLTYPE SystemNative::GetProcessorCount()
  
      BEGIN_QCALL;
  
+#ifndef FEATURE_PAL
      CPUGroupInfo::EnsureInitialized();
  
      if(CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
      {
          processorCount = CPUGroupInfo::GetNumActiveProcessors();
      }
-
+#endif // !FEATURE_PAL
      // Processor count will be 0 if CPU groups are disabled/not supported
      if(processorCount == 0)
      {
diff --git a/src/gc/env/gcenv.os.h b/src/gc/env/gcenv.os.h

index 05dccf7..7fa1ba7 100644 (file)
--- a/src/gc/env/gcenv.os.h
+++ b/src/gc/env/gcenv.os.h
@@ -167,11 +167,18 @@ class AffinitySet
  
  public:
  
+    static const size_t BitsetDataSize = MAX_SUPPORTED_CPUS / BitsPerBitsetEntry;
+
      AffinitySet()
      {
          memset(m_bitset, 0, sizeof(m_bitset));
      }
  
+    uintptr_t* GetBitsetData()
+    {
+        return m_bitset;
+    }
+
      // Check if the set contains a processor
      bool Contains(size_t cpuIndex) const
      {
@@ -477,9 +484,6 @@ public:
      // Is NUMA support available
      static bool CanEnableGCNumaAware();
  
-    // Gets the NUMA node for the processor
-    static bool GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no);
-
      // Get processor number and optionally its NUMA node number for the specified heap number
      // Parameters:
      //  heap_number - heap number to get the result for
diff --git a/src/gc/unix/gcenv.unix.cpp b/src/gc/unix/gcenv.unix.cpp

index c71d211..a6d56f2 100644 (file)
--- a/src/gc/unix/gcenv.unix.cpp
+++ b/src/gc/unix/gcenv.unix.cpp
@@ -55,6 +55,33 @@
  #include "globals.h"
  #include "cgroup.h"
  
+#if HAVE_NUMA_H
+
+#include <numa.h>
+#include <numaif.h>
+#include <dlfcn.h>
+
+// List of all functions from the numa library that are used
+#define FOR_ALL_NUMA_FUNCTIONS \
+    PER_FUNCTION_BLOCK(mbind) \
+    PER_FUNCTION_BLOCK(numa_available) \
+    PER_FUNCTION_BLOCK(numa_max_node) \
+    PER_FUNCTION_BLOCK(numa_node_of_cpu)
+
+// Declare pointers to all the used numa functions
+#define PER_FUNCTION_BLOCK(fn) extern decltype(fn)* fn##_ptr;
+FOR_ALL_NUMA_FUNCTIONS
+#undef PER_FUNCTION_BLOCK
+
+// Redefine all calls to numa functions as calls through pointers that are set
+// to the functions of libnuma in the initialization.
+#define mbind(...) mbind_ptr(__VA_ARGS__)
+#define numa_available() numa_available_ptr()
+#define numa_max_node() numa_max_node_ptr()
+#define numa_node_of_cpu(...) numa_node_of_cpu_ptr(__VA_ARGS__)
+
+#endif // HAVE_NUMA_H
+
  #if defined(_ARM_) || defined(_ARM64_)
  #define SYSCONF_GET_NUMPROCS _SC_NPROCESSORS_CONF
  #else
@@ -109,6 +136,74 @@ uint32_t g_pageSizeUnixInl = 0;
  
  AffinitySet g_processAffinitySet;
  
+#if HAVE_CPUSET_T
+typedef cpuset_t cpu_set_t;
+#endif
+
+// The highest NUMA node available
+int g_highestNumaNode = 0;
+// Is numa available
+bool g_numaAvailable = false;
+
+void* g_numaHandle = nullptr;
+
+#if HAVE_NUMA_H
+#define PER_FUNCTION_BLOCK(fn) decltype(fn)* fn##_ptr;
+FOR_ALL_NUMA_FUNCTIONS
+#undef PER_FUNCTION_BLOCK
+#endif // HAVE_NUMA_H
+
+
+// Initialize data structures for getting and setting thread affinities to processors and
+// querying NUMA related processor information.
+// On systems with no NUMA support, it behaves as if there was a single NUMA node with
+// a single group of processors.
+void NUMASupportInitialize()
+{
+#if HAVE_NUMA_H
+    g_numaHandle = dlopen("libnuma.so", RTLD_LAZY);
+    if (g_numaHandle == 0)
+    {
+        g_numaHandle = dlopen("libnuma.so.1", RTLD_LAZY);
+    }
+    if (g_numaHandle != 0)
+    {
+        dlsym(g_numaHandle, "numa_allocate_cpumask");
+#define PER_FUNCTION_BLOCK(fn) \
+    fn##_ptr = (decltype(fn)*)dlsym(g_numaHandle, #fn); \
+    if (fn##_ptr == NULL) { fprintf(stderr, "Cannot get symbol " #fn " from libnuma\n"); abort(); }
+FOR_ALL_NUMA_FUNCTIONS
+#undef PER_FUNCTION_BLOCK
+
+        if (numa_available() == -1)
+        {
+            dlclose(g_numaHandle);
+        }
+        else
+        {
+            g_numaAvailable = true;
+            g_highestNumaNode = numa_max_node();
+        }
+    }
+#endif // HAVE_NUMA_H
+    if (!g_numaAvailable)
+    {
+        // No NUMA
+        g_highestNumaNode = 0;
+    }
+}
+
+// Cleanup of the NUMA support data structures
+void NUMASupportCleanup()
+{
+#if HAVE_NUMA_H
+    if (g_numaAvailable)
+    {
+        dlclose(g_numaHandle);
+    }
+#endif // HAVE_NUMA_H
+}
+
  // Initialize the interface implementation
  // Return:
  //  true if it has succeeded, false if it has failed
@@ -221,6 +316,8 @@ bool GCToOSInterface::Initialize()
  
  #endif // HAVE_SCHED_GETAFFINITY
  
+    NUMASupportInitialize();
+
      return true;
  }
  
@@ -235,6 +332,7 @@ void GCToOSInterface::Shutdown()
      munmap(g_helperPage, OS_PAGE_SIZE);
  
      CleanupCGroup();
+    NUMASupportCleanup();
  }
  
  // Get numeric id of the current thread if possible on the
@@ -468,8 +566,29 @@ void* GCToOSInterface::VirtualReserveAndCommitLargePages(size_t size)
  //  true if it has succeeded, false if it has failed
  bool GCToOSInterface::VirtualCommit(void* address, size_t size, uint16_t node)
  {
-    assert(node == NUMA_NODE_UNDEFINED && "Numa allocation is not ported to local GC on unix yet");
-    return mprotect(address, size, PROT_WRITE | PROT_READ) == 0;
+    bool success = mprotect(address, size, PROT_WRITE | PROT_READ) == 0;
+
+#if HAVE_NUMA_H
+    if (success && g_numaAvailable && (node != NUMA_NODE_UNDEFINED))
+    {
+        if ((int)node <= g_highestNumaNode)
+        {
+            int nodeMaskLength = (g_highestNumaNode + 1 + sizeof(unsigned long) - 1) / sizeof(unsigned long);
+            unsigned long *nodeMask = (unsigned long*)alloca(nodeMaskLength * sizeof(unsigned long));
+            memset(nodeMask, 0, nodeMaskLength);
+
+            int index = node / sizeof(unsigned long);
+            int mask = ((unsigned long)1) << (node & (sizeof(unsigned long) - 1));
+            nodeMask[index] = mask;
+
+            int st = mbind(address, size, MPOL_PREFERRED, nodeMask, g_highestNumaNode, 0);
+            assert(st == 0);
+            // If the mbind fails, we still return the allocated memory since the node is just a hint
+        }
+    }
+#endif // HAVE_NUMA_H
+
+    return success;
  }
  
  // Decomit virtual memory range.
@@ -775,13 +894,7 @@ uint32_t GCToOSInterface::GetTotalProcessorCount()
  
  bool GCToOSInterface::CanEnableGCNumaAware()
  {
-    return false;
-}
-
-bool GCToOSInterface::GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no)
-{
-    assert(!"Numa has not been ported to local GC for unix");
-    return false;
+    return g_numaAvailable;
  }
  
  // Get processor number and optionally its NUMA node number for the specified heap number
@@ -806,10 +919,8 @@ bool GCToOSInterface::GetProcessorForHeap(uint16_t heap_number, uint16_t* proc_n
  
                  if (GCToOSInterface::CanEnableGCNumaAware())
                  {
-                    if (!GCToOSInterface::GetNumaProcessorNode(procNumber, node_no))
-                    {
-                        *node_no = NUMA_NODE_UNDEFINED;
-                    }
+                    int result = numa_node_of_cpu(procNumber);
+                    *node_no = (result >= 0) ? (uint16_t)result : NUMA_NODE_UNDEFINED;
                  }
                  else
                  {
diff --git a/src/gc/windows/gcenv.windows.cpp b/src/gc/windows/gcenv.windows.cpp

index 86bd703..d2bcde8 100644 (file)
--- a/src/gc/windows/gcenv.windows.cpp
+++ b/src/gc/windows/gcenv.windows.cpp
@@ -1286,19 +1286,6 @@ bool GCToOSInterface::CanEnableGCNumaAware()
      return g_fEnableGCNumaAware;
  }
  
-bool GCToOSInterface::GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no)
-{
-    GroupProcNo groupProcNo(proc_no);
-
-    PROCESSOR_NUMBER procNumber;
-    procNumber.Group    = groupProcNo.GetGroup();
-    procNumber.Number   = (BYTE)groupProcNo.GetProcIndex();
-    procNumber.Reserved = 0;
-
-    assert(g_fEnableGCNumaAware);
-    return ::GetNumaProcessorNodeEx(&procNumber, node_no) != FALSE;
-}
-
  // Get processor number and optionally its NUMA node number for the specified heap number
  // Parameters:
  //  heap_number - heap number to get the result for
@@ -1310,53 +1297,67 @@ bool GCToOSInterface::GetProcessorForHeap(uint16_t heap_number, uint16_t* proc_n
  {
      bool success = false;
  
-    if (CanEnableGCCPUGroups())
+    // Locate heap_number-th available processor
+    uint16_t procNumber;
+    size_t cnt = heap_number;
+    for (uint16_t i = 0; i < GCToOSInterface::GetTotalProcessorCount(); i++)
      {
-        uint16_t gn, gpn;
-        GetGroupForProcessor((uint16_t)heap_number, &gn, &gpn);
-
-        *proc_no = GroupProcNo(gn, gpn).GetCombinedValue();
-
-        if (GCToOSInterface::CanEnableGCNumaAware())
+        if (g_processAffinitySet.Contains(i))
          {
-            if (!GCToOSInterface::GetNumaProcessorNode(*proc_no, node_no))
+            if (cnt == 0)
              {
-                *node_no = NUMA_NODE_UNDEFINED;
+                procNumber = i;
+                success = true;
+                break;
              }
+
+            cnt--;
+        }
+    }
+
+    if (success)
+    {
+        WORD gn, gpn;
+
+        if (CanEnableGCCPUGroups())
+        {
+            GetGroupForProcessor(procNumber, &gn, &gpn);
          }
          else
-        {   // no numa setting, each cpu group is treated as a node
-            *node_no = gn;
+        {
+            gn = GroupProcNo::NoGroup;
+            gpn = procNumber;
          }
  
-        success = true;
-    }
-    else
-    {
-        int bit_number = 0;
-        uint8_t proc_number = 0;
-        for (uintptr_t mask = 1; mask != 0; mask <<= 1)
+        GroupProcNo groupProcNo(gn, gpn);
+        *proc_no = groupProcNo.GetCombinedValue();
+
+        if (GCToOSInterface::CanEnableGCNumaAware())
          {
-            if (g_processAffinitySet.Contains(proc_number))
+            PROCESSOR_NUMBER procNumber;
+
+            if (CanEnableGCCPUGroups())
              {
-                if (bit_number == heap_number)
-                {
-                    *proc_no = GroupProcNo(GroupProcNo::NoGroup, proc_number).GetCombinedValue();
+                procNumber.Group = gn;
+            }
+            else
+            {
+                // Get the current processor group
+                PROCESSOR_NUMBER procNumber;
+                GetCurrentProcessorNumberEx(&procNumber);
+            }
  
-                    if (GCToOSInterface::CanEnableGCNumaAware())
-                    {
-                        if (!GCToOSInterface::GetNumaProcessorNode(proc_number, node_no))
-                        {
-                            *node_no = NUMA_NODE_UNDEFINED;
-                        }
-                    }
+            procNumber.Number   = (BYTE)gpn;
+            procNumber.Reserved = 0;
  
-                    success = true;
-                    break;
-                }
-                bit_number++;
+            if (GetNumaProcessorNodeEx(&procNumber, node_no))
+            {
+                *node_no = NUMA_NODE_UNDEFINED;
              }
-            proc_number++;
+        }
+        else
+        {   // no numa setting, each cpu group is treated as a node
+            *node_no = groupProcNo.GetGroup();
          }
      }
  
diff --git a/src/inc/utilcode.h b/src/inc/utilcode.h

index 5b222b1..cf4b8dd 100644 (file)
--- a/src/inc/utilcode.h
+++ b/src/inc/utilcode.h
@@ -1331,10 +1331,7 @@ BYTE * ClrVirtualAllocWithinRange(const BYTE *pMinAddr,
  // Allocate free memory with specific alignment                                   
  //
  LPVOID ClrVirtualAllocAligned(LPVOID lpAddress, SIZE_T dwSize, DWORD flAllocationType, DWORD flProtect, SIZE_T alignment);
-                                   
-//******************************************************************************
-// Returns the number of processors that a process has been configured to run on
-//******************************************************************************
+
  class NumaNodeInfo 
  {
  private:
@@ -1350,10 +1347,16 @@ public:         // functions
  
      static LPVOID VirtualAllocExNuma(HANDLE hProc, LPVOID lpAddr, SIZE_T size,
                                       DWORD allocType, DWORD prot, DWORD node);
+#ifndef FEATURE_PAL
      static BOOL GetNumaProcessorNodeEx(PPROCESSOR_NUMBER proc_no, PUSHORT node_no);
+#else // !FEATURE_PAL
+    static BOOL GetNumaProcessorNodeEx(USHORT proc_no, PUSHORT node_no);
+#endif // !FEATURE_PAL
  #endif
  };
  
+#ifndef FEATURE_PAL
+
  struct CPU_Group_Info 
  {
      WORD       nr_active;      // at most 64
@@ -1413,9 +1416,15 @@ public:
      }
  };
  
-int GetCurrentProcessCpuCount();
  DWORD_PTR GetCurrentProcessCpuMask();
  
+#endif // !FEATURE_PAL
+
+//******************************************************************************
+// Returns the number of processors that a process has been configured to run on
+//******************************************************************************
+int GetCurrentProcessCpuCount();
+
  uint32_t GetOsPageSize();
  
  
diff --git a/src/pal/inc/pal.h b/src/pal/inc/pal.h

index 0c9b5a7..79bc677 100644 (file)
--- a/src/pal/inc/pal.h
+++ b/src/pal/inc/pal.h
@@ -3996,88 +3996,6 @@ CreatePipe(
  // NUMA related APIs
  //
  
-typedef enum _PROCESSOR_CACHE_TYPE {
-  CacheUnified,
-  CacheInstruction,
-  CacheData,
-  CacheTrace
-} PROCESSOR_CACHE_TYPE;
-
-typedef struct _PROCESSOR_NUMBER {
-  WORD Group;
-  BYTE Number;
-  BYTE Reserved;
-} PROCESSOR_NUMBER, *PPROCESSOR_NUMBER;
-
-typedef enum _LOGICAL_PROCESSOR_RELATIONSHIP {
-  RelationProcessorCore,
-  RelationNumaNode,
-  RelationCache,
-  RelationProcessorPackage,
-  RelationGroup,
-  RelationAll               = 0xffff
-} LOGICAL_PROCESSOR_RELATIONSHIP;
-
-typedef ULONG_PTR KAFFINITY;
-
-#define ANYSIZE_ARRAY 1
-
-typedef struct _GROUP_AFFINITY {
-  KAFFINITY Mask;
-  WORD      Group;
-  WORD      Reserved[3];
-} GROUP_AFFINITY, *PGROUP_AFFINITY;
-
-typedef struct _PROCESSOR_GROUP_INFO {
-  BYTE      MaximumProcessorCount;
-  BYTE      ActiveProcessorCount;
-  BYTE      Reserved[38];
-  KAFFINITY ActiveProcessorMask;
-} PROCESSOR_GROUP_INFO, *PPROCESSOR_GROUP_INFO;
-
-typedef struct _PROCESSOR_RELATIONSHIP {
-  BYTE           Flags;
-  BYTE           EfficiencyClass;
-  BYTE           Reserved[21];
-  WORD           GroupCount;
-  GROUP_AFFINITY GroupMask[ANYSIZE_ARRAY];
-} PROCESSOR_RELATIONSHIP, *PPROCESSOR_RELATIONSHIP;
-
-typedef struct _GROUP_RELATIONSHIP {
-  WORD                 MaximumGroupCount;
-  WORD                 ActiveGroupCount;
-  BYTE                 Reserved[20];
-  PROCESSOR_GROUP_INFO GroupInfo[ANYSIZE_ARRAY];
-} GROUP_RELATIONSHIP, *PGROUP_RELATIONSHIP;
-
-typedef struct _NUMA_NODE_RELATIONSHIP {
-  DWORD          NodeNumber;
-  BYTE           Reserved[20];
-  GROUP_AFFINITY GroupMask;
-} NUMA_NODE_RELATIONSHIP, *PNUMA_NODE_RELATIONSHIP;
-
-typedef struct _CACHE_RELATIONSHIP {
-  BYTE                 Level;
-  BYTE                 Associativity;
-  WORD                 LineSize;
-  DWORD                CacheSize;
-  PROCESSOR_CACHE_TYPE Type;
-  BYTE                 Reserved[20];
-  GROUP_AFFINITY       GroupMask;
-} CACHE_RELATIONSHIP, *PCACHE_RELATIONSHIP;
-
-typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX {
-  LOGICAL_PROCESSOR_RELATIONSHIP Relationship;
-  DWORD                          Size;
-  union {
-    PROCESSOR_RELATIONSHIP Processor;
-    NUMA_NODE_RELATIONSHIP NumaNode;
-    CACHE_RELATIONSHIP     Cache;
-    GROUP_RELATIONSHIP     Group;
-  };
-} SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX;
-
-
  PALIMPORT
  BOOL
  PALAPI
@@ -4088,10 +4006,7 @@ GetNumaHighestNodeNumber(
  PALIMPORT
  BOOL
  PALAPI
-GetNumaProcessorNodeEx(
-  IN  PPROCESSOR_NUMBER Processor,
-  OUT PUSHORT NodeNumber
-);
+PAL_GetNumaProcessorNode(WORD procNo, WORD* node);
  
  PALIMPORT
  LPVOID
@@ -4108,61 +4023,12 @@ VirtualAllocExNuma(
  PALIMPORT
  BOOL
  PALAPI
-GetLogicalProcessorInformationEx(
-  IN LOGICAL_PROCESSOR_RELATIONSHIP RelationshipType,
-  OUT OPTIONAL PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer,
-  IN OUT PDWORD ReturnedLength
-);
-
-PALIMPORT
-DWORD_PTR
-PALAPI
-SetThreadAffinityMask(
-  IN HANDLE hThread,
-  IN DWORD_PTR dwThreadAffinityMask
-);
-
-PALIMPORT
-BOOL
-PALAPI
-SetThreadGroupAffinity(
-  IN HANDLE hThread,
-  IN const GROUP_AFFINITY *GroupAffinity,
-  OUT OPTIONAL PGROUP_AFFINITY PreviousGroupAffinity
-);
+PAL_SetCurrentThreadAffinity(WORD procNo);
  
  PALIMPORT
  BOOL
  PALAPI
-GetThreadGroupAffinity(
-  IN HANDLE hThread,
-  OUT PGROUP_AFFINITY GroupAffinity
-);
-
-PALIMPORT
-VOID
-PALAPI
-GetCurrentProcessorNumberEx(
-  OUT PPROCESSOR_NUMBER ProcNumber
-);
-
-PALIMPORT
-BOOL
-PALAPI
-GetProcessAffinityMask(
-  IN HANDLE hProcess,
-  OUT PDWORD_PTR lpProcessAffinityMask,
-  OUT PDWORD_PTR lpSystemAffinityMask
-);
-
-PALIMPORT
-BOOL
-PALAPI
-SetThreadIdealProcessorEx(
-  IN HANDLE hThread,
-  IN PPROCESSOR_NUMBER lpIdealProcessor,
-  OUT PPROCESSOR_NUMBER lpPreviousIdealProcessor
-);
+PAL_GetCurrentThreadAffinitySet(SIZE_T size, UINT_PTR* data);
  
  //
  // The types of events that can be logged.
diff --git a/src/pal/src/include/pal/palinternal.h b/src/pal/src/include/pal/palinternal.h

index 67236aa..6f64208 100644 (file)
--- a/src/pal/src/include/pal/palinternal.h
+++ b/src/pal/src/include/pal/palinternal.h
@@ -679,6 +679,9 @@ typedef enum _TimeConversionConstants
  bool
  ReadMemoryValueFromFile(const char* filename, size_t* val);
  
+DWORD
+GetTotalCpuCount();
+
  #ifdef __APPLE__
  bool
  GetApplicationContainerFolder(PathCharString& buffer, const char *applicationGroupId, int applicationGroupIdLength);
diff --git a/src/pal/src/misc/sysinfo.cpp b/src/pal/src/misc/sysinfo.cpp

index 2c14949..419c3f6 100644 (file)
--- a/src/pal/src/misc/sysinfo.cpp
+++ b/src/pal/src/misc/sysinfo.cpp
@@ -95,24 +95,11 @@ SET_DEFAULT_DEBUG_CHANNEL(MISC);
  #endif
  #endif // __APPLE__
  
-
-DWORD
-PALAPI
-PAL_GetLogicalCpuCountFromOS()
+DWORD GetTotalCpuCount()
  {
      int nrcpus = 0;
  
-#if HAVE_SCHED_GETAFFINITY
-
-    cpu_set_t cpuSet;
-    int st = sched_getaffinity(0, sizeof(cpu_set_t), &cpuSet);
-    if (st != 0)
-    {
-        ASSERT("sched_getaffinity failed (%d)\n", errno);
-    }
-
-    nrcpus = CPU_COUNT(&cpuSet);
-#elif HAVE_SYSCONF
+#if HAVE_SYSCONF
  
  #if defined(_ARM_) || defined(_ARM64_)
  #define SYSCONF_GET_NUMPROCS       _SC_NPROCESSORS_CONF
@@ -139,11 +126,36 @@ PAL_GetLogicalCpuCountFromOS()
      {
          ASSERT("sysctl failed for HW_NCPU (%d)\n", errno);
      }
+#else // HAVE_SYSCONF
+#error "Don't know how to get total CPU count on this platform"
  #endif // HAVE_SYSCONF
  
      return nrcpus;
  }
  
+DWORD
+PALAPI
+PAL_GetLogicalCpuCountFromOS()
+{
+    int nrcpus = 0;
+
+#if HAVE_SCHED_GETAFFINITY
+
+    cpu_set_t cpuSet;
+    int st = sched_getaffinity(0, sizeof(cpu_set_t), &cpuSet);
+    if (st != 0)
+    {
+        ASSERT("sched_getaffinity failed (%d)\n", errno);
+    }
+
+    nrcpus = CPU_COUNT(&cpuSet);
+#else // HAVE_SCHED_GETAFFINITY
+    nrcpus = GetTotalCpuCount();
+#endif // HAVE_SCHED_GETAFFINITY
+
+    return nrcpus;
+}
+
  /*++
  Function:
    GetSystemInfo
diff --git a/src/pal/src/numa/numa.cpp b/src/pal/src/numa/numa.cpp

index 9283a04..0c9d409 100644 (file)
--- a/src/pal/src/numa/numa.cpp
+++ b/src/pal/src/numa/numa.cpp
@@ -47,33 +47,6 @@ using namespace CorUnix;
  typedef cpuset_t cpu_set_t;
  #endif
  
-// CPU affinity descriptor
-struct CpuAffinity
-{
-    // NUMA node
-    BYTE Node;
-    // CPU number relative to the group the CPU is in
-    BYTE Number;
-    // CPU group
-    WORD Group;
-};
-
-// Array mapping global CPU index to its affinity
-CpuAffinity *g_cpuToAffinity = NULL;
-
-// Array mapping CPU group and index in the group to the global CPU index
-short *g_groupAndIndexToCpu = NULL;
-// Array mapping CPU group to the corresponding affinity mask of the CPUs in the group
-KAFFINITY *g_groupToCpuMask = NULL;
-// Array mapping CPU group to the number of processors in the group
-BYTE *g_groupToCpuCount = NULL;
-
-// Total number of processors in the system
-int g_cpuCount = 0;
-// Total number of possible processors in the system
-int g_possibleCpuCount = 0;
-// Total number of CPU groups
-int g_groupCount = 0;
  // The highest NUMA node available
  int g_highestNumaNode = 0;
  // Is numa available
@@ -87,92 +60,6 @@ FOR_ALL_NUMA_FUNCTIONS
  #undef PER_FUNCTION_BLOCK
  #endif // HAVE_NUMA_H
  
-static const int MaxCpusPerGroup = 8 * sizeof(KAFFINITY);
-static const WORD NO_GROUP = 0xffff;
-
-/*++
-Function:
-  FreeLookupArrays
-
-Free CPU and group lookup arrays
---*/
-VOID
-FreeLookupArrays()
-{
-    free(g_groupAndIndexToCpu);
-    free(g_cpuToAffinity);
-    free(g_groupToCpuMask);
-    free(g_groupToCpuCount);
-
-    g_groupAndIndexToCpu = NULL;
-    g_cpuToAffinity = NULL;
-    g_groupToCpuMask = NULL;
-    g_groupToCpuCount = NULL;
-}
-
-/*++
-Function:
-  AllocateLookupArrays
-
-Allocate CPU and group lookup arrays
-Return TRUE if the allocation succeeded
---*/
-BOOL
-AllocateLookupArrays()
-{
-    g_groupAndIndexToCpu = (short*)malloc(g_groupCount * MaxCpusPerGroup * sizeof(short));
-    if (g_groupAndIndexToCpu == NULL)
-    {
-        goto FAILED;
-    }
-
-    g_cpuToAffinity = (CpuAffinity*)malloc(g_possibleCpuCount * sizeof(CpuAffinity));
-    if (g_cpuToAffinity == NULL)
-    {
-        goto FAILED;
-    }
-
-    g_groupToCpuMask = (KAFFINITY*)malloc(g_groupCount * sizeof(KAFFINITY));
-    if (g_groupToCpuMask == NULL)
-    {
-        goto FAILED;
-    }
-
-    g_groupToCpuCount = (BYTE*)malloc(g_groupCount * sizeof(BYTE));
-    if (g_groupToCpuCount == NULL)
-    {
-        goto FAILED;
-    }
-
-    memset(g_groupAndIndexToCpu, 0xff, g_groupCount * MaxCpusPerGroup * sizeof(short));
-    memset(g_cpuToAffinity, 0xff, g_possibleCpuCount * sizeof(CpuAffinity));
-    memset(g_groupToCpuMask, 0, g_groupCount * sizeof(KAFFINITY));
-    memset(g_groupToCpuCount, 0, g_groupCount * sizeof(BYTE));
-
-    return TRUE;
-
-FAILED:
-    FreeLookupArrays();
-
-    return FALSE;
-}
-
-/*++
-Function:
-  GetFullAffinityMask
-
-Get affinity mask for the specified number of processors with all
-the processors enabled.
---*/
-KAFFINITY GetFullAffinityMask(int cpuCount)
-{
-    if ((size_t)cpuCount < sizeof(KAFFINITY) * 8)
-    {
-        return ((KAFFINITY)1 << (cpuCount)) - 1;
-    }
-
-    return ~(KAFFINITY)0;
-}
  
  /*++
  Function:
@@ -208,73 +95,6 @@ FOR_ALL_NUMA_FUNCTIONS
          else
          {
              g_numaAvailable = true;
-
-            struct bitmask *mask = numa_allocate_cpumask();
-            int numaNodesCount = numa_max_node() + 1;
-
-            g_possibleCpuCount = numa_num_possible_cpus();
-            g_cpuCount = 0;
-            g_groupCount = 0;
-
-            for (int i = 0; i < numaNodesCount; i++)
-            {
-                int st = numa_node_to_cpus(i, mask);
-                // The only failure that can happen is that the mask is not large enough
-                // but that cannot happen since the mask was allocated by numa_allocate_cpumask
-                _ASSERTE(st == 0);
-                unsigned int nodeCpuCount = numa_bitmask_weight(mask);
-                g_cpuCount += nodeCpuCount;
-                unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup;
-                g_groupCount += nodeGroupCount;
-            }
-
-            if (!AllocateLookupArrays())
-            {
-                dlclose(numaHandle);
-                return FALSE;
-            }
-
-            WORD currentGroup = 0;
-            int currentGroupCpus = 0;
-
-            for (int i = 0; i < numaNodesCount; i++)
-            {
-                int st = numa_node_to_cpus(i, mask);
-                // The only failure that can happen is that the mask is not large enough
-                // but that cannot happen since the mask was allocated by numa_allocate_cpumask
-                _ASSERTE(st == 0);
-                unsigned int nodeCpuCount = numa_bitmask_weight(mask);
-                unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup;
-                for (int j = 0; j < g_possibleCpuCount; j++)
-                {
-                    if (numa_bitmask_isbitset(mask, j))
-                    {
-                        if (currentGroupCpus == MaxCpusPerGroup)
-                        {
-                            g_groupToCpuCount[currentGroup] = MaxCpusPerGroup;
-                            g_groupToCpuMask[currentGroup] = GetFullAffinityMask(MaxCpusPerGroup);
-                            currentGroupCpus = 0;
-                            currentGroup++;
-                        }
-                        g_cpuToAffinity[j].Node = i;
-                        g_cpuToAffinity[j].Group = currentGroup;
-                        g_cpuToAffinity[j].Number = currentGroupCpus;
-                        g_groupAndIndexToCpu[currentGroup * MaxCpusPerGroup + currentGroupCpus] = j;
-                        currentGroupCpus++;
-                    }
-                }
-
-                if (currentGroupCpus != 0)
-                {
-                    g_groupToCpuCount[currentGroup] = currentGroupCpus;
-                    g_groupToCpuMask[currentGroup] = GetFullAffinityMask(currentGroupCpus);
-                    currentGroupCpus = 0;
-                    currentGroup++;
-                }
-            }
-
-            numa_free_cpumask(mask);
-
              g_highestNumaNode = numa_max_node();
          }
      }
@@ -282,21 +102,7 @@ FOR_ALL_NUMA_FUNCTIONS
      if (!g_numaAvailable)
      {
          // No NUMA
-        g_possibleCpuCount = PAL_GetLogicalCpuCountFromOS();
-        g_cpuCount = PAL_GetLogicalCpuCountFromOS();
-        g_groupCount = 1;
          g_highestNumaNode = 0;
-
-        if (!AllocateLookupArrays())
-        {
-            return FALSE;
-        }
-
-        for (int i = 0; i < g_possibleCpuCount; i++)
-        {
-            g_cpuToAffinity[i].Number = i;
-            g_cpuToAffinity[i].Group = 0;
-        }
      }
  
      return TRUE;
@@ -311,7 +117,6 @@ Cleanup of the NUMA support data structures
  VOID
  NUMASupportCleanup()
  {
-    FreeLookupArrays();
  #if HAVE_NUMA_H
      if (g_numaAvailable)
      {
@@ -346,493 +151,35 @@ GetNumaHighestNodeNumber(
  
  /*++
  Function:
-  GetNumaProcessorNodeEx
-
-See MSDN doc.
---*/
-BOOL
-PALAPI
-GetNumaProcessorNodeEx(
-  IN  PPROCESSOR_NUMBER Processor,
-  OUT PUSHORT NodeNumber
-)
-{
-    PERF_ENTRY(GetNumaProcessorNodeEx);
-    ENTRY("GetNumaProcessorNodeEx(Processor=%p, NodeNumber=%p)\n", Processor, NodeNumber);
+  PAL_GetNumaProcessorNode
  
-    BOOL success = FALSE;
+Abstract
+  Get NUMA node of a processor
  
-    if ((Processor->Group < g_groupCount) &&
-        (Processor->Number < MaxCpusPerGroup) &&
-        (Processor->Reserved == 0))
-    {
-        short cpu = g_groupAndIndexToCpu[Processor->Group * MaxCpusPerGroup + Processor->Number];
-        if (cpu != -1)
-        {
-            *NodeNumber = g_cpuToAffinity[cpu].Node;
-            success = TRUE;
-        }
-    }
-
-    if (!success)
-    {
-        *NodeNumber = 0xffff;
-        SetLastError(ERROR_INVALID_PARAMETER);
-    }
+Parameters:
+  procNo - number of the processor to get the NUMA node for
+  node   - the resulting NUMA node
  
-    LOGEXIT("GetNumaProcessorNodeEx returns BOOL %d\n", success);
-    PERF_EXIT(GetNumaProcessorNodeEx);
-
-    return success;
-}
-
-/*++
-Function:
-  GetLogicalProcessorInformationEx
-
-See MSDN doc.
---*/
-BOOL
-PALAPI
-GetLogicalProcessorInformationEx(
-  IN LOGICAL_PROCESSOR_RELATIONSHIP RelationshipType,
-  OUT OPTIONAL PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer,
-  IN OUT PDWORD ReturnedLength
-)
-{
-    PERF_ENTRY(GetLogicalProcessorInformationEx);
-    ENTRY("GetLogicalProcessorInformationEx(RelationshipType=%d, Buffer=%p, ReturnedLength=%p)\n", RelationshipType, Buffer, ReturnedLength);
-
-    BOOL success = FALSE;
-
-    if (RelationshipType == RelationGroup)
-    {
-        size_t requiredSize = __builtin_offsetof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, Group);
-        requiredSize += __builtin_offsetof(GROUP_RELATIONSHIP, GroupInfo);
-        requiredSize += g_groupCount * sizeof(PROCESSOR_GROUP_INFO);
-
-        if (*ReturnedLength >= requiredSize)
-        {
-            Buffer->Relationship = RelationGroup;
-            Buffer->Size = requiredSize;
-            Buffer->Group.MaximumGroupCount = g_groupCount;
-            Buffer->Group.ActiveGroupCount = g_groupCount;
-            for (int i = 0; i < g_groupCount; i++)
-            {
-                Buffer->Group.GroupInfo[i].MaximumProcessorCount = MaxCpusPerGroup;
-                Buffer->Group.GroupInfo[i].ActiveProcessorCount = g_groupToCpuCount[i];
-                Buffer->Group.GroupInfo[i].ActiveProcessorMask = g_groupToCpuMask[i];
-            }
-
-            success = TRUE;
-        }
-        else
-        {
-            SetLastError(ERROR_INSUFFICIENT_BUFFER);
-        }
-
-        *ReturnedLength = requiredSize;
-    }
-    else
-    {
-        // We only support the group relationship
-        SetLastError(ERROR_INVALID_PARAMETER);
-    }
-
-    LOGEXIT("GetLogicalProcessorInformationEx returns BOOL %d\n", success);
-    PERF_EXIT(GetLogicalProcessorInformationEx);
-
-    return success;
-}
-
-/*++
-Function:
-  GetThreadGroupAffinityInternal
-
-Get the group affinity for the specified pthread
---*/
-BOOL
-GetThreadGroupAffinityInternal(
-  IN pthread_t thread,
-  OUT PGROUP_AFFINITY GroupAffinity
-)
-{
-    BOOL success = FALSE;
-
-#if HAVE_PTHREAD_GETAFFINITY_NP
-    cpu_set_t cpuSet;
-
-    int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
-
-    if (st == 0)
-    {
-        WORD group = NO_GROUP;
-        KAFFINITY mask = 0;
-
-        for (int i = 0; i < g_possibleCpuCount; i++)
-        {
-            if (CPU_ISSET(i, &cpuSet))
-            {
-                WORD g = g_cpuToAffinity[i].Group;
-                // Unless the thread affinity was already set by SetThreadGroupAffinity, it is possible that
-                // the current thread has affinity with processors from multiple groups. So we report just the
-                // first group we find.
-                if (group == NO_GROUP || g == group)
-                {
-                    group = g;
-                    mask |= ((KAFFINITY)1) << g_cpuToAffinity[i].Number;
-                }
-            }
-        }
-
-        GroupAffinity->Group = group;
-        GroupAffinity->Mask = mask;
-        success = TRUE;
-    }
-    else
-    {
-        SetLastError(ERROR_GEN_FAILURE);
-    }
-#else // HAVE_PTHREAD_GETAFFINITY_NP
-    // There is no API to manage thread affinity, so let's return a group affinity
-    // with all the CPUs on the system.
-    GroupAffinity->Group = 0;
-    GroupAffinity->Mask = GetFullAffinityMask(g_possibleCpuCount);
-    success = TRUE;
-#endif // HAVE_PTHREAD_GETAFFINITY_NP
-
-    return success;
-}
-
-/*++
-Function:
-  GetThreadGroupAffinity
-
-See MSDN doc.
---*/
-BOOL
-PALAPI
-GetThreadGroupAffinity(
-  IN HANDLE hThread,
-  OUT PGROUP_AFFINITY GroupAffinity
-)
-{
-    PERF_ENTRY(GetThreadGroupAffinity);
-    ENTRY("GetThreadGroupAffinity(hThread=%p, GroupAffinity=%p)\n", hThread, GroupAffinity);
-    CPalThread *pCurrentThread = InternalGetCurrentThread();
-    CPalThread *pTargetThread = NULL;
-    IPalObject *pTargetThreadObject = NULL;
-
-    PAL_ERROR palErr =
-        InternalGetThreadDataFromHandle(pCurrentThread, hThread,
-                                        0, // THREAD_SET_CONTEXT
-                                        &pTargetThread, &pTargetThreadObject);
-
-    if (NO_ERROR != palErr)
-    {
-        ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread,
-              palErr);
-        return FALSE;
-    }
-
-    BOOL success = GetThreadGroupAffinityInternal(
-        pTargetThread->GetPThreadSelf(), GroupAffinity);
-    LOGEXIT("GetThreadGroupAffinity returns BOOL %d\n", success);
-    PERF_EXIT(GetThreadGroupAffinity);
-
-    return success;
-}
-
-
-/*++
-Function:
-  SetThreadGroupAffinity
-
-See MSDN doc.
+Return value:
+  TRUE if the function was able to get the NUMA node, FALSE if it has failed.
  --*/
  BOOL
  PALAPI
-SetThreadGroupAffinity(
-  IN HANDLE hThread,
-  IN const GROUP_AFFINITY *GroupAffinity,
-  OUT OPTIONAL PGROUP_AFFINITY PreviousGroupAffinity
-)
+PAL_GetNumaProcessorNode(WORD procNo, WORD* node)
  {
-    PERF_ENTRY(SetThreadGroupAffinity);
-    ENTRY("SetThreadGroupAffinity(hThread=%p, GroupAffinity=%p, PreviousGroupAffinity=%p)\n", hThread, GroupAffinity, PreviousGroupAffinity);
-
-    CPalThread *pCurrentThread = InternalGetCurrentThread();
-    CPalThread *pTargetThread = NULL;
-    IPalObject *pTargetThreadObject = NULL;
-
-    PAL_ERROR palErr =
-        InternalGetThreadDataFromHandle(pCurrentThread, hThread,
-                                        0, // THREAD_SET_CONTEXT
-                                        &pTargetThread, &pTargetThreadObject);
-
-    if (NO_ERROR != palErr)
-    {
-        ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread,
-              palErr);
-        return FALSE;
-    }
-
-    pthread_t thread = pTargetThread->GetPThreadSelf();
-
-    if (PreviousGroupAffinity != NULL)
-    {
-        GetThreadGroupAffinityInternal(thread, PreviousGroupAffinity);
-    }
-
-#if HAVE_PTHREAD_GETAFFINITY_NP
-    int groupStartIndex = GroupAffinity->Group * MaxCpusPerGroup;
-    KAFFINITY mask = 1;
-    cpu_set_t cpuSet;
-    CPU_ZERO(&cpuSet);
-
-    for (int i = 0; i < MaxCpusPerGroup; i++, mask <<= 1)
-    {
-        if (GroupAffinity->Mask & mask)
-        {
-            int cpu = g_groupAndIndexToCpu[groupStartIndex + i];
-            if (cpu != -1)
-            {
-                CPU_SET(cpu, &cpuSet);
-            }
-        }
-    }
-
-    int st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
-
-    if (st != 0)
-    {
-        switch (st)
-        {
-        case EINVAL:
-            // There is no processor in the mask that is allowed to execute the process
-            SetLastError(ERROR_INVALID_PARAMETER);
-            break;
-        case ESRCH:
-            SetLastError(ERROR_INVALID_HANDLE);
-            break;
-        default:
-            SetLastError(ERROR_GEN_FAILURE);
-            break;
-        }
-    }
-
-    BOOL success = (st == 0);
-#else // HAVE_PTHREAD_GETAFFINITY_NP
-    // There is no API to manage thread affinity, so let's ignore the request
-    BOOL success = TRUE;
-#endif // HAVE_PTHREAD_GETAFFINITY_NP
-
-    LOGEXIT("SetThreadGroupAffinity returns BOOL %d\n", success);
-    PERF_EXIT(SetThreadGroupAffinity);
-
-    return success;
-}
-
-/*++
-Function:
-  SetThreadAffinityMask
-
-See MSDN doc.
---*/
-DWORD_PTR
-PALAPI
-SetThreadAffinityMask(
-  IN HANDLE hThread,
-  IN DWORD_PTR dwThreadAffinityMask
-)
-{
-    PERF_ENTRY(SetThreadAffinityMask);
-    ENTRY("SetThreadAffinityMask(hThread=%p, dwThreadAffinityMask=%p)\n", hThread, dwThreadAffinityMask);
-
-    CPalThread *pCurrentThread = InternalGetCurrentThread();
-    CPalThread *pTargetThread = NULL;
-    IPalObject *pTargetThreadObject = NULL;
-
-    PAL_ERROR palErr =
-        InternalGetThreadDataFromHandle(pCurrentThread, hThread,
-                                        0, // THREAD_SET_CONTEXT
-                                        &pTargetThread, &pTargetThreadObject);
-
-    if (NO_ERROR != palErr)
-    {
-        ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread,
-              palErr);
-        return 0;
-    }
-
-    pthread_t thread = pTargetThread->GetPThreadSelf();
-
-#if HAVE_PTHREAD_GETAFFINITY_NP
-    cpu_set_t prevCpuSet;
-    CPU_ZERO(&prevCpuSet);
-    KAFFINITY prevMask = 0;
-
-    int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &prevCpuSet);
-
-    if (st == 0)
-    {
-        for (int i = 0; i < std::min(8 * (int)sizeof(KAFFINITY), g_possibleCpuCount); i++)
-        {
-            if (CPU_ISSET(i, &prevCpuSet))
-            {
-                prevMask |= ((KAFFINITY)1) << i;
-            }
-        }
-    }
-
-    cpu_set_t cpuSet;
-    CPU_ZERO(&cpuSet);
-
-    int cpu = 0;
-    while (dwThreadAffinityMask)
-    {
-        if (dwThreadAffinityMask & 1)
-        {
-            CPU_SET(cpu, &cpuSet);
-        }
-        cpu++;
-        dwThreadAffinityMask >>= 1;
-    }
-
-    st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
-
-    if (st != 0)
-    {
-        switch (st)
-        {
-        case EINVAL:
-            // There is no processor in the mask that is allowed to execute the
-            // process
-            SetLastError(ERROR_INVALID_PARAMETER);
-            break;
-        case ESRCH:
-            SetLastError(ERROR_INVALID_HANDLE);
-            break;
-        default:
-            SetLastError(ERROR_GEN_FAILURE);
-            break;
-        }
-    }
-
-    DWORD_PTR ret = (st == 0) ? prevMask : 0;
-#else  // HAVE_PTHREAD_GETAFFINITY_NP
-    // There is no API to manage thread affinity, so let's ignore the request
-    DWORD_PTR ret = 0;
-#endif // HAVE_PTHREAD_GETAFFINITY_NP
-    LOGEXIT("SetThreadAffinityMask returns  %lu\n", ret);
-    PERF_EXIT(SetThreadAffinityMask);
-
-    return ret;
-}
-
-/*++
-Function:
-  GetCurrentProcessorNumberEx
-
-See MSDN doc.
---*/
-VOID
-PALAPI
-GetCurrentProcessorNumberEx(
-  OUT PPROCESSOR_NUMBER ProcNumber
-)
-{
-    PERF_ENTRY(GetCurrentProcessorNumberEx);
-    ENTRY("GetCurrentProcessorNumberEx(ProcNumber=%p\n", ProcNumber);
-
-    DWORD cpu = GetCurrentProcessorNumber();
-    _ASSERTE((int)cpu < g_possibleCpuCount);
-    ProcNumber->Group = g_cpuToAffinity[cpu].Group;
-    ProcNumber->Number = g_cpuToAffinity[cpu].Number;
-
-    LOGEXIT("GetCurrentProcessorNumberEx\n");
-    PERF_EXIT(GetCurrentProcessorNumberEx);
-}
-
-/*++
-Function:
-  GetProcessAffinityMask
-
-See MSDN doc.
---*/
-BOOL
-PALAPI
-GetProcessAffinityMask(
-  IN HANDLE hProcess,
-  OUT PDWORD_PTR lpProcessAffinityMask,
-  OUT PDWORD_PTR lpSystemAffinityMask
-)
-{
-    PERF_ENTRY(GetProcessAffinityMask);
-    ENTRY("GetProcessAffinityMask(hProcess=%p, lpProcessAffinityMask=%p, lpSystemAffinityMask=%p\n", hProcess, lpProcessAffinityMask, lpSystemAffinityMask);
-
-    BOOL success = FALSE;
-
-    if (hProcess == GetCurrentProcess())
+#if HAVE_NUMA_H
+    if (g_numaAvailable)
      {
-        int cpuCountInMask = (g_cpuCount > 64) ? 64 : g_cpuCount;
-
-        DWORD_PTR systemMask = GetFullAffinityMask(cpuCountInMask);
-
-#if HAVE_SCHED_GETAFFINITY
-        int pid = getpid();
-        cpu_set_t cpuSet;
-        int st = sched_getaffinity(pid, sizeof(cpu_set_t), &cpuSet);
-        if (st == 0)
+        int result = numa_node_of_cpu(procNo);
+        if (result >= 0)
          {
-            DWORD_PTR processMask = 0;
-
-            for (int i = 0; i < cpuCountInMask; i++)
-            {
-                if (CPU_ISSET(i, &cpuSet))
-                {
-                    processMask |= ((DWORD_PTR)1) << i;
-                }
-            }
-
-            success = TRUE;
-
-            *lpProcessAffinityMask = processMask;
-            *lpSystemAffinityMask = systemMask;
-        }
-        else if (errno == EINVAL)
-        {
-            // There are more processors than can fit in a cpu_set_t
-            // return all bits set for all processors (upto 64) for both masks.
-            *lpProcessAffinityMask = systemMask;
-            *lpSystemAffinityMask = systemMask;
-            success = TRUE;
-        }
-        else
-        {
-            // We should not get any of the errors that the sched_getaffinity can return since none
-            // of them applies for the current thread, so this is an unexpected kind of failure.
-            SetLastError(ERROR_GEN_FAILURE);
+            *node = (WORD)result;
+            return TRUE;
          }
-#else // HAVE_SCHED_GETAFFINITY
-        // There is no API to manage thread affinity, so let's return both affinity masks
-        // with all the CPUs on the system set.
-        *lpSystemAffinityMask = systemMask;
-        *lpProcessAffinityMask = systemMask;
-
-        success = TRUE;
-#endif // HAVE_SCHED_GETAFFINITY
-    }
-    else
-    {
-        // PAL supports getting affinity mask for the current process only
-        SetLastError(ERROR_INVALID_PARAMETER);
      }
+#endif // HAVE_NUMA_H
  
-    LOGEXIT("GetProcessAffinityMask returns BOOL %d\n", success);
-    PERF_EXIT(GetProcessAffinityMask);
-
-    return success;
+    return FALSE;
  }
  
  /*++
@@ -898,115 +245,3 @@ VirtualAllocExNuma(
  
      return result;
  }
-
-/*++
-Function:
-  SetThreadIdealProcessorEx
-
-See MSDN doc.
---*/
-BOOL
-PALAPI
-SetThreadIdealProcessorEx(
-  IN HANDLE hThread,
-  IN PPROCESSOR_NUMBER lpIdealProcessor,
-  OUT PPROCESSOR_NUMBER lpPreviousIdealProcessor)
-{
-    PERF_ENTRY(SetThreadIdealProcessorEx);
-    ENTRY("SetThreadIdealProcessorEx(hThread=%p, lpIdealProcessor=%p)\n", hThread, lpIdealProcessor);
-
-    CPalThread *pCurrentThread = InternalGetCurrentThread();
-    CPalThread *pTargetThread = NULL;
-    IPalObject *pTargetThreadObject = NULL;
-
-    PAL_ERROR palErr =
-        InternalGetThreadDataFromHandle(pCurrentThread, hThread,
-                                        0, // THREAD_SET_CONTEXT
-                                        &pTargetThread, &pTargetThreadObject);
-
-    if (NO_ERROR != palErr)
-    {
-        ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread,
-              palErr);
-        return 0;
-    }
-
-    pthread_t thread = pTargetThread->GetPThreadSelf();
-
-#if HAVE_PTHREAD_GETAFFINITY_NP
-    int cpu = -1;
-    if ((lpIdealProcessor->Group < g_groupCount) &&
-        (lpIdealProcessor->Number < MaxCpusPerGroup) &&
-        (lpIdealProcessor->Reserved == 0))
-    {
-        cpu = g_groupAndIndexToCpu[lpIdealProcessor->Group * MaxCpusPerGroup + lpIdealProcessor->Number];
-    }
-
-    if (cpu == -1)
-    {
-        SetLastError(ERROR_INVALID_PARAMETER);
-        return FALSE;
-    }
-
-    if (lpPreviousIdealProcessor != NULL)
-    {
-        cpu_set_t prevCpuSet;
-        CPU_ZERO(&prevCpuSet);
-        DWORD prevCpu = GetCurrentProcessorNumber();
-
-        int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &prevCpuSet);
-
-        if (st == 0)
-        {
-            for (int i = 0; i < g_possibleCpuCount; i++)
-            {
-                if (CPU_ISSET(i, &prevCpuSet))
-                {
-                    prevCpu = i;
-                    break;
-                }
-            }
-        }
-
-        _ASSERTE((int)prevCpu < g_possibleCpuCount);
-        lpPreviousIdealProcessor->Group = g_cpuToAffinity[prevCpu].Group;
-        lpPreviousIdealProcessor->Number = g_cpuToAffinity[prevCpu].Number;
-        lpPreviousIdealProcessor->Reserved = 0;
-    }
-
-    cpu_set_t cpuSet;
-    CPU_ZERO(&cpuSet);
-    CPU_SET(cpu, &cpuSet);
-
-    int st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
-
-    if (st != 0)
-    {
-        switch (st)
-        {
-        case EINVAL:
-            // There is no processor in the mask that is allowed to execute the
-            // process
-            SetLastError(ERROR_INVALID_PARAMETER);
-            break;
-        case ESRCH:
-            SetLastError(ERROR_INVALID_HANDLE);
-            break;
-        default:
-            SetLastError(ERROR_GEN_FAILURE);
-            break;
-        }
-    }
-
-    BOOL success = (st == 0);
-
-#else  // HAVE_PTHREAD_GETAFFINITY_NP
-    // There is no API to manage thread affinity, so let's ignore the request
-    BOOL success = FALSE;
-#endif // HAVE_PTHREAD_GETAFFINITY_NP
-
-    LOGEXIT("SetThreadIdealProcessorEx returns BOOL %d\n", success);
-    PERF_EXIT(SetThreadIdealProcessorEx);
-
-    return success;
-}
diff --git a/src/pal/src/numa/numashim.h b/src/pal/src/numa/numashim.h

index dd7f58d..e56cfab 100644 (file)
--- a/src/pal/src/numa/numashim.h
+++ b/src/pal/src/numa/numashim.h
@@ -13,19 +13,12 @@
  #include <numa.h>
  #include <numaif.h>
  
-#define numa_free_cpumask numa_bitmask_free
-
  // List of all functions from the numa library that are used
  #define FOR_ALL_NUMA_FUNCTIONS \
      PER_FUNCTION_BLOCK(numa_available) \
      PER_FUNCTION_BLOCK(mbind) \
-    PER_FUNCTION_BLOCK(numa_num_possible_cpus) \
      PER_FUNCTION_BLOCK(numa_max_node) \
-    PER_FUNCTION_BLOCK(numa_allocate_cpumask) \
-    PER_FUNCTION_BLOCK(numa_node_to_cpus) \
-    PER_FUNCTION_BLOCK(numa_bitmask_weight) \
-    PER_FUNCTION_BLOCK(numa_bitmask_isbitset) \
-    PER_FUNCTION_BLOCK(numa_bitmask_free)
+    PER_FUNCTION_BLOCK(numa_node_of_cpu)
  
  // Declare pointers to all the used numa functions
  #define PER_FUNCTION_BLOCK(fn) extern decltype(fn)* fn##_ptr;
@@ -36,13 +29,8 @@ FOR_ALL_NUMA_FUNCTIONS
  // to the functions of libnuma in the initialization.
  #define numa_available() numa_available_ptr()
  #define mbind(...) mbind_ptr(__VA_ARGS__)
-#define numa_num_possible_cpus() numa_num_possible_cpus_ptr()
  #define numa_max_node() numa_max_node_ptr()
-#define numa_allocate_cpumask() numa_allocate_cpumask_ptr()
-#define numa_node_to_cpus(...) numa_node_to_cpus_ptr(__VA_ARGS__)
-#define numa_bitmask_weight(...) numa_bitmask_weight_ptr(__VA_ARGS__)
-#define numa_bitmask_isbitset(...) numa_bitmask_isbitset_ptr(__VA_ARGS__)
-#define numa_bitmask_free(...) numa_bitmask_free_ptr(__VA_ARGS__)
+#define numa_node_of_cpu(...) numa_node_of_cpu_ptr(__VA_ARGS__)
  
  #endif // HAVE_NUMA_H
  
diff --git a/src/pal/src/thread/thread.cpp b/src/pal/src/thread/thread.cpp

index 86a0863..122e860 100644 (file)
--- a/src/pal/src/thread/thread.cpp
+++ b/src/pal/src/thread/thread.cpp
@@ -64,6 +64,7 @@ SET_DEFAULT_DEBUG_CHANNEL(THREAD); // some headers have code with asserts, so do
  #include "pal/fakepoll.h"
  #endif  // HAVE_POLL
  #include <limits.h>
+#include <algorithm>
  
  #if HAVE_SYS_LWP_H
  #include <sys/lwp.h>
@@ -2921,3 +2922,95 @@ int CorUnix::CThreadMachExceptionHandlers::GetIndexOfHandler(exception_mask_t bm
  }
  
  #endif // HAVE_MACH_EXCEPTIONS
+
+/*++
+Function:
+  PAL_SetCurrentThreadAffinity
+
+Abstract
+  Set affinity of the current thread to the specified processor.
+
+Parameters:
+  procNo - number of the processor to affinitize the current thread to
+
+Return value:
+  TRUE if the function was able to set the affinity, FALSE if it has failed.
+--*/
+BOOL
+PALAPI
+PAL_SetCurrentThreadAffinity(WORD procNo)
+{
+#if HAVE_PTHREAD_GETAFFINITY_NP
+    cpu_set_t cpuSet;
+    CPU_ZERO(&cpuSet);
+
+    int st = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuSet);
+
+    if (st == 0)
+    {
+        CPU_SET(procNo, &cpuSet);
+        st = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuSet);
+    }
+
+    return st == 0;
+#else  // HAVE_PTHREAD_GETAFFINITY_NP
+    // There is no API to manage thread affinity, so let's ignore the request
+    return FALSE;
+#endif // HAVE_PTHREAD_GETAFFINITY_NP
+}
+
+/*++
+Function:
+  PAL_SetCurrentThreadAffinity
+
+Abstract
+  Get affinity set of the current thread. The set is represented by an array of "size" entries of UINT_PTR type.
+
+Parameters:
+  size - number of entries in the "data" array
+  data - pointer to the data of the resulting set, the LSB of the first entry in the array represents processor 0
+
+Return value:
+  TRUE if the function was able to get the affinity set, FALSE if it has failed.
+--*/
+BOOL
+PALAPI
+PAL_GetCurrentThreadAffinitySet(SIZE_T size, UINT_PTR* data)
+{
+    cpu_set_t cpuSet;
+    CPU_ZERO(&cpuSet);
+
+#if HAVE_PTHREAD_GETAFFINITY_NP
+    int st = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuSet);
+
+    if (st == 0)
+    {
+        const SIZE_T BitsPerBitsetEntry = 8 * sizeof(UINT_PTR);
+        int nrcpus = GetTotalCpuCount();
+
+        // Get info for as much processors as it is possible to fit into the resulting set
+        SIZE_T remainingCount = std::min(size * BitsPerBitsetEntry, (SIZE_T)nrcpus);
+        SIZE_T i = 0;
+        while (remainingCount != 0)
+        {
+            UINT_PTR entry = 0;
+            SIZE_T bitsToCopy = std::min(remainingCount, BitsPerBitsetEntry);
+            SIZE_T cpuSetOffset = i * BitsPerBitsetEntry;
+            for (SIZE_T j = 0; j < bitsToCopy; j++)
+            {
+                if (CPU_ISSET(cpuSetOffset + j, &cpuSet))
+                {
+                    entry |= (UINT_PTR)1 << j;
+                }
+            }
+            remainingCount -= bitsToCopy;
+            data[i++] = entry;
+        }
+    }
+
+    return st == 0;
+#else  // HAVE_PTHREAD_GETAFFINITY_NP
+    // There is no API to manage thread affinity, so let's ignore the request
+    return FALSE;
+#endif // HAVE_PTHREAD_GETAFFINITY_NP
+}
diff --git a/src/utilcode/util.cpp b/src/utilcode/util.cpp

index 61f41d7..e709160 100644 (file)
--- a/src/utilcode/util.cpp
+++ b/src/utilcode/util.cpp
@@ -733,10 +733,17 @@ BYTE * ClrVirtualAllocWithinRange(const BYTE *pMinAddr,
      return ::VirtualAllocExNuma(hProc, lpAddr, dwSize, allocType, prot, node);
  }
  
+#ifndef FEATURE_PAL
  /*static*/ BOOL NumaNodeInfo::GetNumaProcessorNodeEx(PPROCESSOR_NUMBER proc_no, PUSHORT node_no)
  {
      return ::GetNumaProcessorNodeEx(proc_no, node_no);
  }
+#else // !FEATURE_PAL
+/*static*/ BOOL NumaNodeInfo::GetNumaProcessorNodeEx(USHORT proc_no, PUSHORT node_no)
+{
+    return PAL_GetNumaProcessorNode(proc_no, node_no);
+}
+#endif // !FEATURE_PAL
  #endif
  
  /*static*/ BOOL NumaNodeInfo::m_enableGCNumaAware = FALSE;
@@ -749,15 +756,6 @@ BYTE * ClrVirtualAllocWithinRange(const BYTE *pMinAddr,
      if (CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_GCNumaAware) == 0)
          return FALSE;
  
-#ifndef FEATURE_PAL
-    // check if required APIs are supported
-    HMODULE hMod = GetModuleHandleW(WINDOWS_KERNEL32_DLLNAME_W);
-#else
-    HMODULE hMod = GetCLRModule();
-#endif    
-    if (hMod == NULL)
-        return FALSE;
-
      // fail to get the highest numa node number
      if (!::GetNumaHighestNodeNumber(&highest) || (highest == 0))
          return FALSE;
@@ -778,8 +776,10 @@ BYTE * ClrVirtualAllocWithinRange(const BYTE *pMinAddr,
      m_enableGCNumaAware = InitNumaNodeInfoAPI();
  }
  
+#ifndef FEATURE_PAL
+
  //******************************************************************************
-// NumaNodeInfo 
+// CPUGroupInfo 
  //******************************************************************************
  #if !defined(FEATURE_REDHAWK)
  /*static*/ //CPUGroupInfo::PNTQSIEx CPUGroupInfo::m_pNtQuerySystemInformationEx = NULL;
@@ -1187,6 +1187,7 @@ BOOL CPUGroupInfo::GetCPUGroupRange(WORD group_number, WORD* group_begin, WORD*
      LIMITED_METHOD_CONTRACT;
      return m_threadUseAllCpuGroups;
  }
+#endif // !FEATURE_PAL
  
  //******************************************************************************
  // Returns the number of processors that a process has been configured to run on
@@ -1206,6 +1207,8 @@ int GetCurrentProcessCpuCount()
          return cCPUs;
  
      unsigned int count = 0;
+
+#ifndef FEATURE_PAL
      DWORD_PTR pmask, smask;
  
      if (!GetProcessAffinityMask(GetCurrentProcess(), &pmask, &smask))
@@ -1233,18 +1236,20 @@ int GetCurrentProcessCpuCount()
              count = 64;
      }
  
-#ifdef FEATURE_PAL
-    uint32_t cpuLimit;
+#else // !FEATURE_PAL
+    count = PAL_GetLogicalCpuCountFromOS();
  
+    uint32_t cpuLimit;
      if (PAL_GetCpuLimit(&cpuLimit) && cpuLimit < count)
          count = cpuLimit;
-#endif
+#endif // !FEATURE_PAL
  
      cCPUs = count;
  
      return count;
  }
  
+#ifndef FEATURE_PAL
  DWORD_PTR GetCurrentProcessCpuMask()
  {
      CONTRACTL
@@ -1266,6 +1271,7 @@ DWORD_PTR GetCurrentProcessCpuMask()
      return 0;
  #endif
  }
+#endif // !FEATURE_PAL
  
  uint32_t GetOsPageSizeUncached()
  {
diff --git a/src/vm/ceemain.cpp b/src/vm/ceemain.cpp

index 1b85649..9ac0cc6 100644 (file)
--- a/src/vm/ceemain.cpp
+++ b/src/vm/ceemain.cpp
@@ -654,8 +654,9 @@ void EEStartupHelper(COINITIEE fFlags)
          // Need to do this as early as possible. Used by creating object handle
          // table inside Ref_Initialization() before GC is initialized.
          NumaNodeInfo::InitNumaNodeInfo();
+#ifndef FEATURE_PAL
          CPUGroupInfo::EnsureInitialized();
-
+#endif // !FEATURE_PAL
  
          // Initialize global configuration settings based on startup flags
          // This needs to be done before the EE has started
diff --git a/src/vm/eeconfig.cpp b/src/vm/eeconfig.cpp

index e59a85e..6bd0edd 100644 (file)
--- a/src/vm/eeconfig.cpp
+++ b/src/vm/eeconfig.cpp
@@ -1226,7 +1226,14 @@ HRESULT EEConfig::sync()
  
      tieredCompilation_StartupTier_CallCountingDelayMs =
          CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TC_StartupTier_CallCountingDelayMs);
-    if (CPUGroupInfo::HadSingleProcessorAtStartup())
+
+#ifndef FEATURE_PAL
+    bool hadSingleProcessorAtStartup = g_SystemInfo.dwNumberOfProcessors == 1;//CPUGroupInfo::HadSingleProcessorAtStartup();
+#else // !FEATURE_PAL
+    bool hadSingleProcessorAtStartup = g_SystemInfo.dwNumberOfProcessors == 1;
+#endif // !FEATURE_PAL
+
+    if (hadSingleProcessorAtStartup)
      {
          DWORD delayMultiplier =
              CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TC_StartupTier_DelaySingleProcMultiplier);
diff --git a/src/vm/gcenv.os.cpp b/src/vm/gcenv.os.cpp

index a56215a..8f9e1ba 100644 (file)
--- a/src/vm/gcenv.os.cpp
+++ b/src/vm/gcenv.os.cpp
@@ -32,6 +32,10 @@ uint32_t g_pageSizeUnixInl = 0;
  
  static AffinitySet g_processAffinitySet;
  
+#ifdef FEATURE_PAL
+static uint32_t g_currentProcessCpuCount;
+#endif // FEATURE_PAL
+
  class GroupProcNo
  {
      uint16_t m_groupProc;
@@ -106,8 +110,21 @@ bool GCToOSInterface::Initialize()
  
  #ifdef FEATURE_PAL
      g_pageSizeUnixInl = GetOsPageSize();
-#endif
  
+    g_currentProcessCpuCount = PAL_GetLogicalCpuCountFromOS();
+    if (PAL_GetCurrentThreadAffinitySet(AffinitySet::BitsetDataSize, g_processAffinitySet.GetBitsetData()))
+    {
+        assert(g_currentProcessCpuCount == g_processAffinitySet.Count());
+    }
+    else
+    {
+        // There is no way to get affinity on the current OS, set the affinity set to reflect all processors
+        for (size_t i = 0; i < g_currentProcessCpuCount; i++)
+        {
+            g_processAffinitySet.Add(i);
+        }
+    }
+#else // FEATURE_PAL
      if (CPUGroupInfo::CanEnableGCCPUGroups())
      {
          // When CPU groups are enabled, then the process is not bound by the process affinity set at process launch.
@@ -135,6 +152,7 @@ bool GCToOSInterface::Initialize()
              }
          }
      }
+#endif // FEATURE_PAL
  
      return true;
  }
@@ -175,7 +193,7 @@ bool GCToOSInterface::SetCurrentThreadIdealAffinity(uint16_t srcProcNo, uint16_t
      LIMITED_METHOD_CONTRACT;
  
      bool success = true;
-
+#ifndef FEATURE_PAL
      GroupProcNo srcGroupProcNo(srcProcNo);
      GroupProcNo dstGroupProcNo(dstProcNo);
  
@@ -202,7 +220,6 @@ bool GCToOSInterface::SetCurrentThreadIdealAffinity(uint16_t srcProcNo, uint16_t
  
          success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, NULL);
      }
-#if !defined(FEATURE_PAL)
      else
      {
          if (GetThreadIdealProcessorEx(GetCurrentThread(), &proc))
@@ -211,10 +228,13 @@ bool GCToOSInterface::SetCurrentThreadIdealAffinity(uint16_t srcProcNo, uint16_t
              success = !!SetThreadIdealProcessorEx(GetCurrentThread(), &proc, &proc);
          }
      }
-#endif // !defined(FEATURE_PAL)
-#endif
-
+#endif // !FEATURE_CORESYSTEM
      return success;
+
+#else // !FEATURE_PAL
+    return GCToOSInterface::SetThreadAffinity(dstProcNo);
+
+#endif // !FEATURE_PAL
  }
  
  // Get the number of the current processor
@@ -472,7 +492,7 @@ size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize)
  bool GCToOSInterface::SetThreadAffinity(uint16_t procNo)
  {
      LIMITED_METHOD_CONTRACT;
-
+#ifndef FEATURE_PAL
      GroupProcNo groupProcNo(procNo);
  
      if (groupProcNo.GetGroup() != GroupProcNo::NoGroup)
@@ -489,6 +509,9 @@ bool GCToOSInterface::SetThreadAffinity(uint16_t procNo)
      {
          return !!SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)1 << groupProcNo.GetProcIndex());
      }
+#else //  FEATURE_PAL
+    return PAL_SetCurrentThreadAffinity(procNo);
+#endif //  FEATURE_PAL
  }
  
  // Boosts the calling thread's thread priority to a level higher than the default
@@ -510,7 +533,9 @@ bool GCToOSInterface::BoostThreadPriority()
  //  set of enabled processors
  const AffinitySet* GCToOSInterface::SetGCThreadsAffinitySet(uintptr_t configAffinityMask, const AffinitySet* configAffinitySet)
  {
+#ifndef FEATURE_PAL
      if (CPUGroupInfo::CanEnableGCCPUGroups())
+#endif // !FEATURE_PAL
      {
          if (!configAffinitySet->IsEmpty())
          {
@@ -524,6 +549,7 @@ const AffinitySet* GCToOSInterface::SetGCThreadsAffinitySet(uintptr_t configAffi
              }
          }
      }
+#ifndef FEATURE_PAL
      else
      {
          if (configAffinityMask != 0)
@@ -538,6 +564,7 @@ const AffinitySet* GCToOSInterface::SetGCThreadsAffinitySet(uintptr_t configAffi
              }
          }
      }
+#endif // !FEATURE_PAL
  
      return &g_processAffinitySet;
  }
@@ -549,10 +576,14 @@ uint32_t GCToOSInterface::GetCurrentProcessCpuCount()
  {
      LIMITED_METHOD_CONTRACT;
  
+#ifndef FEATURE_PAL
      // GetCurrentProcessCpuCount only returns up to 64 procs.
      return CPUGroupInfo::CanEnableGCCPUGroups() ?
                  GCToOSInterface::GetTotalProcessorCount():
                  ::GetCurrentProcessCpuCount();
+#else // !FEATURE_PAL
+    return g_currentProcessCpuCount;
+#endif // !FEATURE_PAL
  }
  
  // Return the size of the user-mode portion of the virtual address space of this process.
@@ -887,6 +918,7 @@ uint32_t GCToOSInterface::GetTotalProcessorCount()
  {
      LIMITED_METHOD_CONTRACT;
  
+#ifndef FEATURE_PAL
      if (CPUGroupInfo::CanEnableGCCPUGroups())
      {
          return CPUGroupInfo::GetNumActiveProcessors();
@@ -895,6 +927,9 @@ uint32_t GCToOSInterface::GetTotalProcessorCount()
      {
          return g_SystemInfo.dwNumberOfProcessors;
      }
+#else // !FEATURE_PAL
+    return g_currentProcessCpuCount;
+#endif // !FEATURE_PAL
  }
  
  bool GCToOSInterface::CanEnableGCNumaAware()
@@ -904,20 +939,6 @@ bool GCToOSInterface::CanEnableGCNumaAware()
      return NumaNodeInfo::CanEnableGCNumaAware() != FALSE;
  }
  
-bool GCToOSInterface::GetNumaProcessorNode(uint16_t proc_no, uint16_t *node_no)
-{
-    LIMITED_METHOD_CONTRACT;
-
-    GroupProcNo groupProcNo(proc_no);
-
-    PROCESSOR_NUMBER procNumber;
-    procNumber.Group    = groupProcNo.GetGroup();
-    procNumber.Number   = (BYTE)groupProcNo.GetProcIndex();
-    procNumber.Reserved = 0;
-
-    return NumaNodeInfo::GetNumaProcessorNodeEx(&procNumber, node_no) != FALSE;
-}
-
  // Get processor number and optionally its NUMA node number for the specified heap number
  // Parameters:
  //  heap_number - heap number to get the result for
@@ -929,53 +950,76 @@ bool GCToOSInterface::GetProcessorForHeap(uint16_t heap_number, uint16_t* proc_n
  {
      bool success = false;
  
-    if (CPUGroupInfo::CanEnableGCCPUGroups())
+    // Locate heap_number-th available processor
+    uint16_t procNumber;
+    size_t cnt = heap_number;
+    for (uint16_t i = 0; i < GCToOSInterface::GetTotalProcessorCount(); i++)
      {
-        uint16_t gn, gpn;
-        CPUGroupInfo::GetGroupForProcessor((uint16_t)heap_number, &gn, &gpn);
+        if (g_processAffinitySet.Contains(i))
+        {
+            if (cnt == 0)
+            {
+                procNumber = i;
+                success = true;
+                break;
+            }
+
+            cnt--;
+        }
+    }
+
+    if (success)
+    {
+#ifndef FEATURE_PAL
+        WORD gn, gpn;
+
+        if (CPUGroupInfo::CanEnableGCCPUGroups())
+        {
+            CPUGroupInfo::GetGroupForProcessor(procNumber, &gn, &gpn);
+        }
+        else
+        {
+            gn = GroupProcNo::NoGroup;
+            gpn = procNumber;
+        }
+
+        GroupProcNo groupProcNo(gn, gpn);
+        *proc_no = groupProcNo.GetCombinedValue();
  
-        *proc_no = GroupProcNo(gn, gpn).GetCombinedValue();
          if (GCToOSInterface::CanEnableGCNumaAware())
          {
-            if (!GCToOSInterface::GetNumaProcessorNode(*proc_no, node_no))
+            PROCESSOR_NUMBER procNumber;
+
+            if (CPUGroupInfo::CanEnableGCCPUGroups())
+            {
+                procNumber.Group = gn;
+            }
+            else
+            {
+                // Get the current processor group
+                PROCESSOR_NUMBER procNumber;
+                GetCurrentProcessorNumberEx(&procNumber);
+            }
+
+            procNumber.Number   = (BYTE)gpn;
+            procNumber.Reserved = 0;
+
+            if (NumaNodeInfo::GetNumaProcessorNodeEx(&procNumber, node_no))
              {
                  *node_no = NUMA_NODE_UNDEFINED;
              }
          }
          else
          {   // no numa setting, each cpu group is treated as a node
-            *node_no = gn;
+            *node_no = groupProcNo.GetGroup();
          }
-
-        success = true;
-    }
-    else
-    {
-        int bit_number = 0;
-        uint8_t proc_number = 0;
-        for (uintptr_t mask = 1; mask != 0; mask <<= 1)
+#else // !FEATURE_PAL
+        *proc_no = procNumber;
+        if (!GCToOSInterface::CanEnableGCNumaAware() || !NumaNodeInfo::GetNumaProcessorNodeEx(procNumber, (WORD*)node_no))
          {
-            if (g_processAffinitySet.Contains(proc_number))
-            {
-                if (bit_number == heap_number)
-                {
-                    *proc_no = GroupProcNo(GroupProcNo::NoGroup, proc_number).GetCombinedValue();
-
-                    if (GCToOSInterface::CanEnableGCNumaAware())
-                    {
-                        if (!GCToOSInterface::GetNumaProcessorNode(proc_number, node_no))
-                        {
-                            *node_no = NUMA_NODE_UNDEFINED;
-                        }
-                    }
-
-                    success = true;
-                    break;
-                }
-                bit_number++;
-            }
-            proc_number++;
+            *node_no = NUMA_NODE_UNDEFINED;
          }
+#endif // !FEATURE_PAL
      }
  
      return success;
@@ -993,6 +1037,7 @@ bool GCToOSInterface::ParseGCHeapAffinitizeRangesEntry(const char** config_strin
      size_t index_offset = 0;
  
      char* number_end;
+#ifndef FEATURE_PAL
      size_t group_number = strtoul(*config_string, &number_end, 10);
  
      if ((number_end == *config_string) || (*number_end != ':'))
@@ -1011,6 +1056,7 @@ bool GCToOSInterface::ParseGCHeapAffinitizeRangesEntry(const char** config_strin
  
      index_offset = group_begin;
      *config_string = number_end + 1;
+#endif // !FEATURE_PAL
  
      size_t start, end;
      if (!ParseIndexOrRange(config_string, &start, &end))
@@ -1018,11 +1064,13 @@ bool GCToOSInterface::ParseGCHeapAffinitizeRangesEntry(const char** config_strin
          return false;
      }
  
+#ifndef FEATURE_PAL
      if ((start >= group_size) || (end >= group_size))
      {
          // Invalid CPU index values or range
          return false;
      }
+#endif // !FEATURE_PAL
  
      *start_index = index_offset + start;
      *end_index = index_offset + end;
diff --git a/src/vm/threads.cpp b/src/vm/threads.cpp

index 6890290..43976a9 100644 (file)
--- a/src/vm/threads.cpp
+++ b/src/vm/threads.cpp
@@ -451,7 +451,7 @@ void Thread::ChooseThreadCPUGroupAffinity()
          GC_TRIGGERS;
      }
      CONTRACTL_END;
-
+#ifndef FEATURE_PAL
      if (!CPUGroupInfo::CanEnableGCCPUGroups() || !CPUGroupInfo::CanEnableThreadUseAllCpuGroups()) 
           return;
  
@@ -471,6 +471,7 @@ void Thread::ChooseThreadCPUGroupAffinity()
      CPUGroupInfo::SetThreadGroupAffinity(GetThreadHandle(), &groupAffinity, NULL);
      m_wCPUGroup = groupAffinity.Group;
      m_pAffinityMask = groupAffinity.Mask;
+#endif // !FEATURE_PAL
  }
  
  void Thread::ClearThreadCPUGroupAffinity()
@@ -481,7 +482,7 @@ void Thread::ClearThreadCPUGroupAffinity()
          GC_NOTRIGGER;
      }
      CONTRACTL_END;
-
+#ifndef FEATURE_PAL
      if (!CPUGroupInfo::CanEnableGCCPUGroups() || !CPUGroupInfo::CanEnableThreadUseAllCpuGroups()) 
           return;
  
@@ -499,6 +500,7 @@ void Thread::ClearThreadCPUGroupAffinity()
  
      m_wCPUGroup = 0;
      m_pAffinityMask = 0;
+#endif // !FEATURE_PAL
  }
  
  DWORD Thread::StartThread()
@@ -1561,8 +1563,10 @@ Thread::Thread()
      
      m_fGCSpecial = FALSE;
  
+#ifndef FEATURE_PAL
      m_wCPUGroup = 0;
      m_pAffinityMask = 0;
+#endif // !FEATURE_PAL
  
      m_pAllLoggedTypes = NULL;
  
diff --git a/src/vm/threads.h b/src/vm/threads.h

index 94ce275..e5307d9 100644 (file)
--- a/src/vm/threads.h
+++ b/src/vm/threads.h
@@ -4824,9 +4824,10 @@ public:
      void SetGCSpecial(bool fGCSpecial);
  
  private:
+#ifndef FEATURE_PAL
      WORD m_wCPUGroup;
      DWORD_PTR m_pAffinityMask;
-
+#endif // !FEATURE_PAL
  public:
      void ChooseThreadCPUGroupAffinity();
      void ClearThreadCPUGroupAffinity();
diff --git a/src/vm/win32threadpool.cpp b/src/vm/win32threadpool.cpp

index 29c1d21..09a3a07 100644 (file)
--- a/src/vm/win32threadpool.cpp
+++ b/src/vm/win32threadpool.cpp
@@ -345,12 +345,16 @@ BOOL ThreadpoolMgr::Initialize()
      UnManagedPerAppDomainTPCount* pADTPCount;
      pADTPCount = PerAppDomainTPCountList::GetUnmanagedTPCount();
  
+#ifndef FEATURE_PAL
      //ThreadPool_CPUGroup
      CPUGroupInfo::EnsureInitialized();
      if (CPUGroupInfo::CanEnableGCCPUGroups() && CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
          NumberOfProcessors = CPUGroupInfo::GetNumActiveProcessors();
      else
          NumberOfProcessors = GetCurrentProcessCpuCount();
+#else // !FEATURE_PAL
+    NumberOfProcessors = GetCurrentProcessCpuCount();
+#endif // !FEATURE_PAL
      InitPlatformVariables();
  
      EX_TRY
@@ -380,20 +384,15 @@ BOOL ThreadpoolMgr::Initialize()
          RetiredWorkerSemaphore = new CLRLifoSemaphore();
          RetiredWorkerSemaphore->Create(0, ThreadCounter::MaxPossibleCount);
  
+#ifndef FEATURE_PAL
          //ThreadPool_CPUGroup
          if (CPUGroupInfo::CanEnableGCCPUGroups() && CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
              RecycledLists.Initialize( CPUGroupInfo::GetNumActiveProcessors() );
          else
              RecycledLists.Initialize( g_SystemInfo.dwNumberOfProcessors );
-        /*
-            {
-                SYSTEM_INFO sysInfo;
-
-                ::GetSystemInfo( &sysInfo );
-
-                RecycledLists.Initialize( sysInfo.dwNumberOfProcessors );
-            }
-        */
+#else // !FEATURE_PAL
+        RecycledLists.Initialize( g_SystemInfo.dwNumberOfProcessors );
+#endif // !FEATURE_PAL
      }
      EX_CATCH
      {
@@ -4095,9 +4094,10 @@ DWORD WINAPI ThreadpoolMgr::GateThreadStart(LPVOID lpArgs)
          return 0;
      }
  
+#ifndef FEATURE_PAL
      //GateThread can start before EESetup, so ensure CPU group information is initialized;
      CPUGroupInfo::EnsureInitialized();
-
+#endif // !FEATURE_PAL
      // initialize CPU usage information structure;
      prevCPUInfo.idleTime.QuadPart   = 0;
      prevCPUInfo.kernelTime.QuadPart = 0;
diff --git a/src/vm/win32threadpool.h b/src/vm/win32threadpool.h

index bb6ebc0..55f321c 100644 (file)
--- a/src/vm/win32threadpool.h
+++ b/src/vm/win32threadpool.h
@@ -735,12 +735,22 @@ public:
          {
              LIMITED_METHOD_CONTRACT;
  
+            DWORD processorNumber = 0;
+
+#ifndef FEATURE_PAL
                 if (CPUGroupInfo::CanEnableGCCPUGroups() && CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
-                return pRecycledListPerProcessor[CPUGroupInfo::CalculateCurrentProcessorNumber()][memType];
+                processorNumber = CPUGroupInfo::CalculateCurrentProcessorNumber();
              else
                  // Turns out GetCurrentProcessorNumber can return a value greater than the number of processors reported by
                  // GetSystemInfo, if we're running in WOW64 on a machine with >32 processors.
-                   return pRecycledListPerProcessor[GetCurrentProcessorNumber()%NumberOfProcessors][memType];
+                   processorNumber = GetCurrentProcessorNumber()%NumberOfProcessors;
+#else // !FEATURE_PAL
+            if (PAL_HasGetCurrentProcessorNumber())
+            {
+                processorNumber = GetCurrentProcessorNumber();
+            }
+#endif // !FEATURE_PAL
+            return pRecycledListPerProcessor[processorNumber][memType];
         }
      };
author	Jan Vorlicek <janvorli@microsoft.com>
	Fri, 5 Apr 2019 00:53:32 +0000 (02:53 +0200)
committer	Jan Vorlicek <janvorli@microsoft.com>
	Mon, 8 Apr 2019 23:26:55 +0000 (01:26 +0200)
src/classlibnative/bcltype/system.cpp		patch \| blob \| history
src/gc/env/gcenv.os.h		patch \| blob \| history
src/gc/unix/gcenv.unix.cpp		patch \| blob \| history
src/gc/windows/gcenv.windows.cpp		patch \| blob \| history
src/inc/utilcode.h		patch \| blob \| history
src/pal/inc/pal.h		patch \| blob \| history
src/pal/src/include/pal/palinternal.h		patch \| blob \| history
src/pal/src/misc/sysinfo.cpp		patch \| blob \| history
src/pal/src/numa/numa.cpp		patch \| blob \| history
src/pal/src/numa/numashim.h		patch \| blob \| history
src/pal/src/thread/thread.cpp		patch \| blob \| history
src/utilcode/util.cpp		patch \| blob \| history
src/vm/ceemain.cpp		patch \| blob \| history
src/vm/eeconfig.cpp		patch \| blob \| history
src/vm/gcenv.os.cpp		patch \| blob \| history
src/vm/threads.cpp		patch \| blob \| history
src/vm/threads.h		patch \| blob \| history
src/vm/win32threadpool.cpp		patch \| blob \| history
src/vm/win32threadpool.h		patch \| blob \| history