Modify affinity range config format for Windows
[platform/upstream/coreclr.git] / src / gc / gc.cpp
index a26ad8d..0dc7e36 100644 (file)
@@ -75,14 +75,6 @@ BOOL bgc_heap_walk_for_etw_p = FALSE;
 #define LOH_PIN_QUEUE_LENGTH 100
 #define LOH_PIN_DECAY 10
 
-#ifdef BIT64
-// Right now we support maximum 1024 procs - meaning that we will create at most
-// that many GC threads and GC heaps. 
-#define MAX_SUPPORTED_CPUS 1024
-#else
-#define MAX_SUPPORTED_CPUS 64
-#endif // BIT64
-
 uint32_t yp_spin_count_unit = 0;
 size_t loh_size_threshold = LARGE_OBJECT_SIZE;
 
@@ -5062,8 +5054,6 @@ class heap_select
     static uint16_t proc_no_to_heap_no[MAX_SUPPORTED_CPUS];
     static uint16_t heap_no_to_proc_no[MAX_SUPPORTED_CPUS];
     static uint16_t heap_no_to_numa_node[MAX_SUPPORTED_CPUS];
-    static uint16_t heap_no_to_cpu_group[MAX_SUPPORTED_CPUS];
-    static uint16_t heap_no_to_group_proc[MAX_SUPPORTED_CPUS];
     static uint16_t numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4];
 
     static int access_time(uint8_t *sniff_buffer, int heap_number, unsigned sniff_index, unsigned n_sniff_buffers)
@@ -5196,26 +5186,6 @@ public:
         heap_no_to_numa_node[heap_number] = numa_node;
     }
 
-    static uint16_t find_cpu_group_from_heap_no(int heap_number)
-    {
-        return heap_no_to_cpu_group[heap_number];
-    }
-
-    static void set_cpu_group_for_heap(int heap_number, uint16_t group_number)
-    {
-        heap_no_to_cpu_group[heap_number] = group_number;
-    }
-
-    static uint16_t find_group_proc_from_heap_no(int heap_number)
-    {
-        return heap_no_to_group_proc[heap_number];
-    }
-
-    static void set_group_proc_for_heap(int heap_number, uint16_t group_proc)
-    {
-        heap_no_to_group_proc[heap_number] = group_proc;
-    }
-
     static void init_numa_node_to_heap_map(int nheaps)
     {   // called right after GCHeap::Init() for each heap is finished
         // when numa is not enabled, heap_no_to_numa_node[] are all filled
@@ -5245,8 +5215,6 @@ unsigned heap_select::cur_sniff_index;
 uint16_t heap_select::proc_no_to_heap_no[MAX_SUPPORTED_CPUS];
 uint16_t heap_select::heap_no_to_proc_no[MAX_SUPPORTED_CPUS];
 uint16_t heap_select::heap_no_to_numa_node[MAX_SUPPORTED_CPUS];
-uint16_t heap_select::heap_no_to_cpu_group[MAX_SUPPORTED_CPUS];
-uint16_t heap_select::heap_no_to_group_proc[MAX_SUPPORTED_CPUS];
 uint16_t heap_select::numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4];
 
 BOOL gc_heap::create_thread_support (unsigned number_of_heaps)
@@ -5289,79 +5257,22 @@ void gc_heap::destroy_thread_support ()
     }
 }
 
-void set_thread_group_affinity_for_heap(int heap_number, GCThreadAffinity* affinity)
+void set_thread_affinity_for_heap(int heap_number)
 {
-    affinity->Group = GCThreadAffinity::None;
-    affinity->Processor = GCThreadAffinity::None;
+    uint16_t proc_no;
+    uint16_t node_no;
 
-    uint16_t gn, gpn;
-    GCToOSInterface::GetGroupForProcessor((uint16_t)heap_number, &gn, &gpn);
-
-    int bit_number = 0;
-    for (uintptr_t mask = 1; mask !=0; mask <<=1) 
+    if (GCToOSInterface::GetProcessorForHeap(heap_number, &proc_no, &node_no))
     {
-        if (bit_number == gpn)
+        heap_select::set_proc_no_for_heap(heap_number, proc_no);
+        if (node_no != NUMA_NODE_UNDEFINED)
         {
-            dprintf(3, ("using processor group %d, mask %Ix for heap %d\n", gn, mask, heap_number));
-            affinity->Processor = gpn;
-            affinity->Group = gn;
-            heap_select::set_cpu_group_for_heap(heap_number, gn);
-            heap_select::set_group_proc_for_heap(heap_number, gpn);
-            if (GCToOSInterface::CanEnableGCNumaAware())
-            {  
-                PROCESSOR_NUMBER proc_no;
-                proc_no.Group    = gn;
-                proc_no.Number   = (uint8_t)gpn;
-                proc_no.Reserved = 0;
-
-                uint16_t node_no = 0;
-                if (GCToOSInterface::GetNumaProcessorNode(&proc_no, &node_no))
-                    heap_select::set_numa_node_for_heap(heap_number, node_no);
-            }
-            else
-            {   // no numa setting, each cpu group is treated as a node
-                heap_select::set_numa_node_for_heap(heap_number, gn);
-            }
-            return;
+            heap_select::set_numa_node_for_heap(heap_number, node_no);
         }
-        bit_number++;
-    }
-}
-
-void set_thread_affinity_mask_for_heap(int heap_number, GCThreadAffinity* affinity)
-{
-    affinity->Group = GCThreadAffinity::None;
-    affinity->Processor = GCThreadAffinity::None;
-
-    uintptr_t pmask = process_mask;
-    int bit_number = 0; 
-    uint8_t proc_number = 0;
-    for (uintptr_t mask = 1; mask != 0; mask <<= 1)
-    {
-        if ((mask & pmask) != 0)
+        if (!GCToOSInterface::SetThreadAffinity(proc_no))
         {
-            if (bit_number == heap_number)
-            {
-                dprintf (3, ("Using processor %d for heap %d", proc_number, heap_number));
-                affinity->Processor = proc_number;
-                heap_select::set_proc_no_for_heap(heap_number, proc_number);
-                if (GCToOSInterface::CanEnableGCNumaAware())
-                {
-                    uint16_t node_no = 0;
-                    PROCESSOR_NUMBER proc_no;
-                    proc_no.Group = 0;
-                    proc_no.Number = (uint8_t)proc_number;
-                    proc_no.Reserved = 0;
-                    if (GCToOSInterface::GetNumaProcessorNode(&proc_no, &node_no))
-                    {
-                        heap_select::set_numa_node_for_heap(heap_number, node_no);
-                    }
-                }
-                return;
-            }
-            bit_number++;
+            dprintf(1, ("Failed to set thread affinity for server GC thread"));
         }
-        proc_number++;
     }
 }
 
@@ -5501,7 +5412,7 @@ bool gc_heap::virtual_alloc_commit_for_heap (void* addr, size_t size, int h_numb
     {
         if (GCToOSInterface::CanEnableGCNumaAware())
         {
-            uint32_t numa_node = heap_select::find_numa_node_from_heap_no(h_number);
+            uint16_t numa_node = heap_select::find_numa_node_from_heap_no(h_number);
             if (GCToOSInterface::VirtualCommit(addr, size, numa_node))
                 return true;
         }
@@ -5854,7 +5765,7 @@ void gc_mechanisms::init_mechanisms()
     promotion = FALSE;//TRUE;
     compaction = TRUE;
 #ifdef FEATURE_LOH_COMPACTION
-    loh_compaction = gc_heap::should_compact_loh();
+    loh_compaction = gc_heap::loh_compaction_requested();
 #else
     loh_compaction = FALSE;
 #endif //FEATURE_LOH_COMPACTION
@@ -13626,43 +13537,18 @@ try_again:
                     acontext->set_alloc_heap(GCHeap::GetHeap(max_hp->heap_number));
                     if (!gc_thread_no_affinitize_p)
                     {
-                        if (GCToOSInterface::CanEnableGCCPUGroups())
-                        {   //only set ideal processor when max_hp and org_hp are in the same cpu
-                            //group. DO NOT MOVE THREADS ACROSS CPU GROUPS
-                            uint16_t org_gn = heap_select::find_cpu_group_from_heap_no(org_hp->heap_number);
-                            uint16_t max_gn = heap_select::find_cpu_group_from_heap_no(max_hp->heap_number);
-                            if (org_gn == max_gn) //only set within CPU group, so SetThreadIdealProcessor is enough
-                            {   
-                                uint16_t group_proc_no = heap_select::find_group_proc_from_heap_no(max_hp->heap_number);
-
-                                GCThreadAffinity affinity;
-                                affinity.Processor = group_proc_no;
-                                affinity.Group = org_gn;
-                                if (!GCToOSInterface::SetCurrentThreadIdealAffinity(&affinity))
-                                {
-                                    dprintf (3, ("Failed to set the ideal processor and group for heap %d.",
-                                                org_hp->heap_number));
-                                }
-                            }
-                        }
-                        else 
-                        {
-                            uint16_t proc_no = heap_select::find_proc_no_from_heap_no(max_hp->heap_number);
+                        uint16_t src_proc_no = heap_select::find_proc_no_from_heap_no(org_hp->heap_number);
+                        uint16_t dst_proc_no = heap_select::find_proc_no_from_heap_no(max_hp->heap_number);
 
-                            GCThreadAffinity affinity;
-                            affinity.Processor = proc_no;
-                            affinity.Group = GCThreadAffinity::None;
-
-                            if (!GCToOSInterface::SetCurrentThreadIdealAffinity(&affinity))
-                            {
-                                dprintf (3, ("Failed to set the ideal processor for heap %d.",
-                                            org_hp->heap_number));
-                            }
+                        if (!GCToOSInterface::SetCurrentThreadIdealAffinity(src_proc_no, dst_proc_no))
+                        {
+                            dprintf (3, ("Failed to set the ideal processor for heap %d.",
+                                        org_hp->heap_number));
                         }
                     }
                     dprintf (3, ("Switching context %p (home heap %d) ", 
                                  acontext,
-                        acontext->get_home_heap()->pGenGCHeap->heap_number));
+                                 acontext->get_home_heap()->pGenGCHeap->heap_number));
                     dprintf (3, (" from heap %d (%Id free bytes, %d contexts) ", 
                                  org_hp->heap_number,
                                  org_size,
@@ -21372,10 +21258,10 @@ retry:
     }
 }
 
-BOOL gc_heap::should_compact_loh()
+BOOL gc_heap::loh_compaction_requested()
 {
     // If hard limit is specified GC will automatically decide if LOH needs to be compacted.
-    return (heap_hard_limit || loh_compaction_always_p || (loh_compaction_mode != loh_compaction_default));
+    return (loh_compaction_always_p || (loh_compaction_mode != loh_compaction_default));
 }
 
 inline
@@ -21545,7 +21431,7 @@ BOOL gc_heap::plan_loh()
 
 void gc_heap::compact_loh()
 {
-    assert (should_compact_loh());
+    assert (loh_compaction_requested() || heap_hard_limit);
 
     generation* gen        = large_object_generation;
     heap_segment* start_seg = heap_segment_rw (generation_start_segment (gen));
@@ -24392,7 +24278,7 @@ void gc_heap::relocate_shortened_survivor_helper (uint8_t* plug, uint8_t* plug_e
 
     while (x < plug_end)
     {
-        if (check_short_obj_p && ((plug_end - x) < min_pre_pin_obj_size))
+        if (check_short_obj_p && ((plug_end - x) < (DWORD)min_pre_pin_obj_size))
         {
             dprintf (3, ("last obj %Ix is short", x));
 
@@ -25449,22 +25335,10 @@ void gc_heap::gc_thread_stub (void* arg)
     gc_heap* heap = (gc_heap*)arg;
     if (!gc_thread_no_affinitize_p)
     {
-        GCThreadAffinity affinity;
-        affinity.Group = GCThreadAffinity::None;
-        affinity.Processor = GCThreadAffinity::None;
-
         // We are about to set affinity for GC threads. It is a good place to set up NUMA and
         // CPU groups because the process mask, processor number, and group number are all
         // readily available.
-        if (GCToOSInterface::CanEnableGCCPUGroups())
-            set_thread_group_affinity_for_heap(heap->heap_number, &affinity);
-        else
-            set_thread_affinity_mask_for_heap(heap->heap_number, &affinity);
-
-        if (!GCToOSInterface::SetThreadAffinity(&affinity))
-        {
-            dprintf(1, ("Failed to set thread affinity for server GC thread"));
-        }
+        set_thread_affinity_for_heap(heap->heap_number);
     }
 
     // server GC threads run at a higher priority than normal.
@@ -34211,12 +34085,25 @@ HRESULT GCHeap::Initialize()
     uint32_t nhp_from_config = 0;
 
 #ifdef MULTIPLE_HEAPS
+    AffinitySet config_affinity_set;
+    GCConfigStringHolder cpu_index_ranges_holder(GCConfig::GetGCHeapAffinitizeRanges());
+
+    if (!ParseGCHeapAffinitizeRanges(cpu_index_ranges_holder.Get(), &config_affinity_set))
+    {
+        return CLR_E_GC_BAD_AFFINITY_CONFIG_FORMAT;
+    }
+
+    uintptr_t config_affinity_mask = static_cast<uintptr_t>(GCConfig::GetGCHeapAffinitizeMask());
+    const AffinitySet* process_affinity_set = GCToOSInterface::SetGCThreadsAffinitySet(config_affinity_mask, &config_affinity_set);
+
+    if (process_affinity_set->IsEmpty())
+    {
+        return CLR_E_GC_BAD_AFFINITY_CONFIG;
+    }
+
     nhp_from_config = static_cast<uint32_t>(GCConfig::GetHeapCount());
     
-    // GetCurrentProcessCpuCount only returns up to 64 procs.
-    uint32_t nhp_from_process = GCToOSInterface::CanEnableGCCPUGroups() ?
-                                GCToOSInterface::GetTotalProcessorCount():
-                                GCToOSInterface::GetCurrentProcessCpuCount();
+    uint32_t nhp_from_process = GCToOSInterface::GetCurrentProcessCpuCount();
 
     if (nhp_from_config)
     {
@@ -34231,63 +34118,23 @@ HRESULT GCHeap::Initialize()
 #ifndef FEATURE_REDHAWK
     gc_heap::gc_thread_no_affinitize_p = (gc_heap::heap_hard_limit ? false : (GCConfig::GetNoAffinitize() != 0));
 
-    size_t gc_thread_affinity_mask = static_cast<size_t>(GCConfig::GetGCHeapAffinitizeMask());
-
     if (gc_heap::heap_hard_limit)
     {
-        gc_heap::gc_thread_no_affinitize_p = (gc_thread_affinity_mask == 0);
+        gc_heap::gc_thread_no_affinitize_p = ((config_affinity_set.Count() == 0) && (config_affinity_mask == 0));
     }
 
     if (!(gc_heap::gc_thread_no_affinitize_p))
     {
-        if (!(GCToOSInterface::CanEnableGCCPUGroups()))
-        {
-            uintptr_t pmask, smask;
-            if (GCToOSInterface::GetCurrentProcessAffinityMask(&pmask, &smask))
-            {
-                pmask &= smask;
-
-#ifdef FEATURE_PAL
-                // GetCurrentProcessAffinityMask can return pmask=0 and smask=0 on
-                // systems with more than 1 NUMA node. The pmask decides the
-                // number of GC heaps to be used and the processors they are
-                // affinitized with. So pmask is now set to reflect that 64
-                // processors are available to begin with. The actual processors in
-                // the system may be lower and are taken into account before
-                // finalizing the number of heaps.
-                if (!pmask)
-                {
-                    pmask = SIZE_T_MAX;
-                }
-#endif // FEATURE_PAL
-
-                if (gc_thread_affinity_mask)
-                {
-                    pmask &= gc_thread_affinity_mask;
-                }
-
-                process_mask = pmask;
-
-                unsigned int set_bits_in_pmask = 0;
-                while (pmask)
-                {
-                    if (pmask & 1)
-                        set_bits_in_pmask++;
-                    pmask >>= 1;
-                }
-
-                nhp = min (nhp, set_bits_in_pmask);
+        uint32_t num_affinitized_processors = (uint32_t)process_affinity_set->Count();
 
+        if (num_affinitized_processors != 0)
+        {
+            nhp = min(nhp, num_affinitized_processors);
+        }
 #ifdef FEATURE_PAL
-                // Limit the GC heaps to the number of processors available in the system.
-                nhp = min (nhp, GCToOSInterface::GetTotalProcessorCount());
+        // Limit the GC heaps to the number of processors available in the system.
+        nhp = min (nhp, GCToOSInterface::GetTotalProcessorCount());
 #endif // FEATURE_PAL
-            }
-            else
-            {
-                gc_heap::gc_thread_no_affinitize_p = true;
-            }
-        }
     }
 #endif //!FEATURE_REDHAWK
 #endif //MULTIPLE_HEAPS