Perf followup for Pinned Object Heap (#34215)
authorVladimir Sadov <vsadov@microsoft.com>
Wed, 1 Apr 2020 20:08:53 +0000 (13:08 -0700)
committerGitHub <noreply@github.com>
Wed, 1 Apr 2020 20:08:53 +0000 (20:08 +0000)
* free list bucket skipping

* couple more tweaks

* More size buckets in POH free lists

* static_data_table

* PR feedback

* off-by-one error in BASE_LOH_ALIST_BITS

src/coreclr/src/gc/gc.cpp
src/coreclr/src/gc/gcpriv.h

index 5521936..37c1798 100644 (file)
@@ -1917,7 +1917,8 @@ retry:
     }
 }
 
-inline BOOL try_enter_spin_lock(GCSpinLock* spin_lock)
+inline
+static BOOL try_enter_spin_lock(GCSpinLock* spin_lock)
 {
     return (Interlocked::CompareExchange(&spin_lock->lock, 0, -1) < 0);
 }
@@ -2411,7 +2412,6 @@ static static_data static_data_table[latency_level_last - latency_level_first +
         // loh
         {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0},
         // poh
-        // TODO: tuning https://github.com/dotnet/runtime/issues/13739
         {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0},
     },
 
@@ -2432,7 +2432,6 @@ static static_data static_data_table[latency_level_last - latency_level_first +
         // loh
         {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0},
         // poh
-        // TODO: tuning https://github.com/dotnet/runtime/issues/13739
         {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0}
     },
 };
@@ -11246,9 +11245,9 @@ gc_heap::init_gc_heap (int  h_number)
     heap_segment_allocated (pseg) = heap_segment_mem (pseg) + Align (min_obj_size, get_alignment_constant (FALSE));
     heap_segment_used (pseg) = heap_segment_allocated (pseg) - plug_skew;
 
-    generation_of (max_generation)->free_list_allocator = allocator(NUM_GEN2_ALIST, BASE_GEN2_ALIST, gen2_alloc_list);
-    generation_of (loh_generation)->free_list_allocator = allocator(NUM_LOH_ALIST, BASE_LOH_ALIST, loh_alloc_list);
-    generation_of (poh_generation)->free_list_allocator = allocator(NUM_POH_ALIST, BASE_POH_ALIST, poh_alloc_list);
+    generation_of (max_generation)->free_list_allocator = allocator(NUM_GEN2_ALIST, BASE_GEN2_ALIST_BITS, gen2_alloc_list);
+    generation_of (loh_generation)->free_list_allocator = allocator(NUM_LOH_ALIST, BASE_LOH_ALIST_BITS, loh_alloc_list);
+    generation_of (poh_generation)->free_list_allocator = allocator(NUM_POH_ALIST, BASE_POH_ALIST_BITS, poh_alloc_list);
 
     for (int gen_num = 0; gen_num < total_generation_count; gen_num++)
     {
@@ -11861,11 +11860,11 @@ void gc_heap::check_batch_mark_array_bits (uint8_t* start, uint8_t* end)
 }
 #endif //VERIFY_HEAP && BACKGROUND_GC
 
-allocator::allocator (unsigned int num_b, size_t fbs, alloc_list* b)
+allocator::allocator (unsigned int num_b, int fbb, alloc_list* b)
 {
     assert (num_b < MAX_BUCKET_COUNT);
     num_buckets = num_b;
-    frst_bucket_size = fbs;
+    first_bucket_bits = fbb;
     buckets = b;
 }
 
@@ -11921,65 +11920,38 @@ void allocator::clear()
 }
 
 //always thread to the end.
-void allocator::thread_free_item (uint8_t* item, uint8_t*& head, uint8_t*& tail)
+void allocator::thread_item (uint8_t* item, size_t size)
 {
+    unsigned int a_l_number = first_suitable_bucket(size);
+    alloc_list* al = &alloc_list_of (a_l_number);
+    uint8_t*& head = al->alloc_list_head();
+    uint8_t*& tail = al->alloc_list_tail();
+
     free_list_slot (item) = 0;
     free_list_undo (item) = UNDO_EMPTY;
     assert (item != head);
 
     if (head == 0)
     {
-       head = item;
-    }
-    //TODO: This shouldn't happen anymore - verify that's the case.
-    //the following is necessary because the last free element
-    //may have been truncated, and tail isn't updated.
-    else if (free_list_slot (head) == 0)
-    {
-        free_list_slot (head) = item;
+        head = item;
     }
     else
     {
+        assert ((free_list_slot(head) != 0) || (tail == head));
         assert (item != tail);
         assert (free_list_slot(tail) == 0);
+
         free_list_slot (tail) = item;
     }
-    tail = item;
-}
 
-void allocator::thread_item (uint8_t* item, size_t size)
-{
-    size_t sz = frst_bucket_size;
-    unsigned int a_l_number = 0;
-
-    for (; a_l_number < (num_buckets-1); a_l_number++)
-    {
-        if (size < sz)
-        {
-            break;
-        }
-        sz = sz * 2;
-    }
-    alloc_list* al = &alloc_list_of (a_l_number);
-    thread_free_item (item,
-                      al->alloc_list_head(),
-                      al->alloc_list_tail());
+    tail = item;
 }
 
 void allocator::thread_item_front (uint8_t* item, size_t size)
 {
-    //find right free list
-    size_t sz = frst_bucket_size;
-    unsigned int a_l_number = 0;
-    for (; a_l_number < (num_buckets-1); a_l_number++)
-    {
-        if (size < sz)
-        {
-            break;
-        }
-        sz = sz * 2;
-    }
+    unsigned int a_l_number = first_suitable_bucket (size);
     alloc_list* al = &alloc_list_of (a_l_number);
+
     free_list_slot (item) = al->alloc_list_head();
     free_list_undo (item) = UNDO_EMPTY;
 
@@ -12644,66 +12616,62 @@ BOOL gc_heap::a_fit_free_list_p (int gen_number,
     BOOL can_fit = FALSE;
     generation* gen = generation_of (gen_number);
     allocator* gen_allocator = generation_allocator (gen);
-    size_t sz_list = gen_allocator->first_bucket_size();
-    for (unsigned int a_l_idx = 0; a_l_idx < gen_allocator->number_of_buckets(); a_l_idx++)
+
+    for (unsigned int a_l_idx = gen_allocator->first_suitable_bucket(size); a_l_idx < gen_allocator->number_of_buckets(); a_l_idx++)
     {
-        if ((size < sz_list) || (a_l_idx == (gen_allocator->number_of_buckets()-1)))
-        {
-            uint8_t* free_list = gen_allocator->alloc_list_head_of (a_l_idx);
-            uint8_t* prev_free_item = 0;
+        uint8_t* free_list = gen_allocator->alloc_list_head_of (a_l_idx);
+        uint8_t* prev_free_item = 0;
 
-            while (free_list != 0)
+        while (free_list != 0)
+        {
+            dprintf (3, ("considering free list %Ix", (size_t)free_list));
+            size_t free_list_size = unused_array_size (free_list);
+            if ((size + Align (min_obj_size, align_const)) <= free_list_size)
             {
-                dprintf (3, ("considering free list %Ix", (size_t)free_list));
-                size_t free_list_size = unused_array_size (free_list);
-                if ((size + Align (min_obj_size, align_const)) <= free_list_size)
-                {
-                    dprintf (3, ("Found adequate unused area: [%Ix, size: %Id",
-                                 (size_t)free_list, free_list_size));
-
-                    gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, FALSE);
-                    // We ask for more Align (min_obj_size)
-                    // to make sure that we can insert a free object
-                    // in adjust_limit will set the limit lower
-                    size_t limit = limit_from_size (size, flags, free_list_size, gen_number, align_const);
-
-                    uint8_t*  remain = (free_list + limit);
-                    size_t remain_size = (free_list_size - limit);
-                    if (remain_size >= Align(min_free_list, align_const))
-                    {
-                        make_unused_array (remain, remain_size);
-                        gen_allocator->thread_item_front (remain, remain_size);
-                        assert (remain_size >= Align (min_obj_size, align_const));
-                    }
-                    else
-                    {
-                        //absorb the entire free list
-                        limit += remain_size;
-                    }
-                    generation_free_list_space (gen) -= limit;
+                dprintf (3, ("Found adequate unused area: [%Ix, size: %Id",
+                                (size_t)free_list, free_list_size));
 
-                    adjust_limit_clr (free_list, limit, size, acontext, flags, 0, align_const, gen_number);
+                gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, FALSE);
+                // We ask for more Align (min_obj_size)
+                // to make sure that we can insert a free object
+                // in adjust_limit will set the limit lower
+                size_t limit = limit_from_size (size, flags, free_list_size, gen_number, align_const);
 
-                    can_fit = TRUE;
-                    goto end;
-                }
-                else if (gen_allocator->discard_if_no_fit_p())
+                uint8_t*  remain = (free_list + limit);
+                size_t remain_size = (free_list_size - limit);
+                if (remain_size >= Align(min_free_list, align_const))
                 {
-                    assert (prev_free_item == 0);
-                    dprintf (3, ("couldn't use this free area, discarding"));
-                    generation_free_obj_space (gen) += free_list_size;
-
-                    gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, FALSE);
-                    generation_free_list_space (gen) -= free_list_size;
+                    make_unused_array (remain, remain_size);
+                    gen_allocator->thread_item_front (remain, remain_size);
+                    assert (remain_size >= Align (min_obj_size, align_const));
                 }
                 else
                 {
-                    prev_free_item = free_list;
+                    //absorb the entire free list
+                    limit += remain_size;
                 }
-                free_list = free_list_slot (free_list);
+                generation_free_list_space (gen) -= limit;
+
+                adjust_limit_clr (free_list, limit, size, acontext, flags, 0, align_const, gen_number);
+
+                can_fit = TRUE;
+                goto end;
+            }
+            else if (gen_allocator->discard_if_no_fit_p())
+            {
+                assert (prev_free_item == 0);
+                dprintf (3, ("couldn't use this free area, discarding"));
+                generation_free_obj_space (gen) += free_list_size;
+
+                gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, FALSE);
+                generation_free_list_space (gen) -= free_list_size;
+            }
+            else
+            {
+                prev_free_item = free_list;
             }
+            free_list = free_list_slot (free_list);
         }
-        sz_list = sz_list * 2;
     }
 end:
     return can_fit;
@@ -12807,89 +12775,85 @@ BOOL gc_heap::a_fit_free_list_uoh_p (size_t size,
 #ifdef BACKGROUND_GC
     int cookie = -1;
 #endif //BACKGROUND_GC
-    size_t sz_list = allocator->first_bucket_size();
-    for (unsigned int a_l_idx = 0; a_l_idx < allocator->number_of_buckets(); a_l_idx++)
+
+    for (unsigned int a_l_idx = allocator->first_suitable_bucket(size); a_l_idx < allocator->number_of_buckets(); a_l_idx++)
     {
-        if ((size < sz_list) || (a_l_idx == (allocator->number_of_buckets()-1)))
+        uint8_t* free_list = allocator->alloc_list_head_of (a_l_idx);
+        uint8_t* prev_free_item = 0;
+        while (free_list != 0)
         {
-            uint8_t* free_list = allocator->alloc_list_head_of (a_l_idx);
-            uint8_t* prev_free_item = 0;
-            while (free_list != 0)
-            {
-                dprintf (3, ("considering free list %Ix", (size_t)free_list));
+            dprintf (3, ("considering free list %Ix", (size_t)free_list));
 
-                size_t free_list_size = unused_array_size(free_list);
+            size_t free_list_size = unused_array_size(free_list);
 
-                ptrdiff_t diff = free_list_size - size;
+            ptrdiff_t diff = free_list_size - size;
 
 #ifdef FEATURE_LOH_COMPACTION
-                diff -= loh_pad;
+            diff -= loh_pad;
 #endif //FEATURE_LOH_COMPACTION
 
-                // must fit exactly or leave formattable space
-                if ((diff == 0) || (diff > (ptrdiff_t)Align (min_obj_size, align_const)))
-                {
+            // must fit exactly or leave formattable space
+            if ((diff == 0) || (diff > (ptrdiff_t)Align (min_obj_size, align_const)))
+            {
 #ifdef BACKGROUND_GC
-                    cookie = bgc_alloc_lock->uoh_alloc_set (free_list);
-                    bgc_track_uoh_alloc();
+                cookie = bgc_alloc_lock->uoh_alloc_set (free_list);
+                bgc_track_uoh_alloc();
 #endif //BACKGROUND_GC
 
-                    //unlink the free_item
-                    allocator->unlink_item (a_l_idx, free_list, prev_free_item, FALSE);
+                //unlink the free_item
+                allocator->unlink_item (a_l_idx, free_list, prev_free_item, FALSE);
 
-                    // Substract min obj size because limit_from_size adds it. Not needed for LOH
-                    size_t limit = limit_from_size (size - Align(min_obj_size, align_const), flags, free_list_size,
-                                                    gen_number, align_const);
+                // Substract min obj size because limit_from_size adds it. Not needed for LOH
+                size_t limit = limit_from_size (size - Align(min_obj_size, align_const), flags, free_list_size,
+                                                gen_number, align_const);
 
 #ifdef FEATURE_LOH_COMPACTION
-                    if (loh_pad)
-                    {
-                        make_unused_array (free_list, loh_pad);
-                        limit -= loh_pad;
-                        free_list += loh_pad;
-                        free_list_size -= loh_pad;
-                    }
+                if (loh_pad)
+                {
+                    make_unused_array (free_list, loh_pad);
+                    limit -= loh_pad;
+                    free_list += loh_pad;
+                    free_list_size -= loh_pad;
+                }
 #endif //FEATURE_LOH_COMPACTION
 
-                    uint8_t*  remain = (free_list + limit);
-                    size_t remain_size = (free_list_size - limit);
-                    if (remain_size != 0)
-                    {
-                        assert (remain_size >= Align (min_obj_size, align_const));
-                        make_unused_array (remain, remain_size);
-                    }
-                    if (remain_size >= Align(min_free_list, align_const))
-                    {
-                        loh_thread_gap_front (remain, remain_size, gen);
-                        assert (remain_size >= Align (min_obj_size, align_const));
-                    }
-                    else
-                    {
-                        generation_free_obj_space (gen) += remain_size;
-                    }
-                    generation_free_list_space (gen) -= free_list_size;
-                    dprintf (3, ("found fit on loh at %Ix", free_list));
+                uint8_t*  remain = (free_list + limit);
+                size_t remain_size = (free_list_size - limit);
+                if (remain_size != 0)
+                {
+                    assert (remain_size >= Align (min_obj_size, align_const));
+                    make_unused_array (remain, remain_size);
+                }
+                if (remain_size >= Align(min_free_list, align_const))
+                {
+                    loh_thread_gap_front (remain, remain_size, gen);
+                    assert (remain_size >= Align (min_obj_size, align_const));
+                }
+                else
+                {
+                    generation_free_obj_space (gen) += remain_size;
+                }
+                generation_free_list_space (gen) -= free_list_size;
+                dprintf (3, ("found fit on loh at %Ix", free_list));
 #ifdef BACKGROUND_GC
-                    if (cookie != -1)
-                    {
-                        bgc_uoh_alloc_clr (free_list, limit, acontext, flags, align_const, cookie, FALSE, 0);
-                    }
-                    else
+                if (cookie != -1)
+                {
+                    bgc_uoh_alloc_clr (free_list, limit, acontext, flags, align_const, cookie, FALSE, 0);
+                }
+                else
 #endif //BACKGROUND_GC
-                    {
-                        adjust_limit_clr (free_list, limit, size, acontext, flags, 0, align_const, gen_number);
-                    }
-
-                    //fix the limit to compensate for adjust_limit_clr making it too short
-                    acontext->alloc_limit += Align (min_obj_size, align_const);
-                    can_fit = TRUE;
-                    goto exit;
+                {
+                    adjust_limit_clr (free_list, limit, size, acontext, flags, 0, align_const, gen_number);
                 }
-                prev_free_item = free_list;
-                free_list = free_list_slot (free_list);
+
+                //fix the limit to compensate for adjust_limit_clr making it too short
+                acontext->alloc_limit += Align (min_obj_size, align_const);
+                can_fit = TRUE;
+                goto exit;
             }
+            prev_free_item = free_list;
+            free_list = free_list_slot (free_list);
         }
-        sz_list = sz_list * 2;
     }
 exit:
     return can_fit;
@@ -13465,7 +13429,7 @@ int bgc_allocate_spin(size_t min_gc_size, size_t bgc_begin_size, size_t bgc_size
 
     if (((bgc_begin_size / end_size) >= 2) || (bgc_size_increased >= bgc_begin_size))
     {
-        if ((bgc_begin_size / end_size) > 2)
+        if ((bgc_begin_size / end_size) >= 2)
         {
             dprintf (3, ("alloc-ed too much before bgc started"));
         }
@@ -13616,7 +13580,14 @@ BOOL gc_heap::uoh_try_fit (int gen_number,
 #ifdef BACKGROUND_GC
         if (can_allocate && recursive_gc_sync::background_running_p())
         {
-            bgc_loh_size_increased += size;
+            if (gen_number == poh_generation)
+            {
+                bgc_poh_size_increased += size;
+            }
+            else
+            {
+                bgc_loh_size_increased += size;
+            }
         }
 #endif //BACKGROUND_GC
     }
@@ -13761,7 +13732,7 @@ allocation_state gc_heap::allocate_uoh (int gen_number,
                 bgc_loh_allocate_spin() :
                 bgc_poh_allocate_spin();
 
-            if (spin_for_allocation >= 0)
+            if (spin_for_allocation > 0)
             {
                 add_saved_spinlock_info (true, me_release, mt_alloc_large);
                 leave_spin_lock (&more_space_lock_uoh);
@@ -13772,7 +13743,7 @@ allocation_state gc_heap::allocate_uoh (int gen_number,
                 add_saved_spinlock_info (true, me_acquire, mt_alloc_large);
                 dprintf (SPINLOCK_LOG, ("[%d]spin Emsl uoh", heap_number));
             }
-            else
+            else if (spin_for_allocation < 0)
             {
                 wait_for_background (awr_uoh_alloc_during_bgc, true);
             }
@@ -14809,51 +14780,46 @@ uint8_t* gc_heap::allocate_in_older_generation (generation* gen, size_t size,
     if (! (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, generation_allocation_pointer (gen),
                        generation_allocation_limit (gen), old_loc, USE_PADDING_TAIL | pad_in_front)))
     {
-        size_t sz_list = gen_allocator->first_bucket_size();
-        for (unsigned int a_l_idx = 0; a_l_idx < gen_allocator->number_of_buckets(); a_l_idx++)
+        for (unsigned int a_l_idx = gen_allocator->first_suitable_bucket(real_size * 2); a_l_idx < gen_allocator->number_of_buckets(); a_l_idx++)
         {
-            if ((real_size < (sz_list / 2)) || (a_l_idx == (gen_allocator->number_of_buckets()-1)))
+            uint8_t* free_list = gen_allocator->alloc_list_head_of (a_l_idx);
+            uint8_t* prev_free_item = 0;
+            while (free_list != 0)
             {
-                uint8_t* free_list = gen_allocator->alloc_list_head_of (a_l_idx);
-                uint8_t* prev_free_item = 0;
-                while (free_list != 0)
-                {
-                    dprintf (3, ("considering free list %Ix", (size_t)free_list));
+                dprintf (3, ("considering free list %Ix", (size_t)free_list));
 
-                    size_t free_list_size = unused_array_size (free_list);
+                size_t free_list_size = unused_array_size (free_list);
 
-                    if (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, free_list, (free_list + free_list_size),
-                                    old_loc, USE_PADDING_TAIL | pad_in_front))
-                    {
-                        dprintf (4, ("F:%Ix-%Id",
-                                     (size_t)free_list, free_list_size));
+                if (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, free_list, (free_list + free_list_size),
+                                old_loc, USE_PADDING_TAIL | pad_in_front))
+                {
+                    dprintf (4, ("F:%Ix-%Id",
+                                    (size_t)free_list, free_list_size));
 
-                        gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, !discard_p);
-                        generation_free_list_space (gen) -= free_list_size;
-                        remove_gen_free (gen->gen_num, free_list_size);
+                    gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, !discard_p);
+                    generation_free_list_space (gen) -= free_list_size;
+                    remove_gen_free (gen->gen_num, free_list_size);
 
-                        adjust_limit (free_list, free_list_size, gen, from_gen_number+1);
-                        generation_allocate_end_seg_p (gen) = FALSE;
-                        goto finished;
-                    }
-                    // We do first fit on bucket 0 because we are not guaranteed to find a fit there.
-                    else if (discard_p || (a_l_idx == 0))
-                    {
-                        dprintf (3, ("couldn't use this free area, discarding"));
-                        generation_free_obj_space (gen) += free_list_size;
+                    adjust_limit (free_list, free_list_size, gen, from_gen_number+1);
+                    generation_allocate_end_seg_p (gen) = FALSE;
+                    goto finished;
+                }
+                // We do first fit on bucket 0 because we are not guaranteed to find a fit there.
+                else if (discard_p || (a_l_idx == 0))
+                {
+                    dprintf (3, ("couldn't use this free area, discarding"));
+                    generation_free_obj_space (gen) += free_list_size;
 
-                        gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, FALSE);
-                        generation_free_list_space (gen) -= free_list_size;
-                        remove_gen_free (gen->gen_num, free_list_size);
-                    }
-                    else
-                    {
-                        prev_free_item = free_list;
-                    }
-                    free_list = free_list_slot (free_list);
+                    gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, FALSE);
+                    generation_free_list_space (gen) -= free_list_size;
+                    remove_gen_free (gen->gen_num, free_list_size);
+                }
+                else
+                {
+                    prev_free_item = free_list;
                 }
+                free_list = free_list_slot (free_list);
             }
-            sz_list = sz_list * 2;
         }
         //go back to the beginning of the segment list
         heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
@@ -17413,27 +17379,22 @@ void gc_heap::record_gcs_during_no_gc()
 BOOL gc_heap::find_loh_free_for_no_gc()
 {
     allocator* loh_allocator = generation_allocator (generation_of (loh_generation));
-    size_t sz_list = loh_allocator->first_bucket_size();
     size_t size = loh_allocation_no_gc;
-    for (unsigned int a_l_idx = 0; a_l_idx < loh_allocator->number_of_buckets(); a_l_idx++)
+    for (unsigned int a_l_idx = loh_allocator->first_suitable_bucket(size); a_l_idx < loh_allocator->number_of_buckets(); a_l_idx++)
     {
-        if ((size < sz_list) || (a_l_idx == (loh_allocator->number_of_buckets()-1)))
+        uint8_t* free_list = loh_allocator->alloc_list_head_of (a_l_idx);
+        while (free_list)
         {
-            uint8_t* free_list = loh_allocator->alloc_list_head_of (a_l_idx);
-            while (free_list)
-            {
-                size_t free_list_size = unused_array_size(free_list);
+            size_t free_list_size = unused_array_size(free_list);
 
-                if (free_list_size > loh_allocation_no_gc)
-                {
-                    dprintf (3, ("free item %Ix(%Id) for no gc", (size_t)free_list, free_list_size));
-                    return TRUE;
-                }
-
-                free_list = free_list_slot (free_list);
+            if (free_list_size > size)
+            {
+                dprintf (3, ("free item %Ix(%Id) for no gc", (size_t)free_list, free_list_size));
+                return TRUE;
             }
+
+            free_list = free_list_slot (free_list);
         }
-        sz_list = sz_list * 2;
     }
 
     return FALSE;
@@ -23136,7 +23097,7 @@ void gc_heap::plan_phase (int condemned_gen_number)
                                   (1 + condemned_gen_number));
     generation*  older_gen = 0;
     generation* consing_gen = condemned_gen1;
-    alloc_list  r_free_list [MAX_BUCKET_COUNT];
+    alloc_list  r_free_list [MAX_SOH_BUCKET_COUNT];
 
     size_t r_free_list_space = 0;
     size_t r_free_obj_space = 0;
@@ -31653,26 +31614,22 @@ BOOL gc_heap::can_expand_into_p (heap_segment* seg, size_t min_free_size, size_t
         size_t largest_free_space = free_space;
         dprintf (SEG_REUSE_LOG_0, ("can_expand_into_p: gen1: testing segment [%Ix %Ix", first_address, end_address));
         //find the first free list in range of the current segment
-        size_t sz_list = gen_allocator->first_bucket_size();
-        unsigned int a_l_idx = 0;
         uint8_t* free_list = 0;
+        unsigned int a_l_idx = gen_allocator->first_suitable_bucket(eph_gen_starts);
         for (; a_l_idx < gen_allocator->number_of_buckets(); a_l_idx++)
         {
-            if ((eph_gen_starts <= sz_list) || (a_l_idx == (gen_allocator->number_of_buckets()-1)))
+            free_list = gen_allocator->alloc_list_head_of (a_l_idx);
+            while (free_list)
             {
-                free_list = gen_allocator->alloc_list_head_of (a_l_idx);
-                while (free_list)
+                if ((free_list >= first_address) &&
+                    (free_list < end_address) &&
+                    (unused_array_size (free_list) >= eph_gen_starts))
                 {
-                    if ((free_list >= first_address) &&
-                        (free_list < end_address) &&
-                        (unused_array_size (free_list) >= eph_gen_starts))
-                    {
-                        goto next;
-                    }
-                    else
-                    {
-                        free_list = free_list_slot (free_list);
-                    }
+                    goto next;
+                }
+                else
+                {
+                    free_list = free_list_slot (free_list);
                 }
             }
         }
@@ -36421,10 +36378,8 @@ HRESULT GCHeap::Initialize()
 
 #ifdef MULTIPLE_HEAPS
     gc_heap::n_heaps = nhp;
-    // TODO: tuning https://github.com/dotnet/runtime/issues/13739
     hr = gc_heap::initialize_gc (seg_size, large_seg_size /*loh_segment_size*/, large_seg_size /*poh_segment_size*/, nhp);
 #else
-    // TODO: tuning https://github.com/dotnet/runtime/issues/13739
     hr = gc_heap::initialize_gc (seg_size, large_seg_size /*loh_segment_size*/, large_seg_size /*poh_segment_size*/);
 #endif //MULTIPLE_HEAPS
 
index 4801a19..e04abda 100644 (file)
@@ -106,7 +106,7 @@ inline void FATAL_GC_ERROR()
 
 #define CARD_BUNDLE         //enable card bundle feature.(requires WRITE_WATCH)
 
-#define ALLOW_REFERENCES_IN_POH  //Allow POH objects to contain references.
+// #define ALLOW_REFERENCES_IN_POH  //Allow POH objects to contain references.
 
 // If this is defined we use a map for segments in order to find the heap for
 // a segment fast. But it does use more memory as we have to cover the whole
@@ -664,10 +664,11 @@ typedef DPTR(class CFinalize)                  PTR_CFinalize;
 #endif // FEATURE_PREMORTEM_FINALIZATION
 
 //-------------------------------------
-//generation free list. It is an array of free lists bucketed by size, starting at sizes lower than first_bucket_size
+//generation free list. It is an array of free lists bucketed by size, starting at sizes lower than (1 << first_bucket_bits)
 //and doubling each time. The last bucket (index == num_buckets) is for largest sizes with no limit
 
-#define MAX_BUCKET_COUNT (13)//Max number of buckets for the small generations.
+#define MAX_SOH_BUCKET_COUNT (13)//Max number of buckets for the SOH generations.
+#define MAX_BUCKET_COUNT (20)//Max number of buckets.
 class alloc_list
 {
     uint8_t* head;
@@ -693,32 +694,62 @@ public:
 
 class allocator
 {
-    size_t num_buckets;
-    size_t frst_bucket_size;
+    int first_bucket_bits;
+    unsigned int num_buckets;
     alloc_list first_bucket;
     alloc_list* buckets;
     alloc_list& alloc_list_of (unsigned int bn);
     size_t& alloc_list_damage_count_of (unsigned int bn);
 
 public:
-    allocator (unsigned int num_b, size_t fbs, alloc_list* b);
+    allocator (unsigned int num_b, int fbb, alloc_list* b);
+
     allocator()
     {
         num_buckets = 1;
-        frst_bucket_size = SIZE_T_MAX;
+        first_bucket_bits = sizeof(size_t) * 8 - 1;
+    }
+
+    unsigned int number_of_buckets()
+    {
+        return num_buckets;
+    }
+
+    // skip buckets that cannot possibly fit "size" and return the next one
+    // there is always such bucket since the last one fits everything
+    unsigned int first_suitable_bucket(size_t size)
+    {
+        // sizes taking first_bucket_bits or less are mapped to bucket 0
+        // others are mapped to buckets 0, 1, 2 respectively
+        size = (size >> first_bucket_bits) | 1;
+
+        DWORD highest_set_bit_index;
+    #ifdef HOST_64BIT
+        BitScanReverse64(&highest_set_bit_index, size);
+    #else
+        BitScanReverse(&highest_set_bit_index, size);
+    #endif
+
+        return min ((unsigned int)highest_set_bit_index, num_buckets - 1);
+    }
+
+    size_t first_bucket_size()
+    {
+        return ((size_t)1 << (first_bucket_bits + 1));
     }
-    unsigned int number_of_buckets() {return (unsigned int)num_buckets;}
 
-    size_t first_bucket_size() {return frst_bucket_size;}
     uint8_t*& alloc_list_head_of (unsigned int bn)
     {
         return alloc_list_of (bn).alloc_list_head();
     }
+
     uint8_t*& alloc_list_tail_of (unsigned int bn)
     {
         return alloc_list_of (bn).alloc_list_tail();
     }
+
     void clear();
+
     BOOL discard_if_no_fit_p()
     {
         return (num_buckets == 1);
@@ -749,7 +780,6 @@ public:
     void unlink_item (unsigned int bucket_number, uint8_t* item, uint8_t* previous_item, BOOL use_undo_p);
     void thread_item (uint8_t* item, size_t size);
     void thread_item_front (uint8_t* itme, size_t size);
-    void thread_free_item (uint8_t* free_item, uint8_t*& head, uint8_t*& tail);
     void copy_to_alloc_list (alloc_list* toalist);
     void copy_from_alloc_list (alloc_list* fromalist);
     void commit_alloc_list_changes();
@@ -3982,26 +4012,27 @@ protected:
 #endif //SYNCHRONIZATION_STATS
 
 #define NUM_LOH_ALIST (7)
-#define BASE_LOH_ALIST (64*1024)
+    // bucket 0 contains sizes less than 64*1024
+    // the "BITS" number here is the highest bit in 64*1024 - 1, zero-based as in BitScanReverse.
+    // see first_suitable_bucket(size_t size) for details.
+#define BASE_LOH_ALIST_BITS (15)
     PER_HEAP
     alloc_list loh_alloc_list[NUM_LOH_ALIST-1];
 
 #define NUM_GEN2_ALIST (12)
 #ifdef HOST_64BIT
-#define BASE_GEN2_ALIST (1*256)
+    // bucket 0 contains sizes less than 256
+#define BASE_GEN2_ALIST_BITS (7)
 #else
-#define BASE_GEN2_ALIST (1*128)
+    // bucket 0 contains sizes less than 128
+#define BASE_GEN2_ALIST_BITS (6)
 #endif // HOST_64BIT
     PER_HEAP
     alloc_list gen2_alloc_list[NUM_GEN2_ALIST-1];
 
-// TODO: tuning https://github.com/dotnet/runtime/issues/13739
-#define NUM_POH_ALIST (12)
-#ifdef BIT64
-#define BASE_POH_ALIST (1*256)
-#else
-#define BASE_POH_ALIST (1*128)
-#endif // BIT64
+#define NUM_POH_ALIST (19)
+    // bucket 0 contains sizes less than 256
+#define BASE_POH_ALIST_BITS (7)
     PER_HEAP
     alloc_list poh_alloc_list[NUM_POH_ALIST-1];